爬虫 异步协程
Posted zhangchen-sx
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了爬虫 异步协程相关的知识,希望对你有一定的参考价值。
# 基本使用 # 异步轮询的执行 import asyncio async def hello(name): print(‘hello to:‘,name) c = hello(‘zc‘)#调用 返回协程对象<coroutine协程 object hello at 0x0000000005EDDE08> # 创建一个事件循环对象 loop = asyncio.get_event_loop() # 将协程对象注册到事件循环中,然后启动事件循环对象 loop.run_until_complete(c) # 输出hello to: zc
# task 的使用 单任务协程 import asyncio async def hello(name): print(‘hello to:‘,name) c = hello(‘zc‘) # 创建一个事件循环对象 loop = asyncio.get_event_loop() # 就协程进行进一步封装,封装到了task对象中 task = loop.create_task(c) print(task) loop.run_until_complete(task) print(task)
# future 的使用 import asyncio async def hello(name): print(‘hello to:‘,name) c = hello(‘zc‘) loop = asyncio.get_event_loop() task = asyncio.ensure_future(c) print(task) loop.run_until_complete(task) print(task)
# furure 绑定回调
import asyncio def callback(task): # 回调函数 print(‘I am callback‘, task.result()) async def hello(name): print(‘hello to:‘, name) return name c = hello(‘zc‘) loop = asyncio.get_event_loop() # 创建loop实例 task = asyncio.ensure_future(c) # print(task) task.add_done_callback(callback) # 添加要执行的回调函数 loop.run_until_complete(task) # 当任务设定完成开始执行 print(task)
# 爬虫中应用多任务异步操作 # 支持异步的网络请求的模块 pip install aiohttp import asyncio import aiohttp import time async def get_page(url): async with aiohttp.ClientSession() as session: async with await session.get(url=url) as response: page_text = await response.text() # read() 二进制形式的响应数据,json() print(‘响应数据:‘,page_text)
# print(‘ok %s‘%url) start = time.time() urls = [ ‘http://127.0.0.1:5000/bobo‘, ‘http://127.0.0.1:5000/jay‘, ‘http://127.0.0.1:5000/tom‘, ] tasks = [] #任务列表 放置多个任务对象 loop = asyncio.get_event_loop() for url in urls: c = get_page(url) task = asyncio.ensure_future(c) tasks.append(task) # 将多个任务对象对应的列表注册到事件循环中 loop.run_until_complete(asyncio.wait(tasks)) print(‘总耗时‘,time.time()-start) # -- 下面是输出结果 -- # downloading http://127.0.0.1:5000/bobo # downloading http://127.0.0.1:5000/jay # downloading http://127.0.0.1:5000/tom # 下载 ok http://127.0.0.1:5000/bobo # 下载 ok http://127.0.0.1:5000/jay # 下载 ok http://127.0.0.1:5000/tom # 总耗时 2.0021142959594727
# flask 简单的Web服务器 实现代码 from flask import Flask import time app = Flask(__name__) @app.route(‘/bobo‘) def index_bobo(): time.sleep(2) return ‘Hello bobo‘ @app.route(‘/jay‘) def index_jay(): time.sleep(2) return ‘Hello jay‘ @app.route(‘/tom‘) def index_tom(): time.sleep(2) return ‘Hello tom‘ if __name__ == ‘__main__‘: app.run(threaded=True)
# 真实网站请求的 高性能异步IO import asyncio import aiohttp import time async def get_page(url): async with aiohttp.ClientSession() as session: async with await session.get(url=url) as response: page_text = await response.text() # read() 二进制形式的响应数据,json() # print(‘响应数据:‘,page_text) print(‘ok %s‘%url) start = time.time() urls = [ ‘https://baidu.com‘, ‘https://y.qq.com‘, ‘https://www.taobao.com‘, ] tasks = [] #任务列表 放置多个任务对象 loop = asyncio.get_event_loop() for url in urls: c = get_page(url) task = asyncio.ensure_future(c) tasks.append(task) # 将多个任务对象对应的列表注册到事件循环中 loop.run_until_complete(asyncio.wait(tasks)) print(‘总耗时‘,time.time()-start)
0 and False => 0 0 or False => False
以上是关于爬虫 异步协程的主要内容,如果未能解决你的问题,请参考以下文章
python爬虫 asyncio aiohttp aiofiles 单线程多任务异步协程爬取图片