爬虫 异步协程

Posted zhangchen-sx

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了爬虫 异步协程相关的知识,希望对你有一定的参考价值。

# 基本使用
# 异步轮询的执行
import asyncio
async def hello(name):
    print(hello to:,name)
c = hello(zc)#调用 返回协程对象<coroutine协程 object hello at 0x0000000005EDDE08>
# 创建一个事件循环对象
loop = asyncio.get_event_loop()
# 将协程对象注册到事件循环中,然后启动事件循环对象
loop.run_until_complete(c)  # 输出hello to: zc
# task 的使用   单任务协程
import asyncio
async def hello(name):
    print(hello to:,name)
c = hello(zc)
# 创建一个事件循环对象
loop = asyncio.get_event_loop()
# 就协程进行进一步封装,封装到了task对象中
task = loop.create_task(c)
print(task)
loop.run_until_complete(task) 
print(task)

# future 的使用
import asyncio
async def hello(name):
    print(hello to:,name)
c = hello(zc)
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(c)
print(task)
loop.run_until_complete(task)
print(task)
# furure 绑定回调
import
asyncio def callback(task): # 回调函数 print(I am callback, task.result()) async def hello(name): print(hello to:, name) return name c = hello(zc) loop = asyncio.get_event_loop() # 创建loop实例 task = asyncio.ensure_future(c) # print(task) task.add_done_callback(callback) # 添加要执行的回调函数 loop.run_until_complete(task) # 当任务设定完成开始执行 print(task)
# 爬虫中应用多任务异步操作
# 支持异步的网络请求的模块  pip install aiohttp
import asyncio
import aiohttp
import time

async def get_page(url):
    async with aiohttp.ClientSession() as session:
         async with await session.get(url=url) as response:
            page_text = await response.text()  # read() 二进制形式的响应数据,json()  
            print(响应数据:,page_text)
       # print(‘ok %s‘%url) start
= time.time() urls = [ http://127.0.0.1:5000/bobo, http://127.0.0.1:5000/jay, http://127.0.0.1:5000/tom, ] tasks = [] #任务列表 放置多个任务对象 loop = asyncio.get_event_loop() for url in urls: c = get_page(url) task = asyncio.ensure_future(c) tasks.append(task) # 将多个任务对象对应的列表注册到事件循环中 loop.run_until_complete(asyncio.wait(tasks)) print(总耗时,time.time()-start) # -- 下面是输出结果 -- # downloading http://127.0.0.1:5000/bobo # downloading http://127.0.0.1:5000/jay # downloading http://127.0.0.1:5000/tom # 下载 ok http://127.0.0.1:5000/bobo # 下载 ok http://127.0.0.1:5000/jay # 下载 ok http://127.0.0.1:5000/tom # 总耗时 2.0021142959594727
# flask  简单的Web服务器   实现代码
from flask import Flask
import time

app = Flask(__name__)

@app.route(/bobo)
def index_bobo():
    time.sleep(2)
    return Hello bobo

@app.route(/jay)
def index_jay():
    time.sleep(2)
    return Hello jay

@app.route(/tom)
def index_tom():
    time.sleep(2)
    return Hello tom

if __name__ == __main__:
    app.run(threaded=True)
# 真实网站请求的 高性能异步IO
import asyncio
import aiohttp
import time

async def get_page(url):
    async with aiohttp.ClientSession() as session:
         async with await session.get(url=url) as response:
            page_text = await response.text()  # read() 二进制形式的响应数据,json()
            # print(‘响应数据:‘,page_text)
            print(ok %s%url)
start = time.time()
urls = [
    https://baidu.com,
    https://y.qq.com,
    https://www.taobao.com,
]
tasks = []  #任务列表 放置多个任务对象
loop = asyncio.get_event_loop()
for url in urls:
    c = get_page(url)
    task = asyncio.ensure_future(c)
    tasks.append(task)
# 将多个任务对象对应的列表注册到事件循环中
loop.run_until_complete(asyncio.wait(tasks))
print(总耗时,time.time()-start)

 

 

0 and False  => 0
0 or False    => False

 

以上是关于爬虫 异步协程的主要内容,如果未能解决你的问题,请参考以下文章

Python实现基于协程的异步爬虫

python爬虫 asyncio aiohttp aiofiles 单线程多任务异步协程爬取图片

python爬虫--多任务异步协程, 快点,在快点......

Python爬虫之协程,异步协程和多任务异步协程

爬虫 异步协程

异步爬虫-多任务异步协程示例