# -*- coding: utf-8 -*- # 协程基础.py
import asyncio import time async def request(url): print("正在请求:", url) # r = await asyncio.sleep(3) time.sleep(3) print("下载成功:", url) c = request("www.baidu.com") # 异步函数返回的协程对象 # 1.实例化事件循环 loop = asyncio.get_event_loop() # 2.任务对象,把协程对象放到任务对象中 task = loop.create_task(c) # 3.把任务对象放到事件循环中 loop.run_until_complete(task)
# -*- coding: utf-8 -*- # 给任务对象绑定回调.py import asyncio import time async def request(url): print("正在请求:", url) # r = await asyncio.sleep(3) time.sleep(3) print("下载成功:", url) return 123 c = request("www.baidu.com") # 异步函数返回的协程对象 # 回调函数的参数是任务对象task,回调在爬虫中是用来解析的 def parse(task): print("这是回调函数") print("打印结果是协程函数的返回值", task.result()) # 1.实例化事件循环 loop = asyncio.get_event_loop() # 2.任务对象,把协程对象放到任务对象中 task = loop.create_task(c) # 给任务对象绑定一个回调函数 task.add_done_callback(parse) # 3.把任务对象放到事件循环中 loop.run_until_complete(task)
# -*- coding: utf-8 -*- # 多任务异步协程.py import asyncio import time urls = ['www.baidu.com', 'www.sogou.com', 'www.sina.com'] start = time.time() async def request(url): print("正在请求:", url) # time.sleep(3) # 需要改成支持异步的代码 await asyncio.sleep(3) # 协程对象 print("下载成功:", url) loop = asyncio.get_event_loop() # 任务列表,放置多个任务 tasks = [] for url in urls: c = request(url) # 协程对象 task = loop.create_task(c) tasks.append(task) loop.run_until_complete(asyncio.wait(tasks)) print('总共耗时:', time.time() - start)
# -*- coding: utf-8 -*- # 多任务异步协程在爬虫中应用.py import asyncio import time import requests import aiohttp # 跟requests的区别就是支持异步请求 # 单线程 + 多任务异步协程 # start = time.time() # urls = [ # 'http://127.0.0.1:5000/bobo', # 'http://127.0.0.1:5000/jay', # 'http://127.0.0.1:5000/tom', # ] # # async def get_pageText(url): # print("正在下载", url) # page_text = requests.get(url).text # 不支持异步请求,所以会报错 # print("下载完毕", url) # # 返回给回调函数 # return page_text # # # loop = asyncio.get_event_loop() # tasks = [] # for url in urls: # c = get_pageText(url) # task = loop.create_task(c) # tasks.append(task) # loop.run_until_complete(asyncio.wait(tasks)) # # print('总共耗时:', time.time() - start) start = time.time() urls = [ 'http://127.0.0.1:5000/bobo', # 页面响应2秒 'http://127.0.0.1:5000/jay', # 页面响应2秒 'http://127.0.0.1:5000/tom', # 页面响应2秒 ] # 代理操作的时候 # async with await s.get(url=url,headers=headers,proxy="http://ip:port") as response: async def get_pageText(url): # 开启一个连接请求s async with aiohttp.ClientSession() as s: # await的使用条件: 请求和响应都存在网络传输, # 发送一个连接请求,其他参数跟用request发请求一样比如headers,直接写括号里 async with await s.get(url=url) as response: # 获取响应 page_text = await response.text() # print(page_text) # 把page_text传给回调函数进行解析 return page_text from lxml import etree def parse(task): # 获取 执行函数调用的结果 page_text = task.result() # # 实例化etree解析对象 # tree = etree.HTML(page_text) # page_data = tree.xpath('//*[@id="page"]/a[1]/span[1]/i/@class')[0] print(page_text, "开始对页面进行解析") loop = asyncio.get_event_loop() tasks = [] for url in urls: c = get_pageText(url) task = loop.create_task(c) # 给每一个任务对象绑定回调函数 task.add_done_callback(parse) tasks.append(task) loop.run_until_complete(asyncio.wait(tasks)) print('总共耗时:', time.time() - start)