# -*- coding: utf-8 -*- # 協程基礎.py
import asyncio import time async def request(url): print("正在請求:", url) # r = await asyncio.sleep(3) time.sleep(3) print("下載成功:", url) c = request("www.baidu.com") # 異步函數返回的協程對象 # 1.實例化事件循環 loop = asyncio.get_event_loop() # 2.任務對象,把協程對象放到任務對象中 task = loop.create_task(c) # 3.把任務對象放到事件循環中 loop.run_until_complete(task)
# -*- coding: utf-8 -*- # 給任務對象綁定回調.py import asyncio import time async def request(url): print("正在請求:", url) # r = await asyncio.sleep(3) time.sleep(3) print("下載成功:", url) return 123 c = request("www.baidu.com") # 異步函數返回的協程對象 # 回調函數的參數是任務對象task,回調在爬蟲中是用來解析的 def parse(task): print("這是回調函數") print("打印結果是協程函數的返回值", task.result()) # 1.實例化事件循環 loop = asyncio.get_event_loop() # 2.任務對象,把協程對象放到任務對象中 task = loop.create_task(c) # 給任務對象綁定一個回調函數 task.add_done_callback(parse) # 3.把任務對象放到事件循環中 loop.run_until_complete(task)
# -*- coding: utf-8 -*- # 多任務異步協程.py import asyncio import time urls = ['www.baidu.com', 'www.sogou.com', 'www.sina.com'] start = time.time() async def request(url): print("正在請求:", url) # time.sleep(3) # 需要改成支持異步的代碼 await asyncio.sleep(3) # 協程對象 print("下載成功:", url) loop = asyncio.get_event_loop() # 任務列表,放置多個任務 tasks = [] for url in urls: c = request(url) # 協程對象 task = loop.create_task(c) tasks.append(task) loop.run_until_complete(asyncio.wait(tasks)) print('總共耗時:', time.time() - start)
# -*- coding: utf-8 -*- # 多任務異步協程在爬蟲中應用.py import asyncio import time import requests import aiohttp # 跟requests的區別就是支持異步請求 # 單線程 + 多任務異步協程 # start = time.time() # urls = [ # 'http://127.0.0.1:5000/bobo', # 'http://127.0.0.1:5000/jay', # 'http://127.0.0.1:5000/tom', # ] # # async def get_pageText(url): # print("正在下載", url) # page_text = requests.get(url).text # 不支持異步請求,所以會報錯 # print("下載完畢", url) # # 返回給回調函數 # return page_text # # # loop = asyncio.get_event_loop() # tasks = [] # for url in urls: # c = get_pageText(url) # task = loop.create_task(c) # tasks.append(task) # loop.run_until_complete(asyncio.wait(tasks)) # # print('總共耗時:', time.time() - start) start = time.time() urls = [ 'http://127.0.0.1:5000/bobo', # 頁面響應2秒 'http://127.0.0.1:5000/jay', # 頁面響應2秒 'http://127.0.0.1:5000/tom', # 頁面響應2秒 ] # 代理操作的時候 # async with await s.get(url=url,headers=headers,proxy="http://ip:port") as response: async def get_pageText(url): # 開啟一個連接請求s async with aiohttp.ClientSession() as s: # await的使用條件: 請求和響應都存在網絡傳輸, # 發送一個連接請求,其他參數跟用request發請求一樣比如headers,直接寫括號里 async with await s.get(url=url) as response: # 獲取響應 page_text = await response.text() # print(page_text) # 把page_text傳給回調函數進行解析 return page_text from lxml import etree def parse(task): # 獲取 執行函數調用的結果 page_text = task.result() # # 實例化etree解析對象 # tree = etree.HTML(page_text) # page_data = tree.xpath('//*[@id="page"]/a[1]/span[1]/i/@class')[0] print(page_text, "開始對頁面進行解析") loop = asyncio.get_event_loop() tasks = [] for url in urls: c = get_pageText(url) task = loop.create_task(c) # 給每一個任務對象綁定回調函數 task.add_done_callback(parse) tasks.append(task) loop.run_until_complete(asyncio.wait(tasks)) print('總共耗時:', time.time() - start)