# -*- coding: utf-8 -*-
# 協程基礎.py
import asyncio
import time
async def request(url):
print("正在請求:", url)
# r = await asyncio.sleep(3)
time.sleep(3)
print("下載成功:", url)
c = request("www.baidu.com") # 異步函數返回的協程對象
# 1.實例化事件循環
loop = asyncio.get_event_loop()
# 2.任務對象,把協程對象放到任務對象中
task = loop.create_task(c)
# 3.把任務對象放到事件循環中
loop.run_until_complete(task)
# -*- coding: utf-8 -*-
# 給任務對象綁定回調.py
import asyncio
import time
async def request(url):
print("正在請求:", url)
# r = await asyncio.sleep(3)
time.sleep(3)
print("下載成功:", url)
return 123
c = request("www.baidu.com") # 異步函數返回的協程對象
# 回調函數的參數是任務對象task,回調在爬蟲中是用來解析的
def parse(task):
print("這是回調函數")
print("打印結果是協程函數的返回值", task.result())
# 1.實例化事件循環
loop = asyncio.get_event_loop()
# 2.任務對象,把協程對象放到任務對象中
task = loop.create_task(c)
# 給任務對象綁定一個回調函數
task.add_done_callback(parse)
# 3.把任務對象放到事件循環中
loop.run_until_complete(task)
# -*- coding: utf-8 -*-
# 多任務異步協程.py
import asyncio
import time
urls = ['www.baidu.com', 'www.sogou.com', 'www.sina.com']
start = time.time()
async def request(url):
print("正在請求:", url)
# time.sleep(3) # 需要改成支持異步的代碼
await asyncio.sleep(3) # 協程對象
print("下載成功:", url)
loop = asyncio.get_event_loop()
# 任務列表,放置多個任務
tasks = []
for url in urls:
c = request(url) # 協程對象
task = loop.create_task(c)
tasks.append(task)
loop.run_until_complete(asyncio.wait(tasks))
print('總共耗時:', time.time() - start)
# -*- coding: utf-8 -*-
# 多任務異步協程在爬蟲中應用.py
import asyncio
import time
import requests
import aiohttp # 跟requests的區別就是支持異步請求
# 單線程 + 多任務異步協程
# start = time.time()
# urls = [
# 'http://127.0.0.1:5000/bobo',
# 'http://127.0.0.1:5000/jay',
# 'http://127.0.0.1:5000/tom',
# ]
#
# async def get_pageText(url):
# print("正在下載", url)
# page_text = requests.get(url).text # 不支持異步請求,所以會報錯
# print("下載完畢", url)
# # 返回給回調函數
# return page_text
#
#
# loop = asyncio.get_event_loop()
# tasks = []
# for url in urls:
# c = get_pageText(url)
# task = loop.create_task(c)
# tasks.append(task)
# loop.run_until_complete(asyncio.wait(tasks))
#
# print('總共耗時:', time.time() - start)
start = time.time()
urls = [
'http://127.0.0.1:5000/bobo', # 頁面響應2秒
'http://127.0.0.1:5000/jay', # 頁面響應2秒
'http://127.0.0.1:5000/tom', # 頁面響應2秒
]
# 代理操作的時候
# async with await s.get(url=url,headers=headers,proxy="http://ip:port") as response:
async def get_pageText(url):
# 開啟一個連接請求s
async with aiohttp.ClientSession() as s:
# await的使用條件: 請求和響應都存在網絡傳輸,
# 發送一個連接請求,其他參數跟用request發請求一樣比如headers,直接寫括號里
async with await s.get(url=url) as response:
# 獲取響應
page_text = await response.text()
# print(page_text)
# 把page_text傳給回調函數進行解析
return page_text
from lxml import etree
def parse(task):
# 獲取 執行函數調用的結果
page_text = task.result()
# # 實例化etree解析對象
# tree = etree.HTML(page_text)
# page_data = tree.xpath('//*[@id="page"]/a[1]/span[1]/i/@class')[0]
print(page_text, "開始對頁面進行解析")
loop = asyncio.get_event_loop()
tasks = []
for url in urls:
c = get_pageText(url)
task = loop.create_task(c)
# 給每一個任務對象綁定回調函數
task.add_done_callback(parse)
tasks.append(task)
loop.run_until_complete(asyncio.wait(tasks))
print('總共耗時:', time.time() - start)