python爬蟲多次請求超時的幾種重試方法


第一種方法

headers = Dict()
url = 'https://www.baidu.com'
try:
    proxies = None
    response = requests.get(url, headers=headers, verify=False, proxies=None, timeout=3)
except:
    # logdebug('requests failed one time')
    try:
        proxies = None
        response = requests.get(url, headers=headers, verify=False, proxies=None, timeout=3)
    except:
        # logdebug('requests failed two time')
        print('requests failed two time')

總結 :代碼比較冗余,重試try的次數越多,代碼行數越多,但是打印日志比較方便

第二種方法

def requestDemo(url,):
	headers = Dict()
	trytimes = 3  #  重試的次數
	for i in range(trytimes):
		try:
		    proxies = None
		    response = requests.get(url, headers=headers, verify=False, proxies=None, timeout=3)
		    #	注意此處也可能是302等狀態碼
		    if response.status_code == 200:
		    	break
		except:
	    	# logdebug(f'requests failed {i}time')
        	print(f'requests failed {i} time')

總結 :遍歷代碼明顯比第一個簡化了很多,打印日志也方便

第三種方法

def requestDemo(url, times=1):
	headers = Dict()
	try:
	    proxies = None
	    response = requests.get(url, headers=headers, verify=False, proxies=None, timeout=3)
	    html = response.text()
	    #	todo  此處處理代碼正常邏輯
	    pass
	    return html
	except:
    	# logdebug(f'requests failed {i}time')
    	trytimes = 3  #  重試的次數
    	if times < trytimes:
    		times += 1
       		return requestDemo(url, times)
       	return 'out of maxtimes'

總結 :迭代 顯得比較高大上,中間處理代碼時有其它錯誤照樣可以進行重試; 缺點 不太好理解,容易出錯,另外try包含的內容過多時,對代碼運行速度不利。

第四種方法

@retry(3)	#	重試的次數 3
def requestDemo(url):
	headers = Dict()
    proxies = None
    response = requests.get(url, headers=headers, verify=False, proxies=None, timeout=3)
    html = response.text()
    #	todo  此處處理代碼正常邏輯
    pass
    return html
   
def retry(times):
    def wrapper(func):
        def inner_wrapper(*args, **kwargs):
            i = 0
            while i < times:
                try:
                    print(i)
                    return func(*args, **kwargs)
                except:
                	#	此處打印日志  func.__name__ 為say函數
                    print("logdebug: {}()".format(func.__name__))
                    i += 1
        return inner_wrapper
    return wrapper

總結 :裝飾器優點 多種函數復用,使用十分方便

第五種方法

#!/usr/bin/python
# -*-coding='utf-8' -*-
import requests
import time
import json
from lxml import etree
import warnings
warnings.filterwarnings("ignore")





def get_xiaomi():
    try:
        # for n in range(5):  # 重試5次
        #     print("第"+str(n)+"次")
        for a in range(5): # 重試5次
            print(a)
            url = "https://www.mi.com/"
            headers = {
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
                "Accept-Encoding": "gzip, deflate, br",
                "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
                "Connection": "keep-alive",
                # "Cookie": "xmuuid=XMGUEST-D80D9CE0-910B-11EA-8EE0-3131E8FF9940; Hm_lvt_c3e3e8b3ea48955284516b186acf0f4e=1588929065; XM_agreement=0; pageid=81190ccc4d52f577; lastsource=www.baidu.com; mstuid=1588929065187_5718; log_code=81190ccc4d52f577-e0f893c4337cbe4d|https%3A%2F%2Fwww.mi.com%2F; Hm_lpvt_c3e3e8b3ea48955284516b186acf0f4e=1588929099; mstz=||1156285732.7|||; xm_vistor=1588929065187_5718_1588929065187-1588929100964",
                "Host": "www.mi.com",
                "Upgrade-Insecure-Requests": "1",
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36"
            }
            response = requests.get(url,headers=headers,timeout=10,verify=False)
            html = etree.HTML(response.text)
            # print(html)
            result = etree.tostring(html)
            # print(result)
            print(result.decode("utf-8"))
            title = html.xpath('//head/title/text()')[0]
            print("title==",title)
            if "左左" in title:
            # print(response.status_code)
            # if response.status_code ==200:
                break
        return title

    except:
        result = "異常"
        return result

if __name__ == '__main__':
    print(get_xiaomi())

第六種方法

Python重試模塊retrying

# 設置最大重試次數
@retry(stop_max_attempt_number=5)
def get_proxies(self):
    r = requests.get('代理地址')
    print('正在獲取')
    raise Exception("異常")
    print('獲取到最新代理 = %s' % r.text)
    params = dict()
    if r and r.status_code == 200:
        proxy = str(r.content, encoding='utf-8')
        params['http'] = 'http://' + proxy
        params['https'] = 'https://' + proxy
# 設置方法的最大延遲時間,默認為100毫秒(是執行這個方法重試的總時間)
@retry(stop_max_attempt_number=5,stop_max_delay=50)
# 通過設置為50,我們會發現,任務並沒有執行5次才結束!

# 添加每次方法執行之間的等待時間
@retry(stop_max_attempt_number=5,wait_fixed=2000)
# 隨機的等待時間
@retry(stop_max_attempt_number=5,wait_random_min=100,wait_random_max=2000)
# 每調用一次增加固定時長
@retry(stop_max_attempt_number=5,wait_incrementing_increment=1000)

# 根據異常重試,先看個簡單的例子
def retry_if_io_error(exception):
    return isinstance(exception, IOError)

@retry(retry_on_exception=retry_if_io_error)
def read_a_file():
    with open("file", "r") as f:
        return f.read()

read_a_file函數如果拋出了異常,會去retry_on_exception指向的函數去判斷返回的是True還是False,如果是True則運行指定的重試次數后,拋出異常,False的話直接拋出異常。
當時自己測試的時候網上一大堆抄來抄去的,意思是retry_on_exception指定一個函數,函數返回指定異常,會重試,不是異常會退出。真坑人啊!
來看看獲取代理的應用(僅僅是為了測試retrying模塊)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM