在中間件middlewares中寫入一個類,然后再setting中的DOWNLOADER_MIDDLEWARES = {}開啟一下
具體代碼是
from scrapy.http import HtmlResponse
ip_pool = [] pro_addr = '' class proxyMiddleware(object): def process_request(self, request, spider): global pro_addr,ip_pool if "jdzgb" in spider.name: while 1: if len(ip_pool) < 3: get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxx" #獲取ip的url ips = requests.get(get_ip_url).text.split('\n') for i in ips[:-1]: ip_pool.append(i.strip()) break else: break if not pro_addr: pro_addr = random.choice(ip_pool) while 1: url = 'https://www.baidu.com' proxies = { "http": pro_addr, } try: s = requests.session() s.keep_alive = False # 關閉多余連接 response = s.get(url=url,proxies=proxies,timeout=4, verify=False) code = response.status_code # res = requests.get(url, proxies=proxies,timeout=4) # code = res.status_code except Exception as e: print(e) code = '0' print(code,pro_addr) # print(1, ip_pool) if code == 200 or code == 304: request.meta['proxy'] = "http://" + pro_addr
#pro_addr = random.choice(ip_pool) #這里的意思是每次訪問的ip都不一樣,如果把這里關閉,那么就是一個ip如果不過期,就會一直使用這個ip break else: if pro_addr in ip_pool: ip_pool.remove(pro_addr) while 1: if len(ip_pool) < 3: get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxxx"#獲取ip的url ips = requests.get(get_ip_url).text.split('\n') for i in ips[:-1]: ip_pool.append(i.strip()) break else: break pro_addr = random.choice(ip_pool)