在中間件middlewares中寫入一個類,然后再setting中的DOWNLOADER_MIDDLEWARES = {}開啟一下
具體代碼是
from scrapy.http import HtmlResponse
ip_pool = []
pro_addr = ''
class proxyMiddleware(object):
def process_request(self, request, spider):
global pro_addr,ip_pool
if "jdzgb" in spider.name:
while 1:
if len(ip_pool) < 3:
get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxx" #獲取ip的url
ips = requests.get(get_ip_url).text.split('\n')
for i in ips[:-1]:
ip_pool.append(i.strip())
break
else:
break
if not pro_addr:
pro_addr = random.choice(ip_pool)
while 1:
url = 'https://www.baidu.com'
proxies = {
"http": pro_addr,
}
try:
s = requests.session()
s.keep_alive = False # 關閉多余連接
response = s.get(url=url,proxies=proxies,timeout=4, verify=False)
code = response.status_code
# res = requests.get(url, proxies=proxies,timeout=4)
# code = res.status_code
except Exception as e:
print(e)
code = '0'
print(code,pro_addr)
# print(1, ip_pool)
if code == 200 or code == 304:
request.meta['proxy'] = "http://" + pro_addr
#pro_addr = random.choice(ip_pool) #這里的意思是每次訪問的ip都不一樣,如果把這里關閉,那么就是一個ip如果不過期,就會一直使用這個ip
break
else:
if pro_addr in ip_pool:
ip_pool.remove(pro_addr)
while 1:
if len(ip_pool) < 3:
get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxxx"#獲取ip的url
ips = requests.get(get_ip_url).text.split('\n')
for i in ips[:-1]:
ip_pool.append(i.strip())
break
else:
break
pro_addr = random.choice(ip_pool)
