之前處理超時異常時都在downloadmiddleware中處理,但是總感覺很費勁
今天查文檔發現可在errback回調中處理
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError, TCPTimedOutError
yield scrapy.Request(url=full_url, errback=self.error_httpbin, dont_filter=True, callback=self.parse_list, meta={"hd": header})
def error_httpbin(self, failure):
# failure.request 就是Request對象,如果需要重試,直接yield即可
# if failure.check(HttpError):
# these exceptions come from HttpError spider middleware
# you can get the non-200 response
# response = failure.value.response
# self.logger.error('HttpError on %s', response.url)
if failure.check(DNSLookupError):
print("DNSLookupError------->")
# this is the original request
request = failure.request
yield request
# self.logger.error('DNSLookupError on %s', request.url)
elif failure.check(TimeoutError, TCPTimedOutError):
print("timeout------->")
request = failure.request
yield request
# self.logger.error('TimeoutError on %s', request.url)
特此記錄下,之前沒有用這種方式處理超時異常