Scrapy框架: 異常錯誤處理


import scrapy

from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError, TCPTimedOutError

class ErrbackSpider(scrapy.Spider):
    name = "errback_example"
    start_urls = [
        "http://www.httpbin.org/",              	# 正常HTTP 200返回
        "http://www.httpbin.org/status/404",    	# 404 Not found error
        "http://www.httpbin.org/status/500",    	# 500服務器錯誤
        "http://www.httpbin.org:12345/",        	# 超時無響應錯誤
        "http://www.httphttpbinbin.org/",       	# DNS 錯誤
    ]

    def start_requests(self):
        for u in self.start_urls:
            yield scrapy.Request(u, callback=self.parse_httpbin,
                                    errback=self.errback_httpbin,
                                    dont_filter=True)

    def parse_httpbin(self, response):
        self.logger.info('Got successful response from {}'.format(response.url))
        # 其他處理.

    def errback_httpbin(self, failure):
        # 日志記錄所有的異常信息
        self.logger.error(repr(failure))

        # 假設我們需要對指定的異常類型做處理,
        # 我們需要判斷異常的類型

        if failure.check(HttpError):
            # HttpError由HttpErrorMiddleware中間件拋出
            # 可以接收到非200 狀態碼的Response
            response = failure.value.response
            self.logger.error('HttpError on %s', response.url)

        elif failure.check(DNSLookupError):
            # 此異常由請求Request拋出
            request = failure.request
            self.logger.error('DNSLookupError on %s', request.url)

        elif failure.check(TimeoutError, TCPTimedOutError):
            request = failure.request
            self.logger.error('TimeoutError on %s', request.url)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM