429 Too Many Requests (太多請求)
當你需要限制客戶端請求某個服務的數量,也就是限制請求速度時,該狀態碼就會非常有用。在此之前,有一些類似的狀態碼。例如“509 Bandwidth Limit Exceeded”。
如果你希望限制客戶端對服務的請求數,可使用 429 狀態碼,同時包含一個 Retry-After 響應頭用於告訴客戶端多長時間后可以再次請求服務。
middlewares.py # 當狀態碼是429的時候 爬蟲暫停60秒 在爬取
from scrapy import signals
import time
from scrapy.downloadermiddlewares.retry import RetryMiddleware
from scrapy.utils.response import response_status_message
class TooManyRequestsRetryMiddleware(RetryMiddleware):
def __init__(self, crawler):
super(TooManyRequestsRetryMiddleware, self).__init__(crawler.settings)
self.crawler = crawler
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def process_response(self, request, response, spider):
if request.meta.get('dont_retry', False):
return response
elif response.status == 429:
self.crawler.engine.pause()
print("速度太快 暫停60秒")
time.sleep(60) # If the rate limit is renewed in a minute, put 60 seconds, and so on.
self.crawler.engine.unpause()
reason = response_status_message(response.status)
return self._retry(request, reason, spider) or response
elif response.status in self.retry_http_codes:
reason = response_status_message(response.status)
return self._retry(request, reason, spider) or response
return response
settings.py
DOWNLOADER_MIDDLEWARES = { # 開啟暫停中間件
'steam_market.middlewares.TooManyRequestsRetryMiddleware': 543,
}
RETRY_HTTP_CODES = [429, 500, 403] # 這個狀態重試
import random
DOWNLOAD_DELAY = random.random() + random.random() + random.random()
RANDOMIZE_DOWNLOAD_DELAY = True # 發完一個請求 隨機暫停一下 在發下一個請求
