我的環境: celery 3.1.25 python 3.6.9 window10
celery tasks 代碼如下,其中 QuotesSpider 是我的scrapy項目爬蟲類名稱
from celery_app import app from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from tutorial.spiders.quotes import QuotesSpider def crawl_run(): scope = 'all' process = CrawlerProcess(settings=get_project_settings()) process.crawl(QuotesSpider, scope) process.start() process.join() @app.task(queue='default') def execute_task(): return crawl_run()
后面發現這樣寫重復做定時任務的時候會報錯,報reactor不能重啟的問題,改成下面這樣就解決了,這個類要放在和項目scrapy.cfg同級目錄下
from crawler.tutorial.crawler.tutorial.spiders.quotes import QuotesSpider from scrapy.utils.project import get_project_settings import scrapy.crawler as crawler from crochet import setup setup() import os class Scraper(): def crawl_run(self): spider = QuotesSpider() settings = get_project_settings() runner = crawler.CrawlerRunner(settings) runner.crawl(spider, 'all') runner.join() if __name__ == '__main__': scraper = Scraper() scraper.crawl_run()