Scrapy用Pipeline寫入MySQL


 

編輯pipelines.py,添加自定義pipelines類:

# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# class HongxiuPipeline(object):
# def process_item(self, item, spider):
# return item
import datetime
from twisted.enterprise import adbapi


class HongxiuMysqlPipeline(object):

@classmethod
def from_crawler(cls, crawler):
# 從項目的配置文件中讀取相應的參數
# cls.MYSQL_DB_NAME = crawler.settings.get("MYSQL_DB_NAME")
cls.HOST = crawler.settings.get("MYSQL_HOST")
cls.PORT = crawler.settings.get("MYSQL_PORT")
cls.USER = crawler.settings.get("MYSQL_USER")
cls.PASSWD = crawler.settings.get("MYSQL_PASSWORD")
return cls()

def open_spider(self, spider):
self.dbpool = adbapi.ConnectionPool('pymysql', host=self.HOST, port=self.PORT, user=self.USER,
passwd=self.PASSWD, charset='utf8')


def process_item(self, item, spider):
#提交
self.dbpool.runInteraction(self.insert_db, item)
return item

def handle_error(self, failure):
# 處理異步插入時的異常
print(failure)

def close_spider(self, spider):
#關閉連接
self.dbpool.close()

def insert_db(self, cur, item):
#取出數據,執行cur sql
create_date = datetime.datetime.now().date()
create_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
values = (
None,
item['book_id'],
item['book_name'],
item['book_author'],
item['book_type'],
item['tag'],
item['brief'],
item['website'],
None
)
sql = 'INSERT INTO 庫名.表名 VALUES (%s'+',%s'*8+')'
cur.execute(sql, values)

接着在settings.py中寫入相關配置參數,添加至item_pipelines中:

MYSQL_DB_NAME = 'scrapy_db'
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'root'
MYSQL_PASSWORD = 'new.1234'
# 
ITEM_PIPELINES = {
    'toscrape_book.pipelines.MySQLPipeline': 400,
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM