當scrapy爬取完成以后會發送詳細信息到郵箱
1.首先編寫郵件發送模塊
#!usr/bin/env python
# -*- coding:utf-8 -*-
"""
@file: emailHandler.py
@time: 2018/04/21
"""
# 郵件服務封裝
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
status = {} # 狀態碼
pipeline_item = {"item": 0, # 成功item項
"item_error": 0, # 失敗item項
"error_info": "" # 失敗原因
}
spider_time = {"start": "", "end": ""} # 開始結束時間
class EmailHandler(object):
def __init__(self, user, password, type_=0):
"""
:param user:str 發送人郵箱地址(用戶名)
:param password:str 發送人申請的授權碼
:param type_:int 0 為QQ郵箱 1 為163郵箱
"""
self.__QQ = {'smtp': 'smtp.qq.com', 'port': 465}
self.__163 = {'smtp': 'smtp.163.com', 'port': 25}
self.user = user
self.password = password
if type_ == 0:
self.server = smtplib.SMTP_SSL(self.__QQ['smtp'], self.__QQ['port'])
self.server.login(self.user, self.password)
elif type_ == 1:
self.server = smtplib.SMTP_SSL(self.__163['smtp'], self.__163['port'])
self.server.login(self.user, self.password)
def send_mail(self, to, subject, content=None):
"""
:param to:str 接收人郵箱地址
:param subject:str 郵件標題
:param content:str 郵件內容
:return:bool True 成功 False 失敗
"""
try:
if not content:
content = "\r\n"
for key in status.keys():
content += "狀態碼%s:%s次\r\n" % (key, status.get(key))
content += "\r\n"
content += "存入數據庫成功條數:%s 條\r\n存入數據庫失敗條數:%s 條\r\n" % (pipeline_item.get("item"),
pipeline_item.get('item_error'))
if pipeline_item.get('item_error') > 0:
content += "失敗原因:%s" % pipeline_item.get('error_info')
content += "\r\n"
content += "爬蟲啟動時間:%s \r\n爬蟲結束時間:%s" % (spider_time.get("start"), spider_time.get("end"))
msg = MIMEText(content, 'plain', 'utf-8')
msg['From'] = formataddr(['', self.user])
msg['To'] = formataddr(['', to])
msg['Subject'] = subject
self.server.sendmail(self.user, to, msg.as_string())
print("【%s】郵件發送成功" % subject)
return True
except Exception as f:
print("【%s】郵件發送失敗,請檢查信息" % subject)
return False
2.提供郵件提示所有數據
2.1在下載中間件中添加如下代碼
from xxx.emailHandler import *
def process_response(self, request, response, spider):
# Called with the response returned from the downloader.
# Must either;
# - return a Response object
# - return a Request object
# - or raise IgnoreRequest
if response.status in status.keys():
status[response.status] += 1
else:
status[response.status] = 0
return response
2.2 在管道中加入如下代碼
from xxx.middlewares import pipeline_item
def process_item(self, item, spider):
client = MongoClient(self.host, self.port)
db_auth = client.xiaosidb
db_auth.authenticate(self.user, self.passwd)
db = client[self.db]
table = db[self.table]
# data = dict(item)
try:
table.insert(dict(item))
pipeline_item['item'] += 1
except Exception as e:
pipeline_item['item_error'] += 0
pipeline_item['error_info'] = e
return item
2.3在spiders 爬蟲程序中加入如下代碼
from xxx.middlewares import *
def start_requests(self):
spider_time['start'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
def close(spider, reason):
spider_time['end'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
to_sender = 'xxxx' # 要發送的郵箱
mail = EmailHandler(user='xxxx', password='xxx')
mail.send_mail(to_sender, '郵件標題')
3. 當爬蟲運行完畢以后會發送郵件

