將數據存儲到CSV文件
import urllib.request import re import csv url = 'https://maoyan.com/board/4?offset=10' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } # 獲取頁面 req = urllib.request.Request(url, headers=headers) res = urllib.request.urlopen(req) html = res.read().decode('utf-8') # 解析頁面 p = re.compile('<div class="movie-item-info">.*?title="(.*?)".*?class="star">(.*?)</p>.*?releasetime">(.*?)</p>',re.S) rList = p.findall(html) # 存儲到CSV for r in rList: r = [r[0].strip(),r[1].strip(),r[2].strip()] with open('my1.csv','a',newline="") as f: # 創建寫入對象 writer = csv.writer(f) writer.writerow(r)
將數據存儲到mysql中
import urllib.request import re import pymysql import warnings warnings.filterwarnings("ignore") url = 'https://maoyan.com/board/4?offset=10' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } # 創建數據庫連接對象 db = pymysql.connect( "localhost", "root", "123456", "spiderdb", charset="utf8") # 游標對象 cursor = db.cursor() # 獲取頁面 req = urllib.request.Request(url, headers=headers) res = urllib.request.urlopen(req) html = res.read().decode('utf-8') # 解析頁面 p = re.compile( '<div class="movie-item-info">.*?title="(.*?)".*?class="star">(.*?)</p>.*?releasetime">(.*?)</p>', re.S) rList = p.findall(html) # 存儲到CSV ins = 'insert into film(\ name,star,releasetime) \ values(%s,%s,%s)' for r in rList: L = [r[0].strip(), r[1].strip(), r[2].strip()[5:15] ] cursor.execute(ins,L) db.commit()
將數據存儲到pymongo中
import urllib.request import re import pymongo url = 'https://maoyan.com/board/4?offset=10' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } # 創建連接對象 conn = pymongo.MongoClient("127.0.0.1", 27017) db = conn["spiderdb"] myset = db["film"] # 獲取頁面 req = urllib.request.Request(url, headers=headers) res = urllib.request.urlopen(req) html = res.read().decode('utf-8') # 解析頁面 p = re.compile( '<div class="movie-item-info">.*?title="(.*?)".*?class="star">(.*?)</p>.*?releasetime">(.*?)</p>', re.S) rList = p.findall(html) # 存儲到pymongo for r in rList: d = { "name": r[0].strip(), "star": r[1].strip(), "releasetime": r[2].strip() } myset.insert_one(d)
