爬取崔慶才大神的爬蟲教程,最后存儲到mysql


 1 # -*- coding: utf-8 -*-
 2 #coding:utf8
 3 import requests,time,unittest
 4 from lxml import etree
 5 import pymysql
 6 url ='http://cuiqingcai.com/1052.html'
 7 
 8 head = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
 9         "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
10         "Accept-Encoding":"gzip, deflate",
11         "Accept-Language":"zh-CN,zh;q=0.8"
12 
13         }
14 html = requests.get(url)
15 
16 selector = etree.HTML(str(html.text))
17 
18 r = selector.xpath("/html/body/section/div[3]/div/article/p/a/text()")
19 t = selector.xpath("/html/body/section/div[3]/div/article/p/a/@href")
20 #print(r)
21 #print(t)
22 now = str(time.strftime('%Y-%m-%d-%H-%M',time.localtime(time.time())))
23 
24 class datas(object):
25 
26     def Data_storage(self,dt):
27 
28         for each in dt:
29             self.uid=",".join(each).split(',')[0]
30             self.uname=",".join(each).split(',')[1]
31             self.ulink=",".join(each).split(',')[2]
32             uid="\'"+str(self.uid)+"\'"
33             uname="\'"+str(self.uname)+"\'"
34             ulink="\'"+str(self.ulink)+"\'"
35             time="\'"+str(now)+"\'"
36             r="\'"+str('ggg')+"\'"
37             #print(uid,uname,ulink,r,time)
38 
39             conn=pymysql.connect(host='192.168.191.1',user='root',passwd='123456789',db='data',port=3306,charset='utf8')
40             cur=conn.cursor()#獲取一個游標
41             sql ='''INSERT INTO xxb(id,name,remark,link,time)VALUES(%s,%s,%s,%s,%s)'''%(uid,uname,r,ulink,time)
42 
43             cur.execute(sql)
44             cur.execute('select * from xxb')#執行查詢sql語句+
45             data=cur.fetchall()#執行查詢后獲取的數據賦值給data變量,每次查到的數據是上一個sql語句的結果,如果要重新查詢別的數據,則要在上面重新寫個sql查詢語句
46 
47             cur.close()#關閉游標
48             conn.commit()#事務提交
49             conn.close()#釋放數據庫資源
50 
51     def Data_processing(self):
52         a=[]
53         for i in  range(1,31):
54             a.append(str(i))
55         dt = list(zip(a,r,t))
56         self.Data_storage(dt)
57 
58 if __name__=="__main__":
59     gg = datas()
60     gg.Data_processing()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM