python 導入包import requests 以爬取騰訊招聘網的招聘信息為例:(完整代碼)
import requests
from lxml import etree
import pymysql
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
response = requests.get('https://hr.tencent.com/position.php?lid=&tid=&keywords=', headers=headers)
html = etree.HTML(response.content.decode())
# 職位名稱
list_zhiwei = html.xpath('//td[@class="l square"]/a/text()')
print(list_zhiwei)
# 職位類型
list_style = html.xpath('//tr[@class="even"]/td[2]/text()| //tr[@class="odd"]/td[2]/text()')
print(list_style)
# 人數
list_num = html.xpath('//tr[@class="even"]/td[3]/text()| //tr[@class="odd"]/td[3]/text()')
print(list_num)
# 地點
list_place = html.xpath('//tr[@class="even"]/td[4]/text()| //tr[@class="odd"]/td[4]/text()')
print(list_place)
# 發布時間
list_time = html.xpath('//tr[@class="even"]/td[5]/text()| //tr[@class="odd"]/td[5]/text()')
print(list_time)
接下來是入庫的過程,也是今天在實際應用中犯的錯誤,把錯誤代碼貼出來,下次切記別犯了!
# 入庫
conn = pymysql.connect(host = '127.0.0.1',port = 3306,database = 'python01',user = 'root',password = '123456',charset = 'utf8' )
cursor = conn.cursor()
for i in range(10):
name_z = list_zhiwei[i]
caregory = list_style[i]
num_people = list_num[i]
place = list_place[i]
add_time = list_time[i]
cursor.execute("insert into txZp values(0,'" + name_z + "','" + caregory + "','" + num_people + "','" + place + "','" + add_time + "')")
cursor.close()
conn.commit()
conn.close()
# 錯誤在於,關閉cursor對象和鏈接的縮進格式不正確,上面的實例對象建立和關閉沒有在同一層,導致最后報錯,mysql表格里也添加進去數據了,但是僅僅添加了一條
shift+Tab取消縮進后,錯誤消失,數據正常添加到mysql表格中,效果如下:
ps 縮進問題實在是不應該出現,寫一篇博客提醒自己,以后切記!!