你是否經歷過這個:
那就對了~
因為需要post和相關的cookie來請求~
所以,一個簡單的代碼爬拉鈎~~~
1 import requests 2 import time 3 import json 4 5 6 def main(): 7 url_start = "https://www.lagou.com/jobs/list_運維?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput=" 8 url_parse = "https://www.lagou.com/jobs/positionAjax.json?city=成都&needAddtionalResult=false" 9 headers = { 10 'Accept': 'application/json, text/javascript, */*; q=0.01', 11 'Referer': 'https://www.lagou.com/jobs/list_%E8%BF%90%E7%BB%B4?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput=', 12 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36' 13 } 14 for x in range(1, 5): 15 data = { 16 'first': 'true', 17 'pn': str(x), 18 'kd': '運維' 19 } 20 s = requests.Session() # 創建一個session對象 21 s.get(url_start, headers=headers, timeout=3) # 用session對象發出get請求,請求首頁獲取cookies 22 cookie = s.cookies # 為此次獲取的cookies 23 response = s.post(url_parse, data=data, headers=headers, cookies=cookie, timeout=3) # 獲取此次文本 24 time.sleep(5) 25 response.encoding = response.apparent_encoding 26 text = json.loads(response.text) 27 info = text["content"]["positionResult"]["result"] 28 for i in info: 29 print(i["companyFullName"]) 30 companyFullName = i["companyFullName"] 31 print(i["positionName"]) 32 positionName = i["positionName"] 33 print(i["salary"]) 34 salary = i["salary"] 35 print(i["companySize"]) 36 companySize = i["companySize"] 37 print(i["skillLables"]) 38 skillLables = i["skillLables"] 39 print(i["createTime"]) 40 createTime = i["createTime"] 41 print(i["district"]) 42 district = i["district"] 43 print(i["stationname"]) 44 stationname = i["stationname"] 45 46 if __name__ == '__main__': 47 main()