知識點
- 動態數據抓包
- 動態頁面分析
- requests攜帶參數發送請求
- json數據解析
- python 3.8 更加新 穩定 運行代碼
- pycharm 2021.2 輔助敲代碼
- requests 第三方模塊
對於本篇文章有疑問的同學可以加【資料白嫖、解答交流群:910981974】
-
右鍵點擊檢查 或者 F12 打開
-
選擇network 然后刷新網頁
-
隨便點擊打開一個視頻
-
點擊搜到的內容
-
依次展開查看, 去找到我們需要的視頻地址
-
請求頭參數
-
請求參數
- 請求方式: POST
- 請求頭(偽裝):
headers = { 'content-type': 'application/json', 'Cookie': '你自己的cookie', 'Host': 'www.kuaishou.com', 'Origin': 'https://www.kuaishou.com', 'Referer': 'https://www.kuaishou.com/profile/3xv78fxycm35nn4', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36' }
- 請求參數:
data = { 'operationName': "visionProfilePhotoList", 'query': "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n profileUserTopPhoto\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n", 'variables': {'userId': "3x9dquvtb9n9fps", 'pcursor': "", 'page': "profile"} }
- 后續如果需要翻頁爬取, 需要使用遞歸實現
url = 'https://www.kuaishou.com/graphql' # 偽裝 headers = { # 控制data類型 json類型字符串 'content-type': 'application/json', 'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_ea128125517a46bd491ae9ccb255e242; client_key=65890b29; userId=270932146; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABnjkpJPZ-QanEQnI0XWMVZxXtIqPj-hwjsXBn9DHaTzispQcLjGR-5Xr-rY4VFaIC-egxv508oQoRYdgafhxSBpZYqLnApsaeuAaoLj2xMbRoytYGCrTLF6vVWJvzz3nzBVzNSyrXyhz-RTlRJP4xe1VjSp7XLNLRnVFVEtGPuBz0xkOnemy7-1-k6FEwoPIbOau9qgO5mukNg0qQ2NLz_xoSKS0sDuL1vMmNDXbwL4KX-qDmIiCWJ_fVUQoL5jjg3553H5iUdvpNxx97u6I6MkKEzwOaSigFMAE; kuaishou.server.web_ph=b282f9af819333f3d13e9c45765ed62560a1', 'Host': 'www.kuaishou.com', 'Origin': 'https://www.kuaishou.com', 'Referer': 'https://www.kuaishou.com/profile/3xauthkq46ftgkg', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36', } # <Response [200]>: 發送請求成功結果 response = requests.post(url=url, headers=headers, json=data)
json_data = response.json()
feeds = json_data['data']['visionProfilePhotoList']['feeds'] # 下一頁需要的參數 pcursor = json_data['data']['visionProfilePhotoList']['pcursor'] # print(pcursor) for feed in feeds: caption = feed['photo']['caption'] # 標題 photoUrl = feed['photo']['photoUrl'] # 視頻鏈接 # \: 轉義字符, 直接寫\ 匹配不到 \ # \\ 才能匹配到 \ # 用css和xpath 是必須要你拿到的數據是一個網頁源代碼 caption = re.sub('[\\/:*?"<>|\n\t]', '', caption) print(caption, photoUrl)
video_data = requests.get(url=photoUrl).content
with open(f'video/{caption}.mp4', mode='wb') as f: f.write(video_data) print(caption, '下載完成!')
def get_page(pcursor): # 需要的數據得指定好 # 遞歸, 自己調用自己 跳出遞歸 data = { 'operationName': "visionProfilePhotoList", 'query': "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n profileUserTopPhoto\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n", 'variables': {'userId': "3xauthkq46ftgkg", 'pcursor': pcursor, 'page': "profile"} } if pcursor == None: print('全部下載完成') return 0 get_page(pcursor) get_page('')