python爬愛奇藝電視劇及劇集鏈接


 爬取的顯示結果如下:

 

話不多說,下面是python代碼。。。。。。。。。。。。。。。

 1 import requests
 2 import re
 3 from bs4 import BeautifulSoup
 4 import json
 5 
 6 if __name__ == '__main__':
 7 
 8     for i in range(1,10):#翻頁數可自行選擇
 9         #獲取URL,並自動翻頁
10         url = 'http://list.iqiyi.com/www/2/-------------11-'+str(i)+'-1-iqiyi--.html'
11 
12         headers = {
13             'Access-Control-Allow-Credentials': 'true',
14             'Cache-Control': 'max-age=900',
15             'Content-Encoding': 'gzip',
16             'Content-Language': 'zh-CN',
17             'Content-Type': 'text/html; charset=UTF-8',
18             'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
19             'Referer': 'http://list.iqiyi.com/www/2/-------------11-4-1-iqiyi--.html',
20             'Upgrade-Insecure-Requests': '1'
21         }
22         target = requests.get(url=url,headers=headers).text
23         #爬取愛奇藝電視劇總網頁,解析HTML網頁
24         soup = BeautifulSoup(target,'html.parser') #html.parser解析HTML網頁
25         returnSoup = soup.find_all("div", attrs={"class": "wrapper-piclist"})[0]
26         returnSoup1= str(returnSoup).replace('\r\n','').replace('\n','').replace(' ','').replace('"rseat="bigTitle','')
27         href_title = re.findall('"data-widget-qidanadd="qidanadd"href="(.*?)"target="_blank"title=".*?"><imgalt="(.*?)"height="236"rseat="dsjp7"src=',str(returnSoup1))
28 
29 
30         for i in href_title:
31             href=i[0]
32             title=i[1]
33 
34             #轉到某個電視劇鏈接網頁,並解析
35             href1=str(href).split('#')[0]
36             url2=href1
37             target2 = requests.get(url=url2).text
38             soup2 = BeautifulSoup(target2,'html.parser')
39             returnsoup2 = soup2.find_all('div',attrs={'class':'site-piclist_pic'})
40 
41             #用正則表達式獲取劇集鏈接
42             result2 = re.findall('(?<=href=\").*?(?=\")',str(returnsoup2))
43             #用正則表達式獲取劇集名稱
44             title2 = re.findall('(?<=title=\").*?(?=\">)',str(returnsoup2))
45             j=len(title2)
46             #輸出爬取結果
47             for i in range(1,j-2):
48                 str1=''+str(i)+''
49                 print(result2[i])
50                 print(str1,title2[i])

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM