電影天堂電影鏈接爬取


 1 import requests,re
 2 
 3 
 4 def getdetail(url):
 5 
 6     response = requests.get(url)
 7     html = response.content.decode('gbk')
 8     # 電影詳情頁標題
 9     movie_title_name = re.search('<h1><font color=#07519a>(.*)</f',html)
10     movie_title = movie_title_name.group(1)
11     # 電影 磁力   magnet
12     movie_magnet_url = re.search('/><a href="(.*)"><str',html)
13     # print(movie_magnet.group(1))
14     movie_magnet = movie_magnet_url.group(1)
15     # torrent種子
16     movie_torrent_url = re.search('ddf"><a href="(.*)">ft',html)
17     movie_torrent = movie_torrent_url.group(1)
18     # print(movie_torrent.group(1))
19     # 這個列表用來title
20     movie_title_list = []
21     movie_title_list.append(movie_title)
22 
23     # 這個列表兩個下載的鏈接
24     movie_down_url = []
25     movie_down_url.append(movie_magnet)
26     movie_down_url.append(movie_torrent)
27     movie_down_url_all = []
28     movie_down_url_all.append(movie_down_url)
29 
30 
31     movie_dict = dict(zip(movie_title_list,movie_down_url_all))
32     print(movie_dict)
33 
34 
35 
36 def getpage():
37 
38     for i in range(1,178):
39         lurl = 'http://www.dytt8.net/html/gndy/dyzz/list_23_%s.html' % i
40 
41         response = requests.get(lurl)
42 
43         html = response.text
44 
45         movie_url_list = re.findall('<a href="(.*)" class="ulink"',html)
46 
47         for movie_item in movie_url_list:
48             movie_url = 'http://www.dytt8.net'+movie_item
49             getdetail(movie_url)
50 
51 
52 if __name__ == '__main__':
53     getpage()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM