爬取某電影網站(未寫完)

本文轉載自查看原文 2019-07-18 10:35 5120 python實例

 
            1 import requests
import bs4
import lxml
import re
import time
from bs4 import BeautifulSoup
#網站
url = 'https://www.88ys.cc'
#電影或電視劇的名字
film = '家有女友'
#代理ip
proxy='120.24.245.33:16818'#已過期，需續費
proxies = {
   'http':'http://'+proxy,
   'https':'https://'+proxy
   }
#gzip訪問速度更快
headers = {
   "Accept-Encoding": "gzip"
   }
####搜索結果####
def search():
   ####搜索結果####
   #搜索鏈接
   url_search = url + '/index.php?m=vod-search'
   #post需要提交的參數
   data = {
       'wd':film ,
       'submit':''
   }
   #提交搜索內容的表單
   #①無代理
   r_s = requests.post(url_search, data=data)
   #②有代理
   # r_s = requests.post(url_search, data=data, proxies=proxies, headers=headers)
   #設置編碼
   r_s.encoding = 'utf-8'
   #接收返回的網頁
   text_s = r_s.text
   #
   pat = re.compile(r'<a class="link-hover" href="(.*?)"')
   pat_is = re.findall(pat, text_s)
   # print(pat_is)
   return pat_is
####獲取集數####
def List(pat_search):
   ####獲取集數####
   #搜索結果鏈接
   url_list = url + pat_search
   #打開合並的鏈接
   # r_list = requests.get(url_list, proxies=proxies, headers=headers)
   r_list = requests.get(url_list)
   #設置字符編碼
   r_list.encoding = 'utf-8'
   #接收鏈接網頁
   text_list = r_list.text
   # print(text_list)
   #使用BeautifulSoup獲取第一個片源的所有集數鏈接
   text_l_b = BeautifulSoup(text_list,'lxml')
   stab81 = text_l_b.find_all(name='div', attrs={'id':'stab81'})
   stab81_re = re.findall(re.compile(r'href="(.*?)"'), str(stab81[0]))
   # print(stab81_re)
   return stab81_re
####搜索結果文字信息####
def search_news(pat_search):
   ####搜索結果文字信息####
   #存儲獲取的信息以集合形式返回
   #[0]電影名[1]影片類型[2]語言
   information = []
   #建立連接
   url_search = url + pat_search
   r = requests.get(url_search)
   #設置編碼
   r.encoding = 'utf-8'
   #獲取電影信息的div
   bs = str(BeautifulSoup(r.text, 'lxml').find_all('div', class_='ct-c'))
   #獲取電影名稱，並添加到集合
   h1_bs = BeautifulSoup(bs,'lxml')
   h1_re = re.findall(re.compile(r'>(.*?)<'), str(h1_bs.h1))
   information.append(h1_re[0])
   #類型
   type_re = re.findall(re.compile(r'類型：</span>(.*?)</dd>'), bs)
   information.append(type_re[0])
   #語言
   language_re = re.findall(re.compile(r'語言：</span>(.*?)</dd>'), bs)
   information.append(language_re[0])
   # print(information)
   return information
####Main函數(循環獲取搜索結果和集數)####
def _for_():
   ####循環獲取搜索結果和集數####
   #調用搜索
   pat_search = search()
   #接收影片返回信息
   information = None
   #接收片源一返回的信息
   stab81 = None
   # 循環調用方法
   for i in pat_search:
       #調用方法並接收
       information = search_news(i)
       #調用方法並接收
       stab81 = List(i)
       #打印
       print(information)
       #打印
       print(stab81)
       #使用延時防止運行太快被網站強制斷開連接
       time.sleep(3)
# 調用主(Main)函數
_for_()
####未做完，查看器和爬取源碼不一致####
def a():
   _url_ = url + '/vod-play-id-56106-src-1-num-1.html'
   r = requests.get(_url_)
   r.encoding = 'utf-8'
   bs = BeautifulSoup(r.text,'lxml')
   print(bs.prettify()) 
         
(未解決問題)
查看器和爬取源碼不一致
獲取視頻鏈接
電影下載操作

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 爬取電影網站 python爬取電影網站信息電影網站推薦 Flask開發微電影網站(一) netflix中文電影網站 Django實現微電影網站 Flask開發微電影網站(三) 基於Spark的電影推薦系統（電影網站）爬取4567電影網 Node.js 蠶食計划（四）—— Express + SQL Server 搭建電影網站