# -*- coding: utf-8 -*- """ Created on Fri Aug 28 17:21:10 2020 @author: Mto """ """ 網址:http://www.yhdm.tv/ 目的 獲取視頻文件 8月28日,代碼功能基本實現 """ import requests import re from bs4 import BeautifulSoup def getHTML(url): """訪問網站獲取頁面,返回頁面""" header = { 'Host':'www.yhdm.tv', 'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' } r = requests.get(url,headers=header) r.encoding = 'utf-8' return r def GetMp4HTML(url): """訪問網站獲取頁面,返回頁面""" header = { 'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' } r = requests.get(url,headers=header) r.encoding = 'utf-8' print(r.status_code) return r def processHTML(r): """使用bs4進行處理""" soup = BeautifulSoup(r.text,'html.parser') return soup def GetLinksAndTitle(url1,title,links): r = getHTML(url1) soup = processHTML(r) print('要下載的動漫為:'+soup.h1.string) a = soup.findAll(style="display:block") href = a[0].find_all('a') for i in range(0,len(href)): title.append(href[i].text) links.append('http://www.yhdm.tv' + href[i]['href']) def getmp4(link): """提取頁面中的視頻文件鏈接""" r = getHTML(link) soup = processHTML(r) s = soup.select('div#playbox') geturl = re.compile('^https.*\.mp4') mo = geturl.search(s[0].attrs['data-vid']) return(str(mo.group())) #download(str(mo.group()), title) def download(mp4link,title): """下載視頻""" #r = GetMp4HTML(mp4link) print(mp4link) print(title+'模擬訪問成功,不下了,放過那個可憐的服務器吧') # try: # with open(title+'.mp4','wb') as f: # f.write(r.content) # print(title+'下載成功') # except: # print(title+'下載失敗') def main(): title = [] links=[] url = 'http://www.yhdm.tv/show/4790.html' GetLinksAndTitle(url, title, links) for i in range(0,3): mp4link = getmp4(links[i]) download(mp4link, title[i]) main()
