yhdm動漫爬蟲項目


# -*- coding: utf-8 -*-
"""
Created on Fri Aug 28 17:21:10 2020

@author: Mto
"""
"""
網址:http://www.yhdm.tv/
目的
獲取視頻文件
8月28日,代碼功能基本實現
"""
import requests
import re
from bs4 import BeautifulSoup

def getHTML(url):
    """訪問網站獲取頁面,返回頁面"""
    header = {
        'Host':'www.yhdm.tv',
        'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
    }
    r = requests.get(url,headers=header)
    r.encoding = 'utf-8'
    return r


def GetMp4HTML(url):
    """訪問網站獲取頁面,返回頁面"""
    header = {
        'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
    }
    r = requests.get(url,headers=header)
    r.encoding = 'utf-8'
    print(r.status_code)
    return r

      
def processHTML(r):
    """使用bs4進行處理"""
    soup = BeautifulSoup(r.text,'html.parser')
    return soup


def GetLinksAndTitle(url1,title,links):
    r = getHTML(url1)
    soup = processHTML(r)
    print('要下載的動漫為:'+soup.h1.string)
    a = soup.findAll(style="display:block")
    href = a[0].find_all('a')
    for i in range(0,len(href)):
        title.append(href[i].text)
        links.append('http://www.yhdm.tv' + href[i]['href'])
    
        
    
def getmp4(link):
    """提取頁面中的視頻文件鏈接"""
    r = getHTML(link)
    soup = processHTML(r)
    s = soup.select('div#playbox')
    geturl = re.compile('^https.*\.mp4')
    mo = geturl.search(s[0].attrs['data-vid'])
    return(str(mo.group()))
    #download(str(mo.group()), title)


def download(mp4link,title):
    """下載視頻"""
    #r = GetMp4HTML(mp4link)
    print(mp4link)
    print(title+'模擬訪問成功,不下了,放過那個可憐的服務器吧')
    # try:
    #     with open(title+'.mp4','wb') as f:
    #         f.write(r.content)
    #         print(title+'下載成功')
    # except:
    #     print(title+'下載失敗')
    
            
def main():
    title = []
    links=[]
    url = 'http://www.yhdm.tv/show/4790.html'
    GetLinksAndTitle(url, title, links)
    for i in range(0,3):
        mp4link = getmp4(links[i])
        download(mp4link, title[i])
main()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM