python爬蟲之—梨視頻爬取


源代碼

注意動態加載地址:

#動態加載地址
#http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=59&start=24
#http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=59&start=36
#start代表每次加載多少個視頻;
#掌握urlretrieve模塊的用法 #下載模塊
import requests
import re
import os
import time
from urllib.request import urlretrieve  #下載模塊
def video_DL(url):
    #添加請求頭
    header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
    #獲取網頁源代碼
    #url = "http://www.pearvideo.com/category_59"
    response = requests.get(url,headers = header)
    html = response.text
    # 正則匹配獲取視頻ID
    reg = '<a href="(.*?)" class="vervideo-lilink actplay">'
    video_id = re.findall(reg, html)
    video_url = []
    for i in video_id:
        #拼接url地址
        video_html = "http://www.pearvideo.com/{}".format(i)
        video_url.append(video_html)
    for j in video_url:
        #獲取視頻播放地址
        purl = requests.get(j).text
        req = 'srcUrl="(.*?)"'
        purl_1 = re.findall(req,purl)
        #獲取視頻標題
        res = '<h1 class="video-tt">(.*?)</h1>'
        video_name = re.findall(res,purl)
        # print(video_name[0])
        #下載視頻
        print("正在下載視頻:%s"%video_name[0])
        path = "video"
        #判斷當前目錄有沒有video文件
        if path not in os.listdir():
            os.mkdir(path)
        #下載視頻
        urlretrieve(purl_1[0],path+"/%s.mp4"%video_name[0])
def download():
    #動態加載地址
    #http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=59&start=24
    #http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=59&start=36
    n = 12
    while True:
        if n > 48:
            #結束函數
            return
        url = "http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=59&start={}".format(n)
        n += 12
        time.sleep(1)
        #調用上面寫好的下載函數
        video_DL(url)
download()

下載截圖:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM