03-06-10--Python爬取嗶哩嗶哩學習視頻


下載任意嗶哩嗶哩視頻

一 尋找任意一個視頻地址

例如這個:

image-20191210151853073

拿出窗口中的鏈接:https://www.bilibili.com/video/av76609390,修改源代碼中的url即可完成下載

#由於嗶哩嗶哩視頻音頻是分開的,所以下來下來的視頻是兩個,一個音頻,一個視頻,要視頻和音頻合成,可看另一偏博客

二 分析頁面

這個地址加載后會有該視頻的視頻信息和清晰度等信息,我們只需要取出視頻和音頻的地址,直接下載即可,

下面這兩個地址就是一個視頻,一個音頻,分片下載的

image-20191210152224945

image-20191210152307384

我們用requests模塊模擬即可,打開文件不停的發送請求加載數據寫入即可

def download_video(old_video_url, video_url, audio_url, video_name):
    headers.update({"Referer": old_video_url})
    print("開始下載視頻:%s" % video_name)
    video_content = requests.get(video_url, headers=headers)
    print('%s視頻大小:' % video_name, video_content.headers['content-length'])
    audio_content = requests.get(audio_url, headers=headers)
    print('%s音頻大小:' % video_name, audio_content.headers['content-length'])
    # 下載視頻開始
    received_video = 0
    with open('%s_video.mp4' % video_name, 'ab') as output:
        while int(video_content.headers['content-length']) > received_video:
            headers['Range'] = 'bytes=' + str(received_video) + '-'
            response = requests.get(video_url, headers=headers)
            output.write(response.content)
            received_video += len(response.content)
    # 下載視頻結束
    # 下載音頻開始
    audio_content = requests.get(audio_url, headers=headers)
    received_audio = 0
    with open('%s_audio.mp4' % video_name, 'ab') as output:
        while int(audio_content.headers['content-length']) > received_audio:
            # 視頻分片下載
            headers['Range'] = 'bytes=' + str(received_audio) + '-'
            response = requests.get(audio_url, headers=headers)
            output.write(response.content)
            received_audio += len(response.content)
    # 下載音頻結束
    return video_name

三 全部代碼

'''

通過該程序下載的視頻和音頻是分成連個文件的,沒有合成,
視頻為:視頻名_video.mp4
音頻為:視頻名_audio.mp4
修改url的值,換成自己想下載的頁面節課
'''

# 導入requests模塊,模擬發送請求
import requests
# 導入json
import json
# 導入re
import re

# 定義請求頭
headers = {
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}


# 正則表達式,根據條件匹配出值
def my_match(text, pattern):
    match = re.search(pattern, text)
    print(match.group(1))
    print()
    return json.loads(match.group(1))


def download_video(old_video_url, video_url, audio_url, video_name):
    headers.update({"Referer": old_video_url})
    print("開始下載視頻:%s" % video_name)
    video_content = requests.get(video_url, headers=headers)
    print('%s視頻大小:' % video_name, video_content.headers['content-length'])
    audio_content = requests.get(audio_url, headers=headers)
    print('%s音頻大小:' % video_name, audio_content.headers['content-length'])
    # 下載視頻開始
    received_video = 0
    with open('%s_video.mp4' % video_name, 'ab') as output:
        while int(video_content.headers['content-length']) > received_video:
            headers['Range'] = 'bytes=' + str(received_video) + '-'
            response = requests.get(video_url, headers=headers)
            output.write(response.content)
            received_video += len(response.content)
    # 下載視頻結束
    # 下載音頻開始
    audio_content = requests.get(audio_url, headers=headers)
    received_audio = 0
    with open('%s_audio.mp4' % video_name, 'ab') as output:
        while int(audio_content.headers['content-length']) > received_audio:
            # 視頻分片下載
            headers['Range'] = 'bytes=' + str(received_audio) + '-'
            response = requests.get(audio_url, headers=headers)
            output.write(response.content)
            received_audio += len(response.content)
    # 下載音頻結束
    return video_name


if __name__ == '__main__':
    # 換成你要爬取的視頻地址
    url = 'https://www.bilibili.com/video/av76609390'
    # 發送請求,拿回數據
    res = requests.get(url, headers=headers)
    # 視頻詳情json
    playinfo = my_match(res.text, '__playinfo__=(.*?)</script><script>')
    # 視頻內容json
    initial_state = my_match(res.text, r'__INITIAL_STATE__=(.*?);\(function\(\)')
    # 視頻分多種格式,直接取分辨率最高的視頻 1080p
    video_url = playinfo['data']['dash']['video'][0]['baseUrl']
    # 取出音頻地址
    audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']
    video_name = initial_state['videoData']['title']
    print('視頻名字為:video_name')
    print('視頻地址為:', video_url)
    print('音頻地址為:', audio_url)
    download_video(url, video_url, audio_url, video_name)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM