Python爬取b站視頻


import json
import os
import subprocess
import time

import requests
import re



class BLBL(object):
    def __init__(self, url, cookie, referer):
        self.base_url = url
        # cookie內容
        self.cookie = cookie
        # referer內容
        self.referer = referer
        # 請求頭信息
        self.accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
        self.accept_Encoding = 'gzip, deflate, br'
        self.accept_Language = 'zh-CN,zh;q=0.9,en;q=0.8'
        self.user_agent = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) "

    def html(self):
        # 訪問起始網頁需添加的請求頭,不加的話,得不到完整的源代碼(反爬)
        base_headers = {
            'Accept': self.accept,
            'Accept-Encoding': self.accept_Encoding,
            'Accept-Language': self.accept_Language,
            'Cache-Control': 'no-cache',
            'Cookie': self.cookie,
            'Referer': self.referer,
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': self.user_agent
        }
        # 請求網頁
        base_response = requests.get(self.base_url, headers=base_headers)
        # 獲取網頁html代碼
        html = base_response.text
        # print(html.headers)
        return html

    def xin_xi(self, html):
        result = re.findall('<script>window.__playinfo__=(.*?)</script>', html, re.S)[0]
        print(result)
        title = re.findall('<span class="tit">(.*?)</span>', html)[0].replace('/','').replace(':','').replace(' ','').strip()
        html_data = json.loads(result)
        # 音頻url地址
        audio_url = html_data['data']['dash']['audio'][0]['backupUrl'][0]
        # 視頻url地址
        video_url = html_data['data']['dash']['video'][0]['backupUrl'][0]
        return title, audio_url, video_url

    def video(self, html):
        # 獲取視頻名稱,音頻網址,視頻網址
        title, audio_url, video_url = self.xin_xi(html)
        # 請求視頻下載地址時需要添加的請求頭
        download_headers = {
            'User-Agent': self.user_agent,
            'Referer': self.referer,
            'Orig`in': 'https://www.bilibili.com',
            'Accept': self.accept,
            'Accept-Encoding': self.accept_Encoding,
            'Accept-Language': self.accept_Language
        }
        audio_content = requests.get(audio_url,headers=download_headers).content
        video_content = requests.get(video_url,headers=download_headers).content
        with open(title + '.mp3', mode='wb') as f:
            f.write(audio_content)
        with open(title + '.mp4', mode='wb') as f:
            f.write(video_content)
        print('正在保存:', title)
        self.video_audio_merge_single(title)
    def run(self):
        html = self.html()
        self.video(html)
        print('爬取成功')# 爬下來的是兩個文件 一個音頻一個視頻 需要合成到一塊才是完整的(使用ffmpeg)提前下載安裝好並配置好環境變量

    def video_audio_merge_single(self,video_name):
        print("視頻合成開始:", video_name)
        #  ffmpeg -i video.mp4 -i audio.wav -c:v copy -c:a aac -strict experimental output.mp4
        command = 'ffmpeg -i {}.mp4 -i {}.mp3 -vcodec copy -acodec copy {}.mp4'.format(
            video_name, video_name,video_name+'(合)')
        subprocess.Popen(command, shell=True)
        time.sleep(10)
        print("視頻合成結束:", video_name)
        os.remove(f'{video_name}.mp3')
        os.remove(f'{video_name}.mp4')

if __name__ == '__main__':
    url= 視頻播放地址  如:'https://www.bilibili.com/video/BV1yy4y1i766'
    referer = 'https://space.bilibili.com/'
    cookie = 登錄后的cookie
    blbl = BLBL(url, cookie, referer)
    blbl.run()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM