嗶哩嗶哩視頻爬取源碼分享


背景:

  無意間發現B站有個老師的課程特別好(python教學的視頻),單位的網絡限制了視頻網站訪問,所以嘗試着去把視頻下載保存起來,經過一段時間的研究終於完成代碼的開發,大家有需要的可以搞下來,后續我會進行一個延伸優化,爭取做到通過前端頁面的視頻名稱輸入作為爬取條件進行下載。

 

第一版;2021年02月23日

第一版;2021年02月24日

解決間歇不觸發點擊保存按鈕的問題

#_author_='Lucky';
#date: 2021/2/18
import time

from pywinauto import application
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains


def chrome_options_setting(web_driver):
"""
設置 Chrome Browser 的下載前詢問每個文件的保存位置選項為打開(true)
:param web_driver: 瀏覽器驅動
:return: None
"""
web_driver.get("chrome://settings/downloads")
time.sleep(2)
web_element = web_driver.find_element_by_xpath("//settings-ui")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
# shadowRoot 節點下不能使用 xpath 選擇器
web_element = shadowRoot.find_element_by_id("container").find_element_by_id("main")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-basic-page[role='main']")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-downloads-page")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
web_element = shadowRoot.find_element_by_css_selector("settings-toggle-button")
shadowRoot = web_driver.execute_script("return arguments[0].shadowRoot", web_element)
result = shadowRoot.find_element_by_css_selector(
"#outerRow > cr-toggle[aria-describedby='sub-label-text']").get_attribute("aria-pressed")
if result == "false":
shadowRoot.find_element_by_css_selector("#outerRow > cr-toggle[aria-describedby='sub-label-text']").click()


def file_name_save(url, xpath):
bili_browser.get(url)
time.sleep(3)
file_name = bili_browser.find_element_by_xpath(xpath).text
print(file_name)
return file_name


def save_as_window(file_name):
app = application.Application().connect(title_re=u"另存為", class_name="#32770")
save_as_spec = app.window(title=u"另存為", class_name="#32770")
# print(save_as_spec.print_control_identifiers())

edit = save_as_spec["Edit"]
edit.set_text(file_name) # 第一種方法是直接設置edit的text
# edit.type_keys(file_name_save(), with_spaces=True) # 第二種是在里面模擬鍵盤輸入(如果字符串中沒有空格,可以省略后面的參數),殊途同歸

while True:
time.sleep(1)
app['另存為']['保存(&S)'].click()
if app.window(title=u"另存為", class_name="#32770").exists() is False: break


def main_download(url):
bili_browser.get("https://xbeibeix.com/api/bilibili")
time.sleep(3)
bili_browser.find_element_by_xpath("//*[@placeholder='輸入地址']").clear()
time.sleep(1)
bili_browser.find_element_by_xpath("//*[@placeholder='輸入地址']").send_keys(url)
time.sleep(1)
bili_browser.find_element_by_id('button-1').click()
time.sleep(2)
element2 = bili_browser.find_element_by_xpath("//a[contains(text(),'MP4地址')]")
ActionChains(bili_browser).key_down(u'\ue00a').click(element2).perform()
ActionChains(bili_browser).key_up(u'\ue00a')


if __name__ == "__main__":
bili_browser = webdriver.Chrome()
bili_browser.maximize_window()

# 設置Chrome瀏覽器下載前詢問每個文件的保存位置選項
chrome_options_setting(web_driver=bili_browser)
time.sleep(2)

for index in range(456, 634):
video_url = 'https://www.bilibili.com/video/BV197411G75w?p=' + str(index)
file_name_xpath = '//*[@id="multi_page"]/div[2]/ul/li[%d]/a/div/div[1]/span[2]' % index

video_name = file_name_save(url=video_url, xpath=file_name_xpath)
print(file_name_xpath)
# 處理Windows10 系統“另存為”窗口,並修改保存文件名稱
main_download(video_url)
time.sleep(1)

save_as_window(file_name=video_name)
time.sleep(3)

time.sleep(5)
bili_browser.quit()


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM