python爬蟲實現各視頻網站vip付費電影下載
使用解析網站解析視頻播放地址:http://jx.618g.com/
爬取解析網站,獲得視屏片段
cmd命令合成很多ts文件為完整的MP4視屏文件:copy /b *ts movie.mp4
可在cmd窗口下運行,需到文件目錄下,也可使用bat文件運行
為提高下載速度,使用進程池的方式下載
import requests
from multiprocessing import Pool
from fake_useragent import UserAgent
import os
# 單線程方式
# def download():
# headers = {
# 'User-Agent': UserAgent().chrome
# }
# i=0
# while True:
# base_url = 'https://youku.cdn7-okzy.com/20191203/16033_b28cd947/1000k/hls/0d2d28c500600%04d.ts' % i
# response = requests.get(base_url, headers=headers)
# # print(response.status_code)
# if response.status_code == 404:
# print('下載完成')
# break
# else:
# with open('./video/{}'.format(base_url[-10:]), 'wb')as f:
# f.write(response.content)
# i+=1
#
#
# if __name__ == '__main__':
# download()
# 使用進程池方式下載(更快,cpu占用更多)
def download(i,n):
headers = {
'User-Agent': UserAgent().chrome
}
base_url = 'https://youku.cdn7-okzy.com/20191203/16033_b28cd947/1000k/hls/0d2d28c500600%04d.ts' % i
response = requests.get(base_url, headers=headers)
with open('./video/{}'.format(base_url[-10:]), 'wb')as f:
f.write(response.content)
if i % 10 == 0:
print('\r下載進度:'+str((i/n)*100)[:4]+'%', end='')
if __name__ == '__main__':
n = 2200
po = Pool(5) # 創建一個進程池,可以包含5個子進程
for i in range(0, n):
po.apply_async(download, args=(i,n)) # 將全部進程全部加到進程隊列中去
po.close()
print('\r開始下載.....', end='')
po.join()