1.視頻爬取
1 1.下載視頻的源碼如下: 2 import os 3 import requests 4 from bs4 import BeautifulSoup 5 import threading 6 from bj.models import Video 7 8 # globals(repo_dir = './../tmp') 9 repo_dir = './../tmp/video' 10 11 # 定義請求數據的返回結果的函數 12 def get_response(url): 13 # 為了防止被網站禁止訪問,攜帶瀏覽器參數,假裝瀏覽器請求 14 headers = { 15 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' 16 } 17 18 # 取出返回的數據 19 response =requests.get(url=url,headers=headers).content 20 return response 21 22 23 # 解析網頁數據獲取視頻描述和視頻下載url 24 def get_content_video(html): 25 # 通過bs4解析,用內置的解析器html.parser 26 soup=BeautifulSoup(html,'html.parser') 27 # 獲取每個視頻模塊的信息 28 cont=soup.select('.j-r-list-c') 29 # 定義一個數組存放視頻desc+url 30 urlList=[] 31 for item in cont: 32 # 查找第一個a標簽的內容,作為我們后面保存MP4的文件名 33 name=item.find('a').text 34 # 查找視頻url 35 pmUrl=item.select('.j-video')[0].get('data-mp4') 36 37 # 提取視頻id用於后期生成文件名 38 video_id=item.select('.j-video')[0].get('data-id') 39 #以元組的形式添加到數組 40 urlList.append((name,pmUrl,video_id)) 41 return urlList 42 43 # 使用threading異步下載視頻 44 def download(urlList,page): 45 #判斷'./../tmp/vodeo'文件夾是否存在 46 f_path=os.path.join(repo_dir,page) 47 if not os._exists(f_path): 48 print('路徑不存在,馬上創建!') 49 os.makedirs(f_path) 50 for item in urlList: 51 #判斷當前視頻是否有url 52 if item[1] is None: 53 continue 54 # 創建視頻的路徑-->[-3:]截取文件名后綴 55 f_path_video=os.path.join(f_path,'%s.%s'%(item[2],item[1][-3:])) 56 57 #通過多線程的方式下載文件,增加下載速度 58 thread=threading.Thread(target=save_video,args=(f_path_video,item[1])) 59 #啟動線程 60 thread.start() 61 62 #如果下載正常則將視頻數據存入數據庫中 63 Video.objects.create( 64 video_id=item[2], 65 video_url=item[1], 66 video_desc=item[0], 67 ) 68 69 70 # 正式下載視頻文件 71 def save_video(f_path_video,video_url): 72 response=get_response(video_url)#調用方法返回MP4文件的二進制流數據 73 # 通過文件寫入的方式保存成文件 74 with open(f_path_video,'wb') as f: 75 f.write(response) 76 77 78 79 #主函數 80 def main(): 81 for i in range(1,50): 82 print("第" + i + "頁") 83 url = 'http://www.budejie.com/video/%s' % str(i) 84 html = get_response(url) 85 urlList=get_content_video(html) 86 download(urlList,str(i)) 87 88 # 89 # if __name__=="__main__": 90 # main() 91 92 93 ''' 94 ** 由於我們這里僅用於測試,所以我們之抓取一頁 95 ** 鏈接最后的數字表示抓取的數據頁碼,由於首頁的1可以不寫,也可以寫上 96 ** 為了大家更好的理解多頁的表示,這里我們僅抓取一頁,並且鏈接后面寫有頁碼1 97 ''' 98 def test(): 99 url = 'http://www.budejie.com/video/1' 100 html = get_response(url) 101 urlList = get_content_video(html) 102 download(urlList, str(1)) 103
2. 切割視頻 - 視頻尾部多余部分的切割(這里需要安裝ffmpeg很簡單,問度娘)
1 import os 2 import subprocess 3 import datetime 4 def substring(date): 5 r=date.decode() 6 r=r.strip() 7 rlist=r.split(":") 8 result=(int(rlist[0])*60*60)+(int(rlist[1])*60)+(float(rlist[2])) 9 return result 10 11 12 def sub_video(): 13 # url="/home/facelive/Downloads/videos/" 14 # url2="/home/facelive/Downloads/sub_videos/" 15 16 # 硬盤路徑(原視頻存放路徑) 17 url="/media/facelive/Elements/videos/" 18 # 切割后的視頻存放路徑 19 url2="/media/facelive/Elements/sub_videos/" 20 fileList= os.listdir(url) 21 22 23 for file in fileList: 24 #獲取當前文件的視頻長度 25 strcmd=["ffmpeg -i "+url+file+" 2>&1 | grep 'Duration' | cut -d ' ' -f 4 | sed s/,//"] 26 result=subprocess.run(args=strcmd,stdout=subprocess.PIPE,shell=True) 27 date=result.stdout 28 print(type(date)) 29 print(date) 30 time=substring(date) 31 end=time-4 32 sub="ffmpeg -ss 0 -t "+str(end)+" -accurate_seek -i "+url+file+" -codec copy -avoid_negative_ts 1 "+url2+file+'' 33 34 videoresult=subprocess.run(args=sub,shell=True) 35 print(time) 36 print("視頻截取完成!!") 37 38 39 def test(): 40 url = "/home/facelive/Downloads/videos/" 41 fileList = os.listdir(url) 42 for file in fileList: 43 print(file)
3. 視頻加水印
1 import os 2 import subprocess 3 import datetime 4 5 def logo_video(): 6 7 # 硬盤路徑 8 url = "/media/facelive/Elements/videos/" 9 url3="/media/facelive/Elements/logo_videos/" 10 fileList = os.listdir(url) 11 12 for file in fileList: 13 14 sub = "ffmpeg -i "+url+file+" -i /home/facelive/Downloads/image/11.png -filter_complex overlay=W-w " + url3 + file + '' 15 16 videoresult = subprocess.run(args=sub, shell=True) 17 print("視頻logo完成!!")
轉載:https://blog.csdn.net/wsywb111/article/details/78855145
“
Python爬取百思不得姐的視頻+視頻的切割+給視頻添加水印
”
