將下載或上傳任務(一個文件或一個壓縮包)人為的划分為幾個部分,每一個部分采用一個線程進行上傳或下載,如果碰到網絡故障,可以從已經上傳或下載的部分開始繼續上傳下載未完成的部分,而沒有必要從頭開始上傳下載。用戶可以節省時間,提高速度。
一、分割視頻
1、分割的每個小部分的大小:
size = 1024 * 100 # 100k
2、獲取視頻大小:
當在請求上設置stream=True時,沒有立即請求內容,只是獲取了請求頭。推遲下載響應體直到訪問 Response.content 屬性
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' } resp = requests.get(url, headers=headers, stream=True) content_length = resp.headers['content-length']
3、分割視頻:
設置請求頭里面的Range參數
可以分割成多少個視頻:
count = int(content_length) // size
設置Range:
Range:告知服務端,客戶端下載該文件想要從指定的位置開始下載,格式:
Range:告知服務端,客戶端下載該文件想要從指定的位置開始下載,格式: ‘Range’: ‘bytes=start-end’。 start開始位置, end結束位置。
代碼:
range_liat = [] for i in range(count): start = i * size # 開始位置 # 結束位置 if i == count - 1: end = content_length # 最后的一部分視頻 else: end = start + size if i > 0: start += 1 headers_range = {'Range': f'bytes={start}-{end}'} range_list.append(headers_range)
二、請求視頻
1、設置請求頭
for i, headers_range in enumerate(range_list): headers_range.update(headers) resp = requests.get(url, headers=headers_range)
2、保存視頻
with open(f'{i}', 'wb') as f: f.write(resp.content)
三、斷點續傳
確保下載文件的文件夾里沒有其他文件
1、獲取保存視頻的文件夾里面的文件的名稱:
import os f_list = os.listdir(path)
2、請求一小段視頻時,先判斷當前文件夾里是否存在,不存在才下載
if not f'{i}' in ts_list: pass
四、合並視頻
遍歷小段視頻保存的文件夾,按順序保存到一個文件里就好了
import os def file_merge(path, path_name): """ :param path: 小段視頻保存文件夾路徑 :param path_name: 合並后保存位置+視頻名字+格式 """ ts_list = os.listdir(path) with open(path_name, 'wb+') as fw: for i in range(len(ts_list)): # 小段視頻路徑 path_name_i = os.path.join(path, f'{i}') with open(path_name_i, 'rb') as fr: buff = fr.read() fw.write(buff) # 刪除文件 os.remove(path_name_i) print('合並完成:', path)
五、完整代碼:
1、requests版本,多進程,沒有進度條
import os import time import requests from multiprocessing.pool import Pool def get_range(url): """獲取分割文件的位置""" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } size = 1024 * 1000 # 把請求文件對象分割成每1000kb一個文件去下載 res = requests.get(url, headers=headers, stream=True) content_length = res.headers['Content-Length'] # 141062 count = int(content_length) // size headers_list = [] for i in range(count): start = i * size if i == count - 1: end = content_length else: end = start + size if i > 0: start += 1 rang = {'Range': f'bytes={start}-{end}'} rang.update(headers) headers_list.append(rang) return headers_list def down_file(url, headers, i, path): """ :param url: 視頻地址 :param headers: 請求頭 :param i: 小段視頻保存名稱 :param path: 保存位置 """ content = requests.get(url, headers=headers).content with open(f'{path}/{i}', 'wb') as f: f.write(content) def file_merge(path, path_name): """ :param path: 小段視頻保存文件夾路徑 :param path_name: 合並后保存位置+視頻名字+格式 """ ts_list = os.listdir(path) ts_list.sort() print(ts_list) with open(path_name, mode='ab+') as fw: for i in range(len(ts_list)): # 小段視頻路徑 path_name_i = os.path.join(path, f'{i}') with open(path_name_i, mode='rb') as fr: buff = fr.read() fw.write(buff) # 刪除文件 os.remove(path_name_i) print('合並完成:', path) if __name__ == '__main__': start_time = time.time() url = 'https://pic.ibaotu.com/00/51/34/88a888piCbRB.mp4' header_list = get_range(url) path = './test' pool = Pool(8) # 進程池 if not os.path.exists(path): os.mkdir(path) for i, headers in enumerate(header_list): ts_list = os.listdir(path) if not f'{i}' in ts_list: pool.apply_async(down_file, args=(url, headers, i, path)) pool.close() pool.join() end_time = time.time() print(f"下載完成,共花費了{end_time - start_time}") file_merge('./test', "./test/merge.mp4")
2、asyncio版本,異步,有進度條
import asyncio import os import time from tqdm import tqdm from aiohttp import ClientSession headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } size = 1024 * 1000 # 分割的每個視頻長度 def get_range(content_length): """ :param content_length: 視頻長度 :return: 請求頭:Range """ count = int(content_length) // size # 分割成幾個視頻 range_list = [] for i in range(count): start = i * size if i == count - 1: end = content_length else: end = start + size if i > 0: start += 1 rang = {'Range': f'bytes={start}-{end}'} range_list.append(rang) return range_list async def async_main(video_url, section_path): """ 分割視頻,即設置請求頭 :param video_url: 視頻地址 :param section_path: 保存位置 """ async with ClientSession() as session: async with session.get(video_url, headers=headers) as resp: content_length = resp.headers['Content-Length'] # 獲取視頻長度 range_list = get_range(content_length) sem = asyncio.Semaphore(80) # 限制並發數量 if not os.path.exists(section_path): os.mkdir(section_path) # 進度條 with tqdm(total=int(content_length), unit='', ascii=True, unit_scale=True) as bar: down_list = os.listdir(section_path) tasks = [] for i, headers_range in enumerate(range_list): # 判斷是否已經下載 if f'{section_path}/{i}' not in down_list: headers_range.update(headers) task = down_f(session, video_url, headers_range, i, section_path, sem, bar) tasks.append(task) else: bar.update(size) await asyncio.gather(*tasks) async def down_f(session, video_url, headers_range, i, section_path, sem, bar): """下載""" async with sem: # 限制並發數量 async with session.get(video_url, headers=headers_range) as resp: chunks = b'' async for chunk in resp.content.iter_chunked(1024): chunks += chunk with open(f'{section_path}/{i}', 'wb') as f: f.write(chunks) bar.update(size) # 更新進度條 def main(video_url, section_path): loop = asyncio.get_event_loop() task = asyncio.ensure_future(async_main(video_url, section_path)) loop.run_until_complete(task) def file_merge(path, path_name): """ :param path: 小段視頻保存文件夾路徑 :param path_name: 合並后保存位置+視頻名字+格式 """ ts_list = os.listdir(path) ts_list.sort() print(ts_list) with open(path_name, mode='ab+') as fw: for i in range(len(ts_list)): # 小段視頻路徑 path_name_i = os.path.join(path, f'{i}') with open(path_name_i, mode='rb') as fr: buff = fr.read() fw.write(buff) # 刪除文件 os.remove(path_name_i) print('合並完成:', path) if __name__ == '__main__': start_time = time.time() url = 'https://pic.ibaotu.com/00/51/34/88a888piCbRB.mp4' path = './test2' main(url, path) end_time = time.time() print(f"下載完成,共花費了{end_time - start_time}") # file_merge('./test2', './test2/merge.mp4')
3.下載文件並顯示進度條
# !/usr/bin/python3 # -*- coding: utf-8 -*- import os from urllib.request import urlopen import requests from tqdm import tqdm def download_from_url(url, dst): """ @param: url to download file @param: dst place to put the file :return: bool """ # 獲取文件長度 try: file_size = int(urlopen(url).info().get('Content-Length', -1)) except Exception as e: print(e) print("錯誤,訪問url: %s 異常" % url) return False # 判斷本地文件存在時 if os.path.exists(dst): # 獲取文件大小 first_byte = os.path.getsize(dst) else: # 初始大小為0 first_byte = 0 # 判斷大小一致,表示本地文件存在 if first_byte >= file_size: print("文件已經存在,無需下載") return file_size header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} pbar = tqdm( total=file_size, initial=first_byte, unit='B', unit_scale=True, desc=url.split('/')[-1]) # 訪問url進行下載 req = requests.get(url, headers=header, stream=True) try: with(open(dst, 'ab')) as f: for chunk in req.iter_content(chunk_size=1024): if chunk: f.write(chunk) pbar.update(1024) except Exception as e: print(e) return False pbar.close() return True if __name__ == '__main__': url = "https://dl.360safe.com/360/inst.exe" download_from_url(url, "inst.exe")
原文鏈接:https://blog.csdn.net/m0_46652894/article/details/106155852