requests分塊下載文件



將下載或上傳任務(一個文件或一個壓縮包)人為的划分為幾個部分,每一個部分采用一個線程進行上傳或下載,如果碰到網絡故障,可以從已經上傳或下載的部分開始繼續上傳下載未完成的部分,而沒有必要從頭開始上傳下載。用戶可以節省時間,提高速度。

一、分割視頻

1、分割的每個小部分的大小:

size = 1024 * 100 # 100k

2、獲取視頻大小:
當在請求上設置stream=True時,沒有立即請求內容,只是獲取了請求頭。推遲下載響應體直到訪問 Response.content 屬性

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
}
resp = requests.get(url, headers=headers, stream=True)
content_length = resp.headers['content-length']

3、分割視頻:
設置請求頭里面的Range參數

可以分割成多少個視頻:

count = int(content_length) // size

設置Range:

Range:告知服務端,客戶端下載該文件想要從指定的位置開始下載,格式:

Range:告知服務端,客戶端下載該文件想要從指定的位置開始下載,格式:

​ ‘Range’: ‘bytes=start-end’。

​ start開始位置, end結束位置。

  代碼:

range_liat = []
for i in range(count):
    start = i * size   # 開始位置
    # 結束位置
    if i == count - 1:
        end = content_length   # 最后的一部分視頻
    else:
        end = start + size
    if i > 0:
        start += 1
    headers_range = {'Range': f'bytes={start}-{end}'}
    range_list.append(headers_range)

  

 

 

 

二、請求視頻

1、設置請求頭

for i, headers_range in enumerate(range_list):
    headers_range.update(headers)
    resp = requests.get(url, headers=headers_range)

2、保存視頻

with open(f'{i}', 'wb') as f:
    f.write(resp.content)

三、斷點續傳

確保下載文件的文件夾里沒有其他文件

1、獲取保存視頻的文件夾里面的文件的名稱:

import os
f_list = os.listdir(path)


2、請求一小段視頻時,先判斷當前文件夾里是否存在,不存在才下載

if not f'{i}' in ts_list:
    pass

四、合並視頻

遍歷小段視頻保存的文件夾,按順序保存到一個文件里就好了

import os

def file_merge(path, path_name):
    """
    :param path: 小段視頻保存文件夾路徑
    :param path_name: 合並后保存位置+視頻名字+格式
    """
    ts_list = os.listdir(path)
    with open(path_name, 'wb+') as fw:
        for i in range(len(ts_list)):
            # 小段視頻路徑
            path_name_i = os.path.join(path, f'{i}')
            with open(path_name_i, 'rb') as fr:
                buff = fr.read()
                fw.write(buff)
            # 刪除文件
            os.remove(path_name_i)
    print('合並完成:', path)

 

五、完整代碼:

1、requests版本,多進程,沒有進度條

import os
import time
import requests
from multiprocessing.pool import Pool


def get_range(url):
    """獲取分割文件的位置"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
    }
    size = 1024 * 1000  # 把請求文件對象分割成每1000kb一個文件去下載
    res = requests.get(url, headers=headers, stream=True)
    content_length = res.headers['Content-Length']  # 141062
    count = int(content_length) // size

    headers_list = []
    for i in range(count):
        start = i * size
        if i == count - 1:
            end = content_length
        else:
            end = start + size
        if i > 0:
            start += 1

        rang = {'Range': f'bytes={start}-{end}'}
        rang.update(headers)
        headers_list.append(rang)
    return headers_list


def down_file(url, headers, i, path):
    """
    :param url: 視頻地址
    :param headers: 請求頭
    :param i: 小段視頻保存名稱
    :param path: 保存位置
    """
    content = requests.get(url, headers=headers).content
    with open(f'{path}/{i}', 'wb') as f:
        f.write(content)


def file_merge(path, path_name):
    """
    :param path: 小段視頻保存文件夾路徑
    :param path_name: 合並后保存位置+視頻名字+格式
    """
    ts_list = os.listdir(path)
    ts_list.sort()
    print(ts_list)
    with open(path_name, mode='ab+') as fw:
        for i in range(len(ts_list)):
            # 小段視頻路徑
            path_name_i = os.path.join(path, f'{i}')
            with open(path_name_i, mode='rb') as fr:
                buff = fr.read()
                fw.write(buff)
            # 刪除文件
            os.remove(path_name_i)
    print('合並完成:', path)


if __name__ == '__main__':
    start_time = time.time()
    url = 'https://pic.ibaotu.com/00/51/34/88a888piCbRB.mp4'
    header_list = get_range(url)
    path = './test'
    pool = Pool(8)  # 進程池
    if not os.path.exists(path):
        os.mkdir(path)

    for i, headers in enumerate(header_list):
        ts_list = os.listdir(path)
        if not f'{i}' in ts_list:
            pool.apply_async(down_file, args=(url, headers, i, path))

    pool.close()
    pool.join()
    end_time = time.time()
    print(f"下載完成,共花費了{end_time - start_time}")

    file_merge('./test', "./test/merge.mp4")

 

2、asyncio版本,異步,有進度條

import asyncio
import os
import time

from tqdm import tqdm
from aiohttp import ClientSession

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
size = 1024 * 1000  # 分割的每個視頻長度


def get_range(content_length):
    """
    :param content_length: 視頻長度
    :return: 請求頭:Range
    """
    count = int(content_length) // size  # 分割成幾個視頻
    range_list = []
    for i in range(count):
        start = i * size

        if i == count - 1:
            end = content_length
        else:
            end = start + size
        if i > 0:
            start += 1
        rang = {'Range': f'bytes={start}-{end}'}
        range_list.append(rang)
    return range_list


async def async_main(video_url, section_path):
    """
    分割視頻,即設置請求頭
    :param video_url: 視頻地址
    :param section_path: 保存位置
    """
    async with ClientSession() as session:
        async with session.get(video_url, headers=headers) as resp:
            content_length = resp.headers['Content-Length']  # 獲取視頻長度
            range_list = get_range(content_length)
            sem = asyncio.Semaphore(80)  # 限制並發數量
            if not os.path.exists(section_path):
                os.mkdir(section_path)

            # 進度條
            with tqdm(total=int(content_length), unit='', ascii=True, unit_scale=True) as bar:
                down_list = os.listdir(section_path)
                tasks = []
                for i, headers_range in enumerate(range_list):
                    # 判斷是否已經下載
                    if f'{section_path}/{i}' not in down_list:
                        headers_range.update(headers)
                        task = down_f(session, video_url, headers_range, i, section_path, sem, bar)
                        tasks.append(task)
                    else:
                        bar.update(size)
                await asyncio.gather(*tasks)


async def down_f(session, video_url, headers_range, i, section_path, sem, bar):
    """下載"""
    async with sem:  # 限制並發數量
        async with session.get(video_url, headers=headers_range) as resp:
            chunks = b''
            async for chunk in resp.content.iter_chunked(1024):
                chunks += chunk

            with open(f'{section_path}/{i}', 'wb') as f:
                f.write(chunks)
                bar.update(size)  # 更新進度條


def main(video_url, section_path):
    loop = asyncio.get_event_loop()
    task = asyncio.ensure_future(async_main(video_url, section_path))
    loop.run_until_complete(task)


def file_merge(path, path_name):
    """
    :param path: 小段視頻保存文件夾路徑
    :param path_name: 合並后保存位置+視頻名字+格式
    """
    ts_list = os.listdir(path)
    ts_list.sort()
    print(ts_list)
    with open(path_name, mode='ab+') as fw:
        for i in range(len(ts_list)):
            # 小段視頻路徑
            path_name_i = os.path.join(path, f'{i}')
            with open(path_name_i, mode='rb') as fr:
                buff = fr.read()
                fw.write(buff)
            # 刪除文件
            os.remove(path_name_i)
    print('合並完成:', path)


if __name__ == '__main__':
    start_time = time.time()
    url = 'https://pic.ibaotu.com/00/51/34/88a888piCbRB.mp4'
    path = './test2'
    main(url, path)
    end_time = time.time()
    print(f"下載完成,共花費了{end_time - start_time}")

    # file_merge('./test2', './test2/merge.mp4')

 

 3.下載文件並顯示進度條

# !/usr/bin/python3
# -*- coding: utf-8 -*-

import os
from urllib.request import urlopen

import requests
from tqdm import tqdm
def download_from_url(url, dst):
    """
    @param: url to download file
    @param: dst place to put the file
    :return: bool
    """
    # 獲取文件長度
    try:
        file_size = int(urlopen(url).info().get('Content-Length', -1))
    except Exception as e:
        print(e)
        print("錯誤,訪問url: %s 異常" % url)
        return False

    # 判斷本地文件存在時
    if os.path.exists(dst):
        # 獲取文件大小
        first_byte = os.path.getsize(dst)
    else:
        # 初始大小為0
        first_byte = 0

    # 判斷大小一致,表示本地文件存在
    if first_byte >= file_size:
        print("文件已經存在,無需下載")
        return file_size

    header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
    pbar = tqdm(
        total=file_size, initial=first_byte,
        unit='B', unit_scale=True, desc=url.split('/')[-1])

    # 訪問url進行下載
    req = requests.get(url, headers=header, stream=True)
    try:
        with(open(dst, 'ab')) as f:
            for chunk in req.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    pbar.update(1024)
    except Exception as e:
        print(e)
        return False

    pbar.close()
    return True

if __name__ == '__main__':
    url = "https://dl.360safe.com/360/inst.exe"
    download_from_url(url, "inst.exe")

 

原文鏈接:https://blog.csdn.net/m0_46652894/article/details/106155852

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM