python urllib2 實現大文件下載


 

使用urllib2下載並分塊copy:

# from urllib2 import urlopen # Python 2

from urllib.request import urlopen # Python 3



response = urlopen(url)

CHUNK = 16 * 1024

with open(file, 'wb') as f:

    while True:

        chunk = response.read(CHUNK)

        if not chunk:

            break

        f.write(chunk)

 

 

另一種大文件copy方式, shutil:

import shutil

try:

    from urllib.request import urlopen # Python 3

except ImportError:

    from urllib2 import urlopen # Python 2



def get_large_file(url, file, length=16*1024):

    req = urlopen(url)

    with open(file, 'wb') as fp:

        shutil.copyfileobj(req, fp, length)

 

關於shutil的一些介紹:https://www.cnblogs.com/zhangboblogs/p/7821702.html

 

使用urlib2並顯示下載進度,以視頻為例:

#coding:utf-8
import urllib 
import urllib2 
import requests
import random 
import uuid
import time
import sys
from threading import Thread

#img_url = "https://p.ssl.qhimg.com/dm/48_48_100/t017aee03b28107657b.jpg"


img="http://vip.zuiku8.com/1810/妖精的尾巴最終季-01.mp4"

my_headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) \
    AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
   'Referer':'https://www.bilibili.com/bangumi/play/ep250436',
}


def chunk_report(bytes_so_far, chunk_size, total_size):
   percent = float(bytes_so_far) / total_size
   percent = round(percent*100, 2)
   if percent %1==0:
      sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\n" % 
           (bytes_so_far, total_size, percent))

   if bytes_so_far >= total_size:
      sys.stdout.write('\n')

def chunk_read(response, url,chunk_size=8192, report_hook=None):
   total_size = response.info().getheader('Content-Length').strip()
   total_size = int(total_size)
   bytes_so_far = 0
   path_name=url.split("/")[-1]
   path_name=path_name.replace("\n","")
   path_name=path_name.decode("utf-8")
   print path_name
   with open("%s" % path_name, "wb") as f:
      while 1:
         chunk = response.read(chunk_size)
         f.write(chunk)
         f.flush() 
         bytes_so_far += len(chunk)
         if not chunk:
            break
         if report_hook:
            report_hook(bytes_so_far, chunk_size, total_size)
   return bytes_so_far

def down_load(img):
        print img
        request =  urllib2.Request(url=img, headers=my_headers)
        response = urllib2.urlopen(request);
        chunk_read(response,img, report_hook=chunk_report)
        print "downloading with urllib  --->"

if __name__ == '__main__':
     down_load(img)

 

結果:

 

 

 如果想在一行顯示,打印時加\r,end為空 :

print ('\r downloading...{:.1f}'.format(percent), end="")

\r 是移至本行行首
\b 是退一個字符

 

 

此外,提一下urllib,之后沒有用它,是因為不支持https:

#!/usr/bin/python
#encoding:utf-8
import urllib
import os

img="http://vip.zuiku8.com/1810/妖精的尾巴最終季-01.mp4"

def Schedule(a,b,c):
   '''
   a:已經下載的數據塊
   b:數據塊的大小
   c:遠程文件的大小
   '''
   per = 100.0*a*b/c
   if per > 100:
      per = 100
   print '%.2f%%' % per


def main():
   path=img.split(".")[-1]
   urllib.urlretrieve(img,path,Schedule)

if __name__ == '__main__':
   main()

 

 

使用urllib2時,發現https下載不成功,添加了如下代碼:

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

 

 

 

 

 

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM