用Python實現大文件分割


python代碼如下:

import sys,os kilobytes = 1024 megabytes = kilobytes*1000 chunksize = int(200*megabytes)#default chunksize

def split(fromfile,todir,chunksize=chunksize): if not os.path.exists(todir):#check whether todir exists or not
 os.mkdir(todir) else: for fname in os.listdir(todir): os.remove(os.path.join(todir,fname)) partnum = 0 inputfile = open(fromfile,'rb')#open the fromfile
    while True: chunk = inputfile.read(chunksize) if not chunk:             #check the chunk is empty
            break partnum += 1 filename = os.path.join(todir,('data%04d'%partnum)) fileobj = open(filename,'wb')#make partfile
        fileobj.write(chunk)         #write data into partfile
 fileobj.close() return partnum if __name__=='__main__': fromfile = input('File to be split?') todir = input('Directory to store part files?') chunksize = int(input('Chunksize to be split?')) absfrom,absto = map(os.path.abspath,[fromfile,todir]) print('Splitting',absfrom,'to',absto,'by',chunksize) try: parts = split(fromfile,todir,chunksize) except: print('Error during split:') print(sys.exc_info()[0],sys.exc_info()[1]) else: print('split finished:',parts,'parts are in',absto)

以data.txt文件為例,此文件是由python隨機生成的數字構成的數據集,大小為1.1G,現將它等分割成多個128M子文件,運行結果如下:

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM