import hashlib import os import send2trash import time ''' 根據MD5,查找一個目錄下的相同文件,並且可以選擇是否刪除(只是刪除到回收站) ''' def getMD5(path): f=open(path,'rb') d5 = hashlib.md5() #生成一個hash的對象 with open(path,'rb') as f: while True: content = f.read(40960) if not content: break d5.update(content) # 每次讀取一部分,然后添加到hash對象里 # print('MD5 : %s' % d5.hexdigest()) return d5.hexdigest() # 打印16進制的hash值 def getSha512(path): f = open(path, 'rb') sh = hashlib.sha512() with open(path,'rb') as f: while True: content = f.read(40960) if not content: break sh.update(content) # print(sh.hexdigest()) return sh.hexdigest() # 裝飾器,計算時間用的 def timer(func): # 高階函數:以函數作為參數 def deco(*args,**kwargs): # 嵌套函數,在函數內部以 def 聲明一個函數,接受 被裝飾函數的所有參數 time1 = time.time() func(*args,**kwargs) time2 = time.time() use_time = round(time2-time1,1) print('Elapsed %ss' %(use_time)) return deco # 注意,返回的函數沒有加括號!所以返回的是一個內存地址,而不是函數的返回值 @timer def walk(path): size = 0 x = input('Want to delete duplicate file? y/n\n') if x.lower() == 'y': delete = True else: delete = False dict = {} n = 1 for folder,subfolder,filenames in os.walk(path): for filename in filenames: print('\rHas scanned %s files' %n,end='') root = os.path.join(folder,filename) md5 = getMD5(root) if md5 in dict.keys(): # sha1 = getSha512(root) # sha2 = getSha512(dict[md5]) # if sha1 == sha2: # 發送到回收站 if delete == True: send2trash.send2trash(dict[md5]) print('\n%s\n%s\n' %(root,dict[md5])) # 獲取文件大小 fsize = os.path.getsize(root) size += fsize else: pass dict[md5] = root n += 1 # 處理文件大小的單位 M size = size/float(1024*1024) size = round(size,2) if delete == True: print('\nSaved %sM space.' % size) else: print("\nWasted %sM sapce." %size) if __name__ =="__main__": x = input('Input path:') walk(x)