該python 腳本有以下三個功能:
1. 實現查看目錄下重復的文件,輸出文件按修改時間升序排列
2. 將按修改時間排列比較舊的、可刪除的文件列出來
3. 按目錄對重復文件進行統計,比如,目錄/tmp 重復個數5,是指/tmp目錄下有5個文件在其他地方也存在
python腳本
#!/usr/bin/env python #coding=utf-8 ''' Created on Nov 30, 2016 @author: fangcheng ''' from __future__ import print_function from operator import itemgetter import os import time 'tt為浮點型日期,換化為年月日時分秒格式時間' def timeYS(tt): t1 = time.localtime(tt) t2 = time.strftime("%Y-%m-%d %H:%M:%S",t1) return t2; class File(): ''' copy move remove ''' allfilecount = 0 rddfilecount = 0 singlefiles={} rddfiles={} rdddirs={} def __init__(self): ''' Constructor ''' def getFileMsg(self,filepath): ''' 以元組(filepath,ftime,size)形式輸出文件信息 ''' if os.path.isfile(filepath): size = os.path.getsize(filepath) #bytes B if size <= 1024: size ='{0}B'.format(size); elif size <= 1024*1024: size = size/1024 size ='{0}K'.format(size); else: size = size/1024/1024 size ='{0}M'.format(size); #filename = os.path.basename(filepath) ftime = timeYS(os.path.getmtime(filepath)) return (filepath,ftime,size) return () def setRedundanceFile(self,filepath): ''' 根據文件名稱和大小判斷文件是否重復,文件信息:元組(filepath,mtime,size) ,getFileMsg返回值 1. 遍歷某一目錄下所有文件 2. 將文件的名稱及大小組成一個字符串,做為 key 放入字典 dict1 ,其 value 為 文件信息 3. 每次放入時時判斷 key 是否存在,若存在,就將 文件信息 放入字典 dict2 4. dict2 的 key 為 文件名稱,value為 文件信息 列表 list1 ''' try: if os.path.isdir(filepath): for fil in os.listdir(filepath): fil = os.path.join(filepath,fil) self.setRedundanceFile(fil) elif os.path.isfile(filepath): self.allfilecount = self.allfilecount + 1 size = os.path.getsize(filepath) filename = os.path.basename(filepath) f = self.getFileMsg(filepath) filekey = '{0}_{1}'.format(filename, size) if self.singlefiles.has_key(filekey): self.rddfilecount = self.rddfilecount + 1 #增加規則:發現一個重復文件時,在父目錄下文件數加1,若是首次發現則取該文件在總文件列表的父目錄,其數目也加1 pardir = os.path.dirname(filepath) if self.rdddirs.has_key(pardir): self.rdddirs[pardir] = self.rdddirs.get(pardir)+1 else: self.rdddirs[pardir] = 1 if self.rddfiles.has_key(filekey) : self.rddfiles[filekey].append(f) else: self.rddfiles[filekey] = [f] f = self.singlefiles.get(filekey) self.rddfiles[filekey].append(f) #若是首次發現則取該文件在總文件列表的父目錄,其數目也加1 pardir = os.path.dirname(f[0]) if self.rdddirs.has_key(pardir): self.rdddirs[pardir] = self.rdddirs.get(pardir)+1 else: self.rdddirs[pardir] = 1 else: self.singlefiles[filekey]=f else: return except Exception as e: print(e) def showFileCount(self): print(self.allfilecount) def showRedundanceFile(self,filepath): ''' 根據文件名稱和大小判斷文件是否重復 ''' self.allfilecount = 0 self.rddfilecount = 0 self.singlefiles={} self.rddfiles={} self.setRedundanceFile(filepath) print('the total file num:{0},the redundance file num(not including the first file):{1}'.format(self.allfilecount,self.rddfilecount)) print('-----------------------------------------') for k in self.rddfiles.keys(): for l in sorted(self.rddfiles.get(k), key=itemgetter(1)): #按修改日期升序排列 print(l); print(''); print('------------------------------------------') def showCanRemoveFile(self,filepath): ''' 根據文件名稱和大小判斷文件是否重復 輸出按修改時間較舊的文件 ''' self.allfilecount = 0 self.rddfilecount = 0 self.singlefiles={} self.rddfiles={} rmlist = [] self.setRedundanceFile(filepath) for k in self.rddfiles.keys(): tmplist = sorted(self.rddfiles.get(k), key=itemgetter(1)) tmplist.pop() rmlist.extend(tmplist) for rl in rmlist: print(rl[0]) def rdddirstat(self): ''' 按目錄統計文件重復個數 輸出:目錄/tmp 重復個數5,是指/tmp目錄下有5個文件在其他地方也存在 ''' if len(self.rdddirs)> 0 : print('The redundance file statistics by dirs:') for rd in self.rdddirs.keys(): print('{0} {1}'.format(rd, self.rdddirs.get(rd))) else: print('There are no redundance files') if __name__ == '__main__': f = File() filepath = os.getcwd() #filepath = '/scripts' f.showRedundanceFile(filepath) #查看多余的文件 #f.showCanRemoveFile(filepath) #按修改時間給出比較舊的多余文件 f.rdddirstat() #按目錄統計重復文件個數
腳本添加執行權限后,可直接在服務器上執行
chmod +x findrdd.py
linux上執行示例
[root@bak scripts]# ./findrdd.py the total file num:33,the redundance file num(not including the first file):5 ----------------------------------------- ('/scripts/bkapp.sh', '2016-03-09 16:31:03', '3K') ('/scripts/esgcc/bkapp.sh', '2016-03-10 11:06:06', '3K') ('/scripts/show_rollbak.txt', '2016-03-09 10:50:02', '2K') ('/scripts/esgcc/show_rollbak.txt', '2016-03-10 11:06:06', '2K') ('/scripts/esgcc/deploy.sh', '2016-03-10 11:36:19', '8K') ('/scripts/deploy.sh', '2016-03-11 11:42:04', '8K') ('/scripts/rollback.sh', '2016-03-10 10:22:33', '10K') ('/scripts/esgcc/rollback.sh', '2016-03-10 11:06:06', '10K') ('/scripts/show_deploy.txt', '2016-03-09 10:50:02', '2K') ('/scripts/esgcc/show_deploy.txt', '2016-03-10 11:06:06', '2K') ------------------------------------------ The redundance file statistics by dirs: /scripts 5 /scripts/esgcc 5
windows上執行示例(需要安裝python):
C:\Users\fei\Desktop\tmp>python findrdd.py the total file num:42,the redundance file num(not including the first file):10 ----------------------------------------- ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\application\\application.css', '2016-11-22 13:11:51', '101B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\application\\application.css', '2016-11-22 13:11:51', '101B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\classes\\application\\application.css', '2016-11-22 13:11:53', '101B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\login\\Login.java', '2016-11-22 13:11:51', '3K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\login\\Login.java', '2016-11-22 13:11:52', '3K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\dist\\LoginCSS.jar', '2016-11-22 13:11:53', '55K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\deploy\\LoginCSS.jar', '2016-11-22 13:11:54', '55K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\login\\background.jpg', '2016-11-22 13:11:51', '51K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\login\\background.jpg', '2016-11-22 13:11:52', '51K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\classes\\login\\background.jpg', '2016-11-22 13:11:53', '51K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\application\\Main.java', '2016-11-22 13:11:50', '633B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\application\\Main.java', '2016-11-22 13:11:51', '633B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\login\\Test.java', '2016-11-22 13:11:51', '443B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\login\\Test.java', '2016-11-22 13:11:52', '443B') ('C:\\Users\\fei\\Desktop\\tmp\\build\\project\\src\\login\\Login.css', '2016-11-22 13:11:51', '2K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\src\\login\\Login.css', '2016-11-22 13:11:52', '2K') ('C:\\Users\\fei\\Desktop\\tmp\\build\\build\\classes\\login\\Login.css', '2016-11-22 13:11:53', '2K') ------------------------------------------ The redundance file statistics by dirs: C:\Users\fei\Desktop\tmp\build\build\src\application 2 C:\Users\fei\Desktop\tmp\build\deploy 1 C:\Users\fei\Desktop\tmp\build\build\classes\application 1 C:\Users\fei\Desktop\tmp\build\project\src\login 4 C:\Users\fei\Desktop\tmp\build\dist 1 C:\Users\fei\Desktop\tmp\build\build\classes\login 2 C:\Users\fei\Desktop\tmp\build\project\src\application 2
輸出結果中第二個方法-輸出可刪除文件列表注釋掉了,該刪除方式僅供參考,是否按這種“最新修改的文件就是有效文件、其他文件皆可不要”方式篩選尚需自我決定。
