python 比較兩文件夾的內容,具有通用性。


#-*-coding:utf-8-*-    
 
#===============================================================================  
# 目錄對比工具(包含子目錄 ),並列出
# 1、A比B多了哪些文件  
# 2、B比A多了哪些文件  
# 3、二者相同的文件:文件大小相同 VS 文件大小不同  (Size相同文件不打印:與Size不同文件顯示未排序)
# 4、可通過修改,比較文件名且包含文件格式或者只比較文件名但不包括格式后綴。
#===============================================================================  
 
import os, time,difflib,sys


AFILES = []  #EE
BFILES = []  #SVN
COMMON = []  #EE & SVN
def getPrettyTime(state):
    return time.strftime('%y-%m-%d %H:%M:%S', time.localtime(state.st_mtime))

# def getpathsize(dir): #獲取文件大小的函數,未用上,僅供學習.故注釋掉
#     size=0
#     for root, dirs, files in os.walk(dir):
#     #root:目錄:str 如: C:\CopySVN\SystemObject\TopoProcedure\Built-in\
#     #dirs:目錄名稱:列表: 如 ['Parsers']
#     #files:名稱:列表: 如 ['011D0961FB42416AA49D5E82945DE7E9.og',...]
#     #file:目錄:str, 如 011D0961FB42416AA49D5E82945DE7E9.og
#         for file in files:
#             path = os.path.join(root,file)
#             size = os.path.getsize(path)
#     return size

def dirCompare(apath,bpath):
    afiles = []
    bfiles = []
    for root, dirs , files in os.walk(apath):
        print apath,'All files numbers:',len(files)
        for f in files:
            #比較文件名不含格式后綴
            #afiles.append(root + f[0:-4])
            
            #比較文件名含格式后綴
            afiles.append(root + f)
    for root, dirs , files in os.walk(bpath):
        print bpath,'All files numbers:',len(files)
        for f in files:
            #比較文件名不含格式后綴
            #bfiles.append(root + f[0:-4])
            
            #比較文件名含格式后綴
            bfiles.append(root + f)
            #sizeB = os.path.getsize(root + "/" + f) 此處定義的size無法在commonfiles進行比較. (A,B在各自的循環里面)

    # 去掉afiles中文件名的apath (拿A,B相同的路徑\文件名,做成集合,去找交集)
    apathlen = len(apath)
    aafiles = []
    for f in afiles:
        aafiles.append(f[apathlen:])

    # 去掉bfiles中文件名的bpath
    bpathlen = len(bpath)
    bbfiles = []
    for f in bfiles:
        bbfiles.append(f[bpathlen:])

    afiles = aafiles
    bfiles = bbfiles
    setA = set(afiles)
    setB = set(bfiles)
    #print('%$%'+str(len(setA)))
    #print('%%'+str(len(setB)))
    commonfiles = setA & setB  # 處理共有文件
    #print ("===============File with different size in '", apath, "' and '", bpath, "'===============")
    #將結果輸出到本地
    #with open(os.getcwd()+'diff.txt','w') as di:
        #di.write("===============File with different size in '", apath, "' and '", bpath, "'===============")
    for f in sorted(commonfiles):
        sA=os.path.getsize(apath + "/" + f)
        sB=os.path.getsize(bpath + "/" + f)
        if sA==sB:  #共有文件的大小比較
            #pass #print (f + "\t\t" + getPrettyTime(os.stat(apath + "/" + f)) + "\t\t" + getPrettyTime(os.stat(bpath + "/" + f)))
            #以下代碼是處理大小一致,但是內容可能不一致的情況
            #print("in sa=sb")
            #print(os.getcwd())
            saf=[]
            sbf=[]
            sAfile=open(apath + "/" + f)
            iter_f=iter(sAfile)
            for line in iter_f:
                saf.append(line)
            sAfile.close()
            sBfile=open(bpath + "/" + f)
            iter_fb=iter(sBfile)
            for line in iter_fb:
                sbf.append(line)
            sBfile.close()
            saf1=sorted(saf)
            sbf1=sorted(sbf)
            if(len(saf1)!=len(sbf1)):
                with open(os.getcwd()+'/comment_diff.txt','a') as fp:
                    print(os.getcwd())
                    fp.write(apath + "/" + f+" lines size not equal "+bpath + '/' + f+'\n')
            else:
                for i in range(len(saf1)):
                    #print("into pre")
                    if(saf1[i]!=sbf1[i]):
                        print('into commont')
                        with open(os.getcwd()+'/comment_diff.txt','a') as fp1:
                            fp1.write(apath + "/" + f+" content not equal "+bpath + "/" + f+'\n')
                            break


        else:
            with open (os.getcwd()+'/diff.txt','a') as di:
                di.write("File Name=%s    EEresource file size:%d   !=  SVN file size:%d" %(f,sA,sB)+'\n')

            #print ("File Name=%s    EEresource file size:%d   !=  SVN file size:%d" %(f,sA,sB))

    # 處理僅出現在一個目錄中的文件
    onlyFiles = setA ^ setB
    aonlyFiles = []
    bonlyFiles = []
    for of in onlyFiles:
        if of in afiles:
            aonlyFiles.append(of)
        elif of in bfiles:
            bonlyFiles.append(of)
    
    print apath,'only files numbers:',len(aonlyFiles)
    print bpath,'only files numbers:',len(bonlyFiles)
    #print ("###################### EE resource ONLY ###########################")
    #print ("#only files in ", apath)
    if os.path.exists(os.getcwd()+'/Aonly.txt'):
        os.remove(os.getcwd()+'/Aonly.txt')
    if os.path.exists(os.getcwd()+'/Bonly.txt'):
        os.remove(os.getcwd()+'/Bonly.txt')


    for of in sorted(aonlyFiles):
        with open (os.getcwd()+'/Aonly.txt','a') as a:
            a.write(of+'\n')

        #print (of)
    #print ("*"*20+"SVN ONLY+"+"*"*20)
    #print ("#only files in ", bpath)
    for of in sorted(bonlyFiles):
        with open (os.getcwd()+'/Bonly.txt','a') as b:
            b.write(of+'\n')
        #print (of)

if __name__ == '__main__':
    FolderEE = sys.argv[1]
    FolderSVN = sys.argv[2]
    dirCompare(FolderEE, FolderSVN)
    print("done!")

ps:本文參考 http://www.cnblogs.com/luo-mao/p/5872532.html 貓兒爹,經修改符合自身使用,感謝作者。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM