Python3.7 比較兩個Excel文件指定列的值的異同，並將核對結果寫入Excel中（含升級版本）

本文轉載自查看原文 2020-03-13 19:43 1461 xlutils/ xlwt/ xlrd/ Python

背景：

最近工作中需要核對客戶的歷史數據，

接近400個產品，需要核對的列有15列，行數有8000+行

肉眼核對簡直要吐血

心想着反正在學python呢

人生苦短

何不用python寫個腳本

幫助我核對

我省出時間來做些更有意義的事情呢

前提：

我需要核對的兩份Excel有以下特點：

有共同的主鍵
兩份Excel的結構不盡相同，僅需要核對源文件47列中的15列，這些列在目標文件中都能找到

主要思路：

將兩個文件的主鍵作為鍵，將要核對的列作為值，保存到字典中

然后再進行比較

實現代碼：

import xlrd
import xlwt
import time


origin_dict = {}  # 初始化，用於保存源文件{key(主鍵):value(需要核對的列值)}
target_dict = {}  # 初始化，用於保存目標文件{key(主鍵):value(需要核對的列值)}

# 放在函數外部聲明 會報錯 還不曉得原因
# success = 0
# fail = 0


def compare_excel(ori_path,tar_path,col_ori,col_tar):
    '''
    ori_path:用於存放源文件
    tar_path:用於存放目標文件
    col_ori:源文件中需要比較的列
    col_tar:目標文件中需要比較的列
    '''
    success = 0  # 核對無差異的個數
    fail = 0  # 核對結果有差異的個數
    row_res = 0  #要寫入的文件的行

    # 分別打開源文件與目標文件
    wb_ori = xlrd.open_workbook(ori_path)
    wb_tar = xlrd.open_workbook(tar_path)
    # 新建一個excel，用於存放核對結果
    wb_res = xlwt.Workbook()  

    # 分別獲取源文件與目標文件的sheet
    sheet_ori = wb_ori.sheet_by_index(0)
    sheet_tar = wb_tar.sheet_by_index(0)
    # 新建一名稱為result的sheet頁，用於存放核對具體結果
    sheet_res = wb_res.add_sheet('result')

    # 獲取源文件中由主鍵、需核對列組成的鍵值對，放入字典中
    for row_ori in range(1,sheet_ori.nrows):
        cell_ori_key = sheet_ori.cell_value(row_ori,0)  #因我的源文件的主鍵位於第0列，故該列未參數化，大家可以視自己實際情況進行優化
        cell_ori_value = sheet_ori.cell_value(row_ori,col_ori)
        origin_dict[cell_ori_key] = cell_ori_value
    # print('源文件獲取成功')
    # print('源文件如下：%s' % origin_dict)

    # 獲取目標文件中由主鍵、待核對列組成的鍵值對，放入字典中
    for row_tar in range(1,sheet_tar.nrows):
        cell_tar_key = sheet_tar.cell_value(row_tar,0)
        cell_tar_value = sheet_tar.cell_value(row_tar,col_tar)
        target_dict[cell_tar_key] = cell_tar_value
    # print('目標文件獲取成功')
    # print('目標文件如下：%s' % target_dict)

    # 核對邏輯
    try:
        for i in origin_dict.keys():  # 獲取源文件字典的鍵
            if target_dict.get(i) == origin_dict.get(i):  # 對比兩個字典中相同鍵的值
                success += 1  # 值相等，則無差異數+1
                sheet_res.write(row_res+1,0,i)  # 將鍵寫入結果文件的第0列
                sheet_res.write(row_res+1,1,'你倆長一樣')  #將核對無差異結果寫入結果文件的第1列
                row_res += 1  # 結果文件行數+1
                print('金融產品 %s 核對無差異'% i)
            else:
                fail +=1  # 值不相等，則有差異數+1
                sheet_res.write(row_res+1,0,i) 
                sheet_res.write(row_res+1,1,'核對有差異：源文件的值為：%s，目標文件的值為:%s' % (origin_dict.get(i),target_dict.get(i)))  # #將核對有差異結果寫入結果文件的第1列
                row_res += 1  # 結果文件行數+1
                print('金融產品 %s 核對有差異：源文件的值為：%s，目標文件的值為:%s' % (i,origin_dict.get(i),target_dict.get(i)))
                wb_res.save('result.xlsx')  # 保存結果文件
        print(time.strftime('%Y-%m-%d %H-%M-%S',time.localtime())+'核對完成，共核對 %d 條，其中無差異 %d 條， 有差異 %d條' % (len(origin_dict),success,fail))
    except Exception as error:
        print(str(error))


compare_excel('C111111.xlsx','O222222.xlsx',3,2)  # 核對源文件第4列，目標文件第3列

后續：

代碼還有很多優化的空間：

比如說源文件中有的鍵目標文件中沒有，如何提示？

比如說能否一次比較多個列的值

比如擴展成雙主鍵/多主鍵，該如何比對？

如果你有方法或思路，可以跟我這個小白一起交流

*************************************************************************************************************************************************************************************************

上面說的優化空間，今天解決了多主鍵的問題

多主鍵問題解決的關鍵

是把多主鍵拼接成一個字符串，然后再作為字典的鍵

進而比較其值

優化的點還包括：

將核對結果追加寫入源文件中

而非新建一個Excel

具體代碼如下：

import xlrd
import xlwt
import xlutils
import time
from datetime import datetime
from xlrd import xldate_as_tuple
from xlutils import copy

dict_ori = {}  # 初始化，用於保存源文件{key(主鍵):value(需要核對的列值)}
dict_tar = {}  # 初始化，用於保存目標文件{key(主鍵):value(需要核對的列值)}

def Excel_Compare(ori_path,tar_path,sheet_index_ori,col_ori,col_tar):
    '''
    ori_path:源文件地址
    tar_path:目標文件地址
    sheet_index_ori:源文件中待核對的sheet的索引
    col_ori:源文件中待核對的列
    col_tar:目標文件中待核對的列
    '''
    success = 0  
    fail = 0
    space = 0  # 空行個數
    row_res = 0
    # 分別打開源文件與目標文件
    wb_ori = xlrd.open_workbook(ori_path,'w+')
    wb_tar = xlrd.open_workbook(tar_path)
    # 新建一個excel，用於存放核對結果
    # wb_res = xlwt.Workbook()  
    wb_res = copy.copy(wb_ori)  # 在源文件中追加寫入

    # 分別獲取源文件與目標文件的sheet
    sheet_ori = wb_ori.sheet_by_index(sheet_index_ori)
    sheet_tar = wb_tar.sheet_by_index(0)
    # 新建一名稱為result的sheet頁，用於存放核對具體結果
    # sheet_res = wb_res.add_sheet('result')
    sheet_res = wb_res.get_sheet(sheet_index_ori)  # 在原sheet中追加寫入

        # 獲取源文件中由主鍵、需核對列組成的鍵值對，放入字典中
    for row_ori in range(1,sheet_ori.nrows):
        product_id_ori = sheet_ori.cell_value(row_ori,1)  
        # print(product_id_ori)
        product_name_ori = sheet_ori.cell_value(row_ori,4).split('-')[1]  # 源文件中賬套名稱為：XX-xx形式，因核對時只需要后面的xx，故需要做一下處理
        # product_name_ori_ctype = sheet_ori.cell(row_ori,4).ctype
        # print(product_name_ori)
        # print(product_name_ori_ctype)
        # 以下3行代碼將從Excel讀出的日期數據進行轉化並格式化
        date_ori_1 = sheet_ori.cell_value(row_ori,11)
        date_ori_2 = datetime(*xldate_as_tuple(date_ori_1,0))  
        date_ori = date_ori_2.strftime('%Y-%m-%d')
        # print(date_ori)
        key_ori_list = [product_id_ori,product_name_ori,date_ori]
        key_ori = '--'.join(key_ori_list)  # 關鍵點，將多主鍵拼接成字符串
        # print(key_ori)
        income_ori_1 = sheet_ori.cell_value(row_ori,col_ori)  
        income_ori = round(float(income_ori_1),2)  # 讀出來的數據為str類型，需轉化為float類型，方便進行處理，注意源文件中不能有空行
        # dict_ori = {key_ori:income_ori}  # 該方式最終僅保存一次，不是想要的結果
        dict_ori[key_ori] = income_ori  # 將提取出的關鍵信息追加保存為字典格式
        # print("源文件數據獲取成功")
        # print(dict_ori)

    #     獲取源文件中由主鍵、需核對列組成的鍵值對，放入字典中
    for row_tar in range(1,sheet_tar.nrows):
        product_id_tar = sheet_tar.cell_value(row_tar,1)
        product_name_tar = sheet_tar.cell_value(row_tar,5)
        date_tar_1 = sheet_tar.cell_value(row_tar,15)
        key_tar_list = [product_id_tar,product_name_tar,date_tar_1]
        key_tar = '--'.join(key_tar_list)
        income_tar_1 = sheet_tar.cell_value(row_tar,col_tar)
        income_tar = round(float(income_tar_1),2)
        # income_tar_ctype = sheet_tar.cell(row_tar,19).ctype
        # print(income_tar_ctype)
        # dict_tar = {key_tar:income_tar}
        dict_tar[key_tar] = income_tar
        # print("目標文件數據獲取成功")
        # print(dict_tar)

    # 核對邏輯
    try:
        for i in dict_ori.keys():
            # print(type(dict_tar.get(i)))
            # income_ori_float = float(dict_ori.get(i))
            # print(i)
            if dict_tar.get(i)  == dict_ori.get(i):  # 無差異的情況
                success += 1
                product_id_res = i.split('--')[0]
                product_name_res = i.split('--')[1]
                date_res = i.split('--')[2]
                sheet_res.write(row_res+1,20,product_id_res)
                sheet_res.write(row_res+1,21,product_name_res)
                sheet_res.write(row_res+1,22,date_res)
                sheet_res.write(row_res+1,23,'核對無誤')
                row_res += 1 
                print('金融產品:%s，賬套:%s，日期：%s的收益數據核對無差異' % (product_id_res,product_name_res,date_res))
            elif dict_ori.get(i) == 0.00 and dict_tar.get(i) == None:  #有空值的情況
                space += 1
                product_id_res = i.split('--')[0]
                product_name_res = i.split('--')[1]
                date_res = i.split('--')[2]
                sheet_res.write(row_res+1,20,product_id_res)
                sheet_res.write(row_res+1,21,product_name_res)
                sheet_res.write(row_res+1,22,date_res)
                sheet_res.write(row_res+1,23,'空值') 
                row_res += 1
                print('金融產品:%s，賬套:%s，日期：%s的數據為空' % (product_id_res,product_name_res,date_res))
            else:  # 核對有差異的情況
                fail += 1
                product_id_res = i.split('--')[0]
                product_name_res = i.split('--')[1]
                date_res = i.split('--')[2]
                sheet_res.write(row_res+1,20,product_id_res)
                sheet_res.write(row_res+1,21,product_name_res)
                sheet_res.write(row_res+1,22,date_res)
                sheet_res.write(row_res+1,23,'數值有差異，源文件收益為：%s,目標文件的收益為:%s'%(dict_ori.get(i),dict_tar.get(i)))
                row_res += 1
                print('金融產品:%s，賬套:%s，日期：%s 的收益數據核對有差異，源文件的收益為%s,目標文件的收益為%s'%(product_id_res,product_name_res,date_res,dict_ori.get(i),dict_tar.get(i)))
    except Exception as error:
        print(str(error))
    wb_res.save(ori_path)  # 保存源文件
    print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())+'核對完成，共核對%d條，其中，無差異%d條，有差異%d條'%(success+fail,success,fail))


Excel_Compare('CashSequence111.xlsx','CashSequence222.xlsx',3,16,19)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Python比較兩個excel文檔內容的異同 VBA比較兩個Excel數據的異同 python 對比兩個excel文件中的列，如果相同，提取相同值所在行的另一列值 python 比較兩個excel文件中主要的差異配合Beyound Compare 使用更加完美 Python中讀取文件中的json串，並將其寫入到Excel表格中 EXCEL兩個列的交集 python讀取excel並將內容寫入txt excel比較兩列數據大小並填充較小值的顏色 Excel 如何查找兩個列的值滿足條件的行 python 中對比兩個Excel 文件內容