【Python】EXCEL轉Json

本文轉載自查看原文 2019-07-02 17:01 2055 python3進階

在網上查閱了好幾個EXCEL轉Json的代碼，有的是僅支持一個層級的Json（這個就不支持多層級的json），有的太過復雜看的不是很懂，所以就自己寫了一個目前符合自己使用的代碼。
我先講下實現的方式。

如果遇到一個如下的json格式，我該怎么存到excel方便讀取出來呢？

{
    "name": "haha",
    "value": 12,
    "floor_area": 43.5,
    "categories": [
        {
            "id": 1,
            "extra_property": "xixi",
            "renovation_type": [
                1,
                2
            ],
            "subcategories": [
                {
                    "subcategory_id": 1,
                    "subcategory_value": 2
                }
            ]
        }
    ]
}

這是個多維json，存在excel中不是很好存放，讀取出來也得花點心思，畢竟你不知道后期又會有什么格式的json出現。為了應對千奇百怪的json格式，我想到如下方式

上圖中特別標注的，可以划分為三個部分，以下對這三個部分進行講解。

區域一（功能校驗區域）：

user：登陸的用戶或操作的用戶

real_code：預計接口執行操作后需要返回的code值，如200、401等

real_message：預計接口執行操作后需要返回的message，如“請求成功‘、”無操作權限“等

區域二（參數類型區域）：

傳遞參數的類型無外乎這幾種：int、str、float、boolean。同樣我們需要告訴代碼這個參數是什么類型的參數，以免傳遞數據的時候發生參數類型錯誤。在代碼中我做了處理，可以識別常用的四種類型（不區分大小寫）。

區域三（參數區域）：

如果是數組類型的數據，用"[]"來標記，不同的數據存儲在不同的單元格中，如：

看到圖的時候你會問：為什么sheet里面會有參數的名稱？

我們可以看出“categories”是個哈希，底下還存在不同的參數。為了知道哪些參數是在“categories”下的，我們可以用一張sheet去存儲“categories”參，這樣我們可以通過代碼方便去找到。

有可能“categories”存在多組數據，所以我們需要用"[]"來告知代碼。要讀取那些數據，我們可以存儲行號，不同的行號可以用頓號分隔

附上代碼：

# -*- coding: utf-8 -*-
# !/usr/bin/python3

import os
import re
import xlrd
import json
import unicodedata

_author_ = 'garcia'


class DataCenter:
    def __init__(self):
        # Default File Path:
        self.data_dir = os.getenv('AUTO_DATACENTER', '存放excel的文件夾地址')

        self.filename = ''
        self.path = ''
        self.sheetNames = ''

    @staticmethod
    def is_number(val):
        """判斷val是否是str"""
        try:
            return True
        except ValueError:
            pass

        try:
            unicodedata.numeric(val)
            return True
        except (TypeError, ValueError):
            pass

    def keep_integer_type_from_excel(self, value):
        # Keep integer number as integer type. When reading from excel it has been changed to float type.
        if self.is_number(value) and type(value) != str and value % 1 == 0:
            return int(value)
        else:
            return value

    def change_field_type(self, table, col, val):
        # 字段類型轉換
        field_type = table.cell(0, col).value
        val = self.keep_integer_type_from_excel(val)
        try:
            if val == '' or val is None:
                pass
            elif field_type.lower() == 'int':
                return int(val)
            elif field_type.lower() == 'float':
                return float(val)
            elif field_type.lower() == 'boolean':
                return int(bool(val))
            elif field_type.lower() == 'str' or field_type == '' or field_type is None:
                return str(val)
            else:
                return '字段類型錯誤！'
        except Exception as e:
            print(e)

    @staticmethod
    def unic(item):
        # Resolved Chinese mess code.
        try:
            item = json.dumps(item, ensure_ascii=False, encoding='UTF-8')
        except UnicodeDecodeError:
            try:
                item = json.dumps(item, ensure_ascii=False, encoding='UTF-8')
            except:
                pass
        except:
            pass

        # Convert json data string back
        item = json.loads(item, encoding='UTF-8')
        return item

    @staticmethod
    def get_sheet_names(wb):
        """
        Returns the names of all the worksheets in the current workbook.
        """
        sheet_names = wb.sheet_names()
        return sheet_names

    @staticmethod
    def __convert_to_list(val):
        """轉換字符串為list"""
        value_list = re.split(',|，|、', val)
        for i in range(len(value_list)):
            value_list[i] = int(value_list[i])

        return value_list

    def get_table(self, sheet_name):
        if self.path is None:
            # Default Data Directory
            file = os.path.join(self.data_dir, self.filename)
        else:
            file = os.path.join(self.path, self.filename)

        try:
            excel_date = xlrd.open_workbook(file)
            # 得到excel的全部sheet標簽值
            self.sheetNames = self.get_sheet_names(excel_date)
            my_sheet_index = self.sheetNames.index(sheet_name)
            table = excel_date.sheet_by_index(my_sheet_index)
        except Exception as e:
            print(e)

        return table

    @staticmethod
    def get_row_and_col(table):
        """獲取列數、行數"""
        total_row = table.nrows
        total_col = table.ncols

        return total_row, total_col

    @staticmethod
    def get_param(table, start_col, total_col):
        param_list = []  # 獲取參數
        for col in range(start_col, total_col):
            param = table.cell(1, col).value  # 獲取字段名
            if param is None or param == '':
                param_list.append(param_list[-1])
            else:
                param_list.append(param)
        return param_list

    def get_child_param(self, param, row, includeEmptyCells):
        if param in self.sheetNames:
            table = self.get_table(param)
            child_total_row, child_total_col = self.get_row_and_col(table)

            child_param = self.get_param(table, 1, child_total_col)

            data_dic = {}
            count = 0
            for col in range(1, child_total_col):
                # Solve issue that get integer data from Excel file would be auto-changed to float type.
                val = self.change_field_type(table, col, table.cell(row, col).value)
                param = child_param[count]
                count += 1
                if '[]' in param:
                    if val == '' or val is None:
                        pass
                    else:
                        param = param[:param.index('[')]
                        data_dic[param] = [] if param not in data_dic.keys() else data_dic[param]
                        if param in self.sheetNames:
                            val_list = self.__convert_to_list(val)
                            for i in range(len(val_list)):
                                data_dic[param].append(
                                    self.get_child_param(param, val_list[i] - 1, includeEmptyCells))
                        else:
                            data_dic[param].append(val)
                else:
                    if param in self.sheetNames:
                        if val is not None and val != '':
                            val_list = self.__convert_to_list(val)
                            for i in range(len(val_list)):
                                data_dic[param] = self.get_child_param(param, val_list[i] - 1, includeEmptyCells)
                    elif (val == '' or val is None) and includeEmptyCells == 'False':
                        pass
                    else:
                        data_dic[param] = val
        return data_dic

    def param_to_json(self, filename, sheet_name, includeEmptyCells, path=None):
        """
        獲取指定sheet中接口參數
        :param filename: 文件名
        :param sheet_name: 讀取excel的sheet名稱
        :param path:文件路徑
        :return:
        """
        try:
            self.filename = filename
            self.path = path
            table = self.get_table(sheet_name)

            total_row, total_col = self.get_row_and_col(table)

            function_point_list = []
            check_list = []
            user_list = []
            all_data_list = []
            param_list = self.get_param(table, 4, total_col)

            for row in range(2, total_row):
                data_dic = {}
                get_check_list = []
                count = 0
                for col in range(4, total_col):
                    # Solve issue that get integer data from Excel file would be auto-changed to float type.
                    val = self.change_field_type(table, col, table.cell(row, col).value)
                    param = param_list[count]
                    count += 1
                    if '[]'in param:
                        if val == '' or val is None:
                            pass
                        else:
                            param = param[:param.index('[')]
                            data_dic[param] = [] if param not in data_dic.keys() else data_dic[param]
                            if param in self.sheetNames:
                                val_list = self.__convert_to_list(val)
                                for i in range(len(val_list)):
                                    data_dic[param].append(
                                        self.get_child_param(param, val_list[i] - 1, includeEmptyCells))
                            else:
                                data_dic[param].append(val)
                    else:
                        if param in self.sheetNames:
                            if val is not None and val != '':
                                val_list = self.__convert_to_list(val)
                                for i in range(len(val_list)):
                                    data_dic[param] = self.get_child_param(param, val_list[i] - 1, includeEmptyCells)
                            else:
                                pass
                        elif (val == '' or val is None) and includeEmptyCells == 'False':
                            pass
                        else:
                            data_dic[param] = val
                print(data_dic)
                get_check_list.append(self.keep_integer_type_from_excel(table.cell(row, 2).value))
                get_check_list.append(self.keep_integer_type_from_excel(table.cell(row, 3).value))
                check_list.append(get_check_list)
                all_data_list.append(data_dic)
                user_list.append(table.cell(row, 1).value)
                function_point_list.append(table.cell(row, 0).value)
        except Exception as e:
            print(e)

        # return all_data_list, function_point_list
        return user_list, all_data_list, function_point_list, check_list


if __name__ == '__main__':
    dc = DataCenter()
    userlist, allList, FunctionPoint, checklist = dc.param_to_json('存放數據的excel名稱', 'sheet名', 'False')
    print(userlist, allList, FunctionPoint, checklist)

說到這，我們來講講她的缺點：

1、如果存在多張表的嵌套，代碼執行的時間比較長

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python實現json轉excel python操作json文件：讀取、寫入、追加、刪除、excel轉json、json轉excel Json文件轉Excel Excel轉Json NPOI json轉Excel DataTable轉Excel ，Excel轉DataTable 【python】將excel轉成json python - xml轉excel [Python] dict轉json Python datetime 轉 JSON [轉]Vue導出json數據到Excel表格