解析使用xml.etree.ElementTree 模塊,生成使用xml.dom.minidom模塊, ElementTree比dom快,dom生成簡單且會自動格式化。
<?xml version='1.0' encoding='utf-8'?> <baspools> <bas> <basprovider>0</basprovider> <portal_version>1</portal_version> <timeout>111</timeout> <retry>111</retry> <auth_type>111</auth_type> </bas> <bas> <basprovider>0</basprovider> <portal_version>1</portal_version> <timeout>5000</timeout> <retry>3</retry> <auth_type>0</auth_type> </bas> </baspools>
解析為dict:
{0: {'retry': '111', 'auth_type': '111', 'portal_version': '1', 'timeout': '111', 'basprovider': '0'}, 1: {'retry': '3', 'auth_type': '0', 'portal_version': '1', 'timeout': '5000', 'basprovider': '0'}}
將上述字典再還原xml
執行代碼:
# coding = 'utf-8' import time import xml.etree.ElementTree as ET import xml.dom.minidom as minidom start = time.clock() # 記錄處理開始時間;與最后一行一起使用,來判斷輸出運行時間。 def read_xml(in_path): """讀取並解析xml文件 in_path: xml路徑 return: tree""" tree = ET.parse(in_path) return tree def creat_dict(root): """xml生成為dict:, 將tree中個節點添加到list中,將list轉換為字典dict_init 疊加生成多層字典dict_new""" dict_new = {} for key, valu in enumerate(root): dict_init = {} list_init = [] for item in valu: list_init.append([item.tag, item.text]) for lists in list_init: dict_init[lists[0]] = lists[1] dict_new[key] = dict_init return dict_new def dict_to_xml(input_dict, root_tag, node_tag): """ 定義根節點root_tag,定義第二層節點node_tag 第三層中將字典中鍵值對對應參數名和值 return: xml的tree結構 """ root_name = ET.Element(root_tag) for (k, v) in input_dict.items(): node_name = ET.SubElement(root_name, node_tag) for key, val in v.items(): key = ET.SubElement(node_name, key) key.text = val return root_name def out_xml(root): """格式化root轉換為xml文件""" rough_string = ET.tostring(root, 'utf-8') reared_content = minidom.parseString(rough_string) with open(out_file, 'w+') as fs: reared_content.writexml(fs, addindent=" ", newl="\n", encoding="utf-8") return True if __name__ == '__main__': in_files = r"D:\baspool_read.xml" out_file = r"D:\baspool_out.xml" tree = read_xml(in_files) node_new = creat_dict(tree.getroot()) # 將xml轉換為dict root = dict_to_xml(node_new, "baspools", "bas") # 將dict轉換為xml out_xml(root) # 輸出xml到out_files end = time.clock() print("read: %f s" % (end - start))
解決 字典無順序導致生成的xml文件參數位置不固定,對dict_to_xml()函數進行修正:
def dict_to_xml(input_dict,root_tag,node_tag): """ 定義根節點root_tag,定義第二層節點node_tag 第三層中將字典中鍵值對對應參數名和值 return: xml的tree結構 """ root_name = ET.Element(root_tag) for (k, v) in input_dict.items(): node_name = ET.SubElement(root_name, node_tag) for (key, val) in sorted(v.items(), key=lambda e:e[0], reverse=True): key = ET.SubElement(node_name, key) key.text = val return root_name
