lxml / dom.minidom 读写XML字符 | Python

本文转载自查看原文 2018-05-17 17:22 887 Python

1.查找与修改：
from lxml import etree
# 1.使用lxml库读写XML格式数据,测试速度比xml.dom.minidom解析的要快; # 1.1 将string转换成python对象
#       e1 = etree.XML(xml_content) 或者 etree.fromstring(xml_content) # 1.2 读取xml字符串中的指定节点文本与属性
#       a1 = e1.xpath("//SPEC_LIST/text()")  # 节点文本
#       a1 = e1.xpath("//SPEC_LIST/@clone")  # 节点属性 # 1.3 添加子节点
#       obj = e1.xpath("//SPEC_LIST")[0]  # 获取节点
#       spec = etree.SubElement(obj, "SPEC")  # 添加子节点 # 1.4 修改节点属性
#       obj = e1.xpath("//SPEC_LIST")[0]  # 获取节点
#       obj.attrib['cpu'] = "8"  # 设置节点属性,attrib属性是一个字典; # 1.5 将python对象转换成string
#       content = etree.tounicode(e1)  # unicode
#       content = etree.tostring(e1)  # bytes

from lxml import etree

xml_string = """<CERT_LIST><CERT ip="192.168.100.17" password="test" username="test"/></CERT_LIST>
"""

# string > object
xml_obj = etree.XML(xml_string)

# get node
node = xml_obj.xpath("//CERT")[0]

# get attribute
ip = node.xpath("./@ip")[0]
user_name = node.xpath("./@username")[0]

print(ip, user_name)

# add child node
new_node = etree.SubElement(node, "new_node")

# set attribute
new_node.attrib["new_attribute"] = "new_value"

# object > string
content = etree.tounicode(xml_obj)
print(content)

"""
<CERT_LIST><CERT ip="192.168.100.17" password="test" username="test"><new_node new_attribute="new_value"/></CERT></CERT_LIST>
"""

示例

2.创建XML文档与添加元素节点

from lxml import etree


root = etree.Element("root")
print(root, dir(root))

child1 = etree.SubElement(root, "sub1")

child2 = etree.SubElement(root, "sub2")
child2.text = "sub_text2"

child3 = etree.SubElement(root, "sub3")
child3.text = "sub_text3"

# addprevious/addnext
child1.addprevious(child2)
child1.addnext(child3)

sub_child1 = etree.SubElement(child1, "sub_child1")

# append()
child1.append(sub_child1)

# print(dir(child))

# 'addnext', 'addprevious', 'append', 'attrib', 'base', 'clear', 'cssselect', 'extend',
# 'find', 'findall', 'findtext', 'get', 'getchildren', 'getiterator', 'getnext', 'getparent',
# 'getprevious', 'getroottree', 'index', 'insert', 'items', 'iter', 'iterancestors',
# 'iterchildren', 'iterdescendants', 'iterfind', 'itersiblings', 'itertext', 'keys',
# 'makeelement', 'nsmap', 'prefix', 'remove', 'replace', 'set', 'sourceline', 'tag',
# 'tail', 'text', 'values', 'xpath'

# clear()
# Resets an element.  This function removes all subelements, clears all attributes and sets the text and tail properties to None.
# 重置元素，清除子元素文本以及属性。

# cssselect()
# css选择器方式

# extend()

# find()
# Finds the first matching subelement, by tag name or path.

# findall()
# Finds all matching subelements, by tag name or path.

# findtext()
# Finds text for the first matching subelement, by tag name or path.

# get()
# Gets an element attribute.

# getiterator()
# a sequence or iterator of all elements in the subtree

# getnext()
# Returns the following sibling of this element or None.

# getparent()
# Returns the parent of this element or None for the root element.

# getprevious()
# Returns the preceding sibling of this element or None.

# getroottree()
# Return an ElementTree for the root node of the document that contains this element.

# index(self, child, start=None, stop=None)
# Find the position of the child within the parent

# insert(self, index, element)
# Inserts a subelement at the given position in this element

# items()
# Gets element attributes, as a sequence.

# iter()
# Iterate over all elements in the subtree in document order

# iterancestors()
# Iterate over the ancestors of this element (from parent to parent).

# iterchildren()/iterdescendants()

# iterfind()

# itersiblings()

# itertext()

# keys()
# Gets a list of attribute names.

# makeelement()
# Creates a new element associated with the same document.

# remove(element)
# Removes a matching subelement.

# replace(self, old_element, new_element)

# set(self, key, value)
# Sets an element attribute.

# values()
# Gets element attribute values as a sequence of strings.

# xpath()

# attrib
# base
# nsmap
# prefix
# sourceline
# tag
# tail
# text

print(etree.tostring(root))
print(dir(etree))

# 'AncestorsIterator', 'AttributeBasedElementClassLookup', 'C14NError', 'CDATA', 'Comment', 'CommentBase',
# 'CustomElementClassLookup', 'DEBUG', 'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError', 'DocInfo',
# 'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element', 'ElementBase', 'ElementChildIterator',
# 'ElementClassLookup', 'ElementDefaultClassLookup', 'ElementDepthFirstIterator', 'ElementNamespaceClassLookup',
# 'ElementTextIterator', 'ElementTree', 'Entity', 'EntityBase', 'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes',
# 'Extension', 'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser', 'HTMLPullParser',
# 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION', 'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION',
# 'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError', 'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError',
# 'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction', 'PyErrorLog', 'PythonElementClassLookup',
# 'QName', 'RelaxNG', 'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError', 'RelaxNGValidateError', 'Resolver',
# 'Schematron', 'SchematronError', 'SchematronParseError', 'SchematronValidateError', 'SerialisationError',
# 'SiblingsIterator', 'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML', 'XMLDTDID', 'XMLID',
# 'XMLParser', 'XMLPullParser', 'XMLSchema', 'XMLSchemaError', 'XMLSchemaParseError', 'XMLSchemaValidateError',
# 'XMLSyntaxError', 'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathElementEvaluator', 'XPathError',
# 'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError', 'XPathSyntaxError', 'XSLT',
# 'XSLTAccessControl', 'XSLTApplyError', 'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError',
# 'XSLTSaveError', '_Attrib', '_BaseErrorLog', '_Comment', '_Document', '_DomainErrorLog', '_Element',
# '_ElementIterator', '_ElementMatchIterator', '_ElementStringResult', '_ElementTagMatcher', '_ElementTree',
# '_ElementUnicodeResult', '_Entity', '_ErrorLog', '_FeedParser', '_IDDict', '_ListErrorLog', '_LogEntry',
# '_ProcessingInstruction', '_RotatingErrorLog', '_SaxParserTarget', '_TargetParserResult', '_Validator',
# '_XPathEvaluatorBase', '_XSLTProcessingInstruction', '_XSLTResultTree', 'adopt_external_document',
# 'cleanup_namespaces', 'clear_error_log', 'dump', 'fromstring', 'fromstringlist', 'get_default_parser',
# 'htmlfile', 'iselement', 'iterparse', 'iterwalk', 'memory_debugger', 'parse', 'parseid', 'register_namespace',
# 'set_default_parser', 'set_element_class_lookup', 'strip_attributes', 'strip_elements', 'strip_tags', 'tostring',
# 'tostringlist', 'tounicode', 'use_global_python_log', 'xmlfile'


# tree = etree.ElementTree(root)
# tree.write("test.xml", pretty_print=True, xml_declaration=True, encoding='utf-8')

# 以下模块可以无视
import xml.dom.minidom from xml.dom.minidom import parse, parseString
# 获取xml节点数据
doc = parseString(certificate)   # 将xml数据转为doc对象
for node in doc.getElementsByTagName("CERT_LIST"):
    for hostnode in node.getElementsByTagName("CERT"):
        ip = hostnode.getAttribute("ip")
        username = hostnode.getAttribute("username")
        password = hostnode.getAttribute("password")
        datacenter = hostnode.getAttribute("datacenter")
        cluster = hostnode.getAttribute("cluster")
# 写入xml数据
impl = xml.dom.minidom.getDOMImplementation()
dom1 = impl.createDocument(None, 'SPEC_LIST', None)
root = dom1.documentElement
root.setAttribute("cpu", cpu)
root.setAttribute("memory", memory)
root.setAttribute("disk", disk)

specifications = dom1.toxml()  # 将doc对象转为xml数据

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 Lxml读写XML字符 python 之模块之 xml.dom.minidom解析xml Python minidom模块(DOM写入和解析XML) Python3使用xml.dom.minidom和xml.etree模块儿解析xml文件，封装函数 python解析xml之lxml DOM4j读写XML（实例） python处理xml的常用包（lib.xml、ElementTree、lxml） python读写xml文件 python xml.dom模块解析xml 【python】lxml