1.查找与修改:
from lxml import etree
# 1.使用lxml库读写XML格式数据,测试速度比xml.dom.minidom解析的要快; # 1.1 将string转换成python对象 # e1 = etree.XML(xml_content) 或者 etree.fromstring(xml_content) # 1.2 读取xml字符串中的指定节点文本与属性 # a1 = e1.xpath("//SPEC_LIST/text()") # 节点文本 # a1 = e1.xpath("//SPEC_LIST/@clone") # 节点属性 # 1.3 添加子节点 # obj = e1.xpath("//SPEC_LIST")[0] # 获取节点 # spec = etree.SubElement(obj, "SPEC") # 添加子节点 # 1.4 修改节点属性 # obj = e1.xpath("//SPEC_LIST")[0] # 获取节点 # obj.attrib['cpu'] = "8" # 设置节点属性,attrib属性是一个字典; # 1.5 将python对象转换成string # content = etree.tounicode(e1) # unicode # content = etree.tostring(e1) # bytes

from lxml import etree xml_string = """<CERT_LIST><CERT ip="192.168.100.17" password="test" username="test"/></CERT_LIST> """ # string > object xml_obj = etree.XML(xml_string) # get node node = xml_obj.xpath("//CERT")[0] # get attribute ip = node.xpath("./@ip")[0] user_name = node.xpath("./@username")[0] print(ip, user_name) # add child node new_node = etree.SubElement(node, "new_node") # set attribute new_node.attrib["new_attribute"] = "new_value" # object > string content = etree.tounicode(xml_obj) print(content) """ <CERT_LIST><CERT ip="192.168.100.17" password="test" username="test"><new_node new_attribute="new_value"/></CERT></CERT_LIST> """
2.创建XML文档与添加元素节点
from lxml import etree root = etree.Element("root") print(root, dir(root)) child1 = etree.SubElement(root, "sub1") child2 = etree.SubElement(root, "sub2") child2.text = "sub_text2" child3 = etree.SubElement(root, "sub3") child3.text = "sub_text3" # addprevious/addnext child1.addprevious(child2) child1.addnext(child3) sub_child1 = etree.SubElement(child1, "sub_child1") # append() child1.append(sub_child1) # print(dir(child)) # 'addnext', 'addprevious', 'append', 'attrib', 'base', 'clear', 'cssselect', 'extend', # 'find', 'findall', 'findtext', 'get', 'getchildren', 'getiterator', 'getnext', 'getparent', # 'getprevious', 'getroottree', 'index', 'insert', 'items', 'iter', 'iterancestors', # 'iterchildren', 'iterdescendants', 'iterfind', 'itersiblings', 'itertext', 'keys', # 'makeelement', 'nsmap', 'prefix', 'remove', 'replace', 'set', 'sourceline', 'tag', # 'tail', 'text', 'values', 'xpath' # clear() # Resets an element. This function removes all subelements, clears all attributes and sets the text and tail properties to None. # 重置元素,清除子元素文本以及属性。 # cssselect() # css选择器方式 # extend() # find() # Finds the first matching subelement, by tag name or path. # findall() # Finds all matching subelements, by tag name or path. # findtext() # Finds text for the first matching subelement, by tag name or path. # get() # Gets an element attribute. # getiterator() # a sequence or iterator of all elements in the subtree # getnext() # Returns the following sibling of this element or None. # getparent() # Returns the parent of this element or None for the root element. # getprevious() # Returns the preceding sibling of this element or None. # getroottree() # Return an ElementTree for the root node of the document that contains this element. # index(self, child, start=None, stop=None) # Find the position of the child within the parent # insert(self, index, element) # Inserts a subelement at the given position in this element # items() # Gets element attributes, as a sequence. # iter() # Iterate over all elements in the subtree in document order # iterancestors() # Iterate over the ancestors of this element (from parent to parent). # iterchildren()/iterdescendants() # iterfind() # itersiblings() # itertext() # keys() # Gets a list of attribute names. # makeelement() # Creates a new element associated with the same document. # remove(element) # Removes a matching subelement. # replace(self, old_element, new_element) # set(self, key, value) # Sets an element attribute. # values() # Gets element attribute values as a sequence of strings. # xpath() # attrib # base # nsmap # prefix # sourceline # tag # tail # text print(etree.tostring(root)) print(dir(etree)) # 'AncestorsIterator', 'AttributeBasedElementClassLookup', 'C14NError', 'CDATA', 'Comment', 'CommentBase', # 'CustomElementClassLookup', 'DEBUG', 'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError', 'DocInfo', # 'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element', 'ElementBase', 'ElementChildIterator', # 'ElementClassLookup', 'ElementDefaultClassLookup', 'ElementDepthFirstIterator', 'ElementNamespaceClassLookup', # 'ElementTextIterator', 'ElementTree', 'Entity', 'EntityBase', 'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', # 'Extension', 'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser', 'HTMLPullParser', # 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION', 'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION', # 'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError', 'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError', # 'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction', 'PyErrorLog', 'PythonElementClassLookup', # 'QName', 'RelaxNG', 'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError', 'RelaxNGValidateError', 'Resolver', # 'Schematron', 'SchematronError', 'SchematronParseError', 'SchematronValidateError', 'SerialisationError', # 'SiblingsIterator', 'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML', 'XMLDTDID', 'XMLID', # 'XMLParser', 'XMLPullParser', 'XMLSchema', 'XMLSchemaError', 'XMLSchemaParseError', 'XMLSchemaValidateError', # 'XMLSyntaxError', 'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathElementEvaluator', 'XPathError', # 'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError', 'XPathSyntaxError', 'XSLT', # 'XSLTAccessControl', 'XSLTApplyError', 'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError', # 'XSLTSaveError', '_Attrib', '_BaseErrorLog', '_Comment', '_Document', '_DomainErrorLog', '_Element', # '_ElementIterator', '_ElementMatchIterator', '_ElementStringResult', '_ElementTagMatcher', '_ElementTree', # '_ElementUnicodeResult', '_Entity', '_ErrorLog', '_FeedParser', '_IDDict', '_ListErrorLog', '_LogEntry', # '_ProcessingInstruction', '_RotatingErrorLog', '_SaxParserTarget', '_TargetParserResult', '_Validator', # '_XPathEvaluatorBase', '_XSLTProcessingInstruction', '_XSLTResultTree', 'adopt_external_document', # 'cleanup_namespaces', 'clear_error_log', 'dump', 'fromstring', 'fromstringlist', 'get_default_parser', # 'htmlfile', 'iselement', 'iterparse', 'iterwalk', 'memory_debugger', 'parse', 'parseid', 'register_namespace', # 'set_default_parser', 'set_element_class_lookup', 'strip_attributes', 'strip_elements', 'strip_tags', 'tostring', # 'tostringlist', 'tounicode', 'use_global_python_log', 'xmlfile' # tree = etree.ElementTree(root) # tree.write("test.xml", pretty_print=True, xml_declaration=True, encoding='utf-8')
# 以下模块可以无视
import xml.dom.minidom from xml.dom.minidom import parse, parseString # 获取xml节点数据 doc = parseString(certificate) # 将xml数据转为doc对象 for node in doc.getElementsByTagName("CERT_LIST"): for hostnode in node.getElementsByTagName("CERT"): ip = hostnode.getAttribute("ip") username = hostnode.getAttribute("username") password = hostnode.getAttribute("password") datacenter = hostnode.getAttribute("datacenter") cluster = hostnode.getAttribute("cluster") # 写入xml数据 impl = xml.dom.minidom.getDOMImplementation() dom1 = impl.createDocument(None, 'SPEC_LIST', None) root = dom1.documentElement root.setAttribute("cpu", cpu) root.setAttribute("memory", memory) root.setAttribute("disk", disk) specifications = dom1.toxml() # 将doc对象转为xml数据