go解析xml的三種方式


go解析xml的三種方式

之前項目中用到過xml解析,在這里記錄一下。

小文件簡單解析

demo.xml

<?xml version="1.0" encoding="UTF-8"?>
<config>
   <smtpServer>smtp.163.com</smtpServer>
   <smtpPort>25</smtpPort>
   <sender>user@163.com</sender>
   <senderPasswd>123456</senderPasswd>
   <receivers flag="true">
     <age>16</age>
     <user>Mike_Zhang@live.com</user>
     <user>test1@qq.com</user>
     <script>
     <![CDATA[
        function matchwo(a,b) {
            if (a < b && a < 0) then {
                return 1;
            } else {
                return 0;
            }
        }
        ]]>
     </script>
  </receivers>
 </config>

main.go

package main

import (
	"fmt"
	"io/ioutil"
	"encoding/xml"
)
/*
https://studygolang.com/static/pkgdoc/pkg/encoding_xml.htm
*/
// 定義結構體映射xml結構
type SConfig struct {
	XMLName  xml.Name `xml:"config"` // 指定最外層的標簽為config
	SmtpServer string `xml:"smtpServer"` // 讀取smtpServer配置項,並將結果保存到SmtpServer變量中
	SmtpPort int `xml:"smtpPort"`
	Sender string `xml:"sender"`
	SenderPasswd string `xml:"senderPasswd"`
	Receivers SReceivers `xml:"receivers"` // 讀取receivers標簽下的內容,以結構方式獲取
  }
   
  type SReceivers struct {
	Age int `xml:"age"`
	Flag string `xml:"flag,attr"` // 讀取flag屬性
	User []string `xml:"user"` // 讀取user數組
	Script string `xml:"script"` // 讀取 <![CDATA[ xxx ]]> 數據
  }

func readXml(path string) {
	// 不用管理打開和關閉,ioutil 在內部已經處理過了
	data, err := ioutil.ReadFile(path)
	if err != nil {
		fmt.Println("讀文件出錯!", err)
		return
	}
	// fmt.Println(string(bytes))
	v := SConfig{}
	err = xml.Unmarshal(data, &v)
	if err != nil {
	  fmt.Printf("error: %v", err)
	  return
	}
   
	//fmt.Println(v)
	fmt.Println("SmtpServer : ",v.SmtpServer)
	fmt.Println("SmtpPort : ",v.SmtpPort)
	fmt.Println("Sender : ",v.Sender)
	fmt.Println("SenderPasswd : ",v.SenderPasswd)
	fmt.Println("Receivers.Flag : ",v.Receivers.Flag)
	fmt.Println("Receivers.Age : ",v.Receivers.Age)
	fmt.Println("Receivers.Script : ",v.Receivers.Script)
	for i,element := range v.Receivers.User {
	  fmt.Println(i,element)
	}
}

func main() {
	readXml("demo.xml")
}

輸出:

SmtpServer :  smtp.163.com
SmtpPort :  25
Sender :  user@163.com
SenderPasswd :  123456
Receivers.Flag :  true
Receivers.Age :  16
Receivers.Script :

        function matchwo(a,b) {
            if (a < b && a < 0) then {
                return 1;
            } else {
                return 0;
            }
        }


0 Mike_Zhang@live.com
1 test1@qq.com

參考博客

大文件解析

對於超大xml文件的讀取采用事件驅動的方式節省內存提高效率:

demo.xml

<?xml version="1.0" encoding="UTF-8"?>
<config>
   <smtpServer>smtp.163.com</smtpServer>
   <smtpPort>25</smtpPort>
   <sender>user@163.com</sender>
  <senderPasswd>123456</senderPasswd>
   <receivers flag="true">
     <age>16</age>
     <user>Mike_Zhang@live.com</user>
     <user>test1@qq.com</user>
     <script>
     <![CDATA[
        function matchwo(a,b) {
            if (a < b && a < 0) then {
                return 1;
            } else {
                return 0;
            }
        }
        ]]>
     </script>
  </receivers>
 </config>

main.go

package main

import (
	"fmt"
	"encoding/xml"
	"bufio"
	"os"
	"io"
)
/*
解析超大 xml 文件
https://studygolang.com/static/pkgdoc/pkg/encoding_xml.htm
*/
// 定義結構體映射xml結構
type SConfig struct {
	XMLName  xml.Name `xml:"config"` // 指定最外層的標簽為config
	SmtpServer string `xml:"smtpServer"` // 讀取smtpServer配置項,並將結果保存到SmtpServer變量中
	SmtpPort int `xml:"smtpPort"`
	Sender string `xml:"sender"`
	SenderPasswd string `xml:"senderPasswd"`
	Receivers SReceivers `xml:"receivers"` // 讀取receivers標簽下的內容,以結構方式獲取
  }
   
  type SReceivers struct {
	Age int `xml:"age"`
	Flag string `xml:"flag,attr"` // 讀取flag屬性
	User []string `xml:"user"` // 讀取user數組
	Script string `xml:"script"` // 讀取 <![CDATA[ xxx ]]> 數據
  }

func readXml(path string) {
	file, errOpen := os.Open(path) // 打開文件
	if errOpen != nil {
		fmt.Println("打開文件異常!", errOpen)
		return
	}

	defer file.Close() // 關閉文件

	// 創建帶緩存的 Reader
	reader := bufio.NewReader(file)

	decoder := xml.NewDecoder(reader)

	for t, err := decoder.Token(); err == nil || err == io.EOF; t, err = decoder.Token() {
		switch token := t.(type) {
			case xml.StartElement:
				name := token.Name.Local
				fmt.Println(name)
				if name == "config" {
					// 解析 config 
					var sConfig = SConfig{}
					configErr := decoder.DecodeElement(&sConfig, &token)
					if configErr != nil {
						fmt.Println("解析錯誤:")
						fmt.Println(configErr)
					} else {
						fmt.Println(sConfig)
					}
					return
				}
		}
	}
}

func main() {
	readXml("demo.xml")
} 

輸出:

config
{{ config} smtp.163.com 25 user@163.com 123456 {16 true [Mike_Zhang@live.com test1@qq.com]

        function matchwo(a,b) {
            if (a < b && a < 0) then {
                return 1;
            } else {
                return 0;
            }
        }

     }}

復雜結構解析

有的時候xml文件很復雜,嵌套很深,這個時候如果我們使用struct來映射就會很麻煩,好在開源了一個很方便的解析工具etree。這個etreepythonetreeapi幾乎一樣,用起來簡單好用。

bookstores.xml

<bookstore xmlns:p="urn:schemas-books-com:prices">

  <book category="COOKING">
    <title lang="en">Everyday Italian</title>
    <author>Giada De Laurentiis</author>
    <year>2005</year>
    <p:price>30.00</p:price>
  </book>

  <book category="CHILDREN">
    <title lang="en">Harry Potter</title>
    <author>J K. Rowling</author>
    <year>2005</year>
    <p:price>29.99</p:price>
  </book>

  <book category="WEB">
    <title lang="en">XQuery Kick Start</title>
    <author>James McGovern</author>
    <author>Per Bothner</author>
    <author>Kurt Cagle</author>
    <author>James Linn</author>
    <author>Vaidyanathan Nagarajan</author>
    <year>2003</year>
    <p:price>49.99</p:price>
  </book>

  <book category="WEB">
    <title lang="en">Learning XML</title>
    <author>Erik T. Ray</author>
    <year>2003</year>
    <p:price>39.95</p:price>
  </book>

</bookstore>

main.go

package main
/*

使用 etree 解析復雜結構的 xml 文件
https://godoc.org/github.com/beevik/etree
https://pkg.go.dev/github.com/beevik/etree?tab=doc
https://github.com/beevik/etree
*/

import (
	"fmt"
	"github.com/beevik/etree"// go get github.com/beevik/etree
)

func readXml(path string) {
	doc := etree.NewDocument()
	if err := doc.ReadFromFile(path); err != nil {
		panic(err)
	}

	root := doc.SelectElement("bookstore")
	fmt.Println("ROOT element:", root.Tag)

	for _, book := range root.SelectElements("book") {
		fmt.Println("CHILD element:", book.Tag)
		if title := book.SelectElement("title"); title != nil {
			lang := title.SelectAttrValue("lang", "unknown")
			fmt.Printf("  TITLE: %s (%s)\n", title.Text(), lang)
		}
		for _, attr := range book.Attr {
			fmt.Printf("  ATTR: %s=%s\n", attr.Key, attr.Value)
		}
	}
}

func main()  {
	readXml("bookstores.xml")
}

輸出:

ROOT element: bookstore
CHILD element: book
  TITLE: Everyday Italian (en)
  ATTR: category=COOKING
CHILD element: book
  TITLE: Harry Potter (en)
  ATTR: category=CHILDREN
CHILD element: book
  TITLE: XQuery Kick Start (en)
  ATTR: category=WEB
CHILD element: book
  TITLE: Learning XML (en)
  ATTR: category=WEB


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM