XPathClass類,使用xpath返回屬性值或文本


1,需要添加文件HtmlAgilityPack.dll以及對它的引用

2,類碼

public class XPathClass
    {
       
        /// <summary>
        /// 返回節點內的文本值,如<span>文字</span>,返回"文字"
        /// </summary>
        /// <param name="htmlSource">html頁面源代碼</param>
        /// <param name="xpath">xpath路徑</param>
        /// <returns>一個節點對應文本值</returns>
        public static string GetInnerText(string htmlSource,string xpath)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath);
            if (node != null)
            {
                return node.InnerText;
            }
            else
            {
                return null;
            }
        }

        /// <summary>
        /// 獲取節點的屬性值,如<a href="#"></a>返回herf的值為#
        /// </summary>
        /// <param name="htmlSource">html源代碼</param>
        /// <param name="xpath">節點的xpath路徑</param>
        /// <param name="attrName">屬性名</param>
        /// <returns>屬性名對應的值</returns>
        public static string GetAttribute(string htmlSource,string xpath,string attrName)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath);
            if (node != null)
            {
                return node.Attributes[attrName].Value;
            }
            else
            {
                return null;
            } 
        }

        /// <summary>
        /// 返回滿足相同xpath的多個節點內的文本值,如<span>文字</span>,返回"文字"
        /// </summary>
        /// <param name="htmlSource">html頁面源代碼</param>
        /// <param name="xpath">xpath路徑</param>
        /// <returns>一個節點對應文本值</returns>
        public static List<string> GetInnerTexts(string htmlSource, string xpath)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath);
            if (nodes != null)
            {
                List<string> list = new List<string>();
                for (int i = 0; i < nodes.Count; i++)
                {
                    list.Add(nodes[i].InnerText);
                }
                return list;
            }
            else
            {
                return null;
            }           
        }

        /// <summary>
        /// 獲取多個節點的屬性值,如<a href="#"></a>返回herf的值為#
        /// </summary>
        /// <param name="htmlSource">html源代碼</param>
        /// <param name="xpath">節點的xpath路徑</param>
        /// <param name="attrName">屬性名</param>
        /// <returns>屬性名對應的值</returns>
        public static List<string> GetAttributes(string htmlSource, string xpath, string attrName)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath);
            if (nodes != null)
            {
                List<string> list = new List<string>();
                for (int i = 0; i < nodes.Count; i++)
                {
                    list.Add(nodes[i].Attributes[attrName].Value);
                }
                return list;
            }
            else
            {
                return null;
            }    
            
        }


    }

 

3,


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM