1,需要添加文件HtmlAgilityPack.dll以及對它的引用
2,類碼
public class XPathClass { /// <summary> /// 返回節點內的文本值,如<span>文字</span>,返回"文字" /// </summary> /// <param name="htmlSource">html頁面源代碼</param> /// <param name="xpath">xpath路徑</param> /// <returns>一個節點對應文本值</returns> public static string GetInnerText(string htmlSource,string xpath) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath); if (node != null) { return node.InnerText; } else { return null; } } /// <summary> /// 獲取節點的屬性值,如<a href="#"></a>返回herf的值為# /// </summary> /// <param name="htmlSource">html源代碼</param> /// <param name="xpath">節點的xpath路徑</param> /// <param name="attrName">屬性名</param> /// <returns>屬性名對應的值</returns> public static string GetAttribute(string htmlSource,string xpath,string attrName) { HtmlAgilityPack.HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath); if (node != null) { return node.Attributes[attrName].Value; } else { return null; } } /// <summary> /// 返回滿足相同xpath的多個節點內的文本值,如<span>文字</span>,返回"文字" /// </summary> /// <param name="htmlSource">html頁面源代碼</param> /// <param name="xpath">xpath路徑</param> /// <returns>一個節點對應文本值</returns> public static List<string> GetInnerTexts(string htmlSource, string xpath) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath); if (nodes != null) { List<string> list = new List<string>(); for (int i = 0; i < nodes.Count; i++) { list.Add(nodes[i].InnerText); } return list; } else { return null; } } /// <summary> /// 獲取多個節點的屬性值,如<a href="#"></a>返回herf的值為# /// </summary> /// <param name="htmlSource">html源代碼</param> /// <param name="xpath">節點的xpath路徑</param> /// <param name="attrName">屬性名</param> /// <returns>屬性名對應的值</returns> public static List<string> GetAttributes(string htmlSource, string xpath, string attrName) { HtmlAgilityPack.HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath); if (nodes != null) { List<string> list = new List<string>(); for (int i = 0; i < nodes.Count; i++) { list.Add(nodes[i].Attributes[attrName].Value); } return list; } else { return null; } } }
3,