在學習HTML Xpath之前呢我們先來下載一下Dll文件
下載地址:http://htmlagilitypack.codeplex.com/
大家下載單擊如下圖片下載就行了
<ignore_js_op>
接下來就是在程序中引用一下,
<ignore_js_op>
然后就可以直接調用 了,大家看看
代碼吧
-
//htmlDcoument對象用來訪問Html文檔s
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument ( ) ;
-
//加載Html文檔
-
hd.LoadHtml (strhtml ) ;
-
string str = hd.DocumentNode.SelectSingleNode ( "//*[@id='e_font']" ).OuterHtml ;
這樣就可以得到一個標簽的HTml代碼了
OuterHtml是取包含本身的Html如果是InnerHtml就是取的包含在這個標簽之內的所有Html代碼了
這點大家要注意了
如果大家想獲取Html代碼的Xpath路徑就是這部分
- //*[@id='e_font']
這個其實很簡單只在大家安裝一個Firbug就行了,
看下圖片
<ignore_js_op>
大家只要進入選擇模式,然后選擇你要的內容,然后右鍵復制一下就行了。
然后放在SelectSingleNode()方法里就OK了
下面我說說幾個方法和屬性的意思吧、
方法
SelectNodes 獲取的是一個集合
SelectSingleNode 獲取一個標簽
SetAttributeValue 設置標簽的屬性值例如:SetAttributeValue("name","xpath-89");這說明把name屬性的值修改為xpath-89
屬性
OuterHtml 是取包含本身的Html
InnerHtml 取的包含在這個標簽之內的所有Html代碼了
XPath 獲取相對應的Xpath值
Attributes 獲取一個屬性的值例如:Attributes("name")
也可以進行添加屬性例如:
-
hd.DocumentNode.SelectSingleNode (item.Key ).Attributes.Add ( "xpathid", "xpath_1" ) ;
下面我寫了一個遞歸獲取Html頁面所有Xpath值的方法大家看一下吧
-
//key(Xpath),value(整個節點)
-
public List<ObjXpath> XpathList = new List<ObjXpath> ( ) ;
-
public string strhtml = "" ; //這里就是你的Html代碼具體怎么獲取請參考我的<a href=\"http://www.sufeinet.com/thread-3-1-1.html\" target=\"_blank\">HttpHelper</a>類吧
-
private int Index = 0 ;
-
//開始處理Node
-
private void SartNode ( )
-
{
-
//htmlDcoument對象用來訪問Html文檔s
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument ( ) ;
-
//加載Html文檔
-
hd.LoadHtml (strhtml ) ;
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes ;
-
Index = 0 ;
-
XpathList.Clear ( ) ;
-
foreach (HtmlNode em in htmllist )
-
{
-
Setxpath (em ) ;
-
}
-
}
-
/// <summary>
-
/// 遞歸獲取Html Dom
-
/// </summary>
-
/// <param name="node">要處理的節點</param>
-
private void Setxpath (HtmlNode node )
-
{
-
foreach (HtmlNode item in node.ChildNodes )
-
{
-
if (item. XPath.Contains ( "#" ) )
-
{
-
continue ;
-
}
-
if (item.ChildNodes.Count > 0 )
-
{
-
XpathList.Add ( new ObjXpath ( ) { id = Index.ToString ( ), Key = item. XPath, Value = "" } ) ;
-
Index++ ;
-
Setxpath (item ) ;
-
}
-
else
-
{
-
XpathList.Add ( new ObjXpath ( ) { id = Index.ToString ( ), Key = item. XPath, Value = "" } ) ;
-
Index++ ;
-
}
-
}
-
}
-
public class ObjXpath
-
{
-
public string id { get ; set ; }
-
public string Key { get ; set ; }
-
public string Value { get ; set ; }
-
}
XpathList 就是獲取的所有Xpath值了,大家有興趣的話可以試試
我們先來看看效果吧
<ignore_js_op>
好了下面放出所有代碼給大家
-
using System ;
-
using System. Collections. Generic ;
-
using System. ComponentModel ;
-
using System. Data ;
-
using System. Drawing ;
-
using System.Linq ;
-
using System. Text ;
-
using System.Windows.Forms ;
-
using System. Text. RegularExpressions ;
-
using System. Threading ;
-
using HtmlAgilityPack ;
-
using System. IO ;
-
using System. Runtime. Serialization.Json ;
-
-
namespace AutoXpathTools
-
{
-
public partial class Form1 : Form
-
{
-
public Form1 ( )
-
{
-
InitializeComponent ( ) ;
-
}
-
-
#region 私有變量和方法
-
-
//委托傳入一個字符串
-
private delegate void SetListBox ( string str ) ;
-
-
//key(Xpath),value(整個節點)
-
List<ObjXpath> XpathList = new List<ObjXpath> ( ) ;
-
private int Index = 0 ;
-
//htmlDcoument對象用來訪問Html文檔
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument ( ) ;
-
-
#endregion
-
-
//分析Xpath的所有代碼
-
private void btnGetXpath_Click ( object sender, EventArgs e )
-
{
-
try
-
{
-
HttpHelper http = new HttpHelper ( ) ;
-
HttpItem item = new HttpItem ( ) { URL = textBox1. Text.Trim ( ), IsToLower = false, Encoding = "gbk" } ;
-
txtXml. Text = http.GetHtml (item ) ;
-
if (! string.IsNullOrWhiteSpace (txtXml. Text ) && txtXml. Text.Trim ( ).ToLower ( ) != "error" )
-
{
-
//加載Html文檔
-
hd.LoadHtml (txtXml. Text ) ;
-
-
-
Thread pingTask = new Thread ( new ThreadStart ( delegate
-
{
-
//代碼,線程要執行的代碼
-
SartNode (txtXml. Text ) ;
-
} ) ) ;
-
pingTask.Start ( ) ;
-
-
}
-
else
-
{
-
txtXml. Text = "根據您的的ULR:" + textBox1. Text.Trim ( ) + "無法得到任何內容" ;
-
}
-
}
-
catch (Exception ex )
-
{
-
txtXml. Text = ex.Message.Trim ( ) ;
-
}
-
}
-
-
-
//開始處理Node
-
private void SartNode ( string strhtml )
-
{
-
//htmlDcoument對象用來訪問Html文檔s
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument ( ) ;
-
//加載Html文檔
-
hd.LoadHtml (strhtml ) ;
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes ;
-
Index = 0 ;
-
XpathList.Clear ( ) ;
-
foreach (HtmlNode em in htmllist )
-
{
-
Setxpath (em ) ;
-
}
-
}
-
/// <summary>
-
/// 遞歸獲取Html Dom
-
/// </summary>
-
/// <param name="node">要處理的節點</param>
-
private void Setxpath (HtmlNode node )
-
{
-
foreach (HtmlNode item in node.ChildNodes )
-
{
-
if (item. XPath.Contains ( "#" ) )
-
{
-
continue ;
-
}
-
if (item.ChildNodes.Count > 0 )
-
{
-
XpathList.Add ( new ObjXpath ( ) { id = Index.ToString ( ), Key = item. XPath, Value = "" } ) ;
-
UIContorol (item. XPath ) ;
-
Index++ ;
-
Setxpath (item ) ;
-
}
-
else
-
{
-
XpathList.Add ( new ObjXpath ( ) { id = Index.ToString ( ), Key = item. XPath, Value = "" } ) ;
-
UIContorol (item. XPath ) ;
-
Index++ ;
-
}
-
}
-
}
-
-
//使用委托給控件賦值
-
private void UIContorol ( string str )
-
{
-
listBox1.Items.Add (str ) ;
-
toolStripStatusLabel1. Text = str ;
-
}
-
-
private void listBox1_SelectedValueChanged ( object sender, EventArgs e )
-
{
-
if (listBox1.SelectedItem != null )
-
{
-
txtPath. Text = listBox1.SelectedItem.ToString ( ).Trim ( ) ;
-
}
-
}
-
-
private void button3_Click ( object sender, EventArgs e )
-
{
-
txtContents. Text = hd.DocumentNode.SelectSingleNode (txtPath. Text.Trim ( ) ).OuterHtml ;
-
}
-
-
private void Form1_Load ( object sender, EventArgs e )
-
{
-
//HttpItem item = new HttpItem()
-
//{
-
// URL = "http://www.diandian.com/login",
-
// Method = "post",
-
// Cookie = "dtid=ZfXUVo1IsplHR4mHW1HYmgKbY4GJa003; kvf=1358855337188; alf=1; dru=1356356040; _l5=y",
-
// ContentType = "application/x-www-form-urlencoded",
-
// Postdata = "account=xinsuilie1998@163.com&password=wjlove520&nextUrl=&lcallback=&persistent=1",
-
// Referer = "http://www.diandian.com/logout?formKey=e4714d863c862a84fafd83d98e5ecb22"
-
//};
-
//HttpHelper http = new HttpHelper();
-
//string html = http.GetHtml(item);
-
//string cookie = item.Cookie;
-
//item = new HttpItem() { URL = "http://www.diandian.com/home", Cookie = cookie };
-
//html = http.GetHtml(item);
-
}
-
}
-
public class ObjXpath
-
{
-
public string id { get ; set ; }
-
public string Key { get ; set ; }
-
public string Value { get ; set ; }
-
}
-
}
就到這里吧,大家可以下載我的源代碼試試手
打包下載:
<ignore_js_op> AutoXpathTools.zip (76.32 KB, 下載次數: 0)
如果你感覺可以話就給我推薦一下吧。感謝大家