最近一直忙於學習抓包 一直沒有更新博客 要抓取別人的得數據 首先 要進入他們的網站 所以我就用客戶端模擬瀏覽器登錄 那么就要用到htmlagliitypack這個庫了 可以在vs里面直接安裝 you最新的版本
private CNNWebClient webClient;
HtmlDocument html = new HtmlDocument(); string loginHtml = webClient.DownloadString("xxxxxx");//登錄的網址 html.LoadHtml(loginHtml); //判斷登錄是否有效 HtmlNode success = html.DocumentNode.SelectSingleNode("//*[@id=\"wrapper\"]/div[1]/div[2]/div/div[2]/a[2]"); //若登錄還有效直接返回登錄成功 if (success != null) return new HttpMessage() { Reslut = success.InnerText }; //*[@id="fm1"]/div[4]/input[1] string lt = html.DocumentNode.SelectSingleNode("//*[@id=\"fm1\"]/div[4]/input[1]").Attributes["value"].Value;//htmlagliitypack的屬性獲取網頁上的值 string execution = html.DocumentNode.SelectSingleNode("//*[@id=\"fm1\"]/div[4]/input[2]").Attributes["value"].Value; string postData = string.Format("username={2}&password={3}&remember=1<={0}&execution={1}&_eventId=submit"
, lt, execution, UserName, HttpUtility.UrlDecode(PassWord));//格式化字符串 用於提交
webClient.Headers.Add("Content-Type", "application/x-www-form-urlencoded");//post提交必須加這個 webClient.Headers.Add("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"); string postHtml = webClient.UploadString("xxxxxxxxxx", postData);//登錄 webClient.Headers.Add("Content-Type", "text/html;charset=UTF-8"); HtmlDocument index = new HtmlDocument(); string indexRes = webClient.DownloadString("http://eip.chinatowercom.cn/"); index.LoadHtml(indexRes);//加載首頁判斷是否登錄成功 success = index.DocumentNode.SelectSingleNode("//*[@id=\"wrapper\"]/div[1]/div[2]/div/div[2]/a[2]"); if (success == null) return new HttpMessage() { Reslut = postHtml, State = false }; else { // _addMeaage(string.Format("{0}--登錄成功--", success.InnerText)); return new HttpMessage() { Reslut = success.InnerText }; }
這是登錄的方法 還有客戶端登錄 保持cookie或者session 這個很重要
public class CNNWebClient : WebClient
{
private Calculagraph _timer;
private int _timeOut = 10;
/**/
/// <summary>
/// 返回帶有 Cookie 的 HttpWebRequest。
/// </summary>
/// <param name="address"></param>
/// <returns></returns>
protected override WebRequest GetWebRequest(Uri address)
{
WebRequest request = base.GetWebRequest(address);
if (request is HttpWebRequest)
{
HttpWebRequest httpRequest = request as HttpWebRequest;
httpRequest.CookieContainer = cookieContainer;
httpRequest.Timeout = 1000 * Timeout;
httpRequest.ReadWriteTimeout = 1000 * Timeout;
}
return request;
}
/// <summary>
/// 過期時間
/// </summary>
public int Timeout
{
get
{
return _timeOut;
}
set
{
if (value <= 0)
_timeOut = 10;
_timeOut = value;
}
}
}gan
這就是基本的登錄方法了 希望能給大家一點靈感 對於程序員來說 有時候 一點靈感 足以解決困擾幾天的問題
