前言
最近在研究模擬登錄的各種方法, 主要想要實現的兩個功能是:
1.點擊按鈕可以直接跳轉並登錄到某一個系統中。
2.抓取某一個系統中某一個頁面中的特定數據。
為此在網上查了許多的資料,首先了解到自身對http協議基礎知識的欠缺,初步了解后,明白想要實現模擬登錄首先要學會抓包這一項基本的技能,關於抓包這里就不詳細介紹了,向大家推薦一款軟件fiddler,一款不錯的抓包軟件。
首先客戶端向服務端請求無非兩種類型get或post,所以我們要了解在登錄某一個系統時post的地址,以及需要post的參數。這些都需要通過抓包來獲取。
一、簡單的表單提交
第一種功能跳轉就是簡單的表單的提交,所有系統的登錄都是表單的post提交,所以只需要在html中模擬寫出該系統的表單需要提交的參數並賦值,提交表單即可跳轉。
/// <summary> ///返回html表單 ///action post地址,postDict鍵值對 /// <summary> public string getPostFormHtml(HJFormModel model) { string action = model.action; Dictionary<string, string> postDict = model.postDict; StringBuilder sb = new StringBuilder(); sb.Append("<form method=\"post\" id=\"myHttpForm\" action=\"" + action + "\">"); sb.Append(" <div>"); sb.Append(" <br />"); foreach (var dic in postDict) { sb.Append(" <input type=\"hidden\" name=\"" + dic.Key + "\" value=\"" + dic.Value + "\" />"); } sb.Append(" <input type=\"submit\" value=\"登錄\" />"); sb.Append(" </div>"); sb.Append(" </form>"); return sb.ToString(); }
/// <summary> /// Form表單生成請求參考類 /// </summary> public class HJFormModel { /// <summary> /// 表單提交地址 /// </summary> public string action { get; set; } /// <summary> /// 需要提交的數據參數 /// </summary> public Dictionary<string, string> postDict { get; set; } }
上面是代碼下面是請求類,就是從后台返回html,生成一個表單,提交即可,其中postDict 指需要提交的參數這里由鍵值對來賦值,如遇到需要輸入驗證碼的登錄那么就需要增加一條驗證碼連接的圖片,人工識別填寫(目前沒有想到更好的方法)。
二、模擬登錄抓取頁面數據
第二種功能抓取頁面的特定數據,其實就是獲取某一頁面所以的html,順序一樣首先要登錄到系統中,下面附上公共類的代碼。
/// <summary> ///根據參數,返回指定內容 /// <summary> public HJHttpResult GetHtml(HJHttpItem Item) { HJHttpResult res = new HJHttpResult(); try { string htmldate; string html; CookieCollection cook = new CookieCollection(); cook = getCook(Item.GetCookieUrl, Item.Domain, Item.CookName); if (!string.IsNullOrWhiteSpace(Item.postStingData)) { postCookLogin(Item.PostUrl, cook, Item.postStingData); } else { postCookLogin(Item.PostUrl, cook, Item.postKeyData); } html = getAllHtml(Item.GetHtmlUrl, cook); if (!string.IsNullOrWhiteSpace(html)) { htmldate = getCutOutHtml(Item.beginString, Item.finishString, html); } else { htmldate = "獲取失敗"; } res.html = htmldate; res.CookieCoken = cook; return res; } catch (Exception ex) { res.html = "獲取失敗" + ex.Message; return res; } } /// <summary> /// 已登錄成功根據Cookie查詢其他特定HTML /// <summary> public string GetUrlHtml(string beginString, string finishString, string Url, CookieCollection curCookies) { string html; try { string AllHtml = getAllHtml(Url, curCookies); html = getCutOutHtml(beginString, finishString, AllHtml); return html; } catch (Exception ex) { html = "獲取失敗,請重新Post登錄" + ex.Message; return html; } } /// <summary> ///截取html中特定的數據,beginString起始,finishString結束,html /// <summary> public string getCutOutHtml(string beginString, string finishString, string html) { int a = beginString.Length; int i = html.IndexOf(beginString) + a; int j = html.IndexOf(finishString); return html.Substring(i, j - i); } /// <summary> ///獲取cookie ///url 獲取地址,Domain Cook中的Domain,有哪些CookName /// <summary> public CookieCollection getCook(string Url, string Domain, string[] CookName) { CookieCollection curCookies = new CookieCollection(); HttpWebRequest reqget = (HttpWebRequest)WebRequest.Create(Url); reqget.CookieContainer = new CookieContainer(); reqget.CookieContainer.Add(curCookies); reqget.Method = "GET"; HttpWebResponse respget = (HttpWebResponse)reqget.GetResponse(); foreach (Cookie ck in respget.Cookies) { for (int i = 0; i < CookName.Length; i++) { if (ck.Name == CookName[i]) { Cookie cookget = new Cookie(); cookget.Value = ck.Value; cookget.Name = ck.Name; cookget.Domain = Domain; curCookies.Add(cookget); } } } string DefaultUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"; HttpWebRequest request = WebRequest.Create(Url) as HttpWebRequest; request.Method = "GET"; request.UserAgent = DefaultUserAgent; request.CookieContainer = new CookieContainer(); request.CookieContainer.Add(curCookies); return curCookies; } /// <summary> ///post登錄 ///Url post 地址 ,postdata POST數據(字符串),curCookies COOKIE /// <summary> public void postCookLogin(string Url, CookieCollection curCookies, string postData) { HttpWebRequest req = (HttpWebRequest)WebRequest.Create(Url); //add cookie req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(curCookies); //set to POST req.Method = "POST"; req.ContentType = "application/x-www-form-urlencoded"; //prepare post data //string postDataStr = quoteParas(postDict); byte[] postBytes = Encoding.UTF8.GetBytes(postData); req.ContentLength = postBytes.Length; //send post data Stream postDataStream = req.GetRequestStream(); postDataStream.Write(postBytes, 0, postBytes.Length); postDataStream.Close(); //got response HttpWebResponse resp = (HttpWebResponse)req.GetResponse(); string url = resp.ResponseUri.ToString(); //got returned html StreamReader sr = new StreamReader(resp.GetResponseStream()); } /// <summary> ///Url post 地址 ,postdata POST數據(鍵值對),curCookies COOKIE /// <summary> public void postCookLogin(string Url, CookieCollection curCookies, Dictionary<string, string> postData) { HttpWebRequest req = (HttpWebRequest)WebRequest.Create(Url); //add cookie req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(curCookies); //set to POST req.Method = "POST"; req.ContentType = "application/x-www-form-urlencoded"; //prepare post data string postDataStr = quoteParas(postData); byte[] postBytes = Encoding.UTF8.GetBytes(postDataStr); req.ContentLength = postBytes.Length; //send post data Stream postDataStream = req.GetRequestStream(); postDataStream.Write(postBytes, 0, postBytes.Length); postDataStream.Close(); //got response HttpWebResponse resp = (HttpWebResponse)req.GetResponse(); string url = resp.ResponseUri.ToString(); //got returned html StreamReader sr = new StreamReader(resp.GetResponseStream()); } /// <summary> ///獲取html頁面 ///cookCon 已登錄成功的cook,Url 地址 /// <summary> public string getAllHtml(string Url, CookieCollection curCookies) { CookieContainer cookCon = new CookieContainer(); cookCon.Add(curCookies); HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(Url); //屬性配置 webRequest.AllowWriteStreamBuffering = true; webRequest.Credentials = System.Net.CredentialCache.DefaultCredentials; webRequest.MaximumResponseHeadersLength = -1; //webRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)"; webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.Method = "GET"; webRequest.Headers.Add("Accept-Language", "zh-cn"); webRequest.Headers.Add("Accept-Encoding", "gzip,deflate"); webRequest.KeepAlive = true; webRequest.CookieContainer = cookCon; try { //獲取服務器返回的資源 using (HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse()) { using (Stream sream = webResponse.GetResponseStream()) { StreamReader streamReader = new StreamReader(sream); string str = streamReader.ReadToEnd(); return str; } } } catch (WebException ex) { return "錯誤" + ex.Message; } } /// <summary> ///get方式獲取驗證碼 ///Url驗證碼的地址,COOKie cookCon,savePath保存地址 /// <summary> public bool boolDowloadCheckImg(string Url, CookieContainer cookCon, string savePath) { bool bol = true; HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(Url); //屬性配置 webRequest.AllowWriteStreamBuffering = true; webRequest.Credentials = System.Net.CredentialCache.DefaultCredentials; webRequest.MaximumResponseHeadersLength = -1; //webRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)"; webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.Method = "GET"; webRequest.Headers.Add("Accept-Language", "zh-cn"); webRequest.Headers.Add("Accept-Encoding", "gzip,deflate"); webRequest.KeepAlive = true; webRequest.CookieContainer = cookCon; try { //獲取服務器返回的資源 using (HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse()) { using (Stream sream = webResponse.GetResponseStream()) { List<byte> list = new List<byte>(); while (true) { int data = sream.ReadByte(); if (data == -1) break; list.Add((byte)data); } File.WriteAllBytes(savePath, list.ToArray()); } } } catch (WebException ex) { bol = false; } catch (Exception ex) { bol = false; } return bol; } //quote the input dict values //note: the return result for first para no '&' /// <summary> ///鍵值對與字符流轉換 /// <summary> public string quoteParas(Dictionary<string, string> paras) { string quotedParas = ""; bool isFirst = true; string val = ""; foreach (string para in paras.Keys) { if (paras.TryGetValue(para, out val)) { if (isFirst) { isFirst = false; quotedParas += para + "=" + HttpUtility.UrlPathEncode(val); } else { quotedParas += "&" + para + "=" + HttpUtility.UrlPathEncode(val); } } else { break; } } return quotedParas; } } /// <summary> /// Http請求參考類 /// </summary> public class HJHttpItem { /// <summary> /// 獲得Cookie的URL /// </summary> public string GetCookieUrl { get; set; } /// <summary> /// Cookie的Domain /// </summary> public string Domain { get; set; } /// <summary> /// 截取Html的起始位置 /// </summary> public string beginString { get; set; } /// <summary> /// 截取Html的終止位置 /// </summary> public string finishString { get; set; } /// <summary> /// Cookie的Name集合{"",""} /// </summary> public string[] CookName { get; set; } /// <summary> /// post的數據字符串格式 注:與postKeyData必須有一個不是為空的 /// </summary> public string postStingData { get; set; } /// <summary> /// post的數據鍵值對格式 注:與postStingData必須有一個不是為空的 /// </summary> public Dictionary<string, string> postKeyData { get; set; } /// <summary> /// post的地址 /// </summary> public string PostUrl { get; set; } /// <summary> /// 獲取Html的地址 /// </summary> public string GetHtmlUrl { get; set; } } /// <summary> /// Http返回參數類 /// </summary> public class HJHttpResult { /// <summary> /// 返回的Html數據 /// </summary> public string html { get; set; } /// <summary> /// 返回的Cookie數據 /// </summary> public CookieCollection CookieCoken { get; set; } }
大致原理為,首先獲取相應登錄系統的Cookie,這里我們要理解一下Cookie中SessionID的用處,SessionID由服務端向客戶端發起,每次打開瀏覽器會更新SessionID,SessionID的目的是為了存貯post提交后登錄狀態的信息,如登錄的用戶,這個由系統而定,獲得Cookie后利用這個Cookie和postdata向post地址提交,這時如果登錄成功SessionID就記錄下該賬號的登錄狀態,此時利用SessionID訪問該域下的其他頁面就可以了,獲取下來html截取下來想要的數據,同樣有需要驗證碼的get方法獲取驗證碼,人工識別輸入post登錄。
三、小結
最近在這方面做了一些小研究,遇到的問題還有很多,比如如何從后台登錄成功后可以跳轉到瀏覽器中並保持登錄狀態,而且我做測試的用的都是一些較為簡單的系統,需要的參數並不復雜,如果哪位大神對這方面有着較深的研究,或者有這方面比較完善的系統可以分享,歡迎指出思路或其他有錯誤的地方,感激不盡。