最近一直都在搞網站抓取方面的開發,閑着無聊逛逛論壇,發現有些帖子還是寫的相當不錯的,只是一篇一篇的點進去比較麻煩,於是就寫了個小軟件只是為了方便查看博客園和CSDN上的優秀文章。其實這個還可以拓展的,比如說可以添加RSS功能,查看新聞網站的新聞。代碼比較簡單,可以考慮用個工廠模式。
寫的比較亂,都不敢上代碼了。求大神噴!
2013-6-28號更新
1、添加了皮膚
2013-6-29號更新
1、解決了ListView控件添加數據閃爍問題。
2、取消皮膚加快數據加載速度
3、優化了瀏覽文章體驗
里面有幾個類庫非常不錯,想要的可以拿去。
/// /// 類說明:HttpHelps類,用來實現Http訪問,Post或者Get方式的,直接訪問,帶Cookie的,帶證書的等方式,可以設置代理 /// 重要提示:請不要自行修改本類,如果因為你自己修改后將無法升級到新版本。如果確實有什么問題請到官方網站提建議, /// 我們一定會及時修改 /// 編碼日期:2011-09-20 /// 編 碼 人:蘇飛 /// 聯系方式:361983679 /// 官方網址:http://www.sufeinet.com/thread-3-1-1.html /// 修改日期:2013-04-14 /// using System; using System.Collections.Generic; using System.Text; using System.Net; using System.IO; using System.Text.RegularExpressions; using System.IO.Compression; using System.Security.Cryptography.X509Certificates; using System.Net.Security; namespace Common.PageHelper { /// /// Http連接操作幫助類 /// public class HttpHelper { #region 預定義方法或者變更 //默認的編碼 private Encoding encoding = Encoding.Default; //HttpWebRequest對象用來發起請求 private HttpWebRequest request = null; //獲取影響流的數據對象 private HttpWebResponse response = null; /// /// 根據相傳入的數據,得到相應頁面數據 /// ///參數類對象 ///返回HttpResult類型 private HttpResult GetHttpRequestData(HttpItem objhttpitem) { //返回參數 HttpResult result = new HttpResult(); try { #region 得到請求的response using (response = (HttpWebResponse)request.GetResponse()) { result.StatusCode = response.StatusCode; result.StatusDescription = response.StatusDescription; result.Header = response.Headers; if (response.Cookies != null) result.CookieCollection = response.Cookies; if (response.Headers["set-cookie"] != null) result.Cookie = response.Headers["set-cookie"]; MemoryStream _stream = new MemoryStream(); //GZIIP處理 if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) { //開始讀取流並設置編碼方式 //new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240); //.net4.0以下寫法 _stream = GetMemoryStream(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); } else { //開始讀取流並設置編碼方式 //response.GetResponseStream().CopyTo(_stream, 10240); //.net4.0以下寫法 _stream = GetMemoryStream(response.GetResponseStream()); } //獲取Byte byte[] RawResponse = _stream.ToArray(); _stream.Close(); //是否返回Byte類型數據 if (objhttpitem.ResultType == ResultType.Byte) result.ResultByte = RawResponse; //從這里開始我們要無視編碼了 if (encoding == null) { Match meta = Regex.Match(Encoding.Default.GetString(RawResponse), "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase); string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value.ToLower() : string.Empty; charter = charter.Replace("\"", "").Replace("'", "").Replace(";", "").Replace("iso-8859-1", "gbk"); if (charter.Length > 2) encoding = Encoding.GetEncoding(charter); else { if (string.IsNullOrEmpty(response.CharacterSet)) encoding = Encoding.UTF8; else encoding = Encoding.GetEncoding(response.CharacterSet); } } //得到返回的HTML result.Html = encoding.GetString(RawResponse); } #endregion } catch (WebException ex) { //這里是在發生異常時返回的錯誤信息 response = (HttpWebResponse)ex.Response; result.Html = ex.Message; result.StatusCode = response.StatusCode; result.StatusDescription = response.StatusDescription; } catch (Exception ex) { result.Html = ex.Message; } if (objhttpitem.IsToLower) result.Html = result.Html.ToLower(); return result; } /// /// 4.0以下.net版本取數據使用 /// ///流 private static MemoryStream GetMemoryStream(Stream streamResponse) { MemoryStream _stream = new MemoryStream(); int Length = 256; Byte[] buffer = new Byte[Length]; int bytesRead = streamResponse.Read(buffer, 0, Length); // write the required bytes while (bytesRead > 0) { _stream.Write(buffer, 0, bytesRead); bytesRead = streamResponse.Read(buffer, 0, Length); } return _stream; } /// /// 為請求准備參數 /// ///參數列表 ///讀取數據時的編碼方式 private void SetRequest(HttpItem objhttpItem) { // 驗證證書 SetCer(objhttpItem); //設置Header參數 if (objhttpItem.Header != null && objhttpItem.Header.Count > 0) { foreach (string item in objhttpItem.Header.AllKeys) { request.Headers.Add(item, objhttpItem.Header[item]); } } // 設置代理 SetProxy(objhttpItem); //請求方式Get或者Post request.Method = objhttpItem.Method; request.Timeout = objhttpItem.Timeout; request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout; //Accept request.Accept = objhttpItem.Accept; //ContentType返回類型 request.ContentType = objhttpItem.ContentType; //UserAgent客戶端的訪問類型,包括瀏覽器版本和操作系統信息 request.UserAgent = objhttpItem.UserAgent; // 編碼 encoding = objhttpItem.Encoding; //設置Cookie SetCookie(objhttpItem); //來源地址 request.Referer = objhttpItem.Referer; //是否執行跳轉功能 request.AllowAutoRedirect = objhttpItem.Allowautoredirect; //設置Post數據 SetPostData(objhttpItem); //設置最大連接 if (objhttpItem.Connectionlimit > 0) request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit; } /// /// 設置證書 /// /// private void SetCer(HttpItem objhttpItem) { if (!string.IsNullOrEmpty(objhttpItem.CerPath)) { //這一句一定要寫在創建連接的前面。使用回調的方法進行證書驗證。 ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult); //初始化對像,並設置請求的URL地址 request = (HttpWebRequest)WebRequest.Create(objhttpItem.URL); //將證書添加到請求里 request.ClientCertificates.Add(new X509Certificate(objhttpItem.CerPath)); } else //初始化對像,並設置請求的URL地址 request = (HttpWebRequest)WebRequest.Create(objhttpItem.URL); } /// /// 設置Cookie /// ///Http參數 private void SetCookie(HttpItem objhttpItem) { if (!string.IsNullOrEmpty(objhttpItem.Cookie)) //Cookie request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie; //設置Cookie if (objhttpItem.CookieCollection != null) { request.CookieContainer = new CookieContainer(); request.CookieContainer.Add(objhttpItem.CookieCollection); } } /// /// 設置Post數據 /// ///Http參數 private void SetPostData(HttpItem objhttpItem) { //驗證在得到結果時是否有傳入數據 if (request.Method.Trim().ToLower().Contains("post")) { byte[] buffer = null; //寫入Byte類型 if (objhttpItem.PostDataType == PostDataType.Byte && objhttpItem.PostdataByte != null && objhttpItem.PostdataByte.Length > 0) { //驗證在得到結果時是否有傳入數據 buffer = objhttpItem.PostdataByte; }//寫入文件 else if (objhttpItem.PostDataType == PostDataType.FilePath && !string.IsNullOrEmpty(objhttpItem.Postdata)) { StreamReader r = new StreamReader(objhttpItem.Postdata, encoding); buffer = Encoding.Default.GetBytes(r.ReadToEnd()); r.Close(); } //寫入字符串 else if (!string.IsNullOrEmpty(objhttpItem.Postdata)) { buffer = Encoding.Default.GetBytes(objhttpItem.Postdata); } if (buffer != null) { request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); } } } /// /// 設置代理 /// ///參數對象 private void SetProxy(HttpItem objhttpItem) { if (!string.IsNullOrEmpty(objhttpItem.ProxyIp)) { //設置代理服務器 WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false); //建議連接 myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd); //給當前請求對象 request.Proxy = myProxy; //設置安全憑證 request.Credentials = CredentialCache.DefaultNetworkCredentials; } } /// /// 回調驗證證書問題 /// ///流對象 ///證書 ///X509Chain ///SslPolicyErrors ///bool public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors) { // 總是接受 return true; } #endregion #region 普通類型 /// ///采用https協議訪問網絡,根據傳入的URl地址,得到響應的數據字符串。 /// ///參數列表 ///String類型的數據 public HttpResult GetHtml(HttpItem objhttpItem) { try { //准備參數 SetRequest(objhttpItem); } catch (Exception ex) { return new HttpResult() { Cookie = "", Header = null, Html = ex.Message, StatusDescription = "配置參考時報錯" }; } //調用專門讀取數據的類 return GetHttpRequestData(objhttpItem); } #endregion } /// /// Http請求參考類 /// public class HttpItem { string _URL = string.Empty; /// /// 請求URL必須填寫 /// public string URL { get { return _URL; } set { _URL = value; } } string _Method = "GET"; /// /// 請求方式默認為GET方式,當為POST方式時必須設置Postdata的值 /// public string Method { get { return _Method; } set { _Method = value; } } int _Timeout = 100000; /// /// 默認請求超時時間 /// public int Timeout { get { return _Timeout; } set { _Timeout = value; } } int _ReadWriteTimeout = 30000; /// /// 默認寫入Post數據超時間 /// public int ReadWriteTimeout { get { return _ReadWriteTimeout; } set { _ReadWriteTimeout = value; } } string _Accept = "text/html, application/xhtml+xml, */*"; /// /// 請求標頭值 默認為text/html, application/xhtml+xml, */* /// public string Accept { get { return _Accept; } set { _Accept = value; } } string _ContentType = "text/html"; /// /// 請求返回類型默認 text/html /// public string ContentType { get { return _ContentType; } set { _ContentType = value; } } string _UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"; /// /// 客戶端訪問信息默認Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) /// public string UserAgent { get { return _UserAgent; } set { _UserAgent = value; } } Encoding _Encoding = null; /// /// 返回數據編碼默認為NUll,可以自動識別,一般為utf-8,gbk,gb2312 /// public Encoding Encoding { get { return _Encoding; } set { _Encoding = value; } } private PostDataType _PostDataType = PostDataType.String; /// /// Post的數據類型 /// public PostDataType PostDataType { get { return _PostDataType; } set { _PostDataType = value; } } string _Postdata = string.Empty; /// /// Post請求時要發送的字符串Post數據 /// public string Postdata { get { return _Postdata; } set { _Postdata = value; } } private byte[] _PostdataByte = null; /// /// Post請求時要發送的Byte類型的Post數據 /// public byte[] PostdataByte { get { return _PostdataByte; } set { _PostdataByte = value; } } CookieCollection cookiecollection = null; /// /// Cookie對象集合 /// public CookieCollection CookieCollection { get { return cookiecollection; } set { cookiecollection = value; } } string _Cookie = string.Empty; /// /// 請求時的Cookie /// public string Cookie { get { return _Cookie; } set { _Cookie = value; } } string _Referer = string.Empty; /// /// 來源地址,上次訪問地址 /// public string Referer { get { return _Referer; } set { _Referer = value; } } string _CerPath = string.Empty; /// /// 證書絕對路徑 /// public string CerPath { get { return _CerPath; } set { _CerPath = value; } } private Boolean isToLower = false; /// /// 是否設置為全文小寫,默認為不轉化 /// public Boolean IsToLower { get { return isToLower; } set { isToLower = value; } } private Boolean allowautoredirect = false; /// /// 支持跳轉頁面,查詢結果將是跳轉后的頁面,默認是不跳轉 /// public Boolean Allowautoredirect { get { return allowautoredirect; } set { allowautoredirect = value; } } private int connectionlimit = 1024; /// /// 最大連接數 /// public int Connectionlimit { get { return connectionlimit; } set { connectionlimit = value; } } private string proxyusername = string.Empty; /// /// 代理Proxy 服務器用戶名 /// public string ProxyUserName { get { return proxyusername; } set { proxyusername = value; } } private string proxypwd = string.Empty; /// /// 代理 服務器密碼 /// public string ProxyPwd { get { return proxypwd; } set { proxypwd = value; } } private string proxyip = string.Empty; /// /// 代理 服務IP /// public string ProxyIp { get { return proxyip; } set { proxyip = value; } } private ResultType resulttype = ResultType.String; /// /// 設置返回類型String和Byte /// public ResultType ResultType { get { return resulttype; } set { resulttype = value; } } private WebHeaderCollection header = new WebHeaderCollection(); //header對象 public WebHeaderCollection Header { get { return header; } set { header = value; } } } /// /// Http返回參數類 /// public class HttpResult { string _Cookie = string.Empty; /// /// Http請求返回的Cookie /// public string Cookie { get { return _Cookie; } set { _Cookie = value; } } CookieCollection cookiecollection = new CookieCollection(); /// /// Cookie對象集合 /// public CookieCollection CookieCollection { get { return cookiecollection; } set { cookiecollection = value; } } private string html = string.Empty; /// /// 返回的String類型數據 只有ResultType.String時才返回數據,其它情況為空 /// public string Html { get { return html; } set { html = value; } } private byte[] resultbyte = null; /// /// 返回的Byte數組 只有ResultType.Byte時才返回數據,其它情況為空 /// public byte[] ResultByte { get { return resultbyte; } set { resultbyte = value; } } private WebHeaderCollection header = new WebHeaderCollection(); //header對象 public WebHeaderCollection Header { get { return header; } set { header = value; } } private string statusDescription = ""; /// /// 返回狀態說明 /// public string StatusDescription { get { return statusDescription; } set { statusDescription = value; } } private HttpStatusCode statusCode = HttpStatusCode.OK; /// /// 返回狀態碼,默認為OK /// public HttpStatusCode StatusCode { get { return statusCode; } set { statusCode = value; } } } /// /// 返回類型 /// public enum ResultType { /// /// 表示只返回字符串 只有Html有數據 /// String, /// /// 表示返回字符串和字節流 ResultByte和Html都有數據返回 /// Byte } /// /// Post的數據格式默認為string /// public enum PostDataType { /// /// 字符串類型,這時編碼Encoding可不設置 /// String, /// /// Byte類型,需要設置PostdataByte參數的值編碼Encoding可設置為空 /// Byte, /// /// 傳文件,Postdata必須設置為文件的絕對路徑,必須設置Encoding的值 /// FilePath } }