最近一直都在搞網站抓取方面的開發,閑着無聊逛逛論壇,發現有些帖子還是寫的相當不錯的,只是一篇一篇的點進去比較麻煩,於是就寫了個小軟件只是為了方便查看博客園和CSDN上的優秀文章。其實這個還可以拓展的,比如說可以添加RSS功能,查看新聞網站的新聞。代碼比較簡單,可以考慮用個工廠模式。
寫的比較亂,都不敢上代碼了。求大神噴!
2013-6-28號更新
1、添加了皮膚
2013-6-29號更新
1、解決了ListView控件添加數據閃爍問題。
2、取消皮膚加快數據加載速度
3、優化了瀏覽文章體驗


里面有幾個類庫非常不錯,想要的可以拿去。
///
/// 類說明:HttpHelps類,用來實現Http訪問,Post或者Get方式的,直接訪問,帶Cookie的,帶證書的等方式,可以設置代理
/// 重要提示:請不要自行修改本類,如果因為你自己修改后將無法升級到新版本。如果確實有什么問題請到官方網站提建議,
/// 我們一定會及時修改
/// 編碼日期:2011-09-20
/// 編 碼 人:蘇飛
/// 聯系方式:361983679
/// 官方網址:http://www.sufeinet.com/thread-3-1-1.html
/// 修改日期:2013-04-14
///
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.IO.Compression;
using System.Security.Cryptography.X509Certificates;
using System.Net.Security;
namespace Common.PageHelper
{
///
/// Http連接操作幫助類
///
public class HttpHelper
{
#region 預定義方法或者變更
//默認的編碼
private Encoding encoding = Encoding.Default;
//HttpWebRequest對象用來發起請求
private HttpWebRequest request = null;
//獲取影響流的數據對象
private HttpWebResponse response = null;
///
/// 根據相傳入的數據,得到相應頁面數據
///
///參數類對象
///返回HttpResult類型
private HttpResult GetHttpRequestData(HttpItem objhttpitem)
{
//返回參數
HttpResult result = new HttpResult();
try
{
#region 得到請求的response
using (response = (HttpWebResponse)request.GetResponse())
{
result.StatusCode = response.StatusCode;
result.StatusDescription = response.StatusDescription;
result.Header = response.Headers;
if (response.Cookies != null)
result.CookieCollection = response.Cookies;
if (response.Headers["set-cookie"] != null)
result.Cookie = response.Headers["set-cookie"];
MemoryStream _stream = new MemoryStream();
//GZIIP處理
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
{
//開始讀取流並設置編碼方式
//new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240);
//.net4.0以下寫法
_stream = GetMemoryStream(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
}
else
{
//開始讀取流並設置編碼方式
//response.GetResponseStream().CopyTo(_stream, 10240);
//.net4.0以下寫法
_stream = GetMemoryStream(response.GetResponseStream());
}
//獲取Byte
byte[] RawResponse = _stream.ToArray();
_stream.Close();
//是否返回Byte類型數據
if (objhttpitem.ResultType == ResultType.Byte)
result.ResultByte = RawResponse;
//從這里開始我們要無視編碼了
if (encoding == null)
{
Match meta = Regex.Match(Encoding.Default.GetString(RawResponse), "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase);
string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value.ToLower() : string.Empty;
charter = charter.Replace("\"", "").Replace("'", "").Replace(";", "").Replace("iso-8859-1", "gbk");
if (charter.Length > 2)
encoding = Encoding.GetEncoding(charter);
else
{
if (string.IsNullOrEmpty(response.CharacterSet))
encoding = Encoding.UTF8;
else
encoding = Encoding.GetEncoding(response.CharacterSet);
}
}
//得到返回的HTML
result.Html = encoding.GetString(RawResponse);
}
#endregion
}
catch (WebException ex)
{
//這里是在發生異常時返回的錯誤信息
response = (HttpWebResponse)ex.Response;
result.Html = ex.Message;
result.StatusCode = response.StatusCode;
result.StatusDescription = response.StatusDescription;
}
catch (Exception ex)
{
result.Html = ex.Message;
}
if (objhttpitem.IsToLower)
result.Html = result.Html.ToLower();
return result;
}
///
/// 4.0以下.net版本取數據使用
///
///流
private static MemoryStream GetMemoryStream(Stream streamResponse)
{
MemoryStream _stream = new MemoryStream();
int Length = 256;
Byte[] buffer = new Byte[Length];
int bytesRead = streamResponse.Read(buffer, 0, Length);
// write the required bytes
while (bytesRead > 0)
{
_stream.Write(buffer, 0, bytesRead);
bytesRead = streamResponse.Read(buffer, 0, Length);
}
return _stream;
}
///
/// 為請求准備參數
///
///參數列表
///讀取數據時的編碼方式
private void SetRequest(HttpItem objhttpItem)
{
// 驗證證書
SetCer(objhttpItem);
//設置Header參數
if (objhttpItem.Header != null && objhttpItem.Header.Count > 0)
{
foreach (string item in objhttpItem.Header.AllKeys)
{
request.Headers.Add(item, objhttpItem.Header[item]);
}
}
// 設置代理
SetProxy(objhttpItem);
//請求方式Get或者Post
request.Method = objhttpItem.Method;
request.Timeout = objhttpItem.Timeout;
request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout;
//Accept
request.Accept = objhttpItem.Accept;
//ContentType返回類型
request.ContentType = objhttpItem.ContentType;
//UserAgent客戶端的訪問類型,包括瀏覽器版本和操作系統信息
request.UserAgent = objhttpItem.UserAgent;
// 編碼
encoding = objhttpItem.Encoding;
//設置Cookie
SetCookie(objhttpItem);
//來源地址
request.Referer = objhttpItem.Referer;
//是否執行跳轉功能
request.AllowAutoRedirect = objhttpItem.Allowautoredirect;
//設置Post數據
SetPostData(objhttpItem);
//設置最大連接
if (objhttpItem.Connectionlimit > 0)
request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit;
}
///
/// 設置證書
///
///
private void SetCer(HttpItem objhttpItem)
{
if (!string.IsNullOrEmpty(objhttpItem.CerPath))
{
//這一句一定要寫在創建連接的前面。使用回調的方法進行證書驗證。
ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);
//初始化對像,並設置請求的URL地址
request = (HttpWebRequest)WebRequest.Create(objhttpItem.URL);
//將證書添加到請求里
request.ClientCertificates.Add(new X509Certificate(objhttpItem.CerPath));
}
else
//初始化對像,並設置請求的URL地址
request = (HttpWebRequest)WebRequest.Create(objhttpItem.URL);
}
///
/// 設置Cookie
///
///Http參數
private void SetCookie(HttpItem objhttpItem)
{
if (!string.IsNullOrEmpty(objhttpItem.Cookie))
//Cookie
request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie;
//設置Cookie
if (objhttpItem.CookieCollection != null)
{
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(objhttpItem.CookieCollection);
}
}
///
/// 設置Post數據
///
///Http參數
private void SetPostData(HttpItem objhttpItem)
{
//驗證在得到結果時是否有傳入數據
if (request.Method.Trim().ToLower().Contains("post"))
{
byte[] buffer = null;
//寫入Byte類型
if (objhttpItem.PostDataType == PostDataType.Byte && objhttpItem.PostdataByte != null && objhttpItem.PostdataByte.Length > 0)
{
//驗證在得到結果時是否有傳入數據
buffer = objhttpItem.PostdataByte;
}//寫入文件
else if (objhttpItem.PostDataType == PostDataType.FilePath && !string.IsNullOrEmpty(objhttpItem.Postdata))
{
StreamReader r = new StreamReader(objhttpItem.Postdata, encoding);
buffer = Encoding.Default.GetBytes(r.ReadToEnd());
r.Close();
} //寫入字符串
else if (!string.IsNullOrEmpty(objhttpItem.Postdata))
{
buffer = Encoding.Default.GetBytes(objhttpItem.Postdata);
}
if (buffer != null)
{
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
}
}
}
///
/// 設置代理
///
///參數對象
private void SetProxy(HttpItem objhttpItem)
{
if (!string.IsNullOrEmpty(objhttpItem.ProxyIp))
{
//設置代理服務器
WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false);
//建議連接
myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd);
//給當前請求對象
request.Proxy = myProxy;
//設置安全憑證
request.Credentials = CredentialCache.DefaultNetworkCredentials;
}
}
///
/// 回調驗證證書問題
///
///流對象
///證書
///X509Chain
///SslPolicyErrors
///bool
public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
// 總是接受
return true;
}
#endregion
#region 普通類型
///
///采用https協議訪問網絡,根據傳入的URl地址,得到響應的數據字符串。
///
///參數列表
///String類型的數據
public HttpResult GetHtml(HttpItem objhttpItem)
{
try
{
//准備參數
SetRequest(objhttpItem);
}
catch (Exception ex)
{
return new HttpResult() { Cookie = "", Header = null, Html = ex.Message, StatusDescription = "配置參考時報錯" };
}
//調用專門讀取數據的類
return GetHttpRequestData(objhttpItem);
}
#endregion
}
///
/// Http請求參考類
///
public class HttpItem
{
string _URL = string.Empty;
///
/// 請求URL必須填寫
///
public string URL
{
get { return _URL; }
set { _URL = value; }
}
string _Method = "GET";
///
/// 請求方式默認為GET方式,當為POST方式時必須設置Postdata的值
///
public string Method
{
get { return _Method; }
set { _Method = value; }
}
int _Timeout = 100000;
///
/// 默認請求超時時間
///
public int Timeout
{
get { return _Timeout; }
set { _Timeout = value; }
}
int _ReadWriteTimeout = 30000;
///
/// 默認寫入Post數據超時間
///
public int ReadWriteTimeout
{
get { return _ReadWriteTimeout; }
set { _ReadWriteTimeout = value; }
}
string _Accept = "text/html, application/xhtml+xml, */*";
///
/// 請求標頭值 默認為text/html, application/xhtml+xml, */*
///
public string Accept
{
get { return _Accept; }
set { _Accept = value; }
}
string _ContentType = "text/html";
///
/// 請求返回類型默認 text/html
///
public string ContentType
{
get { return _ContentType; }
set { _ContentType = value; }
}
string _UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)";
///
/// 客戶端訪問信息默認Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
///
public string UserAgent
{
get { return _UserAgent; }
set { _UserAgent = value; }
}
Encoding _Encoding = null;
///
/// 返回數據編碼默認為NUll,可以自動識別,一般為utf-8,gbk,gb2312
///
public Encoding Encoding
{
get { return _Encoding; }
set { _Encoding = value; }
}
private PostDataType _PostDataType = PostDataType.String;
///
/// Post的數據類型
///
public PostDataType PostDataType
{
get { return _PostDataType; }
set { _PostDataType = value; }
}
string _Postdata = string.Empty;
///
/// Post請求時要發送的字符串Post數據
///
public string Postdata
{
get { return _Postdata; }
set { _Postdata = value; }
}
private byte[] _PostdataByte = null;
///
/// Post請求時要發送的Byte類型的Post數據
///
public byte[] PostdataByte
{
get { return _PostdataByte; }
set { _PostdataByte = value; }
}
CookieCollection cookiecollection = null;
///
/// Cookie對象集合
///
public CookieCollection CookieCollection
{
get { return cookiecollection; }
set { cookiecollection = value; }
}
string _Cookie = string.Empty;
///
/// 請求時的Cookie
///
public string Cookie
{
get { return _Cookie; }
set { _Cookie = value; }
}
string _Referer = string.Empty;
///
/// 來源地址,上次訪問地址
///
public string Referer
{
get { return _Referer; }
set { _Referer = value; }
}
string _CerPath = string.Empty;
///
/// 證書絕對路徑
///
public string CerPath
{
get { return _CerPath; }
set { _CerPath = value; }
}
private Boolean isToLower = false;
///
/// 是否設置為全文小寫,默認為不轉化
///
public Boolean IsToLower
{
get { return isToLower; }
set { isToLower = value; }
}
private Boolean allowautoredirect = false;
///
/// 支持跳轉頁面,查詢結果將是跳轉后的頁面,默認是不跳轉
///
public Boolean Allowautoredirect
{
get { return allowautoredirect; }
set { allowautoredirect = value; }
}
private int connectionlimit = 1024;
///
/// 最大連接數
///
public int Connectionlimit
{
get { return connectionlimit; }
set { connectionlimit = value; }
}
private string proxyusername = string.Empty;
///
/// 代理Proxy 服務器用戶名
///
public string ProxyUserName
{
get { return proxyusername; }
set { proxyusername = value; }
}
private string proxypwd = string.Empty;
///
/// 代理 服務器密碼
///
public string ProxyPwd
{
get { return proxypwd; }
set { proxypwd = value; }
}
private string proxyip = string.Empty;
///
/// 代理 服務IP
///
public string ProxyIp
{
get { return proxyip; }
set { proxyip = value; }
}
private ResultType resulttype = ResultType.String;
///
/// 設置返回類型String和Byte
///
public ResultType ResultType
{
get { return resulttype; }
set { resulttype = value; }
}
private WebHeaderCollection header = new WebHeaderCollection();
//header對象
public WebHeaderCollection Header
{
get { return header; }
set { header = value; }
}
}
///
/// Http返回參數類
///
public class HttpResult
{
string _Cookie = string.Empty;
///
/// Http請求返回的Cookie
///
public string Cookie
{
get { return _Cookie; }
set { _Cookie = value; }
}
CookieCollection cookiecollection = new CookieCollection();
///
/// Cookie對象集合
///
public CookieCollection CookieCollection
{
get { return cookiecollection; }
set { cookiecollection = value; }
}
private string html = string.Empty;
///
/// 返回的String類型數據 只有ResultType.String時才返回數據,其它情況為空
///
public string Html
{
get { return html; }
set { html = value; }
}
private byte[] resultbyte = null;
///
/// 返回的Byte數組 只有ResultType.Byte時才返回數據,其它情況為空
///
public byte[] ResultByte
{
get { return resultbyte; }
set { resultbyte = value; }
}
private WebHeaderCollection header = new WebHeaderCollection();
//header對象
public WebHeaderCollection Header
{
get { return header; }
set { header = value; }
}
private string statusDescription = "";
///
/// 返回狀態說明
///
public string StatusDescription
{
get { return statusDescription; }
set { statusDescription = value; }
}
private HttpStatusCode statusCode = HttpStatusCode.OK;
///
/// 返回狀態碼,默認為OK
///
public HttpStatusCode StatusCode
{
get { return statusCode; }
set { statusCode = value; }
}
}
///
/// 返回類型
///
public enum ResultType
{
///
/// 表示只返回字符串 只有Html有數據
///
String,
///
/// 表示返回字符串和字節流 ResultByte和Html都有數據返回
///
Byte
}
///
/// Post的數據格式默認為string
///
public enum PostDataType
{
///
/// 字符串類型,這時編碼Encoding可不設置
///
String,
///
/// Byte類型,需要設置PostdataByte參數的值編碼Encoding可設置為空
///
Byte,
///
/// 傳文件,Postdata必須設置為文件的絕對路徑,必須設置Encoding的值
///
FilePath
}
}
