本項目github地址:https://github.com/wangqifan/ZhiHu
什么是Httphelper?
httpelpers是一個封裝好拿來獲取網絡上資源的工具類。因為是用http協議,故取名httphelper。
httphelper出現的背景
使用WebClient可以很方便獲取網絡上的資源,例如
WebClient client = new WebClient(); string html= client.DownloadString("https://www.baidu.com/");
這樣就可以拿到百度首頁的的源代碼,由於WebClient封裝性太強,有時候不大靈活,需要對底層有更細致的把控,這個時候就需要打造自己的網絡資源獲取工具了;
HttpHelper初級
現在着手打造自己的下載工具,剛開始時候長這樣
public class HttpHelp { public static string DownLoadString(string url) {
string Source = string.Empty;
HttpWebRequest request= (HttpWebRequest)WebRequest.Create(url);
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (Stream stream = response.GetResponseStream())
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
Source = reader.ReadToEnd();
}
}
}
return Source;
}
}
程序總會出現各種異常的,這個時候加個Try catch語句
public class HttpHelp { public static string DownLoadString(string url) { string Source = string.Empty; try{ HttpWebRequest request= (HttpWebRequest)WebRequest.Create(url); using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using (Stream stream = response.GetResponseStream()) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } } catch
{ Console.WriteLine("出錯了,請求的URL為{0}", url); } return Source; } }
請求資源是I/O密集型,特別耗時,這個時候需要異步
public static async Task<string> DownLoadString(string url) { return await Task<string>.Run(() => { string Source = string.Empty; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using (Stream stream = response.GetResponseStream()) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } } catch { Console.WriteLine("出錯了,請求的URL為{0}", url); } return Source; }); }
HttpHelper完善
為了欺騙服務器,讓服務器認為這個請求是瀏覽器發出的
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0";
有些資源是需要權限的,這個時候要偽裝成某個用戶,http協議是無狀態的,標記信息都在cookie上面,給請求加上cookie
request.Headers.Add("Cookie", "這里填cookie,從瀏覽器上面拷貝")
再完善下,設定個超時吧
request.Timeout = 5000;
有些網站提供資源是GZIP壓縮,這樣可以節省帶寬,所以請求頭再加個
request.Headers.Add("Accept-Encoding", " gzip, deflate, br");
相應的得到相應流要有相對應的解壓,這個時候httphelper變成這樣了
public static string DownLoadString(string url)
{
string Source = string.Empty;
try{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0"; request.Headers.Add("Cookie", "這里是Cookie"); request.Headers.Add("Accept-Encoding", " gzip, deflate, br"); request.KeepAlive = true;//啟用長連接 using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using (Stream dataStream = response.GetResponseStream()) { if (response.ContentEncoding.ToLower().Contains("gzip"))//解壓 { using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } else if (response.ContentEncoding.ToLower().Contains("deflate"))//解壓 { using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } else { using (Stream stream = response.GetResponseStream())//原始 { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } } } request.Abort(); } catch { Console.WriteLine("出錯了,請求的URL為{0}", url); } return Source;
}
請求態度會被服務器拒絕,返回429。這個時候需要設置代理,我們的請求會提交到代理服務器,代理服務器會向目標服務器請求,得到的響應由代理服務器返回給我們。只要不斷切換代理,服務器不會因為請求太頻繁而拒絕掉程序的請求
var proxy = new WebProxy(“Adress”,8080);//后面是端口號 request.Proxy = proxy;//為httpwebrequest設置代理
原理是
我使用的是一家叫阿布雲的服務商,提供的服務比較穩定優質,就是有點貴,根據阿布雲官網的示例代理,我將httphelp修改成了
public static string DownLoadString(string url) { string Source = string.Empty; try { string proxyHost = "http://proxy.abuyun.com"; string proxyPort = "9020"; // 代理隧道驗證信息 string proxyUser = "H71T6AMK7GREN0JD"; string proxyPass = "D3F01F3AEFE4E45A"; var proxy = new WebProxy(); proxy.Address = new Uri(string.Format("{0}:{1}", proxyHost, proxyPort)); proxy.Credentials = new NetworkCredential(proxyUser, proxyPass); ServicePointManager.Expect100Continue = false; Stopwatch watch = new Stopwatch(); watch.Start(); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0"; request.Headers.Add("Cookie", "q_c1=17d0e600b6974387b1bc3a0117d21c50|1483348502000|1483348502000; l_cap_id=\"NjVhNGM1ODhmZWJlNDE4MDk1OTRlMDU0NTRmMmU3NzY=|1483348502|ce7951227c840cde8d8356526547cfeddece44a8\"; cap_id=\"Y2QyODU3MTg0NTViNDIwZTk4YmRhMTk5YWI5MTY1MGQ=|1483348502|892544d61b1d04265cad1ad172a5911eaf47ebe2\"; d_c0=\"AEAC7iaxFwuPToc2DY_goP_H5QnNPxMReuU=|1483348504\"; r_cap_id=\"ODA5ZDI5YTQ1M2E2NDc1OWJlMjk0Nzk1ZWY4ZjQ1NTU=|1483348505|00d0a93219de27de0e9dfa2c2a6cbe0cbf7c0a36\"; _zap=ea616f49-be5d-4f94-98d8-fdec8f7d277b; __utma=51854390.2059985006.1483348508.1483348508.1483416071.2; __utmz=51854390.1483416071.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmv=51854390.100-1|2=registration_date=20160110=1^3=entry_date=20160110=1; login=\"ZDczZTgyMmUzZjY1NDQ1YTkzMDk2MTk5MTNjMDIxMTM=|1483348523|f1e570e14ceed6b61720c413dd8663527aea78fc\"; z_c0=Mi4wQUJCS0c2ZmVTUWtBUUFMdUpyRVhDeGNBQUFCaEFsVk5LNmVSV0FEc1hkcFV2YUdOaDExVjBTLU1KNVZ6OFRYcC1n|1483416083|3e5d60bef695bd722a95aea50f066c394cfcba9d; _xsrf=87b1049f227fe734a9577ec9f76342b3; __utmb=51854390.0.10.1483416071; __utmc=51854390"); request.Headers.Add("Upgrade-Insecure-Requests", "1"); request.Headers.Add("Cache-Control", "no-cach"); request.Accept = "*/*"; request.Method = "GET"; request.Referer = "https://www.zhihu.com/"; request.Headers.Add("Accept-Encoding", " gzip, deflate, br"); request.KeepAlive = true;//啟用長連接 request.Proxy = proxy; using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using (Stream dataStream = response.GetResponseStream()) { if (response.ContentEncoding.ToLower().Contains("gzip"))//解壓 { using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } else if (response.ContentEncoding.ToLower().Contains("deflate"))//解壓 { using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } else { using (Stream stream = response.GetResponseStream())//原始 { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { Source = reader.ReadToEnd(); } } } } } request.Abort(); watch.Stop(); Console.WriteLine("請求網頁用了{0}毫秒", watch.ElapsedMilliseconds.ToString()); } catch { Console.WriteLine("出錯了,請求的URL為{0}", url); } return Source; }