支持Cookie並開放了一些特殊設置項的HttpWebClient


  1 using System;
  2 using System.Collections.Generic;
  3 using System.Linq;
  4 using System.Text;
  5 using System.Net;
  6 using System.IO;
  7 using System.Collections.Specialized;
  8 using System.Web;
  9 
 10 namespace Common.Helpers
 11 {
 12     /// <summary>
 13     /// 網絡訪問輔助類
 14     /// </summary>
 15     public class HttpWebClient : WebClient
 16     {
 17         #region 公共屬性
 18         /// <summary>
 19         /// 瀏覽器用戶標識,默認采用Chrome的標識
 20         /// </summary>
 21         public string UserAgent { get; set; }
 22         /// <summary>
 23         /// Cookie容器
 24         /// </summary>
 25         public CookieContainer CookieContainer { get; set; }
 26         /// <summary>
 27         /// 如果 POST 請求需要 100-Continue 響應,則為 true;否則為 false。
 28         /// </summary>
 29         public bool Expect100Continue { get; set; }
 30 
 31         private WebResponse m_LastWebResponse = null;
 32         /// <summary>
 33         /// 最后一次的響應對象
 34         /// </summary>
 35         public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } }
 36 
 37         private int m_Timeout = 120000;
 38         /// <summary>
 39         /// 超時時間,默認120000毫秒(120秒)
 40         /// </summary>
 41         public int Timeout
 42         {
 43             get { return m_Timeout; }
 44             set { m_Timeout = value; }
 45         }
 46 
 47         private HttpWebClientSetting m_HttpWebClientSetting = null;
 48         /// <summary>
 49         /// WebClient設置項,該屬性始終不會為null
 50         /// </summary>
 51         public HttpWebClientSetting HttpWebClientSetting
 52         {
 53             get
 54             {
 55                 if (m_HttpWebClientSetting == null)
 56                 {
 57                     m_HttpWebClientSetting = new HttpWebClientSetting();
 58                 }
 59                 return m_HttpWebClientSetting;
 60             }
 61             set
 62             {
 63                 m_HttpWebClientSetting = value ?? new HttpWebClientSetting();
 64             }
 65         }
 66         
 67 
 68         /// <summary>
 69         /// 預處理Web請求對象的委托方法(會在每次獲取WebRequest對象后調用),默認值為null
 70         /// </summary>
 71         public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; }
 72         #endregion
 73 
 74         #region 構造方法
 75         public HttpWebClient()
 76             : this(new CookieContainer())
 77         {
 78         }
 79 
 80         public HttpWebClient(CookieContainer cookieContainer)
 81         {
 82             this.CookieContainer = cookieContainer;
 83             this.UserAgent = UserAgentValues.FireFox;
 84             this.Expect100Continue = false;
 85         }
 86         #endregion
 87 
 88         #region 重寫方法,增加對CookieContainer的支持
 89         protected override WebRequest GetWebRequest(Uri address)
 90         {
 91             if (!string.IsNullOrEmpty(this.UserAgent))
 92             {
 93                 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent);
 94             }
 95 
 96             WebRequest request = base.GetWebRequest(address);
 97             request.Timeout = this.Timeout;
 98             
 99             if (request is HttpWebRequest)
100             {
101                 HttpWebRequest httpRequest = request as HttpWebRequest;
102                 httpRequest.CookieContainer = this.CookieContainer;
103                 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue
104 
105                 //讀取自定義設置項
106                 if (this.HttpWebClientSetting != null)
107                 {
108                     httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;
109                 }
110 
111                 //使用外部委托屬性處理Request對象
112                 if (this.PrepareProcessWebRequest != null)
113                 {
114                     this.PrepareProcessWebRequest(httpRequest);
115                 }
116             }
117             
118             return request;
119         }
120         #endregion
121 
122         #region 重寫方法,增加對響應對象的訪問
123         protected override WebResponse GetWebResponse(WebRequest request)
124         {
125             WebResponse response = base.GetWebResponse(request);
126             this.m_LastWebResponse = response;
127             return response;
128         }
129         #endregion
130 
131         #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData
132         /// <summary>
133         /// 向一個URL用POST提交數據,並返回其響應內容
134         /// ZhangQingFeng    2014-12-14    Add
135         ///    EditLog:
136         ///        ZhangQingFeng    2015-05-12    Edit        因WebClient的UpdateValues方法中固定為UTF-8格式進行UrlEncode,因此此處需用UploadString方式來間接實現    --見微軟WebClient類源碼UploadValuesInternal方法中
137         /// </summary>
138         /// <param name="url">請求的URL</param>
139         /// <param name="data">要提交的數據</param>
140         /// <param name="encoding">請求所使用的編碼</param>
141         /// <param name="responseEncoding">響應內容所使用的編碼,為null時使用請求的編碼</param>
142         /// <returns>響應的內容</returns>
143         public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)
144         {
145             WebClient client = this;
146 
147             /*
148             client.Encoding = encoding ?? Encoding.UTF8;
149 
150             byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());
151 
152             string html = string.Empty;
153 
154             if (responseEncoding == null)
155             {
156                 html = client.Encoding.GetString(response);
157             }
158             else
159             {
160                 html = responseEncoding.GetString(response);
161             }
162              */
163 
164             client.Encoding = encoding ?? Encoding.UTF8;
165             client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");
166 
167             string delimiter = String.Empty;
168             StringBuilder values = new StringBuilder();
169             foreach (string name in data.AllKeys)
170             {
171                 values.Append(delimiter);
172                 values.Append(HttpUtility.UrlEncode(name, encoding));
173                 values.Append("=");
174                 values.Append(HttpUtility.UrlEncode(data[name], encoding));
175                 delimiter = "&";
176             }
177 
178             byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));
179             string html = (responseEncoding ?? client.Encoding).GetString(arrData);
180 
181             return html;
182         }
183 
184         /// <summary>
185         /// 向一個URL用POST提交數據,並返回其響應內容
186         /// ZhangQingFeng    2014-12-14    Add
187         /// </summary>
188         /// <param name="url">請求的URL</param>
189         /// <param name="data">要提交的數據</param>
190         /// <param name="encoding">請求和響應所使用的編碼</param>
191         /// <returns>響應的內容</returns>
192         public string PostData(string url, NameValueCollection data, Encoding encoding)
193         {
194             return PostData(url, data, encoding, null);
195         }
196 
197         /// <summary>
198         /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)
199         /// ZhangQingFeng    2014-12-14    Add
200         /// </summary>
201         /// <param name="url">請求的URL</param>
202         /// <param name="data">要提交的數據</param>
203         /// <returns>響應的內容</returns>
204         public string PostData(string url, NameValueCollection data)
205         {
206             return PostData(url, data, this.Encoding);
207         }
208         #endregion
209 
210         #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData
211         /// <summary>
212         /// 向一個URL用POST提交數據,並返回其響應內容
213         /// ZhangQingFeng    2014-12-14    Add
214         /// </summary>
215         /// <param name="url">請求的URL</param>
216         /// <param name="data">要提交的數據</param>
217         /// <param name="encoding">請求和響應內容所使用的編碼</param>
218         /// <returns>響應的內容</returns>
219         public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)
220         {
221             NameValueCollection postData = new NameValueCollection();
222             if (data != null)
223             {
224                 foreach (var item in data)
225                 {
226                     postData.Add(item.Key, item.Value);
227                 }
228             }
229             return PostData(url, postData, encoding, responseEncoding);
230         }
231 
232 
233         /// <summary>
234         /// 向一個URL用POST提交數據,並返回其響應內容
235         /// ZhangQingFeng    2014-12-14    Add
236         /// </summary>
237         /// <param name="url">請求的URL</param>
238         /// <param name="data">要提交的數據</param>
239         /// <param name="encoding">請求和響應所使用的編碼</param>
240         /// <returns>響應的內容</returns>
241         public string PostData(string url, Dictionary<string, string> data, Encoding encoding)
242         {
243             return PostData(url, data, encoding, null);
244         }
245 
246         /// <summary>
247         /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)
248         /// ZhangQingFeng    2014-12-14    Add
249         /// </summary>
250         /// <param name="url">請求的URL</param>
251         /// <param name="data">要提交的數據</param>
252         /// <returns>響應的內容</returns>
253         public string PostData(string url, Dictionary<string, string> data)
254         {
255             return PostData(url, data, this.Encoding);
256         }
257         #endregion
258 
259         #region 輔助類
260         /// <summary>
261         /// 瀏覽器用戶標識類
262         /// </summary>
263         public class UserAgentValues
264         {
265             public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";
266             public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
267             public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";
268         }
269         #endregion
270     }
271 
272     /// <summary>
273     /// HttpWebClient對象設置類
274     /// </summary>
275     public class HttpWebClientSetting
276     {
277         private bool m_AllowAutoRedirect = true;
278         /// <summary>
279         /// 當響應內容為重定向時客戶端是否自動重定向(如果該屬性為true,則取到的響應則為重定向后的內容,否則則為響應原文),默認值為true
280         /// </summary>
281         public bool AllowAutoRedirect
282         {
283             get { return m_AllowAutoRedirect; }
284             set { m_AllowAutoRedirect = value; }
285         }
286     }
287 }
HttpWebClient

在做頁面抓取的過程中,發現自帶的WebClient不夠靈活,因此做了一個實現。

 

關於在PostData方法中不使用UploadValues()方法的原因:

1.查看微軟的源代碼實現時發現,無論設置請求時的Encoding是否為GB2312,在使用WebClient的UploadValues()上傳內容時,其內在都是使用UTF-8編碼進行UrlEncode,因此傳到服務端中的數據中若包含有中文時則一定會亂碼,因此重寫PostData以規避此問題。

 

關於HttpWebClientSetting中的AllowAutoRedirect屬性:

在WebClient發起請求時,若響應內容為重定向,則WebClient會自動做重定向,因此該類提供此設置項以控制在訪問時是否自動做重定向(第二次訪問Refer后的網站時會將請求中的Refer頭置空,將該AllowAutoRedirect設置為false,然后手動從Response.Header中取出Location對象地址,設置Refer后再訪問,則可真實模擬瀏覽器訪問,從而避開一些網站的防抓取設置)

 

關於HttpWebClient中的LastWebResponse屬性:

當存在多次重定向時,系統記錄了最后一次返回的內容,從此內容的Header中取出ResponseUri,則可以取到最后返回響應的頁面真實地址,從而為下一次的設置請求Refer頭作准備。

 

大約就是如此,后期如有Bug會繼續更新。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM