1、WebRequest 是System.Net抽象類,子類(HttpWebRequest/HttpWebResponse、FileWebRequest、FtpWebRequest)
System.Net.WebRequest abstract
System.Net.HttpWebRequest/HttpWebResponse : WebRequest
System.Net.FileWebRequest : WebRequest
System.Net.FtpWebRequest : WebRequest
WebRequest的子類都用於從web獲取資源。HttpWebRequest利用HTTP 協議和服務器交互,通常是通過 GET 和 POST 兩種方式來對數據進行獲取和提交
1 static void Main(string[] args) 2 { 3 // 創建一個WebRequest實例(默認get方式) 4 HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.baidu.com"); 5 //可以指定請求的類型 6 //request.Method = "POST"; 7 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 8 Console.WriteLine(response.StatusDescription); 9 // 接收數據 10 Stream dataStream = response.GetResponseStream(); 11 StreamReader reader = new StreamReader(dataStream); 12 string responseFromServer = reader.ReadToEnd(); 13 Console.WriteLine(responseFromServer); 14 // 關閉stream和response 15 reader.Close(); 16 dataStream.Close(); 17 response.Close(); 18 }
WebRequest“請求/響應”模型的abstract基類,可以用協議不可知的方式從Internet請求數據
注意:Create方法將運行時確定的WebRequest類的子類作為與requestUri最接近的注冊匹配項返回。例如,當以http://開頭的URI在requestUri中傳遞時,由Create返回一個HttpWebRequest。如果改為傳遞以file://開頭的URI,則Create方法將返回FileWebRequest實例。.NET Framework包括對http://和file:// URI方案的支持。
get
var request = WebRequest.Create("http://www.baidu.com");
request.Method = "GET"; var response = request.GetResponse(); using (var stream = new System.IO.StreamReader(response.GetResponseStream())) { var content = stream.ReadToEnd();//獲取到遠程的頁面字符串 Console.WriteLine(content); }
post
1 var jsonToPost = "{\"name\":\"admin\",\"pwd\":\"123456\"}";
2 var request = WebRequest.Create("http://www.sina.com"); 3 request.Method = "POST"; 4 5 using (var requestStream = request.GetRequestStream()) 6 { 7 var bytes = Encoding.UTF8.GetBytes(jsonToPost); 8 requestStream.Write(bytes, 0, bytes.Length); 9 } 10 11 var response = request.GetResponse(); 12 using (var stream = new System.IO.StreamReader(response.GetResponseStream())) 13 { 14 var content = stream.ReadToEnd();//獲取 Post 返回的內容 15 }
System.Net.HttpWebRequest/HttpWebResponse
1 HttpWebRequest httpReq; 2 HttpWebResponse httpResp; 3 4 string strBuff = ""; 5 char[] cbuffer = new char[256]; 6 int byteRead = 0; 7 8 string filename = @"c:\log.txt"; 9 ///定義寫入流操作 10 public void WriteStream() 11 { 12 Uri httpURL = new Uri(txtURL.Text); 13 14 ///HttpWebRequest類繼承於WebRequest,並沒有自己的構造函數,需通過WebRequest的Creat方法 建立,並進行強制的類型轉換 15 httpReq = (HttpWebRequest)WebRequest.Create(httpURL); 16 ///通過HttpWebRequest的GetResponse()方法建立HttpWebResponse,強制類型轉換 17 18 httpResp = (HttpWebResponse) httpReq.GetResponse(); 19 ///GetResponseStream()方法獲取HTTP響應的數據流,並嘗試取得URL中所指定的網頁內容 20 21 ///若成功取得網頁的內容,則以System.IO.Stream形式返回,若失敗則產生ProtoclViolationException錯 誤。在此正確的做法應將以下的代碼放到一個try塊中處理。這里簡單處理 22 Stream respStream = httpResp.GetResponseStream(); 23 24 ///返回的內容是Stream形式的,所以可以利用StreamReader類獲取GetResponseStream的內容,並以 25 26 StreamReader類的Read方法依次讀取網頁源程序代碼每一行的內容,直至行尾(讀取的編碼格式:UTF8) 27 StreamReader respStreamReader = new StreamReader(respStream,Encoding.UTF8); 28 29 byteRead = respStreamReader.Read(cbuffer,0,256); 30 31 while (byteRead != 0) 32 { 33 string strResp = new string(cbuffer,0,byteRead); 34 strBuff = strBuff + strResp; 35 byteRead = respStreamReader.Read(cbuffer,0,256); 36 } 37 38 respStream.Close(); 39 txtHTML.Text = strBuff; 40 }
2、System.Net.WebClient
WebClient很輕量級的訪問Internet資源的類,在指定uri后可以發送和接受數據。WebClient提供了 DownLoadData,DownLoadFile,UploadData,UploadFile 方法,同時通過了這些方法對應的異步方法,通過WebClient我們可以很方便地上傳和下載文件。
static void Main(string[] args) { WebClient wc = new WebClient(); wc.BaseAddress = "http://www.baidu.com/"; //設置根目錄 wc.Encoding = Encoding.UTF8; //設置按照何種編碼訪問,如果不加此行,獲取到的字符串中文將是亂碼 string str = wc.DownloadString("/"); //字符串形式返回資源 Console.WriteLine(str); //----------------------以下為OpenRead()以流的方式讀取---------------------- wc.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"); wc.Headers.Add("Accept-Language", "zh-cn"); wc.Headers.Add("UA-CPU", "x86"); //wc.Headers.Add("Accept-Encoding","gzip, deflate"); //因為我們的程序無法進行gzip解碼所以如果這樣請求獲得的資源可能無法解碼。當然我們可以給程序加入gzip處理的模塊 那是題外話了。 wc.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"); //Headers 用於添加添加請求的頭信息 Stream objStream = wc.OpenRead("?tn=98050039_dg&ch=1"); //獲取訪問流 StreamReader _read = new StreamReader(objStream, Encoding.UTF8); //新建一個讀取流,用指定的編碼讀取,此處是utf-8 Console.Write(_read.ReadToEnd()); //輸出讀取到的字符串 //------------------------DownloadFile下載文件------------------------------- wc.DownloadFile("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.jpg", @"D:\123.jpg"); //將遠程文件保存到本地 //------------------------DownloadFile下載到字節數組------------------------------- byte[] bytes = wc.DownloadData("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif"); FileStream fs = new FileStream(@"E:\123.gif", FileMode.Create); fs.Write(bytes, 0, bytes.Length); fs.Flush(); WebHeaderCollection whc = wc.ResponseHeaders; //獲取響應頭信息 foreach (string s in whc) { Console.WriteLine(s + ":" + whc.Get(s)); } Console.ReadKey(); }
1 WebClient MyWebClient = new WebClient();
2 MyWebClient.Credentials = CredentialCache.DefaultCredentials;//獲取或設置用於向Internet資源的請求進行身份驗證的網絡憑據
3 Byte[] pageData = MyWebClient.DownloadData("http://www.163.com"); //從指定網站下載數據
4 string pageHtml = Encoding.Default.GetString(pageData); //如果獲取網站頁面采用的是GB2312,則使用這句
5 //string pageHtml = Encoding.UTF8.GetString(pageData); //如果獲取網站頁面采用的是UTF-8,則使用這句
6 Console.WriteLine(pageHtml);//在控制台輸入獲取的內容
7 using (StreamWriter sw = new StreamWriter("c:\\test\\ouput.html"))//將獲取的內容寫入文本
8 { 9 sw.Write(pageHtml); 10 }
System.Net.Http.HttpClient
HttpClient是.NET4.5引入的一個HTTP客戶端庫,其命名空間為 System.Net.Http 。.NET 4.5之前我們可能使用WebClient和HttpWebRequest來達到相同目的。HttpClient利用了最新的面向任務模式,使得處理異步請求非常容易。
下邊是一個使用控制台程序異步請求接口的栗子:
1 static void Main(string[] args) 2 { 3 const string GetUrl = "http://xxxxxxx/api/UserInfo/GetUserInfos";//查詢用戶列表的接口,Get方式訪問 4 const string PostUrl = "http://xxxxxxx/api/UserInfo/AddUserInfo";//添加用戶的接口,Post方式訪問 5 6 //使用Get請求 7 GetFunc(GetUrl); 8 9 UserInfo user = new UserInfo { Name = "jack", Age = 23 }; 10 string userStr = JsonHelper.SerializeObject(user);//序列化 11 //使用Post請求 12 PostFunc(PostUrl, userStr); 13 Console.ReadLine(); 14 } 15 16 /// <summary> 17 /// Get請求 18 /// </summary> 19 /// <param name="path"></param> 20 static async void GetFunc(string path) 21 { 22 //消息處理程序 23 HttpClientHandler handler = new HttpClientHandler() { AutomaticDecompression = DecompressionMethods.GZip }; 24 HttpClient httpClient = new HttpClient(); 25 //異步get請求 26 HttpResponseMessage response = await httpClient.GetAsync(path); 27 //確保響應正常,如果響應不正常EnsureSuccessStatusCode()方法會拋出異常 28 response.EnsureSuccessStatusCode(); 29 //異步讀取數據,格式為String 30 string resultStr = await response.Content.ReadAsStringAsync(); 31 Console.WriteLine(resultStr); 32 } 33 34 /// <summary> 35 /// Post請求 36 /// </summary> 37 /// <param name="path"></param> 38 /// <param name="data"></param> 39 static async void PostFunc(string path, string data) 40 { 41 HttpClientHandler handler = new HttpClientHandler() { AutomaticDecompression = DecompressionMethods.GZip }; 42 HttpClient httpClient = new HttpClient(handler); 43 //HttpContent是HTTP實體正文和內容標頭的基類。 44 HttpContent httpContent = new StringContent(data, Encoding.UTF8, "text/json"); 45 //httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("BasicAuth", Ticket);//驗證請求頭賦值 46 //httpContent.Headers.Add(string name,string value) //添加自定義請求頭 47 48 //發送異步Post請求 49 HttpResponseMessage response = await httpClient.PostAsync(path, httpContent); 50 response.EnsureSuccessStatusCode(); 51 string resultStr = await response.Content.ReadAsStringAsync(); 52 Console.WriteLine(resultStr); 53 } 54 }
注意:因為HttpClient有預熱機制,第一次進行訪問時比較慢,所以我們最好不要用到HttpClient就new一個出來,應該使用單例或其他方式獲取HttpClient的實例。上邊的栗子為了演示方便直接new的HttpClient實例。
HttpClient還有很多其他功能,如附帶Cookie,請求攔截等,可以參考https://www.cnblogs.com/wywnet/p/httpclient.html
using (var http = new HttpClient()) { var content= http.GetAsync("http://www.baidu.com").GetAwaiter().GetResult() .Content.ReadAsStringAsync().GetAwaiter().GetResult(); }
post
1 using (var http = new HttpClient()) 2 { 3 var jsonToPost = "{\"name\":\"admin\",\"pwd\":\"123456\"}"; 4 var content = http.PostAsync("http://www.baidu.com", new StringContent(jsonToPost)).GetAwaiter().GetResult() 5 .Content.ReadAsStringAsync().GetAwaiter().GetResult(); 6 }
WebBrowser
1 WebBrowser web = new WebBrowser(); 2 web.Navigate("http://www.xxx.com/ssc/"); 3 web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted); 4 void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) 5 { 6 WebBrowser web = (WebBrowser)sender; 7 HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table"); 8 foreach (HtmlElement item in ElementCollection) 9 { 10 File.AppendAllText("Kaijiang_xj.txt", item.InnerText); 11 } 12 }
4. 三種方法的簡單比較:
1)。WebRequest 和httpresponse最簡單直接。
2)。WebClient對WebRequest作了包裝,可以用於上傳與下載文件,使用起來方便。 但是如果需要設置httpRequest的一些屬性,如timeout,cache-level,則沒有辦法做到。需要用戶重載。
3)。WebBrowser 最強大,但是耗資源最多。集成了Js引擎,依賴於OS的IE內核,能自動執行返回結果中的JS腳本。但是,一般只能用於winForm程序中。 如果需要在console程序中WebBrowser,請參考:
5. web blogs:
WebBrowser is actually in the System.Windows.Forms namespace and is avisual control that you can add to a form. It is primarily a wrapper around theInternet Explorer browser (MSHTML). It allows you to easily display andinteract programmatically with a web page. You call the Navigate method passinga web URL, wait for it to complete downloading and display and then interactwith the page using the object model it provides.
HttpWebRequest is a concrete class that allows you to request in code anysort of file over HTTP. You usually receive it as a stream of bytes. What youdo with it after that is up to your application.
HttpWebResponse allows you to process the response from a web server thatwas previously requested using HttpWebRequest.
WebRequest and WebResponse are the abstract base classes that theHttpWebRequest and HttpWebResponse inherit from. You can't create thesedirectly. Other classes that inherit from these include Ftp and File classes.
WebClient I have always seen as a nice helper class that provides simplerways to, for example, download or upload a file from a web url. (egDownloadFile and DownloadString methods). I have heard that it actually usesHttpWebRequest / HttpWebResponse behind the scenes for certain methods.
If you needmore fine grained control over web requests and responses, HttpWebRequest /HttpWebResponse are probably the way to go. Otherwise WebClient is generallysimpler and will do the job.
1). http://www.pin5i.com/showtopic-24684.html
2). http://hi.baidu.com/javaecho/blog/item/079c6d2a0d4efd5d4fc226b1.html
System.Net
============================================
Html Agility Pack
Install-Package HtmlAgilityPack
以指定的Stream對象為主的有:
(1)public void Load(Stream stream) ///從指定的Stream對象中加載html;
(2)public void Load(Stream stream, bool detectEncodingFromByteOrderMarks) ///指定是否從順序字節流中解析編碼格式
(3)public void Load(Stream stream, Encoding encoding) ///指定編碼格式
(4)public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
(5)public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
以指定的物理路徑為主的有:
(1)public void Load(string path)
(2)public void Load(string path, bool detectEncodingFromByteOrderMarks) ///指定是否從順序字節流中解析編碼格式
(3)public void Load(string path, Encoding encoding) ///指定編碼格式
(4)public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
(5)public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
string html ="<div id="demo"><span style="color:red;"><h1>Hello World!</h1></span></div>";
doc.LoadHtml(html);
string titleValue = node.Attributes["title"].Value;
{
Console.WriteLine("{0}={1}",attr.Name,attr.Value);
}
string html ="<div id="demo"><span style="color:red;"><h1>Hello World!</h1></span></div>";
doc.LoadHtml(html);
HtmlNode node = doc.HtmlDocument;
Console.WriteLine(node.OuterHtml); /// return "<div id="demo"><span style="color:red;"><h1>Hello World!</h1></span></div>";
Console.WriteLine(node.InnerHtml); /// return "<span style="color:red;"><h1>Hello World!</h1></span>
獲取父節點的系列方法:
1)public IEnumerable<HtmlNode> Ancestors()
獲取當前節點的父節點列表(不包含自身)。
2)public IEnumerable<HtmlNode> Ancestors(string name)
以指定一個名稱來獲取父節點的列表(不包含自身)。
3)public IEnumerable<HtmlNode> AncestorsAndSelf()
獲取當前節點的父節點列表(包含自身)。
4)public IEnumerable<HtmlNode> AncestorsAndSelf(string name)
以指定一個名稱來獲取父節點的列表(包含自身)。
獲取子節點的系列方法:
1)public IEnumerable<HtmlNode> DescendantNodes()
獲取當前節點下的所有子節點的列表,包括子節點的子節點(不包含自身)。
2)public IEnumerable<HtmlNode> DescendantNodesAndSelf()
獲取當前節點下的所有子節點的列表,包括子節點的子節點(包含自身)。
3)public IEnumerable<HtmlNode> Descendants()
獲取當前節點下的直接子節點的列表(不包含自身)。
4)public IEnumerable<HtmlNode> DescendantsAndSelf()
獲取當前節點下的直接子節點的列表(包含自身)。
5)public IEnumerable<HtmlNode> Descendants(string name)
獲取當前節點下的以指定名稱的子節點列表。
6)public IEnumerable<HtmlNode> DescendantsAndSelf(string name)
獲取當前節點下的以指定名稱的子節點的列表(包含自身)。
7)public HtmlNode Element(string name)
獲取第一個符合指定名稱的直接子節點的節點元素。
8)public IEnumerable<HtmlNode> Elements(string name)
獲取符合指定名稱的所有直接子節點的節點列表。
9)public HtmlNodeCollection SelectNodes(string xpath)
獲取符合指定的xpath的子節點列表。
10)public HtmlNode SelectSingleNode(string xpath)
獲取符合指定的xpath的單個字節點元素。