C#快速獲取指定網頁源碼的幾種方式,並通過字符串截取函數 或 正則 取指定內容(IP)


//只獲取網頁源碼開始到標題位目的進行測試
//第一種方式經過測試,稍微快點
 string url = "http://www.ip.cn";
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
            req.Method = "GET";
            req.ContentType = "application/x-www-form-urlencoded";
            HttpWebResponse res = (HttpWebResponse)req.GetResponse();
            Stream ReceiveStream = res.GetResponseStream();
            Encoding encode = System.Text.Encoding.UTF8;
            StreamReader sr = new StreamReader(ReceiveStream, encode);

            string strResult = "";
            Char[] read = new Char[256];
            int count = sr.Read(read, 0, 256);
            while (count > 0)
            {
                String str = new String(read, 0, count);
                strResult += str;
                count = sr.Read(read, 0, 256);
                if (strResult.IndexOf("</title>") != -1)
                {
                    break;
                }
            }
            textBoxTest.Text = strResult;


//第二種獲取網頁源碼

WebClient MyClient = new WebClient();
MyClient.Credentials = CredentialCache.DefaultCredentials;
MyClient.Headers.Add("Host", "www.kuwo.cn");
MyClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36");
Byte[] pageData = MyClient.DownloadData(url);
//string pageHtml = Encoding.Default.GetString(pageData);  //GB2312  
string pageHtml = Encoding.UTF8.GetString(pageData); //UTF-8



//字符串截取,從網頁源碼中截取 兩字符串中間信息
private string GetStr(string TxtStr, string FirstStr, string SecondStr)
        {
            if (FirstStr.IndexOf(SecondStr, 0) != -1)
                return "";
            int FirstSite = TxtStr.IndexOf(FirstStr, 0);
            int SecondSite = TxtStr.IndexOf(SecondStr, FirstSite + 1);
            if (FirstSite == -1 || SecondSite == -1)
                return "";
            return TxtStr.Substring(FirstSite + FirstStr.Length, SecondSite - FirstSite - FirstStr.Length);
        }

//正則截取字符串 A.B之間
string title2 = Regex.Match(title, "(?<="+"A"+").*?(?="+"B"+")").Value;
//Regex.Match(sUrl, "(?<=A).*?(?=B)").Value;

 //自定義函數
private string MyGetTitle(string url, string endTag, string startStr, string endStr)
        {
            try
            {
                //HttpWebRequest類繼承於WebRequest,並沒有自己的構造函數,需通過WebRequest的Creat方法 建立,並進行強制的類型轉換
                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
                req.Method = "GET";
                req.ContentType = "text/html;charset=UTF-8";
                //通過HttpWebRequest的GetResponse()方法建立HttpWebResponse,強制類型轉換
                HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                //若成功取得網頁的內容,則以System.IO.Stream形式返回,
                //若失敗則產生ProtoclViolationException錯 誤。
                //在此正確的做法應將以下的代碼放到一個try塊中處理。這里簡單處理
                Stream ReceiveStream = res.GetResponseStream();
                //返回的內容是Stream形式的,所以可以利用StreamReader類獲取GetResponseStream的內容,
                //並以StreamReader類的Read方法依次讀取網頁源程序代碼每一行的內容,直至行尾(讀取的編碼格式:UTF8) 
                StreamReader sr = new StreamReader(ReceiveStream, Encoding.UTF8);
                string strResult = "";
                Char[] read = new Char[256];
                //Read(char[] buffer,int index,int count);
                //從文件流的第index個位置開始讀,到count個字符,把它們存到buffer中,
                //然后返回一個正數,內部指針后移一位,保證下次從新的位置開始讀。
                int count = sr.Read(read, 0, 256);
                while (count > 0)
                {
                    String str = new String(read, 0, count);
                    strResult += str;
                    count = sr.Read(read, 0, 256);
                    if (strResult.IndexOf(endTag) != -1) break;
                }
                res.Close();
                ReceiveStream.Dispose();
                return Regex.Match(strResult, "(?<=" + startStr + ").*?(?=" + endStr + ")").Value;
            }
            catch (Exception ex)
            {
                LogAdd(ListBoxDownLog, "異常:" + ex.Message);
                throw;
            }

        }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM