C# 關於爬取網站數據遇到csrf-token的分析與解決


需求

某航空公司物流單信息查詢,是一個post請求。通過后台模擬POST HTTP請求發現無法獲取頁面數據,通過查看航空公司網站后,發現網站使用避免CSRF攻擊機制,直接發揮40X錯誤。

關於CSRF

讀者自行百度

網站HTTP請求分析 

Headers

Form Data

在head里包含了cookie 與 x-csrf-token  formdata 里包含了_csrf (與head里的值是一樣的).

 

這里通過查看該網站的JS源代碼發現_csrf 來自於網頁的head標簽里

猜測cookie與 x-csrf-token是有一定的有效期,並且他們共同作用來防御CSRF攻擊。

解決方案

1,首先請求一下該航空公司的網站,獲取cookie與_csrf

2,然后C# 模擬http分別在head和formdata里加入如上參數,發起請求

 

 代碼

 

 public class CSRFToken
    {
        string cookie;//用於請求的站點的cookie
        List<string> csrfs;//用於請求站點的token的key 以及 value

        public CSRFToken(string url)
        {
            //校驗傳輸安全
            if (!string.IsNullOrWhiteSpace(url))
            {
                try
                {
                    //設置請求的頭信息.獲取url的host
                    var _http = new HttpHelper(url);
                    string cookie;
                    string html = _http.CreateGetHttpResponseForPC(out cookie);
                    this.cookie = cookie;

                    string headRegex = @"<meta name=""_csrf.*"" content="".*""/>";

                    MatchCollection matches = Regex.Matches(html, headRegex);
                    Regex re = new Regex("(?<=content=\").*?(?=\")", RegexOptions.None);
                    csrfs = new List<string>();
                    foreach (Match math in matches)
                    {

                        MatchCollection mc = re.Matches(math.Value);
                        foreach (Match ma in mc)
                        {
                            csrfs.Add(ma.Value);
                        }
                    }

                }
                catch (Exception e)
                {

                }
            }
        }

        public String getCookie()
        {
            return cookie;
        }
        public void setCookie(String cookie)
        {
            this.cookie = cookie;
        }
        public List<string> getCsrf_token()
        {
            return csrfs;
        }
    }

httpHelper

  public string CreatePostHttpResponse(IDictionary<string, string> headers, IDictionary<string, string> parameters)
        {
            HttpWebRequest request = null;
            //HTTPSQ請求  
            UTF8Encoding encoding = new System.Text.UTF8Encoding();
            ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
            request = WebRequest.Create(_baseIPAddress) as HttpWebRequest;
            request.ProtocolVersion = HttpVersion.Version10;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11;
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded";
            // request.ContentType = "application/json";
            request.UserAgent = DefaultUserAgent;
            //request.Headers.Add("X-CSRF-TOKEN", "bc0cc533-60cc-484a-952d-0b4c1a95672c");
            //request.Referer = "https://www.asianacargo.com/tracking/viewTraceAirWaybill.do";

            //request.Headers.Add("Origin", "https://www.asianacargo.com");
            //request.Headers.Add("Cookie", "JSESSIONID=HP21d2Dq5FoSlG4Fyw4slWwHb0-Sl1CG6jGtj7HE41e5f4aN_R1p!-435435446!117330181");
            //request.Host = "www.asianacargo.com";


            if (!(headers == null || headers.Count == 0))
            {

                foreach (string key in headers.Keys)
                {
                    request.Headers.Add(key, headers[key]);
                }

            }


            //如果需要POST數據     
            if (!(parameters == null || parameters.Count == 0))
            {
                StringBuilder buffer = new StringBuilder();
                int i = 0;
                foreach (string key in parameters.Keys)
                {
                    if (i > 0)
                    {
                        buffer.AppendFormat("&{0}={1}", key, parameters[key]);
                    }
                    else
                    {
                        buffer.AppendFormat("{0}={1}", key, parameters[key]);
                    }
                    i++;
                }
                byte[] data = encoding.GetBytes(buffer.ToString());
                using (Stream stream = request.GetRequestStream())
                {
                    stream.Write(data, 0, data.Length);
                }
            }

            HttpWebResponse response;

            try
            {
                //獲得響應流
                response = (HttpWebResponse)request.GetResponse();
                Stream s = response.GetResponseStream();

                StreamReader readStream = new StreamReader(s, Encoding.UTF8);
                string SourceCode = readStream.ReadToEnd();
                response.Close();
                readStream.Close();
                return SourceCode;
            }
            catch (WebException ex)
            {
                response = ex.Response as HttpWebResponse; return null;
            }

        }

   public string CreateGetHttpResponse(out string cookie)
        {
            HttpWebRequest request = null;
            //HTTPSQ請求  
            UTF8Encoding encoding = new System.Text.UTF8Encoding();
            ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
            request = WebRequest.Create(_baseIPAddress) as HttpWebRequest;
            request.ProtocolVersion = HttpVersion.Version10;
            ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11;
            request.Method = "GET";
            request.ContentType = "application/x-www-form-urlencoded";
            request.UserAgent = DefaultUserAgent;

            HttpWebResponse response;

            try
            {
                //獲得響應流
                response = (HttpWebResponse)request.GetResponse();

                cookie = response.Headers["Set-Cookie"];
                Stream s = response.GetResponseStream();

                StreamReader readStream = new StreamReader(s, Encoding.UTF8);
                string SourceCode = readStream.ReadToEnd();
                response.Close();
                readStream.Close();
                return SourceCode;
            }
            catch (WebException ex)
            {
                response = ex.Response as HttpWebResponse;
                cookie = "";
                return null;
            }

        }

爬取程序

 

 

爬取結果

瀏覽器結果

注意事項與結論

1,不同的網站,獲取cstf的方式不一樣,無論怎么做,只要信息傳到前台我們都可以有相應的方法來獲取。

2,請求時候的http驗證可能不一樣,測試的某航空公司物流信息的時候,http請求的安全協議是tis12。

 ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11; 還有其他參數比如UserAgent后台可能也會驗證

3,基於如上航空公司,發現它的cookie和cstf_token一定時間內不會改變,那么當實際爬取的時候可以考慮緩存cookie以及cstf_token,只有當請求失敗的時候,才重新獲取

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2026 CODEPRJ.COM