自己做Fiddler,實現http網站的抓取


---恢復內容開始---

由於工作需要,需要做一個小程序,實現網站監控,當瀏覽器瀏覽到目標網站的時候,實現爬取數據。由於https存在證書驗證,需要別的方式來,暫時還沒研究,但必須能正常訪問。

官方的Demo,我在這個基礎上進行了修改。http://fiddler.wikidot.com/fiddlercore-demo

這邊我來介紹下我的Demo。

首先要去下載FiddlerCore4.Dll,百度上很多。

先上代碼:

主界面代碼

using System;
using System.Collections.Generic;
using System.IO;
using System.Reflection;
using System.Threading;
using Fiddler;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace FiddlerDemo
{
    class Program
    {
        public static Proxy oProxy;
        //static string sSecureEndpointHostname = "cd.chnai.com";
        //static int iSecureEndpointPort = 7777;
       
        #region 控制台異常關閉時,程序不影響電腦
        public delegate bool ControlCtrlDelegate(int CtrlType);
        [DllImport("kernel32.dll")]
        private static extern bool SetConsoleCtrlHandler(ControlCtrlDelegate HandlerRoutine, bool Add);
        private static ControlCtrlDelegate cancelHandler = new ControlCtrlDelegate(HandlerRoutine);

        public static bool HandlerRoutine(int CtrlType)
        {
            switch (CtrlType)
            {
                case 0:
                    DoQuit(); //Ctrl+C關閉 
                    break;
                case 2:
                    //Console.WriteLine("2工具被強制關閉");//按控制台關閉按鈕關閉 
                    DoQuit();
                    break;
            }
            return false;
        }
        #endregion
        static void Main(string[] args)
        {
            SetConsoleCtrlHandler(cancelHandler, true);
            FiddlerHelp fh = new FiddlerHelp();
            fh.StartSession();
        }
        static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e)
        {
            DoQuit();
        }
        public static void DoQuit()
        {
            WriteCommandResponse("Shutting down...");
            if (null != oProxy) oProxy.Dispose();
            Fiddler.FiddlerApplication.Shutdown();
            Thread.Sleep(500);
        }
        public static void WriteCommandResponse(string s)
        {
            ConsoleColor oldColor = Console.ForegroundColor;
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine(s);
            Console.ForegroundColor = oldColor;
        }
    }
}

處理類:

using Fiddler;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Configuration;
using System.Xml;


namespace FiddlerDemo
{
    public class HtmlConfig
    {
        public string sWeb { get; set; }
        public string sRegex { get; set; }
    }
    public class FiddlerHelp
    {
        public static Proxy oProxy;
        public static List<HtmlConfig> dicHtml = new List<HtmlConfig> { };
     
        public void StartSession()
        {
            LoadHtmlConfig();
            //設置別名
            Fiddler.FiddlerApplication.SetAppDisplayName("FiddlerCoreDemoApp");

            List<Fiddler.Session> oAllSessions = new List<Fiddler.Session>();

            Fiddler.FiddlerApplication.OnNotification += delegate(object sender, NotificationEventArgs oNEA) { Console.WriteLine("** NotifyUser: " + oNEA.NotifyString); };

            Fiddler.FiddlerApplication.BeforeRequest += delegate(Fiddler.Session oS)
            {
                // 為了使反應篡改,必須使用緩沖模式
                // 被啟用。這允許FiddlerCore以允許修改
                // 在BeforeResponse處理程序中的反應,而不是流
                // 響應給客戶機作為響應進來。
                oS.bBufferResponse = true;
                Monitor.Enter(oAllSessions);
                oAllSessions.Add(oS);
                Monitor.Exit(oAllSessions);
            };
           
            // 下面的事件,您可以檢查由Fiddler閱讀每一響應緩沖區。  
            // 請注意,這不是為絕大多數應用非常有用,因為原始緩沖區幾乎是無用的;它沒有解壓,它包括標題和正文字節數等。
            // 本次僅適用於極少數的應用程序這就需要一個原始的,未經處理的字節流獲取有用
            //Fiddler.FiddlerApplication.OnReadResponseBuffer += new EventHandler<RawReadEventArgs>(FiddlerApplication_OnReadResponseBuffer);
            Fiddler.FiddlerApplication.BeforeResponse += delegate(Fiddler.Session oS)
            {
                //HTTP響應,並隨后修改任何HTTP響應,以取代
                oS.utilDecodeResponse();
                foreach (var item in dicHtml)
                {
                    if (oS.fullUrl.Contains(item.sWeb))
            
{ Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); string sHtmlBody = oS.GetResponseBodyAsString(); if(!string.IsNullOrEmpty(sHtmlBody)) { Console.Write("獲取的內容為:"+MatchRegex(sHtmlBody,item.sRegex) + "\n"); } } } Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); // 內容:{3} , oS.GetResponseBodyEncoding().GetString(oS.responseBodyBytes) //Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); }; Fiddler.FiddlerApplication.AfterSessionComplete += delegate(Fiddler.Session oS) { Console.Title = ("Session list contains: " + oAllSessions.Count.ToString() + " sessions");
          //數量大於1000條時進行清空
                if (oAllSessions.Count > 1000)
                {
                    Monitor.Enter(oAllSessions);
                    oAllSessions.Clear();
                    Monitor.Exit(oAllSessions);
                } }; Console.CancelKeyPress
+= new ConsoleCancelEventHandler(Console_CancelKeyPress); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.abortifclientaborts", true); //啟動方式 //FiddlerCoreStartupFlags oFCSF = FiddlerCoreStartupFlags.Default; Fiddler.CONFIG.IgnoreServerCertErrors = false; int iPort = 8877; //Fiddler.FiddlerApplication.Startup(iPort, oFCSF); Fiddler.FiddlerApplication.Startup(iPort, true, false, true); bool bDone = false; #region 各種操作 do { Console.WriteLine("\nEnter a command [C=Clear; L=List; G=Collect Garbage; W=write SAZ; R=read SAZ;\n\tS=Toggle Forgetful Streaming; T=Trust Root Certificate; Q=Quit]:>"); ConsoleKeyInfo cki = Console.ReadKey(); Console.WriteLine(); switch (Char.ToLower(cki.KeyChar)) { case 'c': Monitor.Enter(oAllSessions); oAllSessions.Clear(); Monitor.Exit(oAllSessions); WriteCommandResponse("Clear..."); FiddlerApplication.Log.LogString("Cleared session list."); break; case 'd': FiddlerApplication.Log.LogString("FiddlerApplication::Shutdown."); FiddlerApplication.Shutdown(); break; //case 'l': // WriteSessionList(oAllSessions); // break; case 'g': Console.WriteLine("Working Set:\t" + Environment.WorkingSet.ToString("n0")); Console.WriteLine("Begin GC..."); GC.Collect(); Console.WriteLine("GC Done.\nWorking Set:\t" + Environment.WorkingSet.ToString("n0")); break; case 'q': bDone = true; DoQuit(); break; case 'r': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 'w': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 't': try { WriteCommandResponse("Result: " + Fiddler.CertMaker.trustRootCert().ToString()); } catch (Exception eX) { WriteCommandResponse("Failed: " + eX.ToString()); } break; // Forgetful streaming case 's': bool bForgetful = !FiddlerApplication.Prefs.GetBoolPref("fiddler.network.streaming.ForgetStreamedData", false); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.ForgetStreamedData", bForgetful); Console.WriteLine(bForgetful ? "FiddlerCore will immediately dump streaming response data." : "FiddlerCore will keep a copy of streamed response data."); break; } } while (!bDone); #endregion } /// <summary> /// 通過讀取xml內的配置來獲取監聽的網站和獲取數據的正則表達式 /// </summary> private void LoadHtmlConfig() { try { XmlDocument xmlDoc = new XmlDocument();  string sPath = string.Empty;
                if (File.Exists(@"..\..\WatchHtml.xml"))
                {
                    //調試目錄
                    sPath = @"..\..\WatchHtml.xml";
                }
                else
                {
                    //編譯目錄下
                    sPath = @"WatchHtml.xml";
                }
XmlNode xn = xmlDoc.SelectSingleNode("Root"); XmlNodeList xnl = xn.ChildNodes; foreach (XmlNode item in xnl) { XmlElement xe = (XmlElement)item; HtmlConfig htmlConfig = new HtmlConfig { sWeb = xe.GetAttribute("Web").ToString(), sRegex = xe.GetAttribute("Regex").ToString() }; dicHtml.Add(htmlConfig); } } catch (Exception) { throw; } } public void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } /// <summary> /// 退出 /// </summary> public void DoQuit() { if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } /// <summary> /// /// </summary> /// <param name="sHtml">獲得的Html頁面</param> /// <param name="sRegex">正則表達式</param> /// <returns></returns> public static string MatchRegex(string sHtml,string sRegex) { string sResult = string.Empty; try { if (string.IsNullOrEmpty(sHtml)) return null; var result = Regex.Match(sHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), sRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (result.Success) { sResult = result.Value; } return sResult; } catch (Exception) { return null; } } } }

XML文件內容

實現效果

 

 

 

 

 

 

---恢復內容結束---


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM