c#: WebBrowser控制台輸出


還是處理視頻下載所相關的問題。

有些網站,它的頁面代碼是由頁面加載后js動態生成,那么其原始的html便不能用。頁面渲染后的代碼,是我們需要的

c#中,我用WebBrowser這個控件處理。設置項目類型為控制台程序,加Form承載WebBrowser實現。

記錄代碼以做備忘:

using System;
using System.IO;
using System.Net;
using System.Runtime.InteropServices;
using System.Text;
using System.Windows.Forms;
using Microsoft.Win32;

namespace crpj
{
    [ComVisible(true)]
    public class Form : System.Windows.Forms.Form
    {
        protected override void SetVisibleCore(bool value)
        {
            base.SetVisibleCore(false);
        }

        public string GetHtmlCode(string url)
        {
            using (var wc = new WebClient())
            {
                wc.Encoding = Encoding.UTF8;
                return wc.DownloadString(url);
            }
        }
    }

    class Program
    {
        private static Timer tmrGet = new Timer();
        private static Timer tmrExit = new Timer();
        private static WebBrowser browser = new WebBrowser();
        //延時獲取?
        private static int delay = 0;
        //js注入腳本
        private static string jsCode;

        //禁止網頁跳轉聲音
        const int FEATURE_DISABLE_NAVIGATION_SOUNDS = 21;
        const int SET_FEATURE_ON_PROCESS = 0x00000002;

        [DllImport("urlmon.dll")]
        [PreserveSig]
        [return: MarshalAs(UnmanagedType.Error)]
        static extern int CoInternetSetFeatureEnabled(
            int FeatureEntry,
            [MarshalAs(UnmanagedType.U4)] int dwFlags,
            bool fEnable);

        /// <summary>
        /// 應用程序的主入口點。
        /// </summary>
        /// 參數列表:url delay jscode
        [STAThread]
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("error: You must provide at least one URL.");
                return;
            }

            CoInternetSetFeatureEnabled(
                FEATURE_DISABLE_NAVIGATION_SOUNDS,
                SET_FEATURE_ON_PROCESS,
                true);
            ChackAndSetBrowserEmulation();

            var form = new Form();
            form.Controls.Add(browser);
            browser.ObjectForScripting = form;
            browser.ScriptErrorsSuppressed = true;
            browser.DocumentCompleted += browser_DocumentCompleted;
            browser.Navigate(args[0]);

            if (args.Length > 1)
                delay = int.Parse(args[1]);
            if (args.Length > 2)
                jsCode = args[2];

            //因為頁面有時需加載js初始化等操作,延時獲取其頁面內容
            tmrGet.Tick += new EventHandler(tmrGet_Tick);
            if (delay > 0)
                tmrGet.Interval = delay;

            //有些網頁不觸發complete事件,或者時間很長,此定時器做判斷,以60秒為界,自結束
            tmrExit.Tick += new EventHandler(tmrExit_Tick);
            tmrExit.Interval = 90000;
            tmrExit.Start();

            Application.Run(form);
        }

        static void tmrExit_Tick(object sender, EventArgs e)
        {
            OutputHtml();
        }

        //WebBrowser以IE11版本做頁面渲染 
        static void ChackAndSetBrowserEmulation()
        {
            try
            {
                string keyName = @"SOFTWARE\Microsoft\Internet Explorer\MAIN\FeatureControl\FEATURE_BROWSER_EMULATION";
                using (var key = Registry.CurrentUser.OpenSubKey(keyName, true))
                {
                    string valueName = Path.GetFileName(Application.ExecutablePath);
                    if (key.GetValue(valueName) == null)
                        key.SetValue(valueName, 11001);
                }
            }
            catch
            {
            }
        }

        static void tmrGet_Tick(object sender, EventArgs e)
        {
            tmrGet.Stop();
            OutputHtml();
        }

        static void OutputHtml()
        {
            tmrExit.Stop();
            //避免韓文等亂碼
            Console.OutputEncoding = Encoding.UTF8;
            //browser.DocumentText取不到執行js之后的body文件
            string html = browser.Document.GetElementsByTagName("html")[0].OuterHtml;
            Console.Write(html);
            Application.Exit();
        }

        static void ExecJS(string jsCode)
        {
            var script = browser.Document.CreateElement("script");
            script.SetAttribute("type", "text/javascript");
            script.SetAttribute("text", "function _func() {" + jsCode + "}");
            browser.Document.Body.AppendChild(script);
            browser.Document.InvokeScript("_func");
        }

        static void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            if (browser.ReadyState == WebBrowserReadyState.Complete && e.Url == browser.Url)
            {
                //是否需要js注入?
                if (!string.IsNullOrEmpty(jsCode))
                {
                    ExecJS(jsCode);
                    System.Threading.Thread.Sleep(500);
                }

                if (delay == 0)
                    OutputHtml();
                else
                    tmrGet.Start();
            }
        }
    }
}
 

如此處理,可能得到所需要的html代碼。

其在控制台輸出圖示效果:

 

並基於此思路,設計進程輸出管理器:

    internal class ProcessOutputMgr
    {
        private static object syncObj = new Object();
        private Process process = new Process();
        private StringBuilder allData = new StringBuilder();
        private bool exitedCalled = false;

        public ProcessMgr(string fileName, string args)
        {
            var startInfo = new ProcessStartInfo(fileName);
            startInfo.WindowStyle = ProcessWindowStyle.Hidden;
            startInfo.Arguments = args;
            startInfo.UseShellExecute = false;
            startInfo.CreateNoWindow = true;
            //crpj皆以utf-8輸出,避免亂碼
            startInfo.StandardOutputEncoding = Encoding.UTF8; startInfo.RedirectStandardOutput
= true; startInfo.RedirectStandardError = true; process.StartInfo = startInfo; process.EnableRaisingEvents = true; //一定要有這個才能觸發Exited 事件 process.Exited += process_Exited; process.OutputDataReceived += process_OutputDataReceived; process.ErrorDataReceived += process_ErrorDataReceived; } public event DataReceivedEventHandler OutputDataReceived; public event DataReceivedEventHandler ErrorDataReceived; public event Action<string> AllDataReceived; public bool Start() { bool result = process.Start(); process.BeginOutputReadLine(); process.BeginErrorReadLine(); return result; } public void WaitForExit() { process.WaitForExit(); } public bool WaitForExit(int milliseconds) { return process.WaitForExit(milliseconds); } private void process_Exited(object sender, EventArgs e) { if (!this.exitedCalled && this.allData.Length != 0) { this.exitedCalled = true; var handler = AllDataReceived; if (handler != null) handler(this.allData.ToString()); } } private void process_OutputDataReceived(object sender, DataReceivedEventArgs e) { lock (syncObj) { var handler = OutputDataReceived; if (handler != null) handler(sender, e); if (e.Data != null) this.allData.AppendLine(e.Data); else { var process = sender as Process; if (process.HasExited && !this.exitedCalled) { this.exitedCalled = true; if (AllDataReceived != null) AllDataReceived(this.addData.ToString()); } } } } private void process_ErrorDataReceived(object sender, DataReceivedEventArgs e) { lock (syncObj) { var handler = ErrorDataReceived; if (handler != null) handler(sender, e); } } }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM