主要思想:通過后台WebView載入指定網頁,再提取出WebView中的內容
關鍵代碼:
var html = await webView.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
有一個很簡單的思路,
訂閱WebView NavigationCompleted事件,然后讓Navigate到指定的網址,發生事件時執行這行代碼
除此之外,這里還有一個異步的方法,用到了TaskCompletionSource這個東西
首先,創建一個TaskCompletionSource:
TaskCompletionSource<string> completionSource = new TaskCompletionSource<string>();
因為返回的東西是string(html),所以泛型T設置成string
然后使用lambda的形式訂閱Navigation事件:
1 webView.NavigationCompleted += async (sender, args) => 2 { 3 if (args.Uri != uri) 4 return; 5 await Task.Delay(200); 6 var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); 7 webView.NavigateToString(""); 8 webView = null; 9 completionSource.SetResult(html); 10 };
Line5的延遲200ms,是為了Navigation完成之后再給頁面里的其他一些元素(比如一些js腳本)一些加載的時間(講道理訂閱事件里也應該寫一個的)
Line7的導航到空是為了防止WebView里的東西繼續運行從而導致一些靈異事件(尤其是一些帶視頻的網頁,咳咳)
Line9,給Task設置個Result,await就會結束
最后:
1 return completionSource.Task;
封裝成類:
public class WebHelper { public class WebLoadedArgs:EventArgs { public bool Success { get; private set; } public WebErrorStatus WebErrorStatus { get; private set; } public string Html { get; private set; } public WebLoadedArgs(WebErrorStatus webErrorStatus) { WebErrorStatus = webErrorStatus; Success = false; } public WebLoadedArgs(string Html,WebErrorStatus webErrorStatus) { this.Html = Html; WebErrorStatus = webErrorStatus; Success = true; } } public string Url { get; private set; } public event EventHandler<WebLoadedArgs> WebLoaded; private WebView webView; public WebHelper(string Url) { this.Url = Url; webView = new WebView(WebViewExecutionMode.SeparateThread); webView.Navigate(new Uri(Url)); webView.NavigationCompleted += WebView_NavigationCompleted; webView.NavigationFailed += WebView_NavigationFailed; } private void WebView_NavigationFailed(object sender, WebViewNavigationFailedEventArgs e) { WebLoaded(this, new WebLoadedArgs(e.WebErrorStatus)); } private async void WebView_NavigationCompleted(WebView sender, WebViewNavigationCompletedEventArgs args) { var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); webView = null; WebLoaded(this, new WebLoadedArgs(html,args.WebErrorStatus)); } /// <summary> /// 異步實現獲取Web內容 /// </summary> /// <param name="Url">網址</param> /// <param name="TimeOut">超時時間</param> /// <returns>Web的Html內容</returns> public static Task<string> LoadWebAsync(string Url,int Timeout) { return LoadWebAsync(Url, "", Timeout); } /// <summary> /// 異步實現獲取Web內容 /// </summary> /// <param name="Url">網址</param> /// <param name="Referer">Header[Referer],用以解決一些盜鏈效驗</param> /// <param name="TimeOut">超時時間</param> /// <returns>Web的Html內容</returns> public static Task<string> LoadWebAsync(string Url,string Referer, int TimeOut) { WebView webView = new WebView(WebViewExecutionMode.SeparateThread); Uri uri = new Uri(Url); HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Get, uri); requestMessage.Headers.Add("Referer", Referer); webView.NavigateWithHttpRequestMessage(requestMessage); TaskCompletionSource<string> completionSource = new TaskCompletionSource<string>(); webView.NavigationCompleted += async (sender, args) => { if (args.Uri != uri) return; await Task.Delay(200); var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" }); webView.NavigateToString(""); webView = null; completionSource.SetResult(html); }; webView.NavigationFailed += (sender, args) => { webView = null; completionSource.SetException(new WebException("", (WebExceptionStatus)args.WebErrorStatus)); }; DispatcherTimer timer = new DispatcherTimer(); timer.Interval = TimeSpan.FromSeconds(TimeOut); timer.Tick += (sender, args) => { timer = null; webView.NavigateToString(""); webView = null; completionSource.SetException(new TimeoutException()); }; timer.Start(); return completionSource.Task; } }
使用方法:
(事件訂閱的方式)
WebHelper webHelper = new WebHelper("http://www.baidu.com/"); webHelper.WebLoaded += WebHelper_WebLoaded; private void WebHelper_WebLoaded(object sender, WebHelper.WebLoadedArgs e) { if(e.Success) { var html = e.Html; } }
(異步的方式)
var html = await WebHelper.LoadWebAsync("http://www.baidu.com", 120);
