我們在做web測試時,經常會使用WebBrowser來進行一些自動化的任務。而有些網頁上面會用IFrame去嵌套別的頁面,這些頁面可能不是在相同域名下的,這時就會出現跨域問題,無法直接在WebBrowser中獲取到IFrame中的元素。下面來做個試驗,自己寫個頁面嵌套一個百度的首頁,然后在我們自己的頁面上輸入要查詢的詞,最后在百度上自動完成搜索。
<!DOCTYPE html> <html lang="en" xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <title></title> </head> <body> <iframe id="baidu" style="float:left;" width="500" height="500" src="http://www.baidu.com"></iframe> <div> 測試值:<input id="search" type="text" /> </div> </body> </html>
下面再建一個簡單的WinForm工程測試一下,界面如下:
下面就是WebBrowser的測試代碼:
using System; using System.Windows.Forms; namespace WebBrowserTest { public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { this.webBrowser1.Navigate(this.textBox1.Text); } private void button2_Click(object sender, EventArgs e) { var doc = this.webBrowser1.Document; var frames = doc.Window.Frames; String testValue = doc.GetElementById("search").GetAttribute("value"); frames[0].Document.GetElementById("kw").SetAttribute("value", testValue); frames[0].Document.GetElementById("su").InvokeMember("click"); } } }
我們運行我們的測試程序后,加載之前我們自己寫的頁面后,在自己的頁面上輸入我們要查詢的詞,點擊測試按鈕,就會看到程序報未處理 UnauthorizedAccessException錯誤:
下面來編寫一個Helper類來解決這個問題,主要原理大致就是利用IWebBrowser2這個接口來獲取Ifream中的Dom,IWebBrowser2中的document可以轉換為IHtmlDocument1,IHtmlDocument2,IHtmlDocument3。
using System; using System.Runtime.InteropServices; using System.Windows.Forms; using mshtml; namespace WebBrowserTest { // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] public interface IServiceProvider { [return: MarshalAs(UnmanagedType.I4)] [PreserveSig] int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject); } public enum OLECMDF { OLECMDF_DEFHIDEONCTXTMENU = 0x20, OLECMDF_ENABLED = 2, OLECMDF_INVISIBLE = 0x10, OLECMDF_LATCHED = 4, OLECMDF_NINCHED = 8, OLECMDF_SUPPORTED = 1 } public enum OLECMDID { OLECMDID_PAGESETUP = 8, OLECMDID_PRINT = 6, OLECMDID_PRINTPREVIEW = 7, OLECMDID_PROPERTIES = 10, OLECMDID_SAVEAS = 4 } public enum OLECMDEXECOPT { OLECMDEXECOPT_DODEFAULT, OLECMDEXECOPT_PROMPTUSER, OLECMDEXECOPT_DONTPROMPTUSER, OLECMDEXECOPT_SHOWHELP } [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)] public interface IWebBrowser2 { [DispId(100)] void GoBack(); [DispId(0x65)] void GoForward(); [DispId(0x66)] void GoHome(); [DispId(0x67)] void GoSearch(); [DispId(0x68)] void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); [DispId(-550)] void Refresh(); [DispId(0x69)] void Refresh2([In] ref object level); [DispId(0x6a)] void Stop(); [DispId(200)] object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; } [DispId(0xc9)] object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; } [DispId(0xca)] object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; } [DispId(0xcb)] object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; } [DispId(0xcc)] bool TopLevelContainer { get; } [DispId(0xcd)] string Type { get; } [DispId(0xce)] int Left { get; set; } [DispId(0xcf)] int Top { get; set; } [DispId(0xd0)] int Width { get; set; } [DispId(0xd1)] int Height { get; set; } [DispId(210)] string LocationName { get; } [DispId(0xd3)] string LocationURL { get; } [DispId(0xd4)] bool Busy { get; } [DispId(300)] void Quit(); [DispId(0x12d)] void ClientToWindow(out int pcx, out int pcy); [DispId(0x12e)] void PutProperty([In] string property, [In] object vtValue); [DispId(0x12f)] object GetProperty([In] string property); [DispId(0)] string Name { get; } [DispId(-515)] int HWND { get; } [DispId(400)] string FullName { get; } [DispId(0x191)] string Path { get; } [DispId(0x192)] bool Visible { get; set; } [DispId(0x193)] bool StatusBar { get; set; } [DispId(0x194)] string StatusText { get; set; } [DispId(0x195)] int ToolBar { get; set; } [DispId(0x196)] bool MenuBar { get; set; } [DispId(0x197)] bool FullScreen { get; set; } [DispId(500)] void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); [DispId(0x1f5)] OLECMDF QueryStatusWB([In] OLECMDID cmdID); [DispId(0x1f6)] void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut); [DispId(0x1f7)] void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize); [DispId(-525)] WebBrowserReadyState ReadyState { get; } [DispId(550)] bool Offline { get; set; } [DispId(0x227)] bool Silent { get; set; } [DispId(0x228)] bool RegisterAsBrowser { get; set; } [DispId(0x229)] bool RegisterAsDropTarget { get; set; } [DispId(0x22a)] bool TheaterMode { get; set; } [DispId(0x22b)] bool AddressBar { get; set; } [DispId(0x22c)] bool Resizable { get; set; } } class CorssDomainHelper { private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046"); private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E"); // Utility for IE cross domain access // Returns null in case of failure. public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow) { if (htmlWindow == null) { return null; } // First try the usual way to get the document. try { IHTMLDocument2 doc = htmlWindow.document; return (IHTMLDocument3)doc; } catch (COMException comEx) { // I think COMException won't be ever fired but just to be sure ... } catch (UnauthorizedAccessException) { } catch (Exception ex) { return null; } // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. // IE tries to prevent a cross frame scripting security issue. try { // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. IServiceProvider sp = (IServiceProvider)htmlWindow; // Use IServiceProvider.QueryService to get IWebBrowser2 object. Object brws = null; sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws); // Get the document from IWebBrowser2. IWebBrowser2 browser = (IWebBrowser2)(brws); return (IHTMLDocument3)browser.Document; } catch (Exception ex) { Console.WriteLine(ex); } return null; } } }
最后將我們的運行代碼改為如下形式,調用Helper類中的GetDocumentFromWindow方法:
using System; using System.Windows.Forms; using mshtml; namespace WebBrowserTest { public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { this.webBrowser1.Navigate(this.textBox1.Text); } private void button2_Click(object sender, EventArgs e) { var doc = this.webBrowser1.Document; var frames = doc.Window.Frames; String testValue = doc.GetElementById("search").GetAttribute("value"); IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(frames[0].DomWindow as IHTMLWindow2); baiduDoc.getElementById("kw").setAttribute("value", testValue); baiduDoc.getElementById("su").click(); } } }
最后運行一下程序可以看到我們可以正常獲取到百度上的元素了。
補充一下路過秋天說的問題:
其實關於這些接口其實我也沒有很深入的研究過,不過網上倒是能搜到很多相關資料介紹這些接口的不同,我這里給一個鏈接:
http://hi.baidu.com/christole/item/1c8dfd1a791a53643f87ced8
然后關於我上面的代碼為什么要使用IHMLDocument3,而不是其它兩個接口,因為IHMLDocument3這個接口里面定義了我需要的getElementById這個方法。
通過查看MSDN,你可以找到你需要的屬性或者方法,然后直接在代碼里面轉換為你需要的類型使用就可以了,它們之間都是可以互相轉化的。比如上面我用完了getElementById方法,我需要查看網頁的title,那么可以將我上面的baiduDoc變量強制轉為IHMLDocument2,然后就可以直接使用它的title屬性了。
參考鏈接:
http://msdn.microsoft.com/en-us/library/aa752052(v=vs.85).aspx
http://codecentrix.blogspot.com/2007/10/when-ihtmlwindow2getdocument-returns.html
http://msdn.microsoft.com/en-us/library/aa752641(v=VS.85).aspx