一、問題:
當我們拿到百度某個鏈接時,顯示的是一個鏈接https://www.sogou.com/link?url=xxx,跳轉后的鏈接是:https://baike.sogou.com/xxx
例:
跳轉后的鏈接:https://baike.sogou.com/v7677690.htm?fromTitle=text%28%29
這里我們需要得到跳轉后的鏈接,此方法也可用於360搜索鏈接轉換
二、代碼
添加Nuget:HtmlAgilityPack程序包
引用:using HtmlAgilityPack;
public static string ResultOfApi(string url) { //發送請求 HttpWebRequest request = null; HttpWebResponse response = null; request = (HttpWebRequest)HttpWebRequest.Create(url); request.Method = "Get"; request.ContentType = "application / x - www - form - urlencoded"; request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"; request.Accept = "text / html,application / xhtml + xml,application / xml; q = 0.9,image / webp,image / apng,*/*;q=0.8"; request.KeepAlive = true; //獲取響應 response = (HttpWebResponse)request.GetResponse(); Stream S = response.GetResponseStream(); StreamReader streamreader = new StreamReader(S); string result = streamreader.ReadToEnd(); response.Close(); //加載源代碼,獲取文檔對象 var doc = new HtmlDocument(); doc.LoadHtml(result); string text_XPath = @"/script[1]"; var text_list = doc.DocumentNode.SelectSingleNode(text_XPath); string newUrl = text_list.InnerHtml.Substring(text_list.InnerHtml.IndexOf('"') + 1); newUrl = newUrl.Remove(newUrl.LastIndexOf('"'));return newUrl; }