前言:
本人一直使用網易雲音樂播放器,對網易雲音樂十分的熱衷,里面的歌單功能非常便捷,能快速找到符合自己喜好的歌曲信息。此文章如有侵權,請留言即刻刪除文章。
請求數據說明:以web請求的方式獲取網易雲音樂歌列表,歌單名稱,鏈接,播放量既創建歌單人名。
一. 了解request請求
什么是request請求:當我們訪問一個網站時所數據的網站地址就是一種request請求。請求的格式應遵照被請求方的合適要求。
例如用谷歌瀏覽器訪問網易雲音樂網址時:
按f12進入控制台 如圖選中NetWork選項
請求頭信息:包含請求地址,請求方式(Get /Post)請求網頁類型Accept,最重要的:User-agent--訪問瀏覽器版本,
二. 提供信息支持
打開Visual Studio=>文件=>新建項目=>新建控制台應用程序
選擇控制台應用程序=>
選中項目右擊=>添加=>類取名為RequestOptions:
RequestOptions:設置請求頭信息參數
1 public class RequestOptions 2 { 3 /// <summary> 4 /// 請求方式 get post 5 /// </summary> 6 public string Method { get; set; } 7 /// <summary> 8 /// 請求地址 9 /// </summary> 10 public Uri Uri { get; set; } 11 /// <summary> 12 /// 上級歷史記錄鏈接 13 /// </summary> 14 public string Referer { get; set; } 15 /// <summary> 16 /// 請求超時時間 毫秒單位 17 /// </summary> 18 public int TimeOut = 5000;
19 20 /// <summary> 21 /// 啟用長連接 22 /// </summary> 23 public bool KeepAlive = true;
24 25 /// <summary> 26 /// 禁止自動跳轉 27 /// </summary> 28 public bool AllowAutoRedirect = false; 29 30 /// <summary> 31 /// 定義最大連接數 32 /// </summary> 33 public int ConntectionLimit = int.MaxValue; 34 /// <summary> 35 /// 請求次數 36 /// </summary> 37 public int RequestNum = 3; 38 /// <summary> 39 /// 可通過文件上傳提交的文件類型 40 /// </summary> 41 public string Accept = "*/*"; 42 43 /// <summary> 44 /// 內容類型 45 /// </summary> 46 public string ContentType = "application/x-www-form-urlencoded"; 47 48 /// <summary> 49 /// 實例化頭部信息 50 /// </summary> 51 public WebHeaderCollection header = new WebHeaderCollection(); 52 53 public WebHeaderCollection webHeader 54 { 55 get { return header; } 56 set { header = value; } 57 } 58 /// <summary> 59 /// 定義請求cookie字符串 60 /// </summary> 61 public string RequestCookies { get; set; } 62 /// <summary> 63 /// 異步請求數據 64 /// </summary> 65 public string XHRParams { get; set; } 66 67 }
新建RequestHelper類:寫請求方法,並處理返回數據信息進行處理
public class RequestHelper { public static string RequestAction(RequestOptions options) { string result = string.Empty; IWebProxy proxy = null;//GetWebProxy(); var request = (HttpWebRequest)WebRequest.Create(options.Uri); request.Accept = options.Accept; request.ServicePoint.Expect100Continue = false; request.ServicePoint.UseNagleAlgorithm = false;//禁止Nagle算法加快載入速度 if (!string.IsNullOrEmpty(options.XHRParams)) { request.AllowWriteStreamBuffering = true; } else { request.AllowWriteStreamBuffering = false; }//禁止緩沖加快載入速度 request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip,deflate");//定義gzip壓縮頁面支持 request.ContentType = options.ContentType;//定義文檔類型及編碼 request.AllowAutoRedirect = options.AllowAutoRedirect;//禁止自動跳轉 request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36";//設置User-Agent 偽裝成goole chrome 瀏覽器 request.Timeout = options.TimeOut;//定義超時時間 request.KeepAlive = options.KeepAlive;//定義長連接 if (!string.IsNullOrEmpty(options.Referer)) request.Referer = options.Referer;//返回上一級歷史連接 request.Method = options.Method; if (proxy != null) request.Proxy = proxy;//設置代理服務器IP,偽裝請求地址 if (!string.IsNullOrEmpty(options.RequestCookies)) request.Headers[HttpRequestHeader.Cookie] = options.RequestCookies; request.ServicePoint.ConnectionLimit = options.ConntectionLimit; if (options.webHeader != null && options.webHeader.Count > 0) request.Headers.Add(options.webHeader); if (!string.IsNullOrEmpty(options.XHRParams)) { byte[] buffer = Encoding.UTF8.GetBytes(options.XHRParams); if (buffer != null) { request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); } } using (var response = (HttpWebResponse)request.GetResponse()) { if (response.ContentEncoding.ToLower().Contains("gzip"))//解壓 { using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { result = reader.ReadToEnd(); } } } else if (response.ContentEncoding.ToLower().Contains("deflate"))//解壓 { using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { result = reader.ReadToEnd(); } } } else { using (Stream stream = response.GetResponseStream())//原始 { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { result = reader.ReadToEnd(); } } } } request.Abort(); return result; } private static IWebProxy GetWebProxy() { System.Net.WebProxy webProxy = null; try { // 代理鏈接地址加端口 string proxyHost = ""; string proxyPort = ""; // 代理身份驗證的帳號跟密碼 //string proxyUser = "xxx"; //string proxyPass = "xxx"; // 設置代理服務器 webProxy = new System.Net.WebProxy(); // 設置代理地址加端口 webProxy.Address = new Uri(string.Format("{0}:{1}", proxyHost, proxyPort)); // 如果只是設置代理IP加端口,例如192.168.1.1:80,這里直接注釋該段代碼,則不需要設置提交給代理服務器進行身份驗證的帳號跟密碼。 //webProxy.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPass); } catch (Exception ex) { Console.WriteLine("獲取代理信息異常", DateTime.Now.ToString(), ex.Message); } return webProxy; } }
現在通用的請求方法已經編寫完畢,下面在Mian方法中調用上面編寫的方法:
項目添加引用:HttpAgiltyPack支持
選擇項目點擊右鍵=>選擇NuGet包管理:
搜索HtmlAgilityPack
選擇安裝即可,
此項目主要使用的Xpath選擇匹配:參考文檔:http://www.w3school.com.cn/xpath/xpath_syntax.asp
xpath可視化工具:HtmlAgilityPack Tester
鏈接:https://pan.baidu.com/s/1_in8Y9qFYzKQtnc-eLrb2w
提取碼:co03
Mian方法:
static void Main(string[] args) { //設置請求路徑 var uri = new Uri(@"https://music.163.com/discover/playlist/?cat=%E5%85%A8%E9%83%A8&order=hot"); //獲取響應提文件 var simpleCrawlResult = RequestHelper.RequestAction(new RequestOptions() { Uri = uri, Method = "Get" }); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(simpleCrawlResult); //匹配選擇到列表 HtmlNodeCollection playList = htmlDoc.DocumentNode.SelectSingleNode(@"/html[1]/body[1]/div[3]/div[1]/ul[1]").SelectNodes("li"); if (playList != null) { foreach (var playActicle in playList) { //匹配歌單名稱 string playName = playActicle.SelectSingleNode("p[1]/a[1]/@title[1]").InnerText.ToString(); //歌單鏈接 string playHref = playActicle.SelectSingleNode("p[1]/a[1]").GetAttributeValue("href",""); //歌單播放量 string playCount = playActicle.SelectSingleNode("div[1]/div[1]/span[2]").InnerText.ToString(); //創建者 string createBy = playActicle.SelectSingleNode("p[2]/a[1]/@title[1]").InnerText.ToString(); //創建者鏈接地址 string createHref = playActicle.SelectSingleNode("p[2]/a[1]").GetAttributeValue("href", ""); //打印輸出到控制台 Console.WriteLine("歌單:"+playName+"\t鏈接:"+playHref+"\t播放量:"+ playCount + "\t創建者:"+createBy+"\t創建者鏈接:"+createHref); Console.WriteLine("=============================="); } } Console.Read(); }
運行結果展示:
總結:
爬蟲是批量獲取信息的一種工具,方便快捷,大量數據。但有的網站進行了反爬蟲處理,如果請求量過大,可能會導致ip被封,還有的網站進行了請求驗證,需做驗證,自行體會,爬蟲雖好但不要侵犯他人隱私哦!