C# 簡單獲取知軒藏書仙草榜


這個網站的url格式統一 書的詳情頁就是http://www.zxcs.me/post/ +{書的編號}。

書的評價鏈接是http://www.zxcs.me/content/plugins/cgz_xinqing/cgz_xinqing_action.php?action=show&id={書的編號}&m={0-1隨機浮點數},返回的數據格式為逗號分隔的數字依次是仙草數,,,,毒草數。

步驟1、先請求詳情頁獲取書的信息:編號,書名,作者,類別等;

步驟2、根據編號獲取評價;

步驟3、存儲,排序。

請求以及正則提取的類代碼:

using System;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace ZXCSGET
{
    public class CrawlerTest
    {
        /// <summary>
        /// 匹配標題中的書名
        /// </summary>
        static string REG_1= @"(?<=\<title\>《).*?(?=》)";
        /// <summary>
        /// 匹配標題中的作者
        /// </summary>
        static string REG_2 = @"(?<=作者:).*?(?=[ ]-[ ]知軒藏書-藏盡網絡中最好的精校小說\</title>)";
        static string REG_3 = @"(?<=精校電子書,).*?(?="")";
        /// <summary>
        /// 目標網站url
        /// </summary>
        static string url_1=@"http://www.zxcs.me/post/";
        static string pluginpath= @"http://www.zxcs.me/content/plugins/cgz_xinqing/";
        /// <summary>
        /// 請求URL1,獲取書籍信息
        /// </summary>
        /// <param name="id">編號</param>
        /// <param name="reContent1">書名</param>
        /// <param name="reContent2">作者</param>
        /// <param name="reContent3">分類</param>
        /// <returns>執行是否成功</returns>
        public static bool RequestUrl1(int id,ref string reContent1,ref string reContent2,ref string reContent3) 
        {
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url_1 + id);
                request.Method = "get";
                request.ContentType = "text/html";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream respStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(respStream, Encoding.UTF8);
                string responseText = reader.ReadToEnd();
                reader.Close();
                Regex regex1 = new Regex(REG_1);
                reContent1= regex1.Match(responseText).Groups[0].Value;
                Regex regex2 = new Regex(REG_2);
                reContent2= regex2.Match(responseText).Groups[0].Value;
                Regex regex3 = new Regex(REG_3);
                reContent3 = regex3.Match(responseText).Groups[0].Value;
                return true;
            }
            catch (Exception)
            {
                return false;
            }
        }

        /// <summary>
        /// 請求獲取書的評價信息
        /// </summary>
        /// <param name="id">編號</param>
        /// <param name="grass">草數;0,0,0,0,0格式的字符串</param>
        /// <returns>執行是否成功</returns>
        public static bool RequestUrl2(int id,ref string grass) 
        {
            try
            {
                Random rd = new Random();
                string url = "" + pluginpath + "cgz_xinqing_action.php?action=show&id=" + id + "&m=" + rd.NextDouble();
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Method = "get";
                request.ContentType = "text/html";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream respStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(respStream, Encoding.UTF8);
                string responseText = reader.ReadToEnd();
                reader.Close();

                grass = responseText;
                return true;
            }
            catch (Exception)
            {
                return false;
            }
        }
    }
}

我是編號0-20000循環獲取書信息,然后再獲取評價,winform單線程很慢。存儲也沒寫,直接排序輸出了top20。后面有空再改進吧。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM