前言 ,想利用小程序導航頁面來提升網站的流量,找到 www.xcxdh666.com 該小程序導航網站。
分析網頁
1 發現網站其實也是用異步分頁請求加載數據的 ,所以根本用不着xpath 解析html,直接分析其請求url

2點擊加載更多找到請求,發現其實就 pageNum ,cagegory 兩個參數

3所以直接請求url 帶入參數,分析起返回json結果

編寫代碼
1 首先建立接收類型
public class XcxApplet
{
public int id { get; set; }
public string categoryName { get; set; }
public string name { get; set; }
public string saomaUrl { get; set; }
public string sum { get; set; }
public string logoUrl { get; set; }
}
public class Result
{
public List<XcxApplet> dataList { get; set; }
public string category { get; set; }
public int status { get; set; }
public int pageNum { get; set; }
}
2 封裝請求頁面方法
public static string GetPostPage(this string posturl, string postData)
{
Encoding encoding = Encoding.UTF8;
byte[] data = null;
if (!string.IsNullOrEmpty(postData)) data = encoding.GetBytes(postData);
try
{
// 設置參數
var request = WebRequest.Create(posturl) as HttpWebRequest;
if (request == null) return string.Empty;
var cookieContainer = new CookieContainer();
request.CookieContainer = cookieContainer;
request.AllowAutoRedirect = true;
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
if (data != null)
{
request.ContentLength = data.Length;
Stream outstream = request.GetRequestStream();
outstream.Write(data, 0, data.Length);
outstream.Close();
}
//發送請求並獲取相應回應數據
var response = request.GetResponse() as HttpWebResponse;
if (response == null) return string.Empty;
//直到request.GetResponse()程序才開始向目標網頁發送Post請求
Stream instream = response.GetResponseStream();
if (instream == null) return string.Empty;
var sr = new StreamReader(instream, encoding);
//返回結果網頁(html)代碼
string content = sr.ReadToEnd();
string err = string.Empty;
//Response.Write(content);
return content;
}
catch (Exception ex)
{
string err = ex.Message;
return string.Empty;
}
}
3 圖片url處理 思路就是要將其返回的url 請求下載到本地或者上傳到自己對應的圖片服務器,
我這里是用七牛雲存儲img的 ,這里你可以改成下載到本地 返回本地的url就好。
public string QiniuUplod(string imgurl)
{
var accessKey = "你的accesskey";
var secretKey = "你的secretkey";
// 生成(上傳)憑證時需要使用此Mac
// 這個示例單獨使用了一個Settings類,其中包含AccessKey和SecretKey
// 實際應用中,請自行設置您的AccessKey和SecretKey
Mac mac = new Mac(accessKey, secretKey);
string bucket = "siyouku";
string saveKey = imgurl.Substring(imgurl.LastIndexOf('/')+1,imgurl.Length- imgurl.LastIndexOf('/')-1);
// 使用前請確保AK和BUCKET正確,否則此函數會拋出異常(比如code612/631等錯誤)
Qiniu.Common.Config.AutoZone(accessKey, bucket, false);
// 上傳策略,參見
// https://developer.qiniu.com/kodo/manual/put-policy
PutPolicy putPolicy = new PutPolicy();
// 如果需要設置為"覆蓋"上傳(如果雲端已有同名文件則覆蓋),請使用 SCOPE = "BUCKET:KEY"
putPolicy.Scope = bucket + ":" + saveKey;
putPolicy.Scope = bucket;
// 上傳策略有效期(對應於生成的憑證的有效期)
putPolicy.SetExpires(3600);
// 上傳到雲端多少天后自動刪除該文件,如果不設置(即保持默認默認)則不刪除
//putPolicy.DeleteAfterDays = 1;
// 生成上傳憑證,參見
// https://developer.qiniu.com/kodo/manual/upload-token
string jstr = putPolicy.ToJsonString();
string token = Auth.CreateUploadToken(mac, jstr);
try
{
var wReq = System.Net.WebRequest.Create(imgurl) as System.Net.HttpWebRequest;
var resp = wReq.GetResponse() as System.Net.HttpWebResponse;
using (var stream = resp.GetResponseStream())
{
// 請不要使用UploadManager的UploadStream方法,因為此流不支持查找(無法獲取Stream.Length)
// 請使用FormUploader或者ResumableUploader的UploadStream方法
FormUploader fu = new FormUploader();
var result = fu.UploadStream(stream, saveKey, token);
var x = Newtonsoft.Json.JsonConvert.DeserializeObject<QiniuResult>(result.Text);
return $"http://img.siyouku.cn/{x.key}";
}
}
catch (Exception ex)
{
return "";
}
}
4 最后是請求主體方法
public ActionResult GetxcxList()
{
Stopwatch watch = new Stopwatch();//監控抓取耗時
watch.Start();
//https://www.xcxdh666.com/pageList.htm?pageNum=0 dataList
var result = new Result();
for (int j = 0; j <54; j++)
{
string url =
$"https://www.xcxdh666.com/pageList.htm?pageNum={j}";
var str = url.GetPostPage(null);//HttpWebRequest 請求頁面
if (str != null)
{
result = str.JsonConvert<Result>(); //string 的序列化擴展方法
}
result.dataList.ForEach(i =>
{
if (!Db.Applet.Any(x => x.Name == i.name))//判斷重復插入
{
var x = new Applet()
{
CategoryName = string.IsNullOrEmpty(i.categoryName) ? "其它" : i.categoryName,
Name = i.name,
SaomiaoUrl = QiniuUplod($"http://img.xcxdh666.com/wxappnav/{i.saomaUrl}"),
Summary = i.sum,
LogoUrl = QiniuUplod($"http://img.xcxdh666.com/wxappnav/{i.logoUrl}"),
SortNum = j,
CreateUser = "wenqing",
CreateTime = DateTime.Now
};
Db.Applet.Add(x);
}
});
Db.SaveChanges();
}
watch.Stop();
return Content("爬取完成!本次請求總共耗時:"+ watch.ElapsedMilliseconds);
}
}
ok 到這里就全部抓取完成
這里附上 展示地址 http://siyouku.cn/Applet
本文永久更細地址:http://siyouku.cn/article/6806.html
