網絡收集整理 爬取圖片
引用AngleSharp NuGet 包
using AngleSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
namespace CoreConsoleApp
{
public class Program
{
public static void Main(string[] args)
{
// 設置配置以支持文檔加載
var config = Configuration.Default.WithDefaultLoader();
int pageIndex = 50;
for (int i = 1; i < pageIndex; i++)
{
// url地址
//var address = "https://www.qwe.com";
var address = @"https://www.qwe.com?pageIndex=" + i;
// 請求網頁
var document = BrowsingContext.New(config).OpenAsync(address);
// 根據class獲取html元素
var cells = document.Result.QuerySelectorAll(".panel-body li");
var fileName = (i + " - " + document.Result.Title).Replace("|", "");
foreach (var item in cells)
{
//var belle = new Belle
//{
// Title = item.QuerySelector("img").GetAttribute("title"),
// ImageUrl = item.QuerySelector("img").GetAttribute("src")
//};
var imageUrl = item.QuerySelector("img").GetAttribute("src");
//string str = DateTime.Now.ToString("yyyyMMddHHmmss");
string localPath = "D:\\Image\\" + DateTime.Now.ToString("yyyMMdd") + "\\" + fileName;
List<string> arr = imageUrl.Split('/').ToList();
if (!Directory.Exists(localPath))
{
// Create the directory it does not exist.
Directory.CreateDirectory(localPath);
}
string filepath = localPath + "\\" + arr.Last();
//方法一
//Download(imageUrl, filepath);
//方法二
WebClient mywebclient = new WebClient();
mywebclient.DownloadFile(imageUrl, filepath);
}
}
Console.ReadLine();
}
/// <summary>
/// Http方式下載文件
/// </summary>
/// <param name="url">http地址</param>
/// <param name="localfile">本地文件</param>
/// <returns></returns>
public static bool Download(string url, string localfile)
{
bool flag = false;
long startPosition = 0; // 上次下載的文件起始位置
FileStream writeStream; // 寫入本地文件流對象
long remoteFileLength = GetHttpLength(url);// 取得遠程文件長度
System.Console.WriteLine("remoteFileLength=" + remoteFileLength);
if (remoteFileLength == 745)
{
System.Console.WriteLine("遠程文件不存在.");
return false;
}
// 判斷要下載的文件夾是否存在
if (File.Exists(localfile))
{
writeStream = File.OpenWrite(localfile); // 存在則打開要下載的文件
startPosition = writeStream.Length; // 獲取已經下載的長度
if (startPosition >= remoteFileLength)
{
System.Console.WriteLine("本地文件長度" + startPosition + "已經大於等於遠程文件長度" + remoteFileLength);
writeStream.Close();
return false;
}
else
{
writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件寫入位置定位
}
}
else
{
writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存創建一個文件
startPosition = 0;
}
try
{
HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打開網絡連接
if (startPosition > 0)
{
myRequest.AddRange((int)startPosition);// 設置Range值,與上面的writeStream.Seek用意相同,是為了定義遠程文件讀取位置
}
Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服務器請求,獲得服務器的回應數據流
byte[] btArray = new byte[512];// 定義一個字節數據,用來向readStream讀取內容和向writeStream寫入內容
int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向遠程文件讀第一次
long currPostion = startPosition;
while (contentSize > 0)// 如果讀取長度大於零則繼續讀
{
currPostion += contentSize;
int percent = (int)(currPostion * 100 / remoteFileLength);
System.Console.WriteLine("percent=" + percent + "%");
writeStream.Write(btArray, 0, contentSize);// 寫入本地文件
contentSize = readStream.Read(btArray, 0, btArray.Length);// 繼續向遠程文件讀取
}
//關閉流
writeStream.Close();
readStream.Close();
flag = true; //返回true下載成功
}
catch (Exception)
{
writeStream.Close();
flag = false; //返回false下載失敗
}
return flag;
}
// 從文件頭得到遠程文件的長度
private static long GetHttpLength(string url)
{
long length = 0;
try
{
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);// 打開網絡連接
HttpWebResponse rsp = (HttpWebResponse)req.GetResponse();
if (rsp.StatusCode == HttpStatusCode.OK)
{
length = rsp.ContentLength;// 從文件頭得到遠程文件的長度
}
rsp.Close();
return length;
}
catch (Exception e)
{
return length;
}
}
}
}
