运行效果如图:

代码如下:
1 using System; 2 using System.IO; 3 using System.Net; 4 using System.Text; 5 using System.Text.RegularExpressions; 6 7 namespace ConsoleApplication2 8 { 9 class Program 10 { 11 static void Main(string[] args) 12 { 13 string host = "https://www.cnblogs.com/?"; //url后面必须加一个?不然程序请求都是404,不知道是什么原因 14 int num = 0; //统计当前为第几个文章的标题 15 int pagSize = 100; //爬取的最大页数 10表示爬首页前10页的标题 16 17 //标题标签样例:<a class="titlelnk" href="https://www.xxxxx.html" target="_blank">【设计模式】简单工厂模式 Simple Factory Pattern</a> 18 string pater = "<a class=\"titlelnk\" href=\"(.*?)\" target=\"_blank\">(.*?)</a>"; //()为C#要捕捉的内容,括号里面的".*?"表示匹配任意内容(因为url的地址是不确定的) 19 Regex regex = new Regex(pater); 20 21 for (int i = 1; i < pagSize; i++) 22 { 23 //首页完整链接为https://www.cnblogs.com/#p2 #p后面的数字代表当前页 24 string url = host + "#p" + i; 25 var html = GetHtmlString(url); 26 if (!string.IsNullOrEmpty(html)) 27 { 28 //标题标签<a class="titlelnk" href="https://www.xxxxx.html" target="_blank">【设计模式】简单工厂模式 Simple Factory Pattern</a> 29 //正则匹配标题的标签,再提取其中的名称和url 30 foreach (Match ma in regex.Matches(html)) 31 { 32 Match match = Regex.Match(ma.Value, pater); 33 string title = match.Groups[2].Value; 34 string titlelnk = match.Groups[1].Value; 35 Console.WriteLine($"-------------------------------第{ ++num }个标题------------------------"); 36 Console.WriteLine(title + "Url:" + titlelnk); 37 Console.WriteLine("--------------------------------------------------------------------------"); 38 File.AppendAllText(@"d:\cnblog.txt", title + " " + titlelnk + "\r\n"); 39 } 40 } 41 } 42 Console.WriteLine("结束一共爬了" + num + "个标题"); 43 Console.ReadKey(); 44 } 45 46 /// <summary> 47 /// 请求url 48 /// </summary> 49 /// <param name="url"></param> 50 /// <returns></returns> 51 public static string GetHtmlString(string url) 52 { 53 try 54 { 55 WebRequest request = WebRequest.Create(url); 56 Stream stream = request.GetResponse().GetResponseStream(); 57 request.Timeout = 3000; 58 using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) 59 { 60 return reader.ReadToEnd(); 61 } 62 } 63 catch (Exception ex) 64 { 65 Console.WriteLine(ex.ToString()); 66 return null; 67 } 68 } 69 70 } 71 }