C# 微信 生活助手 空氣質量 天氣預報等 效果展示 數據抓取 (二)
第一次在博客園寫博客寫的不好,大家見諒。最近工作辭了,然后感冒發燒輸了一個星期的液,感覺很煩躁,心情不是很好,在帝都感覺壓力大,廢話不說了開始正題把!
還沒有完全完成,后續考慮開源!
可以關注微信公眾帳號體驗一下先看下 效果把
先介紹下工具 我用的有 httpwatch,fiddler
國家環保部的數據鏈接 http://datacenter.mep.gov.cn/report/air_daily/airDairyCityHour.jsp
原以為直接get請求就可以了 試了下 發現沒有獲取了 然后看了下代碼 發現
<script type="text/javascript"> function submitForm(){ document.citydayform.submit(); } </script>

<form name="citydayform" action="http://datacenter.mep.gov.cn:80/report/air_daily/airDairyCityHour.jsp" method="post" style="margin: 0px auto 0px auto;" onsubmit="return checkForm1();"> <table width="95%" border="1" align="center" cellpadding="0" cellspacing="0" bordercolorlight="#d9d7b2" bordercolordark="#ffffff" class="font"> <tr> <td width="5%" height="30" bgcolor="#FFFFEF" align="right" class="STYLE1"> 城市: </td> <td width="8%" bgcolor="#FFFFFF"> <input type="text" name='city' value="" /> </td> <td width="5%" height="30" bgcolor="#FFFFEF" align="right" class="STYLE1 STYLE1"> 時間: </td> <td width="30%" bgcolor="#FFFFFF"> 從: <input type="text" id="startdate" name="startdate" readonly="true" onfocus="WdatePicker({ el: this.id, dateFmt: 'yyyy-MM-dd HH:mm', skin: 'whyGreen',maxDate:'#F{$dp.$D(\'enddate\')}' })" style="text-align: center;cursor: pointer;width: 160px;height: 21px;margin-top:0px;" class="Wdate" value="2015-04-16 22:00" /> 到: <input type="text" id="enddate" name="enddate" readonly="true" onfocus="WdatePicker({ el: this.id, dateFmt: 'yyyy-MM-dd HH:mm', skin: 'whyGreen',minDate:'#F{$dp.$D(\'startdate\')}' })" style="text-align: center;cursor: pointer;width: 160px;height: 21px;margin-top:0px;" class="Wdate" value="2015-04-16 22:00" /> </td> <td width="8%" bgcolor="#FFFFFF"> <div align="center"> <input type="image" src="/report/main/images/search.gif" width="83" height="19" /> </div> </td> </tr> </table> </form>
模型

public partial class AirQuality { public int Id { get; set; } public System.DateTime Time { get; set; } public int AQI { get; set; } public int CityCode { get; set; } public string Level { get; set; } public string PrimaryPollutant { get; set; } }
抓取代碼,通過正則匹配數據

while (true) { string param = string.Format("city=&startdate={0}&enddate={1}&page={2}", lastGrabHBTime.AddHours(1).ToString("yyyy-MM-dd HH:mm"), currentTime.ToString("yyyy-MM-dd HH:mm"), pageIndex); #region 數據抓取 while (true) { try { res = HttpHelper.PostRequest(Constant.HBUrl, param, Encoding.GetEncoding("GB2312")); break; } catch (Exception) { Thread.Sleep(1000); } } #endregion res = Regex.Replace(res, "[\r\n|\t]", "").Replace(" ", " "); var matches = regex.Matches(res); if (matches.Count == 0) break; foreach (Match match in matches) { var group = match.Groups; if (group.Count == 10) { var air = new AirQuality() { Time = DateTime.Parse(group[3].Value), AQI = int.Parse(group[5].Value), Level = group[7].Value, PrimaryPollutant = group[9].Value, CityCode = cityConfigs.FirstOrDefault(p => p.City == group[1].Value).Code }; airQualities.Add(air); } } pageIndex++; Thread.Sleep(100); }
POST請求代碼

public static string PostRequest(string url, string param, string cookie = "") { return PostRequest(url, param, Encoding.UTF8, cookie); } public static string PostRequest(string url, string param, Encoding encoding, string cookie = "") { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri(url)); request.ContentType = "application/x-www-form-urlencoded"; request.UserAgent = "Mozilla/5.0 (MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko"; request.Method = "post"; request.Timeout = 30000; request.KeepAlive = false; if (!string.IsNullOrEmpty(cookie)) { request.Headers[HttpRequestHeader.Cookie] = cookie; } byte[] bytes = encoding.GetBytes(param); request.ContentLength = bytes.Length; using (Stream stream = request.GetRequestStream()) { stream.Write(bytes, 0, bytes.Length); } HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (Stream stream = response.GetResponseStream()) { StreamReader reader = new StreamReader(stream, encoding); return reader.ReadToEnd(); } }