源碼以及最新穩定爬全國移動,聯通詳單賬單軟件購買 !請點擊此!
整體效果如下:
所有運營商抓取到的數據都放到了一個庫的三個表里面,后期做數據分析用。
下面分享幾個核心的源代碼給 正在研究這個的朋友們。
簡單架構:
爬蟲核心代碼:
代碼有些亂,基本把整個聯通上的數據都能抓全了,大家自己優化代碼把。
(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)
using Crawler.Common; using Crawler.Interface; using System; using System.Collections.Generic; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using YXRepository.Log; using YXRepository.Model; namespace Crawler { public class CrawlerUNC : CrawlerBase, ICrawler { HttpHelperNew hhn; static IList<String> loglist; private string currentUVC { get { return HttpHelperNew.UNCuacverifykey; } } /// <summary> /// 中國聯通 初始化鏈接 /// </summary> public CrawlerUNC(string number, string pwd) { hhn = new HttpHelperNew(); loglist = new List<string>(); currentPhoneNumber = number; currentPhoneServicePwd = pwd; loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify"; loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage"; loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk"; } public bool IsLoginNeedVerify() { string _url = loginIsNeedVerifyImgRequestUrl; string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13() + "&userName="+currentPhoneNumber+"&pwdType=01&_=" + TimeStamp.GetTimeStamp_13(); string retString = hhn.HttpGet(_url, queryData, HttpForType.聯通); //添加日志記錄: CollectJsonLog(_url,queryData,0,retString); // return retString.Contains(@"""resultCode"":""false""") ? false : true; } /// <summary> /// 聯通登錄 /// </summary> /// <param name="imgcode"></param> /// <returns></returns> public bool IsLoginImgVerifyOk(string imgcode) { currentLoginImgCode = imgcode; string _url = loginToVerifyImgRequestUrl; string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373"; string retString = hhn.HttpGet(_url, queryData); //添加日志記錄: CollectJsonLog(_url, queryData, 0, retString); // return retString.Contains(@"""resultCode"":""true""") ? true : false; } public string GetLoginImg() { loginVerifyImgStream = string.Empty; string queryData = "t=1463034742570"; string part1 = "data:image/png;base64,"; string part2 = "";//添加日志記錄: CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2); // if (!string.IsNullOrEmpty(part2)) loginVerifyImgStream = part1 + part2; return loginVerifyImgStream; } public bool LogOut() { bool retValue = true; return retValue; } public bool Login(out string loginret) { loginret = string.Empty; loginRequestUrl = ""; bool retValue = false; string retString = string.Empty; do { retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.聯通); Thread.Sleep(500); } while (retString.Contains(@"所屬省份系統正在升級")); CollectJsonLog(loginRequestUrl,"",0,retString);return retValue; } /// <summary> /// 聯通 /// </summary> public void UNCInitPage() { string url = "https://login.10010.com/captchazh.htm?type=05"; string retS = hhn.HttpGet(url, "", HttpForType.聯通); //添加日志記錄: CollectJsonLog(url, "", 0, retS); // //設置Cookie"WT_FPC" string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null); CookieCollection hcc = new CookieCollection(); Cookie wtcookie = new Cookie() { Expires = DateTime.Now.AddYears(10), Path = "/", Domain = ".10010.com", Name = "WT_FPC", Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376" //Value = wt_fpc.Substring(wt_fpc.IndexOf('=') + 1, wt_fpc.Length - 7)//此處 寫死也可以,服務器不做校驗。 }; hcc.Add(wtcookie); hhn.cookie.Add(wtcookie); } public bool SendQuerySms() { return true; } public IList<T> GetQueryData<T>(T temp) { return null; } private string getMyDetails() { if (checkLogin) { string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001"; string retString = hhn.HttpPost(infoUrl, "", HttpForType.聯通); //添加日志記錄: CollectJsonLog(infoUrl,"",1,retString); // return retString; } return ""; } public TXInfoModel GetInfo() { TXInfoModel tim = new TXInfoModel(); #region 第一部分 string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX"; string retString = hhn.HttpGet(infoUrl, ""); //添加日志記錄: CollectJsonLog(infoUrl,"",0,retString); // tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @""""); tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"男":"女"; #endregion #region 第二部分 string retString2 = getMyDetails(); tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @""""); DateTime innettime; DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime); tim.InNetTime = innettime; tim.Grade = "";//星級得分 tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @""""); tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ; tim.ProviderName = "中國聯通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03 tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @""""); tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); tim.NetAge = ""; tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @""""); tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @""""); tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @""""); tim.LevelInfo = ""; tim.ZipCode = ""; #endregion #region 第三部分 話費余額/儲蓄余額 string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002"; string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.聯通); //添加日志記錄: CollectJsonLog(infoUrl11, "", 0, retString11); // tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @"""")); tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @"""")); #endregion #region 第三部分 積分 string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView"; string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.聯通); //添加日志記錄: CollectJsonLog(infoUrl22, "", 0, retString22); // int score; int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//聯通某些類型卡 返回的json中可能沒有 積分這個 字段。 tim.PointValue = score; #endregion #region 第四部分 歸屬地查詢 tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber); #endregion return tim; } /// <summary> /// 獲取五個月賬單概括 /// </summary> /// <returns></returns> public IList<TXZhangDanModel> GetZhangDan() { IList<TXZhangDanModel> listZD = new List<TXZhangDanModel>(); TXZhangDanModel temp;string retS = string.Empty; string[] temptimes;int loopi=0; List<string> tempss = GetZhangDanPostData(out temptimes); if (checkLogin) { foreach (var data in tempss) { //當前月份的通話賬單進行處理! loopi++; string PostdataS = data; retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.聯通); //添加日志記錄: CollectJsonLog(infoUrl, PostdataS,1,retS); // //解析每月賬單數據構建model DateTime start1,end1;decimal d1; string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString(); } } return listZD; } /// <summary> /// 獲取詳單 /// </summary> /// <returns></returns> public IList<TXXiangDanModel> GetXiangDan() { IList<TXXiangDanModel> listXD = new List<TXXiangDanModel>(); TXXiangDanModel temp; string retS = string.Empty; List<string> tempss = GetXiangDanPostData(); if (checkLogin) { foreach (var data in tempss) { retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.聯通); //添加日志記錄: CollectJsonLog(callListRequestUrl, PostdataS, 1, retS); while (retS.Contains("暫時無法為您提供服務")) { retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.聯通); Thread.Sleep(500); } if (retS.Contains("系統檢測您的訪問過於頻繁")) { throw new Exception("訪問獲取詳單鏈接過於頻繁!請明天再試"); } //注意,以下代碼解析當月記錄總數,獲得所有確定的游標集合。 string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //從上面返回串,獲取當月記錄總數 List<String> curCurorlist = GetAllcurCuror(currentMonthTotalNum);//集合,翻頁用. foreach (var curcuror in curCurorlist) { //當前月份的通話詳單進行處理! PostdataS = "beginDate=" + data.Split('&')[0] + "&endDate=" + data.Split('&')[1] + "&pageNo=" + curcuror + "&pageSize=50"; retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.聯通); //添加日志記錄: CollectJsonLog(callListRequestUrl, PostdataS, 1, retS); // //匹配startTime date MatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS); //匹配startTime time MatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS); //匹配commPlac MatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS); //匹配commMode MatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS); //匹配anotherNm MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS); //匹配commTime MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS); //匹配commType //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS); //匹配commType MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS); //匹配commFee MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS); if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count) && (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) { //解析每月詳單數據構建model for (int i = 0; i < stlist.Count; i++) { temp = new TXXiangDanModel() { anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""), commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")), commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""), commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""), commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""), commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""), startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""") + " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @"""")) }; listXD.Add(temp); } } } } } return listXD; } /// <summary> /// 翻頁索引(1,2,3,4,5),用於聯通翻頁查詢賬單。默認每頁50條記錄 /// </summary> /// <param name="totalNum">當月份總數目</param> /// <returns></returns> private List<String> GetAllcurCuror(string totalNum) { List<string> retlist = new List<string>(); int totalnum1; int.TryParse(totalNum, out totalnum1); if (totalnum1 == 0) return retlist; else { int yushu = totalnum1 % 50;//比如totalNum 201,余數1 int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5頁 for (int i = 0; i < curcurorCount; i++) { retlist.Add((i+1).ToString());//1,2,3,4,5 } return retlist; } } /// <summary> /// 最近5個月賬單需要的Post數據() /// </summary> /// <returns></returns> private List<string> GetZhangDanPostData(out string[] startendS) { // string dataS = "billdate=201604&querycode=0001&querytype=0001"; List<string> retlist = new List<string>(); startendS = new string[5]; DateTime nowtime = DateTime.Now; for (int i = 0; i < 5; i++) { string mm = nowtime.AddMonths((-1)*i).ToString("yyyyMM"); string dataS = "billdate="+mm+"&querycode=0001&querytype=0001"; string seS; if(i==0) seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + nowtime.AddMonths((-1) * i).ToString(); //整理賬單起&止月份,非本月 else seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString(); //整理賬單起&止月份,本月 startendS[i] = (seS); retlist.Add(dataS); } return retlist; } /// <summary> /// 獲取5個月詳單需要的post數據(2016-04-01&2016-04-30格式) /// </summary> /// <returns></returns> private List<string> GetXiangDanPostData() { List<string> retlist = new List<string>(); int year = DateTime.Now.Year;//當前年 int mouth = DateTime.Now.Month;//當前月 int beforeYear = 0; int beforeMouth = 0; for (int i = 0; i < 5; i++) { if (mouth <= 1 && i!=0 )//如果當前月是一月,那么年份就要減1 { beforeYear = year - i; beforeMouth = 12;//上個月 } else { beforeYear = year; beforeMouth = mouth - i;//上個月 } string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上個月第一天 string beforeMouthLastDay; if (i != 0) beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上個月最后一天 else beforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd"); retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd")); } return retlist; } public static void CollectJsonLog(string url, string data, int method1, string responseS) { string method = method1 == 1 ? "Post" : "Get"; loglist.Add(string.Format("【請求url:{0} , 請求數據:{1} , 請求方式:{2}, 返回數據:{3} 】", url, data, method, responseS)); } public IList<String> GetAllJsonLog() { return loglist; } } }