1、開發思路:入參根據apiSetting配置文件,分配靜態文件存儲地址,可實現不同站點的靜態頁生成功能。靜態頁生成功能使用無頭瀏覽器生成,生成之后的字符串進行正則替換為固定地址,實現本地正常訪問。
2、已發現問題:如果js在載入頁面時進行某些重寫dom操作,已用正則替換掉的動態路徑代碼,會被覆蓋,導致本地訪問無效。 這一點只能是站點開發那邊重新對頁面進行優化,從而避免這種情況。 但是這僅影響本地情況,如果靜態頁面部署到服務器,使用相對路徑其實也不會影響。
using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Web; using System.Web.Mvc; namespace QuartZNetService.Controllers { public class BuildStaticController : Controller { /// <summary> /// 配置地址 /// </summary> public static string jsonUrl = AppDomain.CurrentDomain.BaseDirectory + "apiSetting.json"; /// <summary> /// 網站配置類 /// </summary> public class HttpConfig { /// <summary> /// 網站cookie信息 /// </summary> public string Cookie { get; set; } /// <summary> /// 頁面Referer信息 /// </summary> public string Referer { get; set; } /// <summary> /// 默認(text/html) /// </summary> public string ContentType { get; set; } public string Accept { get; set; } public string AcceptEncoding { get; set; } /// <summary> /// 超時時間(毫秒)默認100000 /// </summary> public int Timeout { get; set; } public string UserAgent { get; set; } /// <summary> /// POST請求時,數據是否進行gzip壓縮 /// </summary> public bool GZipCompress { get; set; } public bool KeepAlive { get; set; } public string CharacterSet { get; set; } public HttpConfig() { this.Timeout = 100000; this.ContentType = "text/html; charset=" + Encoding.UTF8.WebName; this.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"; this.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; this.AcceptEncoding = "gzip,deflate"; this.GZipCompress = false; this.KeepAlive = true; this.CharacterSet = "UTF-8"; } } /// <summary> /// 利用phantomjs 爬取AJAX加載完成之后的頁面 /// JS腳本刷新時間間隔為3秒,防止頁面AJAX請求時間過長導致數據無法獲取 /// </summary> /// <param name="url"></param> /// <param name="sitId">站點ID 用於配置站點盤符位置</param> /// <param name="type">存儲文件夾 可為空</param> /// <param name="config"></param> /// <param name="interval"></param> /// <returns></returns> public JsonResult Do(string url, string sitId, string typeId, string fileName, HttpConfig config, int interval = 3000) { try { var readjson = Readjson(sitId, typeId); JObject jo = (JObject)JsonConvert.DeserializeObject(readjson.ToString()); var sitUrl = jo["url"].ToString(); var folder = jo["folder"].ToString(); string path = System.AppDomain.CurrentDomain.BaseDirectory.ToString(); ProcessStartInfo start = new ProcessStartInfo(path + @"webTools\phantomjs.exe");//設置運行的命令行文件問ping.exe文件,這個文件系統會自己找到 start.WorkingDirectory = path + @"webTools\"; //設置命令參數 string commond = string.Format("{0} {1} {2} {3} {4} {5}", path + @"webTools\codes.js", url, interval, config.UserAgent, config.Accept, config.Referer); start.Arguments = commond; StringBuilder sb = new StringBuilder(); start.CreateNoWindow = true;//不顯示dos命令行窗口 start.RedirectStandardOutput = true;// start.RedirectStandardInput = true;// start.UseShellExecute = false;//是否指定操作系統外殼進程啟動程序 Process p = Process.Start(start); StreamReader reader = new StreamReader(p.StandardOutput.BaseStream,Encoding.UTF8);//截取輸出流 //正則匹配完整外鏈js Regex myreg = new Regex("(http|https)://(?<domain>[^(:|/]*)"); Match myMatch = myreg.Match(url); var reader_txt = reader.ReadToEnd(); StringBuilder reader_write = new StringBuilder(reader_txt); Regex regex = new Regex("<script[^>]*?src=\"([^>]*?)\"[^>]*?>", RegexOptions.IgnoreCase);//正則匹配外鏈html代碼 MatchCollection userMatchColl = regex.Matches(reader_txt); //自定義替換區域 bg if (userMatchColl.Count > 0) { foreach (Match matchItem in userMatchColl) { if (reader_write.ToString().IndexOf(matchItem.Value) > 0 && matchItem.Value.IndexOf("xxx.cn") == -1) { reader_write.Insert( (reader_write.ToString().IndexOf(matchItem.Value) + matchItem.Value.IndexOf("src=\"") + ("src=\"").Length), "https://www.xxx.cn" ); } } } reader_write.Replace("src=\"//", "src=\"https://");//增加https reader_write.Replace("href=\"//", "href=\"https://");//增加https reader_write.Replace("\"//images", "\"https://images");//增加https //自定義替換區域 end StreamWriter write = new StreamWriter(sitUrl + folder + "//" + fileName, false, Encoding.UTF8);//寫入文件 write.Write(reader_write); write.Flush(); write.Close(); p.WaitForExit();//等待程序執行完退出進程 p.Close();//關閉進程 reader.Close();//關閉流 return Json(true, JsonRequestBehavior.AllowGet); } catch (Exception ex) { return Json(ex.Message, JsonRequestBehavior.AllowGet); } } /// <summary> /// 讀取配置文件 /// </summary> /// <param name="sitId"></param> /// <param name="typeId"></param> /// <returns></returns> public static string Readjson(string sitId, string typeId) { string url = ""; string folder = ""; using (System.IO.StreamReader file = System.IO.File.OpenText(jsonUrl)) { using (JsonTextReader reader = new JsonTextReader(file)) { JObject JObject = (JObject)JToken.ReadFrom(reader); //取站點路徑 var sit = JObject["sit"]; foreach (JObject item in sit) { if (item["sitId"].ToString() == sitId) { url = item["sitUrl"].ToString(); } } //取文件夾名稱 可為空 var type = JObject["type"]; foreach (JObject item in type) { if (item["typeId"].ToString() == typeId) { folder = item["folder"].ToString(); } } } } return JsonConvert.SerializeObject(new { url = url, folder = folder }); } } }
codes.js 配置
var page = require('webpage').create(), system = require('system'); var url = system.args[1]; var interval = system.args[2]; var settings = { timeout: interval, encoding: "UTF-8", operation: "GET", headers: { "User-Agent": system.args[3], "Accept": system.args[4], "Accept-Language": "zh-CN,en;q=0.7,en-US;q=0.3", "Connection": "keep-alive", "Upgrade-Insecure-Requests": 1, "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Referer": system.args[5] } } page.settings = settings; page.open(url, function (status) { phantom.outputEncoding = "UTF-8"; if (status !== 'success') { console.log('Unable to post!'); phantom.exit(); } else { setTimeout(function () { console.log(page.content); phantom.exit(); }, interval); } });
apiSetting.json 配置
{ "sit": [ { "sitId": "1", "sitUrl": "D://" }, { "sitId": "60", "sitUrl": "D://" } ], "type": [ { "typeId": "1", "folder": "zmPC" }, { "typeId": "60", "folder": "zmCP" } ] }