相信很多人在爬今日头条的时候就遇到了一个难题,就是找到了消息头后,有3个加密的参数(as ,cp,_signature),如下图:
找到的请求URL是:
以上共有
max_behot_time;category;utm_source;widen;tadrequir;as;cp;_signature;8个参数;
max_behot_time 可以看出是10位数字的时间戳;
category 是对应的频道名,可以在首页找到
utm_source 固定是toutiao,widen固定是1,tadrequire固定是false,剩下的就是as,cp,_cp_signature三个参数了;
要破解这三个参数,我们需要找下JS的代码里是否有相关信息,于是通过搜索“_signature”找到了对应的JS加密代码:
function e(t) { var e = ascp.getHoney() , i = ""; window.TAC && (i = TAC.sign("refresh" === t ? 0 : r.params.max_behot_time_tmp)), r.params = _.extend({}, r.params, { as: e.as, cp: e.cp, max_behot_time: "refresh" === t ? 0 : r.params.max_behot_time_tmp, _signature: i
可以看到as和cp是由ascp.getHoney函数传递的,我们继续找下getHoney这个函数;
!function(t) { var e = {}; e.getHoney = function() { var t = Math.floor((new Date).getTime() / 1e3) , e = t.toString(16).toUpperCase() , i = md5(t).toString().toUpperCase(); if (8 != e.length) return { as: "479BB4B7254C150", cp: "7E0AC8874BB0985" }; for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++) s += n[o] + e[o]; for (var r = "", c = 0; 5 > c; c++) r += e[c + 3] + a[c]; return { as: "A1" + s + e.slice(-3), cp: e.slice(0, 3) + r + "E1" } } , t.ascp = e }(window, document)
根据以上JS代码,可以写成python代码:
def getHoney(): #####根据JS脚本破解as ,cp t = int(time.time()) #获取当前时间 #t=1534389637 #print(t) e =str('%X' % t) ##格式化时间 #print(e) m1 = hashlib.md5() ##MD5加密 m1.update(str(t).encode(encoding='utf-8')) ##转化格式 i = str(m1.hexdigest()).upper() ####转化大写 #print(i) n=i[0:5] ##获取前5位字符 a=i[-5:] ##获取后5位字符 s='' r='' for x in range(0,5): ##交叉组合字符 s+=n[x]+e[x] r+=e[x+3]+a[x] eas='A1'+ s+ e[-3:] ecp=e[0:3]+r+'E1' #print(eas) #print(ecp) return eas,ecp
接下来就是破解_signature,经过搜索找不到对应的JS脚本,百度很久后找到了个大牛(太阳雨2012 )自己破解的JS代码,
navigator = { // WT-JS_DEBUG v1.7.5 - NLiger2018 appCodeName: "Mozilla", appMinorVersion: "0", appName: "Netscape", appVersion: "5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko", browserLanguage: "zh-CN", cookieEnabled: true, cpuClass: "x86", language: "zh-CN", maxTouchPoints: 0, msManipulationViewsEnabled: true, msMaxTouchPoints: 0, msPointerEnabled: true, onLine: true, platform: "Win32", pointerEnabled: true, product: "Gecko", systemLanguage: "zh-CN", userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko", userLanguage: "zh-CN", vendor: "", vendorSub: "", webdriver: false }, window = this, window.navigator = navigator; if (typeof JSON !== "object") { JSON = {}; }(function () { "use strict"; var rx_one = /^[\],:{}\s]*$/; var rx_two = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g; var rx_three = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g; var rx_four = /(?:^|:|,)(?:\s*\[)+/g; var rx_escapable = /[\\"\u0000-\u001f\u007f-\u009f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g; var rx_dangerous = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g; function f(n) { return n < 10 ? "0" + n : n; } function this_value() { return this.valueOf(); } if (typeof Date.prototype.toJSON !== "function") { Date.prototype.toJSON = function () { return isFinite(this.valueOf()) ? this.getUTCFullYear() + "-" + f(this.getUTCMonth() + 1) + "-" + f(this.getUTCDate()) + "T" + f(this.getUTCHours()) + ":" + f(this.getUTCMinutes()) + ":" + f(this.getUTCSeconds()) + "Z" : null; }; Boolean.prototype.toJSON = this_value; Number.prototype.toJSON = this_value; String.prototype.toJSON = this_value; } var gap; var indent; var meta; var rep; function quote(string) { rx_escapable.lastIndex = 0; return rx_escapable.test(string) ? "\"" + string.replace(rx_escapable, function (a) { var c = meta[a]; return typeof c === "string" ? c : "\\u" + ("0000" + a.charCodeAt(0).toString(16)).slice(-4); }) + "\"" : "\"" + string + "\""; } function str(key, holder) { var i; var k; var v; var length; var mind = gap; var partial; var value = holder[key]; if (value && typeof value === "object" && typeof value.toJSON === "function") { value = value.toJSON(key); } if (typeof rep === "function") { value = rep.call(holder, key, value); } switch (typeof value) { case "string": return quote(value); case "number": return isFinite(value) ? String(value) : "null"; case "boolean": case "null": return String(value); case "object": if (!value) { return "null"; } gap += indent; partial = []; if (Object.prototype.toString.apply(value) === "[object Array]") { length = value.length; for (i = 0; i < length; i += 1) { partial[i] = str(i, value) || "null"; } v = partial.length === 0 ? "[]" : gap ? "[\n" + gap + partial.join(",\n" + gap) + "\n" + mind + "]" : "[" + partial.join(",") + "]"; gap = mind; return v; } if (rep && typeof rep === "object") { length = rep.length; for (i = 0; i < length; i += 1) { if (typeof rep[i] === "string") { k = rep[i]; v = str(k, value); if (v) { partial.push(quote(k) + (gap ? ": " : ":") + v); } } } } else { for (k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { v = str(k, value); if (v) { partial.push(quote(k) + (gap ? ": " : ":") + v); } } } } v = partial.length === 0 ? "{}" : gap ? "{\n" + gap + partial.join(",\n" + gap) + "\n" + mind + "}" : "{" + partial.join(",") + "}"; gap = mind; return v; } } if (typeof JSON.stringify !== "function") { meta = { "\b": "\\b", "\t": "\\t", "\n": "\\n", "\f": "\\f", "\r": "\\r", "\"": "\\\"", "\\": "\\\\" }; JSON.stringify = function (value, replacer, space) { var i; gap = ""; indent = ""; if (typeof space === "number") { for (i = 0; i < space; i += 1) { indent += " "; } } else if (typeof space === "string") { indent = space; } rep = replacer; if (replacer && typeof replacer !== "function" && (typeof replacer !== "object" || typeof replacer.length !== "number")) { throw new Error("JSON.stringify"); } return str("", { "": value }); }; } if (typeof JSON.parse !== "function") { JSON.parse = function (text, reviver) { var j; function walk(holder, key) { var k; var v; var value = holder[key]; if (value && typeof value === "object") { for (k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { v = walk(value, k); if (v !== undefined) { value[k] = v; } else { delete value[k]; } } } } return reviver.call(holder, key, value); } text = String(text); rx_dangerous.lastIndex = 0; if (rx_dangerous.test(text)) { text = text.replace(rx_dangerous, function (a) { return "\\u" + ("0000" + a.charCodeAt(0).toString(16)).slice(-4); }); } if (rx_one.test(text.replace(rx_two, "@").replace(rx_three, "]").replace(rx_four, ""))) { j = eval("(" + text + ")"); return (typeof reviver === "function") ? walk({ "": j }, "") : j; } throw new SyntaxError("JSON.parse"); }; } }()); var CryptoJS = CryptoJS || (function (Math, undefined) { var C = {}; var C_lib = C.lib = {}; var Base = C_lib.Base = (function () { function F() {}; return { extend: function (overrides) { F.prototype = this; var subtype = new F(); if (overrides) { subtype.mixIn(overrides); } if (!subtype.hasOwnProperty('init') || this.init === subtype.init) { subtype.init = function () { subtype.$super.init.apply(this, arguments); }; } subtype.init.prototype = subtype; subtype.$super = this; return subtype; }, create: function () { var instance = this.extend(); instance.init.apply(instance, arguments); return instance; }, init: function () {}, mixIn: function (properties) { for (var propertyName in properties) { if (properties.hasOwnProperty(propertyName)) { this[propertyName] = properties[propertyName]; } } if (properties.hasOwnProperty('toString')) { this.toString = properties.toString; } }, clone: function () { return this.init.prototype.extend(this); } }; }()); var WordArray = C_lib.WordArray = Base.extend({ init: function (words, sigBytes) { words = this.words = words || []; if (sigBytes != undefined) { this.sigBytes = sigBytes; } else { this.sigBytes = words.length * 4; } }, toString: function (encoder) { return (encoder || Hex).stringify(this); }, concat: function (wordArray) { var thisWords = this.words; var thatWords = wordArray.words; var thisSigBytes = this.sigBytes; var thatSigBytes = wordArray.sigBytes; this.clamp(); if (thisSigBytes % 4) { for (var i = 0; i < thatSigBytes; i++) { var thatByte = (thatWords[i >>> 2] >>> (24 - (i % 4) * 8)) & 0xff; thisWords[(thisSigBytes + i) >>> 2] |= thatByte << (24 - ((thisSigBytes + i) % 4) * 8); } } else if (thatWords.length > 0xffff) { for (var i = 0; i < thatSigBytes; i += 4) { thisWords[(thisSigBytes + i) >>> 2] = thatWords[i >>> 2]; } } else { thisWords.push.apply(thisWords, thatWords); } this.sigBytes += thatSigBytes; return this; }, clamp: function () { var words = this.words; var sigBytes = this.sigBytes; words[sigBytes >>> 2] &= 0xffffffff << (32 - (sigBytes % 4) * 8); words.length = Math.ceil(sigBytes / 4); }, clone: function () { var clone = Base.clone.call(this); clone.words = this.words.slice(0); return clone; }, random: function (nBytes) { var words = []; var r = (function (m_w) { var m_w = m_w; var m_z = 0x3ade68b1; var mask = 0xffffffff; return function () { m_z = (0x9069 * (m_z & 0xFFFF) + (m_z >> 0x10)) & mask; m_w = (0x4650 * (m_w & 0xFFFF) + (m_w >> 0x10)) & mask; var result = ((m_z << 0x10) + m_w) & mask; result /= 0x100000000; result += 0.5; return result * (Math.random() > .5 ? 1 : -1); } }); for (var i = 0, rcache; i < nBytes; i += 4) { var _r = r((rcache || Math.random()) * 0x100000000); rcache = _r() * 0x3ade67b7; words.push((_r() * 0x100000000) | 0); } return new WordArray.init(words, nBytes); } }); var C_enc = C.enc = {}; var Hex = C_enc.Hex = { stringify: function (wordArray) { var words = wordArray.words; var sigBytes = wordArray.sigBytes; var hexChars = []; for (var i = 0; i < sigBytes; i++) { var bite = (words[i >>> 2] >>> (24 - (i % 4) * 8)) & 0xff; hexChars.push((bite >>> 4).toString(16)); hexChars.push((bite & 0x0f).toString(16)); } return hexChars.join(''); }, parse: function (hexStr) { var hexStrLength = hexStr.length; var words = []; for (var i = 0; i < hexStrLength; i += 2) { words[i >>> 3] |= parseInt(hexStr.substr(i, 2), 16) << (24 - (i % 8) * 4); } return new WordArray.init(words, hexStrLength / 2); } }; var Latin1 = C_enc.Latin1 = { stringify: function (wordArray) { var words = wordArray.words; var sigBytes = wordArray.sigBytes; var latin1Chars = []; for (var i = 0; i < sigBytes; i++) { var bite = (words[i >>> 2] >>> (24 - (i % 4) * 8)) & 0xff; latin1Chars.push(String.fromCharCode(bite)); } return latin1Chars.join(''); }, parse: function (latin1Str) { var latin1StrLength = latin1Str.length; var words = []; for (var i = 0; i < latin1StrLength; i++) { words[i >>> 2] |= (latin1Str.charCodeAt(i) & 0xff) << (24 - (i % 4) * 8); } return new WordArray.init(words, latin1StrLength); } }; var Utf8 = C_enc.Utf8 = { stringify: function (wordArray) { try { return decodeURIComponent(escape(Latin1.stringify(wordArray))); } catch (e) { throw new Error('Malformed UTF-8 data'); } }, parse: function (utf8Str) { return Latin1.parse(unescape(encodeURIComponent(utf8Str))); } }; var BufferedBlockAlgorithm = C_lib.BufferedBlockAlgorithm = Base.extend({ reset: function () { this._data = new WordArray.init(); this._nDataBytes = 0; }, _append: function (data) { if (typeof data == 'string') { data = Utf8.parse(data); } this._data.concat(data); this._nDataBytes += data.sigBytes; }, _process: function (doFlush) { var data = this._data; var dataWords = data.words; var dataSigBytes = data.sigBytes; var blockSize = this.blockSize; var blockSizeBytes = blockSize * 4; var nBlocksReady = dataSigBytes / blockSizeBytes; if (doFlush) { nBlocksReady = Math.ceil(nBlocksReady); } else { nBlocksReady = Math.max((nBlocksReady | 0) - this._minBufferSize, 0); } var nWordsReady = nBlocksReady * blockSize; var nBytesReady = Math.min(nWordsReady * 4, dataSigBytes); if (nWordsReady) { for (var offset = 0; offset < nWordsReady; offset += blockSize) { this._doProcessBlock(dataWords, offset); } var processedWords = dataWords.splice(0, nWordsReady); data.sigBytes -= nBytesReady; } return new WordArray.init(processedWords, nBytesReady); }, clone: function () { var clone = Base.clone.call(this); clone._data = this._data.clone(); return clone; }, _minBufferSize: 0 }); var Hasher = C_lib.Hasher = BufferedBlockAlgorithm.extend({ cfg: Base.extend(), init: function (cfg) { this.cfg = this.cfg.extend(cfg); this.reset(); }, reset: function () { BufferedBlockAlgorithm.reset.call(this); this._doReset(); }, update: function (messageUpdate) { this._append(messageUpdate); this._process(); return this; }, finalize: function (messageUpdate) { if (messageUpdate) { this._append(messageUpdate); } var hash = this._doFinalize(); return hash; }, blockSize: 512 / 32, _createHelper: function (hasher) { return function (message, cfg) { return new hasher.init(cfg).finalize(message); }; }, _createHmacHelper: function (hasher) { return function (message, key) { return new C_algo.HMAC.init(hasher, key).finalize(message); }; } }); var C_algo = C.algo = {}; return C; }(Math)); (function (Math) { var C = CryptoJS; var C_lib = C.lib; var WordArray = C_lib.WordArray; var Hasher = C_lib.Hasher; var C_algo = C.algo; var T = []; (function () { for (var i = 0; i < 64; i++) { T[i] = (Math.abs(Math.sin(i + 1)) * 0x100000000) | 0; } }()); var MD5 = C_algo.MD5 = Hasher.extend({ _doReset: function () { this._hash = new WordArray.init([0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476]); }, _doProcessBlock: function (M, offset) { for (var i = 0; i < 16; i++) { var offset_i = offset + i; var M_offset_i = M[offset_i]; M[offset_i] = ((((M_offset_i << 8) | (M_offset_i >>> 24)) & 0x00ff00ff) | (((M_offset_i << 24) | (M_offset_i >>> 8)) & 0xff00ff00)); } var H = this._hash.words; var M_offset_0 = M[offset + 0]; var M_offset_1 = M[offset + 1]; var M_offset_2 = M[offset + 2]; var M_offset_3 = M[offset + 3]; var M_offset_4 = M[offset + 4]; var M_offset_5 = M[offset + 5]; var M_offset_6 = M[offset + 6]; var M_offset_7 = M[offset + 7]; var M_offset_8 = M[offset + 8]; var M_offset_9 = M[offset + 9]; var M_offset_10 = M[offset + 10]; var M_offset_11 = M[offset + 11]; var M_offset_12 = M[offset + 12]; var M_offset_13 = M[offset + 13]; var M_offset_14 = M[offset + 14]; var M_offset_15 = M[offset + 15]; var a = H[0]; var b = H[1]; var c = H[2]; var d = H[3]; a = FF(a, b, c, d, M_offset_0, 7, T[0]); d = FF(d, a, b, c, M_offset_1, 12, T[1]); c = FF(c, d, a, b, M_offset_2, 17, T[2]); b = FF(b, c, d, a, M_offset_3, 22, T[3]); a = FF(a, b, c, d, M_offset_4, 7, T[4]); d = FF(d, a, b, c, M_offset_5, 12, T[5]); c = FF(c, d, a, b, M_offset_6, 17, T[6]); b = FF(b, c, d, a, M_offset_7, 22, T[7]); a = FF(a, b, c, d, M_offset_8, 7, T[8]); d = FF(d, a, b, c, M_offset_9, 12, T[9]); c = FF(c, d, a, b, M_offset_10, 17, T[10]); b = FF(b, c, d, a, M_offset_11, 22, T[11]); a = FF(a, b, c, d, M_offset_12, 7, T[12]); d = FF(d, a, b, c, M_offset_13, 12, T[13]); c = FF(c, d, a, b, M_offset_14, 17, T[14]); b = FF(b, c, d, a, M_offset_15, 22, T[15]); a = GG(a, b, c, d, M_offset_1, 5, T[16]); d = GG(d, a, b, c, M_offset_6, 9, T[17]); c = GG(c, d, a, b, M_offset_11, 14, T[18]); b = GG(b, c, d, a, M_offset_0, 20, T[19]); a = GG(a, b, c, d, M_offset_5, 5, T[20]); d = GG(d, a, b, c, M_offset_10, 9, T[21]); c = GG(c, d, a, b, M_offset_15, 14, T[22]); b = GG(b, c, d, a, M_offset_4, 20, T[23]); a = GG(a, b, c, d, M_offset_9, 5, T[24]); d = GG(d, a, b, c, M_offset_14, 9, T[25]); c = GG(c, d, a, b, M_offset_3, 14, T[26]); b = GG(b, c, d, a, M_offset_8, 20, T[27]); a = GG(a, b, c, d, M_offset_13, 5, T[28]); d = GG(d, a, b, c, M_offset_2, 9, T[29]); c = GG(c, d, a, b, M_offset_7, 14, T[30]); b = GG(b, c, d, a, M_offset_12, 20, T[31]); a = HH(a, b, c, d, M_offset_5, 4, T[32]); d = HH(d, a, b, c, M_offset_8, 11, T[33]); c = HH(c, d, a, b, M_offset_11, 16, T[34]); b = HH(b, c, d, a, M_offset_14, 23, T[35]); a = HH(a, b, c, d, M_offset_1, 4, T[36]); d = HH(d, a, b, c, M_offset_4, 11, T[37]); c = HH(c, d, a, b, M_offset_7, 16, T[38]); b = HH(b, c, d, a, M_offset_10, 23, T[39]); a = HH(a, b, c, d, M_offset_13, 4, T[40]); d = HH(d, a, b, c, M_offset_0, 11, T[41]); c = HH(c, d, a, b, M_offset_3, 16, T[42]); b = HH(b, c, d, a, M_offset_6, 23, T[43]); a = HH(a, b, c, d, M_offset_9, 4, T[44]); d = HH(d, a, b, c, M_offset_12, 11, T[45]); c = HH(c, d, a, b, M_offset_15, 16, T[46]); b = HH(b, c, d, a, M_offset_2, 23, T[47]); a = II(a, b, c, d, M_offset_0, 6, T[48]); d = II(d, a, b, c, M_offset_7, 10, T[49]); c = II(c, d, a, b, M_offset_14, 15, T[50]); b = II(b, c, d, a, M_offset_5, 21, T[51]); a = II(a, b, c, d, M_offset_12, 6, T[52]); d = II(d, a, b, c, M_offset_3, 10, T[53]); c = II(c, d, a, b, M_offset_10, 15, T[54]); b = II(b, c, d, a, M_offset_1, 21, T[55]); a = II(a, b, c, d, M_offset_8, 6, T[56]); d = II(d, a, b, c, M_offset_15, 10, T[57]); c = II(c, d, a, b, M_offset_6, 15, T[58]); b = II(b, c, d, a, M_offset_13, 21, T[59]); a = II(a, b, c, d, M_offset_4, 6, T[60]); d = II(d, a, b, c, M_offset_11, 10, T[61]); c = II(c, d, a, b, M_offset_2, 15, T[62]); b = II(b, c, d, a, M_offset_9, 21, T[63]); H[0] = (H[0] + a) | 0; H[1] = (H[1] + b) | 0; H[2] = (H[2] + c) | 0; H[3] = (H[3] + d) | 0; }, _doFinalize: function () { var data = this._data; var dataWords = data.words; var nBitsTotal = this._nDataBytes * 8; var nBitsLeft = data.sigBytes * 8; dataWords[nBitsLeft >>> 5] |= 0x80 << (24 - nBitsLeft % 32); var nBitsTotalH = Math.floor(nBitsTotal / 0x100000000); var nBitsTotalL = nBitsTotal; dataWords[(((nBitsLeft + 64) >>> 9) << 4) + 15] = ((((nBitsTotalH << 8) | (nBitsTotalH >>> 24)) & 0x00ff00ff) | (((nBitsTotalH << 24) | (nBitsTotalH >>> 8)) & 0xff00ff00)); dataWords[(((nBitsLeft + 64) >>> 9) << 4) + 14] = ((((nBitsTotalL << 8) | (nBitsTotalL >>> 24)) & 0x00ff00ff) | (((nBitsTotalL << 24) | (nBitsTotalL >>> 8)) & 0xff00ff00)); data.sigBytes = (dataWords.length + 1) * 4; this._process(); var hash = this._hash; var H = hash.words; for (var i = 0; i < 4; i++) { var H_i = H[i]; H[i] = (((H_i << 8) | (H_i >>> 24)) & 0x00ff00ff) | (((H_i << 24) | (H_i >>> 8)) & 0xff00ff00); } return hash; }, clone: function () { var clone = Hasher.clone.call(this); clone._hash = this._hash.clone(); return clone; } }); function FF(a, b, c, d, x, s, t) { var n = a + ((b & c) | (~b & d)) + x + t; return ((n << s) | (n >>> (32 - s))) + b; } function GG(a, b, c, d, x, s, t) { var n = a + ((b & d) | (c & ~d)) + x + t; return ((n << s) | (n >>> (32 - s))) + b; } function HH(a, b, c, d, x, s, t) { var n = a + (b ^ c ^ d) + x + t; return ((n << s) | (n >>> (32 - s))) + b; } function II(a, b, c, d, x, s, t) { var n = a + (c ^ (b | ~d)) + x + t; return ((n << s) | (n >>> (32 - s))) + b; } C.MD5 = Hasher._createHelper(MD5); C.HmacMD5 = Hasher._createHmacHelper(MD5); }(Math)); function get_as_cp_signature() { var getHoney = function () { var t = Math.floor((new Date).getTime() / 1e3), e = t.toString(16).toUpperCase(), i = CryptoJS.MD5(t + '').toString().toUpperCase(); if (8 != e.length) return { as: "479BB4B7254C150", cp: "7E0AC8874BB0985" }; for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++) s += n.substr(o, 1) + e.substr(o, 1); for (var r = "", c = 0; 5 > c; c++) r += e.substr(c + 3, 1) + a.substr(c, 1); return { as: "A1" + s + e.slice(-3), cp: e.slice(0, 3) + r + "E1" } } function e(e, a, r) { return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a) } function a(e, a, r) { return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a) } function r(e, a, r) { var n, t, s = {}, b = s.d = r ? r.d + 1 : 0; for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n]; for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t]; return c(e, 0, s) } function c(t, b, k) { function u(e) { v[x++] = e } function f() { return g = t.charCodeAt(b++) - 32, t.substring(b, b += g) } function l() { try { y = c(t, b, k) } catch (e) { h = e, y = l } } for (var h, y, d, g, v = [], x = 0;;) switch (g = t.charCodeAt(b++) - 32) { case 1: u(!v[--x]); break; case 4: v[x++] = f(); break; case 5: u(function (e) { var a = 0, r = e.length; return function () { var c = a < r; return c && u(e[a++]), c } }(v[--x])); break; case 6: y = v[--x], u(v[--x](y)); break; case 8: if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y; break; case 9: v[x++] = c; break; case 10: u(s(v[--x])); break; case 11: y = v[--x], u(v[--x] + y); break; case 12: for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length; u(String.fromCharCode.apply(null, d)); break; case 13: y = v[--x], h = delete v[--x][y]; break; case 14: v[x++] = t.charCodeAt(b++) - 32; break; case 59: u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []); break; case 61: u(v[--x][t.charCodeAt(b++) - 32]); break; case 62: g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0; break; case 65: h = v[--x], y = v[--x], v[--x][y] = h; break; case 66: u(e(t.substr(b++, 1), v[--x], v[--x])); break; case 67: y = v[--x]; d = v[--x]; g = v[--x]; u(g.x === c ? r(g.y, y, k) : g.apply(d, y)); break; case 68: u(e((g = t.substr(b++, 1)) < "<" ? (b--, f()) : g + g, v[--x], v[--x])); break; case 70: u(!1); break; case 71: v[x++] = n; break; case 72: v[x++] = +f(); break; case 73: u(parseInt(f(), 36)); break; case 75: if (v[--x]) { b++; break } case 74: g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g; break; case 76: u(k[t.charCodeAt(b++) - 32]); break; case 77: y = v[--x], u(v[--x][y]); break; case 78: g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g)); break; case 79: g = t.charCodeAt(b++) - 32, u(k["$" + g]); break; case 81: h = v[--x], v[--x][f()] = h; break; case 82: u(v[--x][f()]); break; case 83: h = v[--x], k[t.charCodeAt(b++) - 32] = h; break; case 84: v[x++] = !0; break; case 85: v[x++] = void 0; break; case 86: u(v[x - 1]); break; case 88: h = v[--x], y = v[--x], v[x++] = h, v[x++] = y; break; case 89: u(function () { function e() { return r(e.y, arguments, k) } return e.y = f(), e.x = c, e }()); break; case 90: v[x++] = null; break; case 91: v[x++] = h; break; case 93: h = v[--x]; break; case 0: return v[--x]; default: u((g << 16 >> 16) - 16) } } var n = window; var t = n.Function, s = Object.keys || function (e) { var a = {}, r = 0; for (var c in e) a[r++] = c; return a.length = r, a }, b = {}, k = {}; r(decodeURIComponent("gr%24Daten%20%D0%98b%2Fs!l%20y%CD%92y%C4%B9g%2C(lfi~ah%60%7Bmv%2C-n%7CjqewVxp%7Brvmmx%2C%26eff%7Fkx%5B!cs%22l%22.Pq%25widthl%22%40q%26heightl%22vr*getContextx%24%222d%5B!cs%23l%23%2C*%3B%3F%7Cu.%7Cuc%7Buq%24fontl%23vr(fillTextx%24%24%E9%BE%98%E0%B8%91%E0%B8%A0%EA%B2%BD2%3C%5B%23c%7Dl%232q*shadowBlurl%231q-shadowOffsetXl%23%24%24limeq%2BshadowColorl%23vr%23arcx88802%5B%25c%7Dl%23vr%26strokex%5B%20c%7Dl%22v%2C)%7DeOmyoZB%5Dmx%5B%20cs!0s%24l%24Pb%3Ck7l%20l!r%26lengthb%25%5El%241%2Bs%24j%02l%20%20s%23i%241ek1s%24gr%23tack4)zgr%23tac%24!%20%2B0o!%5B%23cj%3Fo%20%5D!l%24b%25s%22o%20%5D!l%22l%24b*b%5E0d%23%3E%3E%3Es!0s%25yA0s%22l%22l!r%26lengthb%3Ck%2Bl%22%5El%221%2Bs%22j%05l%20%20s%26l%26z0l!%24%20%2B%5B%22cs'(0l%23i'1ps9wxb%26s()%20%26%7Bs)%2Fs(gr%26Stringr%2CfromCharCodes)0s*yWl%20._b%26s%20o!%5D)l%20l%20Jb%3Ck%24.aj%3Bl%20.Tb%3Ck%24.gj%2Fl%20.%5Eb%3Ck%26i%22-4j!%1F%2B%26%20s%2ByPo!%5D%2Bs!l!l%20Hd%3E%26l!l%20Bd%3E%26%2Bl!l%20%3Cd%3E%26%2Bl!l%206d%3E%26%2Bl!l%20%26%2B%20s%2Cy%3Do!o!%5D%2Fq%2213o!l%20q%2210o!%5D%2Cl%202d%3E%26%20s.%7Bs-yMo!o!%5D0q%2213o!%5D*Ld%3Cl%204d%23%3E%3E%3Eb%7Cs!o!l%20q%2210o!%5D%2Cl!%26%20s%2FyIo!o!%5D.q%2213o!%5D%2Co!%5D*Jd%3Cl%206d%23%3E%3E%3Eb%7C%26o!%5D%2Bl%20%26%2B%20s0l-l!%26l-l!i'1z141z4b%2F%40d%3Cl%22b%7C%26%2Bl-l(l!b%5E%26%2Bl-l%26zl'g%2C)gk%7Dejo%7B%7Fcm%2C)%7Cyn~Lij~em%5B%22cl%24b%25%40d%3Cl%26zl'l%20%24%20%2B%5B%22cl%24b%25b%7C%26%2Bl-l%258d%3C%40b%7Cl!b%5E%26%2B%20q%24sign%20"), [TAC = {}]); var params = getHoney(); params._signature = TAC.sign(0); return JSON.stringify(params); }
将以上代码,保存为JS文件,通过安装execjs 库即可运行JS代码(其实破解as,cp也可以直接复制源JS代码运行破解,不需要自行对代码翻译破解),
python编译JS代码:
import execjs def get_js(self): f = open(r"E:\toutiao\toutiao-TAC.sign.js", 'r', encoding='UTF-8') ##打开JS文件 line = f.readline() htmlstr = '' while line: htmlstr = htmlstr + line line = f.readline() ctx = execjs.compile(htmlstr) return ctx.call('get_as_cp_signature')
完成破解后,即可正常爬虫,完整代码:
#!coding=utf-8 import requests import re import json import math import random import time from requests.packages.urllib3.exceptions import InsecureRequestWarning import pandas as pd requests.packages.urllib3.disable_warnings(InsecureRequestWarning) ###禁止提醒SSL警告 import hashlib import execjs class toutiao(object): def __init__(self,path,url): self.path = path # CSV保存地址 self.url=url self.s = requests.session() headers = {'Accept': '*/*', 'Accept-Language': 'zh-CN', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko', 'Connection': 'Keep-Alive', } self.s.headers.update(headers) self.channel=re.search('ch/(.*?)/',url).group(1) def closes(self): self.s.close() def getdata(self): #获取数据 req = self.s.get(url=self.url, verify=False) #print (self.s.headers) #print(req.text) headers = {'referer': self.url} max_behot_time='0' signature='.1.hXgAApDNVcKHe5jmqy.9f4U' eas = 'A1E56B6786B47FE' ecp = '5B7674A7FF2E9E1' self.s.headers.update(headers) title = [] source = [] source_url = [] comments_count = [] tag = [] chinese_tag = [] label = [] abstract = [] behot_time = [] nowtime = [] duration = [] for i in range(0,30): ##获取页数 Honey = json.loads(self.get_js()) # eas = self.getHoney(int(max_behot_time))[0] # ecp = self.getHoney(int(max_behot_time))[1] eas = Honey['as'] ecp = Honey['cp'] signature = Honey['_signature'] url='https://www.toutiao.com/api/pc/feed/?category={}&utm_source=toutiao&widen=1&max_behot_time={}&max_behot_time_tmp={}&tadrequire=true&as={}&cp={}&_signature={}'.format(self.channel,max_behot_time,max_behot_time,eas,ecp,signature) req=self.s.get(url=url, verify=False) time.sleep(random.random() * 2+2) print(req.text) print(url) j=json.loads(req.text) for k in range(0, 10): now=time.time() if j['data'][k]['tag'] != 'ad': title.append(j['data'][k]['title']) ##标题 source.append(j['data'][k]['source']) ##作者 source_url.append('https://www.toutiao.com/'+j['data'][k]['source_url']) ##文章链接 try: comments_count.append(j['data'][k]['comments_count']) ###评论 except: comments_count.append(0) tag.append(j['data'][k]['tag']) ###频道名 try: chinese_tag.append(j['data'][k]['chinese_tag']) ##频道中文名 except: chinese_tag.append('') try: label.append(j['data'][k]['label']) ## 标签 except: label.append('') try: abstract.append(j['data'][k]['abstract']) ###文章摘要 except: abstract.append('') behot=int(j['data'][k]['behot_time']) behot_time.append(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(behot))) ####发布时间 nowtime.append(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now))) ##抓取时间 duration.append(now-behot) ##发布时长 time.sleep(2) #max_behot_time=str(j['next']['max_behot_time']) print('------------'+str(j['next']['max_behot_time'])) print(title) print(source) print(source_url) print(comments_count) print(tag) print(chinese_tag) print(label) print(abstract) print(behot_time) print(nowtime) print(duration) data={'title':title,'source':source,'source_url':source_url,'comments_count':comments_count,'tag':tag, 'chinese_tag':chinese_tag,'label':label,'abstract':abstract,'behot_time':behot_time,'nowtime':nowtime,'duration':duration, } df=pd.DataFrame(data=data) df.to_csv(self.path+r'\toutiao.csv',encoding='GB18030',index=0) def getHoney(self,t): #####根据JS脚本破解as ,cp #t = int(time.time()) #获取当前时间 #t=1534389637 #print(t) e =str('%X' % t) ##格式化时间 #print(e) m1 = hashlib.md5() ##MD5加密 m1.update(str(t).encode(encoding='utf-8')) ##转化格式 i = str(m1.hexdigest()).upper() ####转化大写 #print(i) n=i[0:5] ##获取前5位 a=i[-5:] ##获取后5位 s='' r='' for x in range(0,5): s+=n[x]+e[x] r+=e[x+3]+a[x] eas='A1'+ s+ e[-3:] ecp=e[0:3]+r+'E1' #print(eas) #print(ecp) return eas,ecp def get_js(self): f = open(r"E:\toutiao\toutiao-TAC.sign.js", 'r', encoding='UTF-8') line = f.readline() htmlstr = '' while line: htmlstr = htmlstr + line line = f.readline() ctx = execjs.compile(htmlstr) return ctx.call('get_as_cp_signature') if __name__=='__main__': path=r'E:\toutiao' url='https://www.toutiao.com/ch/news_tech/' t=toutiao(path,url) t.getdata() t.closes()