目錄
websocket--hook
代碼不全,大致思路
原理:
瀏覽器(客戶端):在瀏覽器中注入一段JS代碼,與服務端建立連接。調用瀏覽器中的js方法,把返回的數據發送給服務端
node啟動js代碼,監聽某端口(客戶端):服務端把參數(python發過來的)發送給客戶端處理,並接收處理結果,再次把接收的結果返回給python處理
python(調用者):把參數發送給node,接收node傳回來的數據
優點:
1.對於js混淆加密較深的,可以采用此方法。
2.不用扣js加密代碼,直接調用瀏覽器環境
缺點:
1.如果有selenium監測,要想使用此方法,必須先繞過selenium監測,否則只能使用真機進行js注入
2.需要node環境,寫一個websocket服務端和客戶端
3.速度沒有直接破解js快
服務端--WebSocketServer.js
let iconv = require('iconv-lite')
var ws = require("nodejs-websocket");
console.log("開始建立連接...")
var server = ws.createServer(function(conn){
let cached = {};
conn.on("text", function (msg) {
if (!msg) return;
// console.log("msg", msg);
var key = conn.key;
if ((msg === "Browser") || (msg === "Python")){
// browser或者python第一次連接
cached[msg] = key;
// console.log("cached",cached);
return;
}
if (Object.values(cached).includes(key)){
// console.log(server.connections.forEach(conn=>conn.key));
var targetConn = server.connections.filter(function(conn){
return conn.key !== key;
})
// console.log("將要發送的實參:",msg);
targetConn.forEach(conn=>{
conn.send(msg);
})
}
})
conn.on("close", function (code, reason) {
// console.log("關閉連接")
});
conn.on("error", function (code, reason) {
console.log("異常關閉")
});
conn.on("connection", function (conn) {
console.log(conn)
});
}).listen(10512)
console.log("WebSocket建立完畢")
客戶端注入JS代碼
createSocket();
function createSocket() {
window.ws = new WebSocket('ws://127.0.0.1:10512/');
window.ws.onopen = function (e) {
console.log("連接服務器成功");
window.ws.send("Browser");
}
window.ws.onclose = function (e) {
console.log("服務器關閉");
setTimeout(createSocket, 60000);
}
window.ws.onerror = function () {
console.log("連接出錯");
}
window.ws.onmessage = function (e) {
var xmlhttp = new glb.XMLHttpRequest();
function state_Change() {
if (xmlhttp.readyState == 4) {
if (xmlhttp.status == 200) {
let result = xmlhttp.responseText
result = JSON.parse(result)
result = JSON.stringify(result)
// result = String.fromCharCode(result)
//發送給Python
// console.log(result);
window.ws.send(result);
} else {
alert("Problem retrieving XML data");
}
}
}
xmlhttp.onreadystatechange = state_Change;
xmlhttp.open('GET', e.data, true);
xmlhttp.send(null);
}
}
python開端口
# -*- coding: utf-8 -*-
from sanic import Sanic
from sanic.response import json
import os
import urllib3
from toutiao2_文件方式.get_data import get_data
from toutiao2_文件方式.get_user_id import get_user
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app = Sanic(__name__)
@app.route("/get_user_id", methods=["GET"])
def captcha_server(request):
try:
data = request.args
media_id = data['media_id'][0]
return get_user_id(media_id)
except Exception as e:
pass
@app.route("/get_data", methods=["GET"])
def captcha_server(request):
try:
data = request.args
user_id = data['user_id'][0]
offset = data['offset'][0]
return get_res(user_id, offset)
except Exception as e:
pass
def get_user_id(media_id):
html = get_user(media_id)
return html
def get_res(user_id, offset):
html = get_data(user_id,offset)
return html
if __name__ == "__main__":
app.run(host="127.0.0.1", port=4007)
get_data.py 文件方式
# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class CG_Client(WebSocketClient):
def opened(self):
self.max_cursor = 0
self.send("Python")
def closed(self, code, reason=None):
# print("Closed down:", code, reason)
pass
def received_message(self, resp):
data = resp.data.decode("utf-8")
write_data(data)
ws.close()
def write_data(data):
with open('./data.txt', 'w', encoding='utf-8') as f:
f.write(data)
f.close()
def get_data(user_id, offset):
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
try:
real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
print('異常關閉')
ws.close()
get_user_id.py 文件方式
# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
# media_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor]
class CG_Client(WebSocketClient):
def opened(self):
self.max_cursor = 0
self.send("Python")
def closed(self, code, reason=None):
# print("Closed down:", code, reason)
pass
def received_message(self, resp):
data = resp.data.decode("utf-8")
write_user(data)
ws.close()
def write_user(data):
with open('./user.txt', 'w', encoding='utf-8') as f:
f.write(data)
f.close()
def get_user(media_id):
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
try:
real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
print('異常關閉')
ws.close()
get_data.py 終端方式
# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
user_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor]
offset = str(sys.argv[2])
class CG_Client(WebSocketClient):
def opened(self):
print("連接成功")
self.max_cursor = 0
self.send("Python")
def closed(self, code, reason=None):
print("Closed down:", code, reason)
def received_message(self, resp):
data = resp.data.decode("utf-8")
print(data)
ws.close()
try:
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
ws.close()
get_user_id.py 終端方式
# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
media_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor]
class CG_Client(WebSocketClient):
def opened(self):
print("連接成功")
self.max_cursor = 0
self.send("Python")
def closed(self, code, reason=None):
print("Closed down:", code, reason)
def received_message(self, resp):
data = resp.data.decode("utf-8")
# data = resp.data.decode("gbk")
print(data)
ws.close()
try:
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
ws.close()
爬蟲調用者
import time
import requests
import json
import urllib3
from toutiao2_文件方式.get_user_id import get_user, CG_Client
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def open_user():
with open('./user.txt', 'r', encoding='utf-8') as f:
user = json.loads(f.read())
f.close()
return user
def open_data():
with open('./data.txt', 'r', encoding='utf-8') as f:
data = json.loads(f.read())
f.close()
return data
# media_id換user_id
def start_ocean_toutiao_user_id(media_id):
data = {
'media_id': media_id,
}
requests.get('http://127.0.0.1:4007/get_user_id', params=data, timeout=3)
time.sleep(2)
response = open_user()
res_media_id = response.get('data').get('media_id')
if int(res_media_id) == int(media_id):
user_id = response.get('data').get('user_id')
return user_id
else:
print('media不對應,請檢查')
return None
# 通過websocket獲取數據
def start_ocean_toutiao_data(user_id, offset):
if user_id == None:
print('沒有獲取到user_id,請檢查原因。可能消息堆積錯誤')
return None
data = {
'user_id': user_id,
'offset': offset
}
requests.get('http://127.0.0.1:4007/get_data', params=data, timeout=3)
response = open_data()
return response
def get_response(media_id,offset):
user_id = start_ocean_toutiao_user_id(media_id)
print(user_id)
data = start_ocean_toutiao_data(user_id, offset)
print(data)
return data
if __name__ == '__main__':
for i in range(1):
offset = 1587744000
# media_id = 6860767764
media_id = 6989633739
user_id = start_ocean_toutiao_user_id(media_id)
print(user_id)
# user_id = 6860406890
data = start_ocean_toutiao_data(user_id, offset)
print(data)
get_response(media_id, offset)
pass