瀏覽器抓取真實直播源地址(純前端JS解析)
網上搜索各種平台的直播源地址都是滿天飛,但是經常會有失效的時候,因為官方也會定期的升級系統修改各種參數或鏈接讓直播源不能永久,所以敝人一直崇尚的是 授人以魚不如授人以漁
,與其給直播源別人,不如教大家如何去爬取直播源,就算失效了也不怕。
0. 前言
本人業余時間喜歡用虎牙看直播,所以第一個便是想到如何抓取虎牙的直播源。
在抓取之前,需要了解視頻直播源的分類和區別,可以自行了解hls
,flv
,m3u8
等知識。
Tips: 本教程只是教大家如何利用前端調試技巧和爬蟲基本操作,不作為商業用途,各位童鞋耗子尾汁。
1. 瀏覽器抓取流程
首選打開虎牙官網,隨便找個直播間:https://m.huya.com/949527
,這里是使用的手機端的網頁(因為手機端的簡單)
隨便看了下,沒有ajax
請求,那么地址定是隨頁面帶進來了,現在大部分直播網頁都是SSR
(服務器端渲染),所以只能去頁面源代碼找找:
好家伙!直接就找到了一個很想地址的東西 liveLineUrl
,是一個m3u8的地址:
這個網站可以測試播放源是不是好的,來!試一下!
就很完美!
但是就這么簡單的嗎?
我又試了一下我經常看的【一起看】的直播間,來看看電影啥的,結果:
這是咋回事。。。然后對比下前后兩個鏈接發現了問題,下面是【一起看】的鏈接:
然后想到 liveLineUrl
這個參數不是全局變量嗎,控制台打印看一下,再仔細對比發現參數變了
有個fm
參數已經變成了seqid
:
先試下控制台打印的能不能播放:
行,司馬懿出來了,現在只用分析如何破解參數即可。
2. 參數解析
Ctrl + Shift + F
搜索 liveLineUrl
, 然后找到這里處理url
的js
,打個斷點調試一下,看看怎么處理的:
斷點進入 Object(m.default)(window.liveLineUrl)
可以看到這里就是處理參數的地方,最后返回的就是解析后的參數字符串:
我整理了下解析函數,重新實現了一下:
function parseUrl(url){
let params = url.split("?")[1];
params = params.split("&");
let paramsObj = {};
for (let i = 0; i < params.length; i++) {
let item = params[i].split("=");
2 === item.length && (paramsObj[item[0]] = item[1])
}
let mainUrl = url.split("?")[0];
let r = mainUrl.split("/");
let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, "");
let {fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj;
let fmParse = Base64.decode(decodeURIComponent(fm));
let p = fmParse.split("_")[0];
let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random());
let newWsSecret = `${p}_0_${streamName}_${time}_${wsTime}`;
newWsSecret = md5.hex(newWsSecret);
let y = "";
Object.keys(others).forEach(e=>{
y += `&${e}=${others[e]}`
});
return `${mainUrl}?wsSecret=${newWsSecret}&wsTime=${wsTime}&u=0&seqid=${time}${y}`;
}
其中用到了Base64
和MD5
相關函數:
// md5下載:https://raw.githubusercontent.com/emn178/js-md5/master/src/md5.js
let Base64 = {
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function(e) {
var t = "";
var n, r, i, s, o, u, a;
var f = 0;
e = Base64._utf8_encode(e);
while (f < e.length) {
n = e.charCodeAt(f++);
r = e.charCodeAt(f++);
i = e.charCodeAt(f++);
s = n >> 2;
o = (n & 3) << 4 | r >> 4;
u = (r & 15) << 2 | i >> 6;
a = i & 63;
if (isNaN(r)) {
u = a = 64
} else if (isNaN(i)) {
a = 64
}
t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a)
}
return t
},
decode: function(e) {
var t = "";
var n, r, i;
var s, o, u, a;
var f = 0;
e = e.replace(/[^A-Za-z0-9+/=]/g, "");
while (f < e.length) {
s = this._keyStr.indexOf(e.charAt(f++));
o = this._keyStr.indexOf(e.charAt(f++));
u = this._keyStr.indexOf(e.charAt(f++));
a = this._keyStr.indexOf(e.charAt(f++));
n = s << 2 | o >> 4;
r = (o & 15) << 4 | u >> 2;
i = (u & 3) << 6 | a;
t = t + String.fromCharCode(n);
if (u != 64) {
t = t + String.fromCharCode(r)
}
if (a != 64) {
t = t + String.fromCharCode(i)
}
}
t = Base64._utf8_decode(t);
return t
},
_utf8_encode: function(e) {
e = e.replace(/rn/g, "n");
var t = "";
for (var n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r)
} else if (r > 127 && r < 2048) {
t += String.fromCharCode(r >> 6 | 192);
t += String.fromCharCode(r & 63 | 128)
} else {
t += String.fromCharCode(r >> 12 | 224);
t += String.fromCharCode(r >> 6 & 63 | 128);
t += String.fromCharCode(r & 63 | 128)
}
}
return t
},
_utf8_decode: function(e) {
var t = "";
var n = 0;
var r = c1 = c2 = 0;
while (n < e.length) {
r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r);
n++
} else if (r > 191 && r < 224) {
c2 = e.charCodeAt(n + 1);
t += String.fromCharCode((r & 31) << 6 | c2 & 63);
n += 2
} else {
c2 = e.charCodeAt(n + 1);
c3 = e.charCodeAt(n + 2);
t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
n += 3
}
}
return t
}
}
3. 源碼及播放器實現
來吧,直接上全部代碼:
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="applicable-device" content="pc,mobile">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/video.js@7.4.1/dist/video-js.min.css">
<title>虎牙直播</title>
</head>
<body>
<video id="player" class="video-js vjs-16-9 vjs-big-play-centered" controls preload="auto" data-setup="{}">
<source src="https://bitdash-a.akamaihd.net/content/sintel/hls/playlist.m3u8" type="application/x-mpegURL">
<p class="vjs-no-js">
To view this video please enable JavaScript, and consider upgrading to a web browser that
<a href="https://videojs.com/html5-video-support/" target="_blank">supports HTML5 video</a>
</p>
</video>
<script src="https://cdn.jsdelivr.net/npm/video.js@7.4.1/dist/video.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/@videojs/http-streaming@1.10.3/dist/videojs-http-streaming.min.js"></script> <script src="js/md5.js"></script> <script> let Base64 = { _keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=", encode: function(e) { var t = ""; var n, r, i, s, o, u, a; var f = 0; e = Base64._utf8_encode(e); while (f < e.length) { n = e.charCodeAt(f++); r = e.charCodeAt(f++); i = e.charCodeAt(f++); s = n >> 2; o = (n & 3) << 4 | r >> 4; u = (r & 15) << 2 | i >> 6; a = i & 63; if (isNaN(r)) { u = a = 64 } else if (isNaN(i)) { a = 64 } t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a) } return t }, decode: function(e) { var t = ""; var n, r, i; var s, o, u, a; var f = 0; e = e.replace(/[^A-Za-z0-9+/=]/g, ""); while (f < e.length) { s = this._keyStr.indexOf(e.charAt(f++)); o = this._keyStr.indexOf(e.charAt(f++)); u = this._keyStr.indexOf(e.charAt(f++)); a = this._keyStr.indexOf(e.charAt(f++)); n = s << 2 | o >> 4; r = (o & 15) << 4 | u >> 2; i = (u & 3) << 6 | a; t = t + String.fromCharCode(n); if (u != 64) { t = t + String.fromCharCode(r) } if (a != 64) { t = t + String.fromCharCode(i) } } t = Base64._utf8_decode(t); return t }, _utf8_encode: function(e) { e = e.replace(/rn/g, "n"); var t = ""; for (var n = 0; n < e.length; n++) { var r = e.charCodeAt(n); if (r < 128) { t += String.fromCharCode(r) } else if (r > 127 && r < 2048) { t += String.fromCharCode(r >> 6 | 192); t += String.fromCharCode(r & 63 | 128) } else { t += String.fromCharCode(r >> 12 | 224); t += String.fromCharCode(r >> 6 & 63 | 128); t += String.fromCharCode(r & 63 | 128) } } return t }, _utf8_decode: function(e) { var t = ""; var n = 0; var r = c1 = c2 = 0; while (n < e.length) { r = e.charCodeAt(n); if (r < 128) { t += String.fromCharCode(r); n++ } else if (r > 191 && r < 224) { c2 = e.charCodeAt(n + 1); t += String.fromCharCode((r & 31) << 6 | c2 & 63); n += 2 } else { c2 = e.charCodeAt(n + 1); c3 = e.charCodeAt(n + 2); t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63); n += 3 } } return t } } function parseUrl(url){ let params = url.split("?")[1]; params = params.split("&"); let paramsObj = {}; for (let i = 0; i < params.length; i++) { let item = params[i].split("="); 2 === item.length && (paramsObj[item[0]] = item[1]) } let mainUrl = url.split("?")[0]; let r = mainUrl.split("/"); let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, ""); let {fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj; let fmParse = Base64.decode(decodeURIComponent(fm)); let p = fmParse.split("_")[0]; let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random()); let newWsSecret = `${p}_0_${streamName}_${time}_${wsTime}`; newWsSecret = md5.hex(newWsSecret); let y = ""; Object.keys(others).forEach(e=>{ y += `&${e}=${others[e]}` }); return `${mainUrl}?wsSecret=${newWsSecret}&wsTime=${wsTime}&u=0&seqid=${time}${y}`; } let e = parseUrl("//al.hls.huya.com/src/1423787831-1423787831-6115122170587774976-2847699118-10057-A-0-1-imgplus_2000.m3u8?wsSecret=f9aaf4fcbe42e724d152c265cf1837fb&wsTime=5ff71b32&fm=RFdxOEJjSjNoNkRKdDZUWV8kMF8kMV8kMl8kMw%3D%3D&ctype=tars_mobile&txyp=o%3Aj10%3B&fs=bgct&&sphdcdn=al_7-tx_3-js_3-ws_7-bd_2-hw_2&sphdDC=huya&sphd=264_*-265_*&t=103"); let t = videojs("#player"); t.src(e); t.play(); </script> </body> </html>
看看諸葛亮彈琴退仲達
:
4. 總結
- 目前發現虎牙【一起看】欄目下的需要把
url
做第二次解析,普通直播間可以直接拿來播放; - 有人可能說為啥要用移動端的頁面解析,而不用PC端的,其實也可以的,分析源碼可以找到一個
config
對象,里面就包含了所需的信息,看到這些參數熟悉不?(同樣的【一起看】欄目的需要二次解析):