python selenium 獲取接口數據。
selenium沒有直接提供查詢的函數,但是可以通過webdriver提供的API查詢,使用的函數是Network.getResponseBody
webdriver提供的API文檔:https://chromedevtools.github.io/devtools-protocol/tot/Network/
Network.getResponseBody文檔說明:
Network.getResponseBody的參數是requestid,requestid是webdriver每個請求自動生成的惟一ID,拿到requestid就能拿到請求返回的內容。
如何獲取requestid?創建webdriver對象時配置信息設置獲取performance,即可獲取每個請求的日志信息,然后通過對日志信息的檢索找到對應的requestid。
獲取日志信息的webdriver創建代碼(注意,必須傳入配置信息才能獲取日志信息):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
from
selenium
import
webdriver
from
selenium.webdriver.common.desired_capabilities
import
DesiredCapabilities
import
time
caps
=
{
'browserName'
:
'chrome'
,
'loggingPrefs'
: {
'browser'
:
'ALL'
,
'driver'
:
'ALL'
,
'performance'
:
'ALL'
,
},
'goog:chromeOptions'
: {
'perfLoggingPrefs'
: {
'enableNetwork'
:
True
,
},
'w3c'
:
False
,
},
}
driver
=
webdriver.Chrome(desired_capabilities
=
caps)
driver.get(
'https://partner.oceanengine.com/union/media/login/'
)
# 必須等待一定的時間,不然會報錯提示獲取不到日志信息,因為絮叨等所有請求結束才能獲取日志信息
time.sleep(
3
)
request_log
=
driver.get_log(
'performance'
)
|
打印request_log是一個數組,然后遍歷request_log檢索需要獲取的url對應的requestid,比如需要獲取https://s3.pstatp.com/bytecom/resource/union_web2/media/manifest.json對應的requestid,並且獲取接口內容:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
for
i
in
range
(
len
(request_log)):
message
=
json.loads(request_log[i][
'message'
])
message
=
message[
'message'
][
'params'
]
# .get() 方式獲取是了避免字段不存在時報錯
request
=
message.get(
'request'
)
if
(request
is
None
):
continue
url
=
request.get(
'url'
)
if
(url
=
=
"https://s3.pstatp.com/bytecom/resource/union_web2/media/manifest.json"
):
# 得到requestId
print
(message[
'requestId'
])
# 通過requestId獲取接口內容
content
=
driver.execute_cdp_cmd(
'Network.getResponseBody'
, {
'requestId'
: message[
'requestId'
]})
print
(content)
break
|
完整代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
import
json
from
selenium
import
webdriver
from
selenium.webdriver.common.desired_capabilities
import
DesiredCapabilities
import
time
caps
=
{
'browserName'
:
'chrome'
,
'loggingPrefs'
: {
'browser'
:
'ALL'
,
'driver'
:
'ALL'
,
'performance'
:
'ALL'
,
},
'goog:chromeOptions'
: {
'perfLoggingPrefs'
: {
'enableNetwork'
:
True
,
},
'w3c'
:
False
,
},
}
driver
=
webdriver.Chrome(desired_capabilities
=
caps)
driver.get(
'https://partner.oceanengine.com/union/media/login/'
)
# 必須等待一定的時間,不然會報錯提示獲取不到日志信息,因為絮叨等所有請求結束才能獲取日志信息
time.sleep(
3
)
request_log
=
driver.get_log(
'performance'
)
print
(request_log)
for
i
in
range
(
len
(request_log)):
message
=
json.loads(request_log[i][
'message'
])
message
=
message[
'message'
][
'params'
]
# .get() 方式獲取是了避免字段不存在時報錯
request
=
message.get(
'request'
)
if
(request
is
None
):
continue
url
=
request.get(
'url'
)
if
(url
=
=
"https://s3.pstatp.com/bytecom/resource/union_web2/media/manifest.json"
):
# 得到requestId
print
(message[
'requestId'
])
# 通過requestId獲取接口內容
content
=
driver.execute_cdp_cmd(
'Network.getResponseBody'
, {
'requestId'
: message[
'requestId'
]})
print
(content)
break
|
轉:https://blog.csdn.net/mxdzchallpp/article/details/106475193