selenium自帶抓包的使用,desired_capabilities


###

做爬蟲的時候,有時候遇到需要的數據在加載資源當中,通常做法是拼接url,然后獲取數據,但首先需要進行分析,如果拼接中的參數有加密的情況時,如果不能模擬算法生成正確的參數,那就很頭疼。而訪問performance,可以獲得加載網站時的資源請求信息,可以通過這一特點,獲取url和數據。

 

#### 

 

# from selenium import webdriver
# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#
# capabilities = DesiredCapabilities.CHROME
# capabilities['loggingPrefs'] = {'browser': 'ALL'}
#
# driver = webdriver.Chrome(desired_capabilities=capabilities)
#
# driver.get('https://www.baidu.com')
#
# # print console log messages
# for entry in driver.get_log('browser'):
#     print(entry)
"""
entry格式:
{'level': 'SEVERE', 'message': 'https://open.ccod.com/WARTC/cphoneRTC/verto-min.js 2086:28 "INVALID METHOD OR NON-EXISTANT CALL REFERENCE IGNORED" "verto.clientReady"', 'source': 'console-api', 'timestamp': 1626147049481}
其中source:

console-api 控制台日志
network 網絡日志
"""

#
# import time
#
# from selenium import webdriver
# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
# import json
#
#
# class Mychrome:
#
#     def __init__(self):
#         self.options = webdriver.ChromeOptions()
#         self.flash_urls = []
#         self.set_browser()
#
#     def set_browser(self):
#
#         prefs = {
#             "profile.managed_default_content_settings.images": 1,
#
#         }
#         if self.flash_urls is not None and len(self.flash_urls) != 0:
#             prefs['profile.managed_plugins_allowed_for_urls'] = self.flash_urls
#         self.options.add_experimental_option('prefs', prefs)
#         self.options.add_experimental_option('w3c', False)
#
#         # 方法1
#         # capabilities = DesiredCapabilities.CHROME
#         # capabilities['loggingPrefs'] = {"performance","all"}
#         # self.driver = webdriver.Chrome(
#         #     desired_capabilities=capabilities
#         # )
#
#         # 方法2
#         # self.options.add_experimental_option("excludeSwitches", ['enable-automation'])  # window.navigator.webdriver設置為undefined,逃過網站的防爬檢查,headless無效
#         desired_capabilities = self.options.to_capabilities()  # 將功能添加到options中
#         desired_capabilities['loggingPrefs'] = {
#             "performance": "ALL"  # 添加日志
#         }
#         self.driver = webdriver.Chrome(
#             desired_capabilities=desired_capabilities
#         )
#
#     def gethtml(self):
#         url = 'http://www.baidu.com'
#         self.driver.get(url)
#         print(self.driver.get_log('performance'))
#         print('-' * 60)
#         print(self.driver.get_log('performance'))
#         for entry in self.driver.get_log('performance'):
#             params = json.loads(entry.get('message')).get('message')
#             print(params.get('request'))  # 請求連接 包含錯誤連接
#             print(params.get('response'))  # 響應連接 正確有返回值得連接
#
#
# if __name__ == '__main__':
#     browser = Mychrome().gethtml()


import json
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time

caps = {
    'browserName': 'chrome',
    'loggingPrefs': {
        'browser': 'ALL',
        'driver': 'ALL',
        'performance': 'ALL',
    },
    'goog:chromeOptions': {
        'perfLoggingPrefs': {
            'enableNetwork': True,
        },
        'w3c': False,
    },
}
driver = webdriver.Chrome(desired_capabilities=caps)

driver.get('https://www.kancloud.cn/ccjin/yingq/1631612')  # 免費天氣接口
# driver.get('https://www.tianqiapi.com/free/day?appid=23035354&appsecret=8YvlPNrz')  # 免費天氣接口
# 必須等待一定的時間,不然會報錯提示獲取不到日志信息,因為絮叨等所有請求結束才能獲取日志信息
time.sleep(3)

driver.find_element_by_xpath('//*[@id="main"]/div/div[2]/div[2]/div/div[3]/span[2]/a').click()

time.sleep(3)

driver.find_element_by_xpath('//*[@id="main"]/div/div[2]/div[2]/div/div[3]/span[2]/a').click()

time.sleep(3)

request_log = driver.get_log('performance')
# print(request_log)
print("len(request_log)", len(request_log))

for i in range(len(request_log)):
    message = json.loads(request_log[i]['message'])
    message = message['message']['params']
    # .get() 方式獲取是了避免字段不存在時報錯
    request = message.get('request')
    if (request is None):
        continue

    url = request.get('url')
    print("url", url)
    if (url == "https://www.tianqiapi.com/free/day?appid=23035354&appsecret=8YvlPNrz"):
        # 得到requestId
        print(message['requestId'])
        # 通過requestId獲取接口內容
        content = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': message['requestId']})
        print(content)
        break


driver.close()

 

 

 

####


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM