BrowserMob Proxy介紹
BrowserMobProxy會提供一個ProxyServer用於做轉發代理攔截,這個server可以是standalone部署支持遠程,也可以embed進代碼中。由於BrowserMob是Java開發的,因此JVM的可以支持真正的embedded,python等非JVM系的只能配置其執行路徑通過子進程的方式來偽裝embedded.
安裝
pip install browsermob-proxy
下載 browsermob-proxy 的二進制文件
調試
from browsermobproxy import Server
## 啟動代理, 修改下載的文件路徑
server = Server(r'**\browsermob-proxy-2.1.4\bin\browser-mob-proxy.bat') server.start()proxy = server.create_proxy()print('proxy', proxy.proxy)
使用
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from browsermobproxy import Server
import time
import requests
import json
class BaseFramework(object):
def __init__(self):
# browsermob-proxy.bat的文件路徑
self.server = Server(r'*\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
self.server.start()
self.proxy = self.server.create_proxy()
self.query_url = 'http://www.baidu.com'
chrome_options = Options()
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--proxy-server={0}'.format(self.proxy.proxy))
# chrome_options.add_argument('--headless') # 無頭模式
# chromedriver.exe的文件路徑
self.browser = webdriver.Chrome(executable_path="*\chromedriver.exe", options=chrome_options)
def process_request(self, request, response):
pass
def process_response(self, response, request):
pass
def run(self, func, *args):
self.proxy.new_har(options={
'captureContent': True,
'captureHeaders': True
})
func(*args)
result = self.proxy.har
for entry in result['log']['entries']:
request = entry['request']
response = entry['response']
self.process_request(request, response)
self.process_response(response, request)
self.proxy.close()
self.browser.close()
def check_params(self, params: list) -> str:
pass
def check_mongo(self, track_data: dict) -> str:
pass
class Framework(BaseFramework):
def load(self, url):
self.browser.get(url)
time.sleep(3)
def process_request(self, request, response):
pass
def process_response(self, response, request):
# print(request['url'])
# 查找需要數據的URL后即可進行解析
if '**' in request['url']:
pass
if __name__ == '__main__':
Framework = Framework()
url = "**"
Framework.run(Framework.load, url)