一、常用基础设置
from selenium import webdriver # 导入webdriver from auth_proxy import proxyauth_plugin_path options = webdriver.ChromeOptions() #设置谷歌浏览器的一些配置选项 options.add_argument('--incognito') #隐身模式(无痕模式) # options.add_argument('--headless') # 无头模式 options.add_argument('--proxy-server=http://host:port') #设置无账号密码代理 # options.add_extension(proxyauth_plugin_path) #设置私密代理
# 屏蔽谷歌浏览器正在接收自动化软件控制提示,加上以下两行 options.add_experimental_option('useAutomationExtension',False) options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_argument("disable-blink-features=AutomationControlled")#去掉webdriver痕迹 options.add_argument('--disable-gpu') #规避bug options.add_argument('--no-sandbox') #取消沙盒模式,解决DevToolsActivePort文件不存在的报错 prefs = {'profile.managed_default_content_settings.images': 2} #不加载图片, 提升速度 prefs.update({"credentials_enable_service":False,"profile.password_manager_enabled":False}) #登录时关闭弹出的密码保存提示框 prefs.update({'profile.default_content_setting_values':{'notifications' :2}}) # 禁用浏览器弹窗 options.add_experimental_option('prefs',prefs) driver = webdriver.Chrome(executable_path="xxx/chromedriver.exe", options=options) driver.maximize_window() # 浏览器启动后最大化 # 规避webdriver检测 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator,'webdriver',{ get: () => undefined }) """ })
二、用于设置上述私密代理的插件代码(auth_proxy.py)
from selenium import webdriver def create_proxyauth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http', plugin_path=None): """Proxy Auth Extension args: proxy_host (str): domain or ip address, ie proxy.domain.com proxy_port (int): port proxy_username (str): auth username proxy_password (str): auth password kwargs: scheme (str): proxy scheme, default http plugin_path (str): absolute path of the extension return str -> plugin_path """ import string import zipfile if plugin_path is None: plugin_path = 'vimm_chrome_proxyauth_plugin.zip' manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "<all_urls>", "webRequest", "webRequestBlocking" ], "background": { "scripts": ["background.js"] }, "minimum_chrome_version":"22.0.0" } """ background_js = string.Template( """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "${scheme}", host: "${host}", port: parseInt(${port}) }, bypassList: ["foobar.com"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "${username}", password: "${password}" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["<all_urls>"]}, ['blocking'] ); """ ).substitute( host=proxy_host, port=proxy_port, username=proxy_username, password=proxy_password, scheme=scheme, ) with zipfile.ZipFile(plugin_path, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", background_js) return plugin_path proxyauth_plugin_path = create_proxyauth_extension( proxy_host="http-xxxxxxxx.com", ##代理服务器 proxy_port=端口号, ##代理端口 proxy_username="用户名", ##认证用户名 proxy_password="密码" ##认证密码 )
注意:代理服务器域名、端口号、用户名和密码记得设置