使用Selenium訪問出現彈窗


大部分商業網站需要我們登錄后才能爬取內容,所以對於爬蟲來說,生成cookies給代理使用成為了一個必須要做的事情。今天我們交流下關於使用selenium訪問目標網站遇到的一些問題。

因為業務需求我們需要采集小紅書的一些數據,程序在掛上代理訪問目標網站的時候彈出了驗證框。如圖所示

fccfb608-f39d-4671-8ceb-0340549e140d.png

這個問題從來沒有遇到過,我以為是的代理的問題,咨詢客服才知道這個是因為我的瀏覽器的驅動和版本的問題,然后更新了新版本就可以解決了。那我們分享下使用chrome driver來進行登錄和cookie的生成。

import os import time import zipfile  from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait   class GenCookies(object):  USER_AGENT = open('useragents.txt').readlines()  # 16yun 代理配置  PROXY_HOST = 't.16yun.cn' # proxy or host  PROXY_PORT = 31111 # port  PROXY_USER = 'USERNAME' # username  PROXY_PASS = 'PASSWORD' # password   @classmethod  def get_chromedriver(cls, use_proxy=False, user_agent=None):  manifest_json = """  {  "version": "1.0.0",  "manifest_version": 2,  "name": "Chrome Proxy",  "permissions": [  "proxy",  "tabs",  "unlimitedStorage",  "storage",  "<all_urls>",  "webRequest",  "webRequestBlocking"  ],  "background": {  "scripts": ["background.js"]  },  "minimum_chrome_version":"22.0.0"  }  """   background_js = """  var config = {  mode: "fixed_servers",  rules: {  singleProxy: {  scheme: "http",  host: "%s",  port: parseInt(%s)  },  bypassList: ["localhost"]  }  };   chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});   function callbackFn(details) {  return {  authCredentials: {  username: "%s",  password: "%s"  }  };  }   chrome.webRequest.onAuthRequired.addListener(  callbackFn,  {urls: ["<all_urls>"]},  ['blocking']  );  """ % (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS)  path = os.path.dirname(os.path.abspath(__file__))  chrome_options = webdriver.ChromeOptions()  if use_proxy:  pluginfile = 'proxy_auth_plugin.zip'   with zipfile.ZipFile(pluginfile, 'w') as zp:  zp.writestr("manifest.json", manifest_json)  zp.writestr("background.js", background_js)  chrome_options.add_extension(pluginfile)  if user_agent:  chrome_options.add_argument('--user-agent=%s' % user_agent)  driver = webdriver.Chrome(  os.path.join(path, 'chromedriver'),  chrome_options=chrome_options)  return driver   def __init__(self, username, password):  self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'  self.browser = self.get_chromedriver(use_proxy=True, user_agent=self.USER_AGENT)  self.wait = WebDriverWait(self.browser, 20)  self.username = username  self.password = password   def open(self):  """  打開網頁輸入用戶名密碼並點擊  :return: None  """  self.browser.delete_all_cookies()  self.browser.get(self.url)  username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))  password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))  submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))  username.send_keys(self.username)  password.send_keys(self.password)  time.sleep(1)  submit.click()   def password_error(self):  """  判斷是否密碼錯誤  :return:  """  try:  return WebDriverWait(self.browser, 5).until(  EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用戶名或密碼錯誤'))  except TimeoutException:  return False   def get_cookies(self):  """  獲取Cookies  :return:  """  return self.browser.get_cookies()   def main(self):  """  入口  :return:  """  self.open()  if self.password_error():  return {  'status': 2,  'content': '用戶名或密碼錯誤'  }  # 如果不需要驗證碼直接登錄成功   cookies = self.get_cookies()  return {  'status': 1,  'content': cookies  }   if __name__ == '__main__':  result = GenCookies(  username='180000000',  password='16yun',  ).main()  print(result)





免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM