使用Selenium访问出现弹窗


大部分商业网站需要我们登录后才能爬取内容,所以对于爬虫来说,生成cookies给代理使用成为了一个必须要做的事情。今天我们交流下关于使用selenium访问目标网站遇到的一些问题。

因为业务需求我们需要采集小红书的一些数据,程序在挂上代理访问目标网站的时候弹出了验证框。如图所示

fccfb608-f39d-4671-8ceb-0340549e140d.png

这个问题从来没有遇到过,我以为是的代理的问题,咨询客服才知道这个是因为我的浏览器的驱动和版本的问题,然后更新了新版本就可以解决了。那我们分享下使用chrome driver来进行登录和cookie的生成。

import os import time import zipfile  from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait   class GenCookies(object):  USER_AGENT = open('useragents.txt').readlines()  # 16yun 代理配置  PROXY_HOST = 't.16yun.cn' # proxy or host  PROXY_PORT = 31111 # port  PROXY_USER = 'USERNAME' # username  PROXY_PASS = 'PASSWORD' # password   @classmethod  def get_chromedriver(cls, use_proxy=False, user_agent=None):  manifest_json = """  {  "version": "1.0.0",  "manifest_version": 2,  "name": "Chrome Proxy",  "permissions": [  "proxy",  "tabs",  "unlimitedStorage",  "storage",  "<all_urls>",  "webRequest",  "webRequestBlocking"  ],  "background": {  "scripts": ["background.js"]  },  "minimum_chrome_version":"22.0.0"  }  """   background_js = """  var config = {  mode: "fixed_servers",  rules: {  singleProxy: {  scheme: "http",  host: "%s",  port: parseInt(%s)  },  bypassList: ["localhost"]  }  };   chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});   function callbackFn(details) {  return {  authCredentials: {  username: "%s",  password: "%s"  }  };  }   chrome.webRequest.onAuthRequired.addListener(  callbackFn,  {urls: ["<all_urls>"]},  ['blocking']  );  """ % (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS)  path = os.path.dirname(os.path.abspath(__file__))  chrome_options = webdriver.ChromeOptions()  if use_proxy:  pluginfile = 'proxy_auth_plugin.zip'   with zipfile.ZipFile(pluginfile, 'w') as zp:  zp.writestr("manifest.json", manifest_json)  zp.writestr("background.js", background_js)  chrome_options.add_extension(pluginfile)  if user_agent:  chrome_options.add_argument('--user-agent=%s' % user_agent)  driver = webdriver.Chrome(  os.path.join(path, 'chromedriver'),  chrome_options=chrome_options)  return driver   def __init__(self, username, password):  self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'  self.browser = self.get_chromedriver(use_proxy=True, user_agent=self.USER_AGENT)  self.wait = WebDriverWait(self.browser, 20)  self.username = username  self.password = password   def open(self):  """  打开网页输入用户名密码并点击  :return: None  """  self.browser.delete_all_cookies()  self.browser.get(self.url)  username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))  password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))  submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))  username.send_keys(self.username)  password.send_keys(self.password)  time.sleep(1)  submit.click()   def password_error(self):  """  判断是否密码错误  :return:  """  try:  return WebDriverWait(self.browser, 5).until(  EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用户名或密码错误'))  except TimeoutException:  return False   def get_cookies(self):  """  获取Cookies  :return:  """  return self.browser.get_cookies()   def main(self):  """  入口  :return:  """  self.open()  if self.password_error():  return {  'status': 2,  'content': '用户名或密码错误'  }  # 如果不需要验证码直接登录成功   cookies = self.get_cookies()  return {  'status': 1,  'content': cookies  }   if __name__ == '__main__':  result = GenCookies(  username='180000000',  password='16yun',  ).main()  print(result)





免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM