使用Selenium反爬(美團)


美團的反爬機制是非常完善的,在用selenium登陸淘寶的時候發現美團能檢測到並彈出滑塊,然后無論怎么滑動都通過不了,在經過一番搜索后發現很多網站對selenium都有檢測機制,如檢測是否存在特有標識 。接下來我們簡單分享下使用代理訪問美團進行數據采集。

示例如下:

 

# -*- coding:UTF-8 -*- import time import re from datetime import date, timedelta from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver import ActionChains from selenium.webdriver.chrome.options import Options TB_LOGIN_URL = 'https://meituan.com' CHROME_DRIVER = '/usr/local/bin/chromedriver' # Windows和Mac的配置路徑不一樣 class SessionException(Exception):  """  會話異常類  """  def __init__(self, message):  super().__init__(self)  self.message = message  def __str__(self):  return self.message class Crawler:  def __init__(self):  self.browser = None  def start(self, username, password):  print("初始化瀏覽器")  self.__init_browser()  print("切換至密碼輸入框")  self.__switch_to_password_mode()  time.sleep(0.5)  print("輸入用戶名")  self.__write_username(username)  time.sleep(2.5)  print("輸入密碼")  self.__write_password(password)  time.sleep(3.5)  print("程序模擬解鎖")  if self.__lock_exist():  self.__unlock()  print("開始發起登錄請求")  self.__submit()  time.sleep(4.5)  # 登錄成功,直接請求頁面  print("登錄成功,跳轉至目標頁面")  self.__navigate_to_target_page()  time.sleep(6.5)  print("解析頁面文本")  crawler_list = self.__parse_page_content();  # 連接數據庫並保存數據  print("保存數據到mysql數據庫")  self.__save_list_to_db(crawler_list)  def __switch_to_password_mode(self):  """  切換到密碼模式  :return:  """  if self.browser.find_element_by_id('J_QRCodeLogin').is_displayed():  self.browser.find_element_by_id('J_Quick2Static').click()  def __write_username(self, username):  """  輸入賬號  :param username:  :return:  """  username_input_element = self.browser.find_element_by_id('TPL_username_1')  username_input_element.clear()  username_input_element.send_keys(username)  def __write_password(self, password):  """  輸入密碼  :param password:  :return:  """  password_input_element = self.browser.find_element_by_id("TPL_password_1")  password_input_element.clear()  password_input_element.send_keys(password)  def __lock_exist(self):  """  判斷是否存在滑動驗證  :return:  """  return self.__is_element_exist('#nc_1_wrapper') and self.browser.find_element_by_id(  'nc_1_wrapper').is_displayed()  def __unlock(self):  """  執行滑動解鎖  :return:  """  bar_element = self.browser.find_element_by_id('nc_1_n1z')  ActionChains(self.browser).drag_and_drop_by_offset(bar_element, 800, 0).perform()  time.sleep(1.5)  self.browser.get_screenshot_as_file('error.png')  if self.__is_element_exist('.errloading > span'):  error_message_element = self.browser.find_element_by_css_selector('.errloading > span')  error_message = error_message_element.text  self.browser.execute_script('noCaptcha.reset(1)')  raise SessionException('滑動驗證失敗, message = ' + error_message)  def __submit(self):  """  提交登錄  :return:  """  self.browser.find_element_by_id('J_SubmitStatic').click()  time.sleep(0.5)  if self.__is_element_exist("#J_Message"):  error_message_element = self.browser.find_element_by_css_selector('#J_Message > p')  error_message = error_message_element.text  raise SessionException('登錄出錯, message = ' + error_message)  #跳轉至目標頁面  def __navigate_to_target_page(self):  pass  # 解析網頁數據  def __parse_page_content(self):  pass  #保存數據  def __save_list_to_db(self, crawler_list):  pass  def __init_browser(self):  """  初始化selenium瀏覽器  :return:  """  options = Options()  # options.add_argument("--headless")  prefs = {"profile.managed_default_content_settings.images": 1}  options.add_experimental_option("prefs", prefs)  options.add_argument('--proxy-server=http://127.0.0.1:9000')  options.add_argument('disable-infobars')  options.add_argument('--no-sandbox')  self.browser = webdriver.Chrome(executable_path=CHROME_DRIVER, options=options)  self.browser.implicitly_wait(3)  self.browser.maximize_window()  self.browser.get(TB_LOGIN_URL) #執行命令行 Crawler().start('username'), 'password'))


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM