selenium
概念:用來完成瀏覽器自動化相關的操作。可以通過代碼的形式制定一些基於瀏覽器自動化的相關操作(行為動作),當代碼執行后,瀏覽器就會自動觸發先關的事件 環境安裝: pip install selenium 下載對應瀏覽器的驅動程序 編碼流程: 導包:from selenium import webdriver 實例化某一款瀏覽器對象 制定相關的行為動作
訪問百度
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') #獲取瀏覽器驅動 bro.get('https://www.baidu.com') #訪問 sleep(2) #標簽定位 tag_input = bro.find_element_by_id('kw') #獲取百度輸入框 tag_input.send_keys('人民幣') #輸入搜索字段 sleep(2) btn = bro.find_element_by_id('su') #獲取搜索按鈕 btn.click() #點擊 sleep(2) bro.quit() #退出瀏覽器
滑動
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://xueqiu.com/') sleep(2) #執行js實現滾輪向下滑動 js = 'window.scrollTo(0,document.body.scrollHeight)' #,document.body.scrollHeight:屏幕的高度 bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') #獲取加載更多按鈕 a_tag.click() sleep(5) #獲取當前瀏覽器頁面數據(動態) print(bro.page_source) bro.quit()
PhantomJs
#PhantomJs是一款無可視化界面的瀏覽器(免安裝) from selenium import webdriver from time import sleep bro = webdriver.PhantomJS(executable_path=r'C:\Users\Administrator\Desktop\爬蟲+數據\爬蟲day03\phantomjs-2.1.1-windows\bin\phantomjs.exe') bro.get('https://xueqiu.com/') sleep(2) bro.save_screenshot('./1.png') #執行js實現滾輪向下滑動 js = 'window.scrollTo(0,document.body.scrollHeight)' bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.save_screenshot('./2.png') # a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') # bro.save_screenshot('./2.png') # a_tag.click() sleep(2) #獲取當前瀏覽器頁面數據(動態) print(bro.page_source) bro.quit()
谷歌無頭瀏覽器
from selenium import webdriver from time import sleep from selenium.webdriver.chrome.options import Options # 創建一個參數對象,用來控制chrome以無界面模式打開 chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options) bro.get('https://www.baidu.com') sleep(2) bro.save_screenshot('1.png') #標簽定位 tag_input = bro.find_element_by_id('kw') tag_input.send_keys('人民幣') sleep(2) btn = bro.find_element_by_id('su') btn.click() sleep(2) print(bro.page_source) bro.quit()
前進和后退
#前進和后退 from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://www.baidu.com') sleep(1) bro.get('http://www.goubanjia.com/') sleep(1) bro.get('https://www.taobao.com') sleep(1) bro.back() sleep(1) bro.forward() sleep(1) print(bro.page_source) bro.quit()
動作鏈一
from selenium import webdriver from time import sleep from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path='./chromedriver.exe') url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的標簽存在於iframe標簽之中,則必須經過switch_to操作在進行標簽定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') #創建一個動作連的對象 action = ActionChains(bro) action.click_and_hold(source_tag) for i in range(4): #perform表示開始執行動作鏈 action.move_by_offset(20,0).perform() sleep(1) bro.quit()
動作鏈二
from selenium import webdriver from time import sleep from selenium.webdriver import ChromeOptions from selenium.webdriver import ActionChains
#selenium避免被檢測 option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=option) url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的標簽存在於iframe標簽之中,則必須經過switch_to操作在進行標簽定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') taget_tag = bro.find_element_by_id('droppable') #創建一個動作連的對象 action = ActionChains(bro) action.drag_and_drop(source_tag,taget_tag) action.perform() sleep(3) # bro.quit()