selenium代碼實例


# 環境安裝:pip install selenium
#  編碼流程:
        1.導報:from selenium import webdriver
        2. 實例化某一款瀏覽器對象
        3.自指定自動化操作代碼

# 使用下面的方法,查找指定的元素進行操作
    find_element_by_id            根據id找節點
    find_elements_by_name         根據name找
    find_elements_by_xpath        根據xpath查找
    find_elements_by_tag_name     根據標簽名找
    find_elements_by_class_name   根據class名字查找
# 截屏保存
browser.save_screenshot(r'phantomjs\baidu.png')
# 退出驅動程序
driver.quit()
# 自動打開百度搜索人民幣
# 自動打開百度搜索人民幣
from selenium import webdriver
from time import sleep
bro
= webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe') bro.get(url='https://www.baidu.com/') sleep(2) text_input = bro.find_element_by_id('kw') # send_keys 給input標簽輸入 text_input.send_keys('人民幣') sleep(2) bro.find_element_by_id('su').click() sleep(3) #獲取當前的頁面源碼數據(渲染后的數據) print(bro.page_source) bro.quit()
#獲取豆瓣電影中更多電影詳情數據
#獲取豆瓣電影中更多電影詳情數據
from selenium import webdriver
from time import sleep

#谷歌無頭瀏覽器
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

url = 'https://movie.douban.com/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options)
bro.get(url)
sleep(3)
# 執行JS代碼,自動向下划 bro.execute_script(
'window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(2) page_text = bro.page_source with open('./douban.html','w',encoding='utf-8') as fp: fp.write(page_text) sleep(1) bro.quit()
#登錄qq空間爬取主頁
from selenium import webdriver
from time import sleep
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe')
url = 'https://qzone.qq.com/'
bro.get(url=url)
sleep(2)
#定位到一個具體的iframe
bro.switch_to.frame('login_frame')
bro.find_element_by_id('switcher_plogin').click()
sleep(2)

bro.find_element_by_id('u').send_keys('332424')
bro.find_element_by_id('p').send_keys('dsaafa020@')

bro.find_element_by_id('login_button').click()

sleep(5)

page_text = bro.page_source
with open('qq.html','w',encoding='utf-8') as fp:
    fp.write(page_text)
bro.quit()

PhantomJS使用(做無頭瀏覽器)(被棄用)

PhantomJS的作者ariya在PhantomJS的GitHub頁面的issue #15344中寫道:由於缺乏積極的貢獻,我將會存檔該項目。如果將來我們又重新開發這個項目的話,這個項目還會被取出來。因此,所有的之前的關於PhantomJS 2.5(由 @Vitallium 提起)和PhantomJS 2.1.x(由 @pixiuPL 提起)的計划也會廢棄。接下來,為了防止混淆,上述被廢棄的版本的源碼和二進制包也會被刪除。在未來的通知之前,PhantomJS 2.1.1將會是已知最后的穩定版本。

#獲取豆瓣電影中更多電影詳情數據
from selenium import webdriver
from time import sleep

url = 'https://movie.douban.com/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro = webdriver.PhantomJS(executable_path=r'C:\Users\Administrator\Desktop\爬蟲+數據\day_03_爬蟲\phantomjs-2.1.1-windows\bin\phantomjs.exe')
bro.get(url)
sleep(3)
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(3)
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(3)
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)
page_text = bro.page_source

with open('./douban.html','w',encoding='utf-8') as fp:
    fp.write(page_text)

sleep(1)
bro.quit()

 # 爬取微信公眾號文章

from selenium import webdriver
from lxml import etree

#谷歌無頭瀏覽器
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

# Fiddler抓包公眾號歷史文章URL url = 'https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NzU0MzU0Nw==&scene=124&uin=MzQxNDc2MTIxOQ%3D%3D&key=5fa6
7e91c99877c92cab8f76d9eba741f20e126dcf62c0a8a42af6c159ae91cc6d9b27dd799b89357259a82e1375e1f275a1960f43e003ac9b5baba11703172d08c
866f9bd6aa20534932779237f7fe8&devicetype=Windows+7&version=62080085&lang=zh_CN&a8scene=7&pass_ticket=bB%2BcRIlVVqJKLAN%2FLxVVoWiJ
XecI7JA3Ttwfs%2FWX0zIjxaW1KxSt6Z2wvmXr8tv0&winzoom=1'
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options) bro.get(url) sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(2) page_text = bro.page_source with open('./douban.html','w',encoding='utf-8') as fp: fp.write(page_text) sleep(1) bro.quit() with open('./douban.html','r',encoding="utf-8") as f: text_html=f.read() etree_page=etree.HTML(text_html) # 獲取所有文章的鏈接 div_list=etree_page.xpath("//div[@class='weui_media_box appmsg js_appmsg']/@hrefs") # 下載公眾號文章每篇文章 for url in div_list: try: bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options) bro.get(url) page_text = bro.page_source t=etree.HTML(page_text) text=t.xpath("//h2[@id='activity-name']/text()")[0].strip() filename=r"C:\Users\Administrator\Desktop\html\%s.html" % text with open(filename,'w',encoding='utf-8') as fp: fp.write(page_text) print(page_text) except Exception as e: print(e) bro.quit()

  

  

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM