一、自動登錄抽屜新熱榜
from selenium import webdriver import time driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') # 把窗口轉成全屏 driver.maximize_window() try: driver.get('https://dig.chouti.com/') driver.implicitly_wait(10) time.sleep(5) # 1、點擊登錄 login_btn = driver.find_element_by_id('login_btn') login_btn.click() time.sleep(2) # 2、輸入用戶名 phone = driver.find_element_by_class_name('login-phone') phone.send_keys('15622792660') # 3、輸入密碼 pwd = driver.find_element_by_class_name('pwd-password-input') pwd.send_keys('kermit46709394') # 4、確認登錄 login_submit = driver.find_element_by_class_name('btn-large') login_submit.click() time.sleep(20) # 捕獲異常並打印 except Exception as e: print(e) finally: driver.close()
二、selenium選擇器之Xpath
from selenium import webdriver driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') try: # 隱式等待: 寫在get請求前 driver.implicitly_wait(5) driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html') # 顯式等待: 寫在get請求后 # wait.until(...) ''' <html> <head> <base href='http://example.com/' /> <title>Example website</title> </head> <body> <div id='images'> <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a> <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a> <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a> <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a> <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a> </div> </body> </html> ''' # 根據xpath語法查找元素 # / 從根節點開始找第一個 html = driver.find_element_by_xpath('/html') # html = driver.find_element_by_xpath('/head') # 報錯 print(html.tag_name) # // 從根節點開始找任意一個節點 div = driver.find_element_by_xpath('//div') print(div.tag_name) # @ # 查找id為images的div節點 div = driver.find_element_by_xpath('//div[@id="images"]') print(div.tag_name) print(div.text) # 找到第一個a節點 a = driver.find_element_by_xpath('//a') print(a.tag_name) # 找到所有a節點 a_s = driver.find_elements_by_xpath('//a') print(a_s) # 找到第一個a節點的href屬性 # get_attribute:獲取節點中某個屬性 a = driver.find_element_by_xpath('//a').get_attribute('href') print(a) finally: driver.close()
三、selenium剩余操作
''' 點擊、清除操作 ''' # from selenium import webdriver # from selenium.webdriver.common.keys import Keys # import time # # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') # # try: # driver.implicitly_wait(10) # # 1、往jd發送請求 # driver.get('https://www.jd.com/') # # 找到輸入框輸入圍城 # input_tag = driver.find_element_by_id('key') # input_tag.send_keys('圍城') # # 鍵盤回車 # input_tag.send_keys(Keys.ENTER) # time.sleep(2) # # 找到輸入框輸入墨菲定律 # input_tag = driver.find_element_by_id('key') # input_tag.clear() # input_tag.send_keys('墨菲定律') # # 找到搜索按鈕點擊搜索 # button = driver.find_element_by_class_name('button') # button.click() # time.sleep(10) # # finally: # driver.close() ''' 獲取cookies (了解) ''' # from selenium import webdriver # import time # # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') # # try: # driver.implicitly_wait(10) # driver.get('https://www.zhihu.com/explore') # print(driver.get_cookies()) # # time.sleep(10) # finally: # driver.close() ''' 選項卡 ''' #選項卡管理:切換選項卡,有js的方式windows.open,有windows快捷鍵: # ctrl+t等,最通用的就是js的方式 # import time # from selenium import webdriver # # browser = webdriver.Chrome() # try: # browser.get('https://www.baidu.com') # # # execute_script: 執行javascrpit代碼 # # 彈窗操作 # # browser.execute_script('alert("tank")') # # 新建瀏覽器窗口 # browser.execute_script( # ''' # window.open(); # ''' # ) # time.sleep(1) # print(browser.window_handles) # 獲取所有的選項卡 # # 切換到第二個窗口 # # 新: # browser.switch_to.window(browser.window_handles[1]) # # 舊: # # browser.switch_to_window(browser.window_handles[1]) # # # 第二個窗口往淘寶發送請求 # browser.get('https://www.taobao.com') # time.sleep(5) # # # 切換到第一個窗口 # browser.switch_to_window(browser.window_handles[0]) # browser.get('https://www.sina.com.cn') # # time.sleep(10) # finally: # browser.close() ''' ActionChangs動作鏈 ''' # from selenium import webdriver # from selenium.webdriver import ActionChains # import time # # driver = webdriver.Chrome() # driver.implicitly_wait(10) # driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') # # try: # # # driver.switch_to_frame('iframeResult') # # 切換到id為iframeResult的窗口內 # driver.switch_to.frame('iframeResult') # # # 源位置 # draggable = driver.find_element_by_id('draggable') # # # 目標位置 # droppable = driver.find_element_by_id('droppable') # # # 調用ActionChains,必須把驅動對象傳進去 # # 得到一個動作鏈對象,復制給一個變量 # actions = ActionChains(driver) # # # 方式一: 機器人 # # 瞬間把源圖片位置秒移到目標圖片位置 # # actions.drag_and_drop(draggable, droppable) # 編寫一個行為 # # actions.perform() # 執行編寫好的行為 # # # # 方式二: 模擬人的行為 # source = draggable.location['x'] # target = droppable.location['x'] # print(source, target) # # distance = target - source # print(distance) # # # perform:每個動作都要調用perform執行 # # # 點擊並摁住源圖片 # ActionChains(driver).click_and_hold(draggable).perform() # # s = 0 # while s < distance: # # 執行位移操作 # ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform() # s += 2 # # # 釋放動作鏈 # ActionChains(driver).release().perform() # # time.sleep(10) # # # finally: # driver.close() ''' 前進、后退 ''' # from selenium import webdriver # import time # # driver = webdriver.Chrome() # # try: # driver.implicitly_wait(10) # driver.get('https://www.jd.com/') # driver.get('https://www.baidu.com/') # driver.get('https://www.cnblogs.com/') # # time.sleep(2) # # # 回退操作 # driver.back() # time.sleep(1) # # 前進操作 # driver.forward() # time.sleep(1) # driver.back() # time.sleep(10) # # finally: # driver.close()
四、破解登陸
from selenium import webdriver from selenium.webdriver import ChromeOptions import time r''' 步驟: 1、打開文件的查看,顯示隱藏文件 2、找到C:\Users\administortra\AppData\Local\Google\Chrome\User Data 刪除Default文件 3、重新打開瀏覽器,並登陸百度賬號 - 此時會創建一個新的Default緩存文件 4、添加cookies 5、關閉谷歌瀏覽器后執行程序 ''' # 獲取options對象,參數對象 options = ChromeOptions() # 獲取cookies保存路徑 # 'C:\Users\administortra\AppData\Local\Google\Chrome\User Data' profile_directory = r'--user-data-dir=C:\Users\administortra\AppData\Local\Google\Chrome\User Data' # 添加用戶信息目錄 options.add_argument(profile_directory) # 把參數加載到當前驅動中 chrome_options默認參數,用來接收options對象 driver = webdriver.Chrome(chrome_options=options) try: driver.implicitly_wait(10) driver.get('https://www.baidu.com/') ''' BDUSS:***** ''' # 添加用戶cookies信息 # name、value必須小寫 driver.add_cookie({"name": "BDUSS", "value": "用戶session字符串"}) # 刷新操作 driver.refresh() time.sleep(10) finally: driver.close()
五、selenium爬取京東商品信息
# '''''' # ''' # 爬取京東商品信息: # 請求url: # https://www.jd.com/ # 提取商品信息: # 1.商品詳情頁 # 2.商品名稱 # 3.商品價格 # 4.評價人數 # 5.商品商家 # ''' # from selenium import webdriver # from selenium.webdriver.common.keys import Keys # import time # # driver = webdriver.Chrome() # # try: # driver.implicitly_wait(10) # # 1、往京東主頁發送請求 # driver.get('https://www.jd.com/') # # # 2、輸入商品名稱,並回車搜索 # input_tag = driver.find_element_by_id('key') # input_tag.send_keys('macbook') # input_tag.send_keys(Keys.ENTER) # time.sleep(2) # # # 通過JS控制滾輪滑動獲取所有商品信息 # js_code = ''' # window.scrollTo(0,5000); # ''' # driver.execute_script(js_code) # 執行js代碼 # # # 等待數據加載 # time.sleep(2) # # # 3、查找所有商品div # # good_div = driver.find_element_by_id('J_goodsList') # good_list = driver.find_elements_by_class_name('gl-item') # n = 1 # for good in good_list: # # 根據屬性選擇器查找 # # 商品鏈接 # good_url = good.find_element_by_css_selector( # '.p-img a').get_attribute('href') # # # 商品名稱 # good_name = good.find_element_by_css_selector( # '.p-name em').text.replace("\n", "--") # # # 商品價格 # good_price = good.find_element_by_class_name( # 'p-price').text.replace("\n", ":") # # # 評價人數 # good_commit = good.find_element_by_class_name( # 'p-commit').text.replace("\n", " ") # # # 商品商家 # good_from = good.find_element_by_class_name( # 'J_im_icon').text.replace("\n", " ") # # good_content = f''' # 商品鏈接: {good_url} # 商品名稱: {good_name} # 商品價格: {good_price} # 評價人數: {good_commit} # 商品商家: {good_from} # \n # ''' # print(good_content) # with open('jd.txt', 'a', encoding='utf-8') as f: # f.write(good_content) # # next_tag = driver.find_element_by_link_text('下一頁') # # next_tag.click() # # time.sleep(10) # # # finally: # driver.close()
六、破解極驗滑動驗證
''' 破解極驗滑動驗證 博客園登錄url: https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F 1、輸入用戶名與密碼,並點擊登錄 2、彈出滑動驗證,獲取有缺口與完整的圖片 3、通過像素點進行比對,獲取滑動位移距離 4、模擬人的行為軌跡 5、開始滑動 ''' from selenium import webdriver # 用來驅動瀏覽器的 from selenium.webdriver import ActionChains # 破解滑動驗證碼的時候用的 可以拖動圖片 import time from PIL import Image # pip3 install pillow import random option = webdriver.ChromeOptions() option.add_argument('disable-infobars') driver = webdriver.Chrome(chrome_options=option) def get_snap(driver): # selenium自帶的截圖網頁全屏圖片 driver.save_screenshot('snap.png') img = driver.find_element_by_class_name('geetest_canvas_img') left = img.location['x'] upper = img.location['y'] right = left + img.size['width'] lower = upper + img.size['height'] # print(left, upper, right, lower) img_obj = Image.open('snap.png') # 對屏幕進行截取,獲取滑動驗證圖片 image = img_obj.crop((left, upper, right, lower)) return image def get_image1(driver): time.sleep(0.2) js_code = ''' var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="block"; console.log(x) ''' time.sleep(1) driver.execute_script(js_code) # 截取圖片 img_obj = get_snap(driver) return img_obj def get_image2(driver): time.sleep(0.2) js_code = ''' var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="none"; console.log(x) ''' driver.execute_script(js_code) time.sleep(1) # 截取圖片 img_obj = get_snap(driver) return img_obj def get_distance(image1, image2): # 初始值 start = 60 # 滑塊色差 color_num = 60 for x in range(start, image1.size[0]): for y in range(image1.size[1]): rgb1 = image1.load()[x, y] rgb2 = image2.load()[x, y] r = abs(rgb1[0] - rgb2[0]) g = abs(rgb1[1] - rgb2[1]) b = abs(rgb1[2] - rgb2[2]) if not (r < color_num and g < color_num and b < color_num): return x - 7 def get_stacks(distance): distance += 20 ''' 勻加速\減速運行 v = v0 + a * t 位移: s = v * t + 0.5 * a * (t**2) ''' # 初速度 v0 = 0 # 加減速度列表 a_list = [3, 4, 5] # 時間 t = 0.2 # 初始位置 s = 0 # 向前滑動軌跡 forward_stacks = [] mid = distance * 3 / 5 while s < distance: if s < mid: a = a_list[random.randint(0, 2)] else: a = -a_list[random.randint(0, 2)] v = v0 stack = v * t + 0.5 * a * (t ** 2) # 每次拿到的位移 stack = round(stack) s += stack v0 = v + a * t forward_stacks.append(stack) back_stacks = [-1, -1, -2, -3, -2, -3, -2, -2, -3, -1] return {'forward_stacks': forward_stacks, 'back_stacks': back_stacks} def main(): try: driver.get('https://passport.cnblogs.com/user/signin') driver.implicitly_wait(5) # 1.輸入用戶名與密碼,點擊登錄 username = driver.find_element_by_id('LoginName') password = driver.find_element_by_id('Password') login_button = driver.find_element_by_class_name('ladda-label') time.sleep(1) username.send_keys('_tank_') time.sleep(1) password.send_keys('k46709394.') # 這里需要等待賬號密碼輸入完畢后再點擊登錄按鈕,否則的不彈框 time.sleep(1) login_button.click() # time.sleep(3) # 2.點擊滑動驗證按鈕,獲取圖片 geetest_button = driver.find_element_by_class_name('geetest_slider_button') geetest_button.click() time.sleep(0.2) # 3.針對完整的圖片進行截取 image1 = get_image1(driver) # 4.針對有缺口的圖片進行截取 image2 = get_image2(driver) # 5.對比兩張圖片,獲取滑動距離 distance = get_distance(image1, image2) # 6.模擬人為滑動軌跡 stacks = get_stacks(distance) # 7.根據滑動軌跡進行滑動 forward_stacks = stacks['forward_stacks'] back_stacks = stacks['back_stacks'] slider_button = driver.find_element_by_class_name('geetest_slider_button') time.sleep(0.2) ActionChains(driver).click_and_hold(slider_button).perform() time.sleep(0.2) for forward_stack in forward_stacks: ActionChains(driver).move_by_offset(xoffset=forward_stack, yoffset=0).perform() time.sleep(0.1) for back_stack in back_stacks: ActionChains(driver).move_by_offset(xoffset=back_stack, yoffset=0).perform() time.sleep(0.1) time.sleep(0.2) ActionChains(driver).move_by_offset(xoffset=5, yoffset=0).perform() ActionChains(driver).move_by_offset(xoffset=-5, yoffset=0).perform() ActionChains(driver).release().perform() time.sleep(50) finally: driver.close() if __name__ == '__main__': main()