import selenium
from selenium import webdriver
import time
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
from selenium.webdriver.common.keys import Keys #鍵盤按鍵操作
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait #等待頁面加載某些元素
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--disable-gpu') #谷歌文檔提到需要加上這個屬性來規避bug
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加載圖片, 可以提升速度
chrome_options.add_argument('--headless') #瀏覽器不提供可視化頁面. linux下如果系統如果無界面不加這條會啟動失敗
kw = "手機"
driver = webdriver.Chrome(r"D:\脫產三期視頻\爬蟲3\chromedriver.exe",options=chrome_options)
driver.get("https://jd.com")
# 隱式等待
driver.implicitly_wait(3)
# 獲取輸入框
kw_input = driver.find_element_by_id("key")
# 輸入關鍵字
kw_input.send_keys(kw)
# 模擬點擊回車
kw_input.send_keys(Keys.ENTER)
# 獲取所有包含商品詳細數據的li
items = driver.find_elements_by_class_name("gl-item")
# 要獲取的數據 價格 商品鏈接 商品標題 評論數量
wait = WebDriverWait(driver, 10,0.5)
products = []
for i in range(1):
for item in items:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.p-img a')))
url = item.find_element_by_css_selector(".p-img a").get_attribute("href")
# print(url)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.p-price i')))
price = item.find_element_by_css_selector(".p-price i").text
# print(price)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.p-name a')))
title = item.find_element_by_css_selector(".p-name a").text
# print(title)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.p-commit strong a')))
commit = item.find_element_by_css_selector(".p-commit strong a").text
# print(commit)
products.append({"url":url,"price":price,"title":title,"commit":commit})
# 在這里點擊下一頁
# print("開始下一頁")
# next_tag = driver.find_element_by_link_text(">")
# next_tag.click()
# 等待2秒開始獲取下一頁
# driver.implicitly_wait(50) #由於網頁是局部刷新 所以隱式等待無效
# 獲取所有包含商品詳細數據的li
# wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'gl-item')))
# time.sleep(3)
# items = driver.find_elements_by_class_name("gl-item")
print([i["commit"] for i in products])
for i in products:
commit = i["commit"]
commit = commit.strip("+")
if "萬" in commit:
commit = commit.strip("萬")
commit = float(commit) * 10000
else:
commit = float(commit)
i["commit"] = commit
res = sorted(products,key= lambda d:d["commit"])
# print([i["commit"] for i in res])
print("恭喜銷量冠軍:",res[-1])