1 ''' 2 爬取京東商品信息: 3 請求url: 4 https://www.jd.com/ 5 提取商品信息: 6 1.商品詳情頁 7 2.商品名稱 8 3.商品價格 9 4.評價人數 10 5.商品商家 11 ''' 12 from selenium import webdriver 13 from selenium.webdriver.common.keys import Keys 14 import time 15 16 17 def get_good(driver): 18 try: 19 20 # 通過JS控制滾輪滑動獲取所有商品信息 21 js_code = ''' 22 window.scrollTo(0,5000); 23 ''' 24 driver.execute_script(js_code) # 執行js代碼 25 26 # 等待數據加載 27 time.sleep(2) 28 29 # 3、查找所有商品div 30 # good_div = driver.find_element_by_id('J_goodsList') 31 good_list = driver.find_elements_by_class_name('gl-item') 32 n = 1 33 for good in good_list: 34 # 根據屬性選擇器查找 35 # 商品鏈接 36 good_url = good.find_element_by_css_selector( 37 '.p-img a').get_attribute('href') 38 39 # 商品名稱 40 good_name = good.find_element_by_css_selector( 41 '.p-name em').text.replace("\n", "--") 42 43 # 商品價格 44 good_price = good.find_element_by_class_name( 45 'p-price').text.replace("\n", ":") 46 47 # 評價人數 48 good_commit = good.find_element_by_class_name( 49 'p-commit').text.replace("\n", " ") 50 51 good_content = f''' 52 商品鏈接: {good_url} 53 商品名稱: {good_name} 54 商品價格: {good_price} 55 評價人數: {good_commit} 56 \n 57 ''' 58 print(good_content) 59 with open('jd.txt', 'a', encoding='utf-8') as f: 60 f.write(good_content) 61 62 next_tag = driver.find_element_by_class_name('pn-next') 63 next_tag.click() 64 65 time.sleep(2) 66 67 # 遞歸調用函數 68 get_good(driver) 69 70 time.sleep(10) 71 72 finally: 73 driver.close() 74 75 76 if __name__ == '__main__': 77 78 good_name = input('請輸入爬取商品信息:').strip() 79 80 driver = webdriver.Chrome() 81 driver.implicitly_wait(10) 82 # 1、往京東主頁發送請求 83 driver.get('https://www.jd.com/') 84 85 # 2、輸入商品名稱,並回車搜索 86 input_tag = driver.find_element_by_id('key') 87 input_tag.send_keys(good_name) 88 input_tag.send_keys(Keys.ENTER) 89 time.sleep(2) 90 91 get_good(driver)