爬取京東數據


'''
爬取京東商品信息:
請求url:
https://www.jd.com/
提取商品信息:
1.商品詳情頁
2.商品名稱
3.商品價格
4.評價人數
5.商品商家
'''
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from openpyxl import Workbook


def get_good(driver):
try:

# 通過JS控制滾輪滑動獲取所有商品信息
js_code = '''
window.scrollTo(0,5000);
'''
driver.execute_script(js_code) # 執行js代碼

# 等待數據加載
time.sleep(2)

# 3、查找所有商品div
# good_div = driver.find_element_by_id('J_goodsList')
good_list = driver.find_elements_by_class_name('gl-item')
n = 1
for good in good_list:
# 根據屬性選擇器查找
# 商品鏈接
good_url = good.find_element_by_css_selector(
'.p-img a').get_attribute('href')

# 商品名稱
good_name = good.find_element_by_css_selector(
'.p-name em').text.replace("\n", "--")

# 商品價格
good_price = good.find_element_by_class_name(
'p-price').text.replace("\n", ":")

# 評價人數
good_commit = good.find_element_by_class_name(
'p-commit').text.replace("\n", " ")

good_content = f'''
商品鏈接: {good_url}
商品名稱: {good_name}
商品價格: {good_price}
評價人數: {good_commit}
\n
'''
print(good_content)
# with open('jd.txt', 'a', encoding='utf-8') as f:
# f.write(good_content)
with open('JD.csv', 'a')as f:
f.write(good_content)

next_tag = driver.find_element_by_class_name('pn-next')
next_tag.click()

time.sleep(2)

# 遞歸調用函數
get_good(driver)

time.sleep(10)

finally:
driver.close()


if __name__ == '__main__':

good_name = input('請輸入爬取商品信息:').strip()

driver = webdriver.Chrome()
driver.implicitly_wait(10)
# 1、往京東主頁發送請求
driver.get('https://www.jd.com/')

# 2、輸入商品名稱,並回車搜索
input_tag = driver.find_element_by_id('key')
input_tag.send_keys(good_name)
input_tag.send_keys(Keys.ENTER)
time.sleep(2)

get_good(driver)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM