爬蟲之selenium爬取京東商品信息


import json
import time
from selenium import webdriver

"""
發送請求
    1.1生成driver對象
    2.1窗口最大化
    2.2下拉滾動條(保證每個位置都刷新)
    3.獲取所有li標簽列表
    遍歷li標簽列表提取圖片的連接以及主播的名字
    保存圖片
翻頁
"""
driver = webdriver.Chrome()
driver.maximize_window()
time.sleep(1)
# 京東商品頁
driver.get('https://list.jd.com/list.html?cat=9987,653,655&ev=exbrand%5F12669&sort=sort_rank_asc&trans=1&JL=3_%E5%93%81%E7%89%8C_%E9%AD%85%E6%97%8F%EF%BC%88MEIZU%EF%BC%89#J_crumbsBar')
while True:
    time.sleep(1)
    for i in range(16):
        driver.execute_script('window.scrollTo(0,{})'.format(i * 500))
        time.sleep(1)
    lis = driver.find_elements_by_xpath('//ul[@class="gl-warp clearfix"]/li')
    with open('京東.txt', 'w', encoding='utf-8') as f:
        for li in lis:
            # 商品圖片、價格、評價人數、商品名
            img_url = li.find_element_by_xpath('./div/div[1]/a/img').get_attribute('src')
            price = li.find_element_by_xpath('.//strong[@class="J_price"]').text
            buyers = li.find_element_by_xpath('./div/div[5]/strong').text
            name = li.find_element_by_xpath('./div/div[4]//em').text

            # 將信息寫入文本
            dic = {}
            dic['name'] = name
            dic['price'] = price
            dic['buyers'] = buyers
            json.dump(dic, f, ensure_ascii=False)
            f.write(',\n')
    try:
        next_url = driver.find_element_by_xpath('//a[@class="pn-next"]').click()
    except Exception as e:
        print(e)
        break

time.sleep(1)
driver.close()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM