能繞赤道(4萬公里)5圈的車是什么樣的?Python爬取懂車帝網站數據,並做數據可視化展示二手車概況


知識點:

  • requests 發送網絡請求
  • parsel 解析數據
  • csv 保存數據

第三方庫

  • requests >>> pip install requests
  • parsel >>> pip install parsel

開發環境:

  • 版 本:anaconda5.2.0(python3.6.5)
  • 編輯器:pycharm

image

 

爬蟲代碼

導入模塊

import requests
import parsel
import csv

 

發送請求

url = f'https://www.dongchedi.com/usedcar/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x?sh_city_name=%E5%85%A8%E5%9B%BD&page=1'
html_data = requests.get(url).text

 

解析數據 篩選數據

selector = parsel.Selector(html_data)
lis = selector.css('#__next > div:nth-child(2) > div.new-main.new > div > div > div.wrap > ul > li')
for li in lis:
    title = li.css('dl dt p::text').get()
    info_list = li.css('dl dd')
    info = ''.join(info_list.css('dd:nth-child(2)::text').getall()).split('|')
    car_age = info[0]
    mileage = info[1].replace('萬公里', '')
    city = info[2]

 

做一個判斷

if len(info_list) == 4:
    dcd_auth = info_list.css('dd span::text').get()
    price = info_list.css('dd:nth-child(4)::text').get()
    original_price = info_list.css('dd:nth-child(5)::text').get()
else:
    dcd_auth = '無認證'
    price = info_list.css('dd:nth-child(3)::text').get()
    original_price = info_list.css('dd:nth-child(4)::text').get()
price = price.replace('', '')
original_price = original_price.replace('新車含稅價: ', '').replace('', '')
print(title, car_age, mileage, city, dcd_auth, price, original_price)

 

保存數據

csv_dcd = open('dcd.csv', mode='a', encoding='utf-8', newline='')
csv_write = csv.writer(csv_dcd)
csv_write.writerow(['品牌', '車齡', '里程(萬公里)', '城市', '認證', '售價(萬元)', '原價(萬元)'])

 

image

數據可視化

導入模塊

import pandas as pd
from pyecharts.charts import *
from pyecharts.commons.utils import JsCode
from pyecharts import options as opts

 

讀取數據

df = pd.read_csv('dcd.csv', encoding = 'utf-8')
df.head()

 

各省市二手車數量柱狀圖

bar=(
    Bar(init_opts=opts.InitOpts(height='500px',width='1000px',theme='dark'))
    .add_xaxis(counts.index.tolist())
    .add_yaxis(
        '城市二手車數量',
        counts.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position='top'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title='各個城市二手車數量柱狀圖'),
            xaxis_opts=opts.AxisOpts(name='書籍名稱',
            type_='category',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name='數量',
            min_=0,
            max_=1400.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_='dash'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_='average',name='均值'),
                opts.MarkLineItem(type_='max',name='最大值'),
                opts.MarkLineItem(type_='min',name='最小值'),
            ]
        )
    )
)
bar.render_notebook()

 

image

各省市二手車平均價格柱狀圖

bar=(
    Bar(init_opts=opts.InitOpts(height='500px',width='1000px',theme='dark'))
    .add_xaxis(means.index.tolist())
    .add_yaxis(
        '城市二手車平均價格',
        means.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position='top'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title='各個城市二手車平均價格柱狀圖'),
            xaxis_opts=opts.AxisOpts(name='城市名稱',
            type_='category',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name='平均價格',
            min_=0,
            max_=40.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_='dash'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_='average',name='均值'),
                opts.MarkLineItem(type_='max',name='最大值'),
                opts.MarkLineItem(type_='min',name='最小值'),
            ]
        )
    )
)
bar.render_notebook()

 

image

二手車品牌占比情況

pie1 = (
    Pie(init_opts=opts.InitOpts(theme='dark',width='1000px',height='600px'))
    .add('', datas_pair_1, radius=['35%', '60%'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂車帝二手車\n\n數量占比區間", 
            pos_left='center', 
            pos_top='center',
            title_textstyle_opts=opts.TextStyleOpts(
                color='#F0F8FF',
                font_size=20,
                font_weight='bold'
            ),
        )
    )
)
pie1.render_notebook() 

 

image

二手車里程區間

pie1 = (
    Pie(init_opts=opts.InitOpts(theme='dark',width='1000px',height='600px'))
    .add('', datas_pair_1, radius=['35%', '60%'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂車帝二手車\n\n里程占比區間", 
            pos_left='center', 
            pos_top='center',
            title_textstyle_opts=opts.TextStyleOpts(
                color='#F0F8FF',
                font_size=20,
                font_weight='bold'
            ),
        )
    )
)
pie1.render_notebook() 

 

image


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM