Python爬取二手房源數據,可視化分析二手房市場行情數據


本文重點

  1. 系統分析網頁性質
  2. 結構化的數據解析
  3. csv數據保存

環境介紹

  • python 3.8
  • pycharm 專業版 >>> 激活碼

#模塊使用

  • requests >>> pip install requests
  • parsel >>> pip install parsel
  • csv

 

 

爬蟲代碼實現步驟: 發送請求 >>> 獲取數據 >>> 解析數據 >>> 保存數據

導入模塊

import requests # 數據請求模塊 第三方模塊 pip install requests
import parsel # 數據解析模塊
import re
import csv

 

發送請求, 對於房源列表頁發送請求

url = 'https://bj.lianjia.com/ershoufang/pg1/'
# 需要攜帶上 請求頭: 把python代碼偽裝成瀏覽器 對於服務器發送請求
# User-Agent 瀏覽器的基本信息
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
}
response = requests.get(url=url, headers=headers)

 

獲取數據

print(response.text)

 

解析數據

selector_1 = parsel.Selector(response.text)
# 把獲取到response.text 數據內容轉成 selector 對象
href = selector_1.css('div.leftContent li div.title a::attr(href)').getall()
for link in href:
    html_data = requests.get(url=link, headers=headers).text
    selector = parsel.Selector(html_data)
    # css選擇器 語法
    # try:
    title = selector.css('.title h1::text').get() # 標題
    area = selector.css('.areaName .info a:nth-child(1)::text').get()  # 區域
    community_name = selector.css('.communityName .info::text').get()  # 小區
    room = selector.css('.room .mainInfo::text').get()  # 戶型
    room_type = selector.css('.type .mainInfo::text').get()  # 朝向
    height = selector.css('.room .subInfo::text').get().split('/')[-1]  # 樓層
    # 中樓層/共5層 split('/') 進行字符串分割  ['中樓層', '共5層'] [-1]
    # ['中樓層', '共5層'][-1] 列表索引位置取值 取列表中最后一個元素  共5層
    # re.findall('共(\d+)層', 共5層) >>>  [5][0] >>> 5
    height = re.findall('共(\d+)層', height)[0]
    sub_info = selector.css('.type .subInfo::text').get().split('/')[-1]  # 裝修
    Elevator = selector.css('.content li:nth-child(12)::text').get()  # 電梯
    # if Elevator == '暫無數據電梯' or Elevator == None:
    #     Elevator = '無電梯'
    house_area = selector.css('.content li:nth-child(3)::text').get().replace('', '')  # 面積
    price = selector.css('.price .total::text').get()  # 價格(萬元)
    date = selector.css('.area .subInfo::text').get().replace('年建', '')  # 年份
    dit = {
        '標題': title,
        '市區': area,
        '小區': community_name,
        '戶型': room,
        '朝向': room_type,
        '樓層': height,
        '裝修情況': sub_info,
        '電梯': Elevator,
        '面積(㎡)': house_area,
        '價格(萬元)': price,
        '年份': date,
    }
    csv_writer.writerow(dit)
    print(title, area, community_name, room, room_type, height, sub_info, Elevator, house_area, price, date,
          sep='|')

 

保存數據

f = open('二手房數據.csv', mode='a', encoding='utf-8', newline='')
csv_writer = csv.DictWriter(f, fieldnames=[
    '標題',
    '市區',
    '小區',
    '戶型',
    '朝向',
    '樓層',
    '裝修情況',
    '電梯',
    '面積(㎡)',
    '價格(萬元)',
    '年份',
])
csv_writer.writeheader()

 

數據可視化

導入所需模塊

import pandas as pd
from pyecharts.charts import Map
from pyecharts.charts import Bar
from pyecharts.charts import Line
from pyecharts.charts import Grid
from pyecharts.charts import Pie
from pyecharts.charts import Scatter
from pyecharts import options as opts

 

讀取數據

df = pd.read_csv('鏈家.csv', encoding = 'utf-8')
df.head()

 

各城區二手房數量北京市地圖

new = [x + '' for x in region]
m = (
        Map()
        .add('', [list(z) for z in zip(new, count)], '北京')
        .set_global_opts(
            title_opts=opts.TitleOpts(title='北京市二手房各區分布'),
            visualmap_opts=opts.VisualMapOpts(max_=3000),
        )
    )
m.render_notebook()

 

各城區二手房數量-平均價格柱狀圖

df_price.values.tolist()
price = [round(x,2) for x in df_price.values.tolist()]
bar = (
    Bar()
    .add_xaxis(region)
    .add_yaxis('數量', count,
              label_opts=opts.LabelOpts(is_show=True))
    .extend_axis(
        yaxis=opts.AxisOpts(
            name="價格(萬元)",
            type_="value",
            min_=200,
            max_=900,
            interval=100,
            axislabel_opts=opts.LabelOpts(formatter="{value}"),
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title='各城區二手房數量-平均價格柱狀圖'),
        tooltip_opts=opts.TooltipOpts(
            is_show=True, trigger="axis", axis_pointer_type="cross"
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(name='數量',
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=False),)
    )
)

line2 = (
    Line()
    .add_xaxis(xaxis_data=region)
    .add_yaxis(
        
        series_name="價格",
        yaxis_index=1,
        y_axis=price,
        label_opts=opts.LabelOpts(is_show=True),
        z=10
        )
)

bar.overlap(line2)
grid = Grid()
grid.add(bar, opts.GridOpts(pos_left="5%", pos_right="20%"), is_control_axis_index=True)
grid.render_notebook()

 

area0 = top_price['小區'].values.tolist()
count = top_price['價格(萬元)'].values.tolist()

bar = (
    Bar()
    .add_xaxis(area0)
    .add_yaxis('數量', count,category_gap = '50%')
    .set_global_opts(
        yaxis_opts=opts.AxisOpts(name='價格(萬元)'),
        xaxis_opts=opts.AxisOpts(name='數量'),
    )
)
bar.render_notebook()

 

散點圖

s = (
    Scatter()
    .add_xaxis(df['面積(㎡)'].values.tolist())
    .add_yaxis('',df['價格(萬元)'].values.tolist())
    .set_global_opts(xaxis_opts=opts.AxisOpts(type_='value'))
)
s.render_notebook()

 

房屋朝向占比

directions = df_direction.index.tolist()
count = df_direction.values.tolist()

c1 = (
    Pie(init_opts=opts.InitOpts(
            width='800px', height='600px',
            )
       )
        .add(
        '',
        [list(z) for z in zip(directions, count)],
        radius=['20%', '60%'],
        center=['40%', '50%'],
#         rosetype="radius",
        label_opts=opts.LabelOpts(is_show=True),
        )    
        .set_global_opts(title_opts=opts.TitleOpts(title='房屋朝向占比',pos_left='33%',pos_top="5%"),
                        legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%",pos_top="25%",orient="vertical")
                        )
        .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}:{c} ({d}%)'),position="outside")
    )
c1.render_notebook()

 

裝修情況/有無電梯玫瑰圖(組合圖)

fitment = df_fitment.index.tolist()
count1 = df_fitment.values.tolist()

directions = df_direction.index.tolist()
count2 = df_direction.values.tolist()

bar = (
    Bar()
    .add_xaxis(fitment)
    .add_yaxis('', count1, category_gap = '50%')
    .reversal_axis()
    .set_series_opts(label_opts=opts.LabelOpts(position='right'))    
    .set_global_opts(
        xaxis_opts=opts.AxisOpts(name='數量'),
        title_opts=opts.TitleOpts(title='裝修情況/有無電梯玫瑰圖(組合圖)',pos_left='33%',pos_top="5%"),
        legend_opts=opts.LegendOpts(type_="scroll", pos_left="90%",pos_top="58%",orient="vertical")
    )
)

c2 = (
    Pie(init_opts=opts.InitOpts(
            width='800px', height='600px',
            )
       )
        .add(
        '',
        [list(z) for z in zip(directions, count2)],
        radius=['10%', '30%'],
        center=['75%', '65%'],
        rosetype="radius",
        label_opts=opts.LabelOpts(is_show=True),
        )    
        .set_global_opts(title_opts=opts.TitleOpts(title='有/無電梯',pos_left='33%',pos_top="5%"),
                        legend_opts=opts.LegendOpts(type_="scroll", pos_left="90%",pos_top="15%",orient="vertical")
                        )
        .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}:{c} \n ({d}%)'),position="outside")
    )

bar.overlap(c2)
bar.render_notebook()

 

二手房樓層分布柱狀縮放圖

floor = df_floor.index.tolist()
count = df_floor.values.tolist()
bar = (
    Bar()
    .add_xaxis(floor)
    .add_yaxis('數量', count)
    .set_global_opts(
        title_opts=opts.TitleOpts(title='二手房樓層分布柱狀縮放圖'),
        yaxis_opts=opts.AxisOpts(name='數量'),
        xaxis_opts=opts.AxisOpts(name='樓層'),
        datazoom_opts=opts.DataZoomOpts(type_='slider')
    )
)
bar.render_notebook()

 

房屋面積分布縱向柱狀圖

area = df_area.index.tolist()
count = df_area.values.tolist()

bar = (
    Bar()
    .add_xaxis(area)
    .add_yaxis('數量', count)
    .reversal_axis()
    .set_series_opts(label_opts=opts.LabelOpts(position="right"))
    .set_global_opts(
        title_opts=opts.TitleOpts(title='房屋面積分布縱向柱狀圖'),
        yaxis_opts=opts.AxisOpts(name='面積(㎡)'),
        xaxis_opts=opts.AxisOpts(name='數量'),
    )
)
bar.render_notebook()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM