python爬取北京貝殼找房網數據


                         python爬取北京貝殼找房網數據

一,選題背景

  貝殼找房業務涉及二手房,新房,租房,商業辦公等。平台擁有全面真實的房源信息,為需要找房的人提高安全可靠的購房體驗。對北京貝殼找房網進行數據爬取

  要達到的數據分析的預期目標是:

  1,對爬取的房源信息進行可視化處理。

  2,預期目標歸類二手房源進行可視化處理。

 

二,爬蟲設計方案

  1,爬蟲名稱:

  爬取北京貝殼找房網數據可視化處理。

  2,爬蟲爬取的內容與數據特征分析:

  目標網站是北京貝殼找房網,其原理主要是通過Requests獲取Json請求,從而得到北京市房源數據

  3. 方案概述

  分析網站頁面結構,找到爬取數據的位置,根據不同的數據制定不同的爬取方法,將爬取的數據保存成csv文件,然后再將csv文件里的數據進行可視化處理。

  第一步 分析網站  

  第二步 發送請求並獲取Json數據 

  第三步 獲取北京市房源數據數據 

  第四步 繪制柱狀圖等

 

三,主題頁面的結構特征分析

  1,主題頁面的結構與特性分析

  通過瀏覽器“審查元素”查看源代碼及“網絡”反饋的消息(按f12可以獲取),如下圖所示:

 

 

 

 

  網站html頁面結構分析

 

 

 

四,爬蟲程序設計

1. 數據的爬取

(1)北京市房源數據的爬取

import requests
import time
from multiprocessing import Pool
from lxml import etree
import pandas as pd
import os
import random

# 獲取房源的基本url
# 參數page
def get_home_url(page):
    url = 'http://bj.ke.com/ershoufang/pg{}/'.format(page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
        'Cookie': 'lianjia_uuid=e6a91b7a-b6a4-40b5-88c6-ff67759cbc8a; crosSdkDT2019DeviceId=-51npj6--xbmlw5-f22i5qg8bh36ouv-yttqkmwdf; _ga=GA1.2.121082359.1579583230; ke_uuid=6de1afa21a5799c0874702af39248907; __xsptplus788=788.1.1579583230.1579583347.4%234%7C%7C%7C%7C%7C%23%23Q6jl-k46IlXjCORdTOp6O3JyzHokoUrb%23; select_city=110000; digv_extends=%7B%22utmTrackId%22%3A%2280418605%22%7D; lianjia_ssid=a4ab1bc0-cb04-492f-960c-342c66065da0; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1583897013,1583932737; User-Realip=111.196.247.121; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216fc67f100b140-06f07f8f707639-33365a06-1049088-16fc67f100c603%22%2C%22%24device_id%22%3A%2216fc67f100b140-06f07f8f707639-33365a06-1049088-16fc67f100c603%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22pinzhuan%22%2C%22%24latest_utm_campaign%22%3A%22wybeijing%22%2C%22%24latest_utm_content%22%3A%22biaotimiaoshu%22%2C%22%24latest_utm_term%22%3A%22biaoti%22%7D%7D; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1583933576; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiMjAxZjBjNWU1ZWE1ZGVmYjQxZDFlYTE4MGVkNWI1OGRjYzk5Mzc2MjEwNTcyMWI3ODhiNTQyNTExOGQ1NTVlZDNkMTY2MWE2YWI5YWRlMGY0NDA3NjkwNWEyMzRlNTdhZWExNDViNGFiNWVmMmMyZWJlZGY1ZjM2Y2M0NWIxMWZlMWFiOWI2MDJiMzFmOTJmYzgxNzNiZTIwMzE1ZGJjNTUyMWE2ZjcxYzZmMTFhOWIyOWU2NzJkZTkyZjc3ZDk1MzhiNjhhMTQyZDQ2YmEyNjJhYzJmNjdjNmFjM2I5YzU0MzdjMDkwYWUwMzZmZjVjYWZkZTY5YjllYzY0NzEwMWY2OTc1NmU1Y2ExNzNhOWRmZTdiNGY4M2E1Zjc2NDZmY2JkMGM2N2JiMjdmZTJjNjI2MzNkMjdlNDY4ODljZGRjMjc3MTQ0NDUxMDllZThlZDVmZmMwMjViNjc2ZjFlY1wiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJkMDI2MDk0N1wifSIsInIiOiJodHRwczovL2JqLmtlLmNvbS9lcnNob3VmYW5nLzE5MTExMzE5NTEwMTAwMTcxNzU5Lmh0bWwiLCJvcyI6IndlYiIsInYiOiIwLjEifQ=='
    }
    text = requests.get(url,headers=headers).text
    html = etree.HTML(text)
    detail_url = html.xpath('//ul[@class="sellListContent"]//li[@class="clear"]/a/@href')
    return detail_url

# 獲取房源詳細數據信息
def get_home_detail_infos(detail_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
        'Cookie': 'lianjia_uuid=e6a91b7a-b6a4-40b5-88c6-ff67759cbc8a; crosSdkDT2019DeviceId=-51npj6--xbmlw5-f22i5qg8bh36ouv-yttqkmwdf; _ga=GA1.2.121082359.1579583230; ke_uuid=6de1afa21a5799c0874702af39248907; __xsptplus788=788.1.1579583230.1579583347.4%234%7C%7C%7C%7C%7C%23%23Q6jl-k46IlXjCORdTOp6O3JyzHokoUrb%23; select_city=110000; digv_extends=%7B%22utmTrackId%22%3A%2280418605%22%7D; lianjia_ssid=a4ab1bc0-cb04-492f-960c-342c66065da0; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1583897013,1583932737; User-Realip=111.196.247.121; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216fc67f100b140-06f07f8f707639-33365a06-1049088-16fc67f100c603%22%2C%22%24device_id%22%3A%2216fc67f100b140-06f07f8f707639-33365a06-1049088-16fc67f100c603%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22pinzhuan%22%2C%22%24latest_utm_campaign%22%3A%22wybeijing%22%2C%22%24latest_utm_content%22%3A%22biaotimiaoshu%22%2C%22%24latest_utm_term%22%3A%22biaoti%22%7D%7D; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1583933576; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiMjAxZjBjNWU1ZWE1ZGVmYjQxZDFlYTE4MGVkNWI1OGRjYzk5Mzc2MjEwNTcyMWI3ODhiNTQyNTExOGQ1NTVlZDNkMTY2MWE2YWI5YWRlMGY0NDA3NjkwNWEyMzRlNTdhZWExNDViNGFiNWVmMmMyZWJlZGY1ZjM2Y2M0NWIxMWZlMWFiOWI2MDJiMzFmOTJmYzgxNzNiZTIwMzE1ZGJjNTUyMWE2ZjcxYzZmMTFhOWIyOWU2NzJkZTkyZjc3ZDk1MzhiNjhhMTQyZDQ2YmEyNjJhYzJmNjdjNmFjM2I5YzU0MzdjMDkwYWUwMzZmZjVjYWZkZTY5YjllYzY0NzEwMWY2OTc1NmU1Y2ExNzNhOWRmZTdiNGY4M2E1Zjc2NDZmY2JkMGM2N2JiMjdmZTJjNjI2MzNkMjdlNDY4ODljZGRjMjc3MTQ0NDUxMDllZThlZDVmZmMwMjViNjc2ZjFlY1wiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJkMDI2MDk0N1wifSIsInIiOiJodHRwczovL2JqLmtlLmNvbS9lcnNob3VmYW5nLzE5MTExMzE5NTEwMTAwMTcxNzU5Lmh0bWwiLCJvcyI6IndlYiIsInYiOiIwLjEifQ=='
    }
    detail_text = requests.get(detail_url,headers=headers).text
    html = etree.HTML(detail_text)
    all_data = []
    # 解析獲取相關數據
    # 所在地址
    home_location = html.xpath('//div[@data-component="overviewIntro"]//div[@class="content"]//div[@class="areaName"]/span[@class="info"]/a/text()')
    all_data.append(home_location)
    # 小區名稱
    local_name = html.xpath('//div[@data-component="overviewIntro"]//div[@class="content"]//div[@class="communityName"]/a/text()')[0]
    all_data.append(local_name)
    # 總價格
    total_price = html.xpath('//div[@data-component="overviewIntro"]//div[@class="content"]//div[@class="price "]/span[@class="total"]/text()')[0]
    all_data.append(total_price)
    # 單價
    unit_price = html.xpath('//div[@data-component="overviewIntro"]//div[@class="content"]//div[@class="price "]//div[@class="unitPrice"]/span/text()')[0]
    all_data.append(unit_price)
    # 房屋基本信息
    home_style = html.xpath('//div[@class="introContent"]//div[@class="base"]//div[@class="content"]/ul/li/text()')
    all_data.append(home_style)
    # 房屋交易屬性信息
    transaction_info = html.xpath('//div[@class="introContent"]//div[@class="transaction"]//div[@class="content"]/ul/li/text()')
    all_data.append(transaction_info)
    # 小區均價
    xiaoqu_price = html.xpath('//div[@class="xiaoquCard"]//div[@class="xiaoqu_main fl"]//span[@class="xiaoqu_main_info price_red"]/text()')[0].replace(' ','')
    all_data.append(xiaoqu_price)
    # 小區建造時間
    xiaoqu_built_time = html.xpath('//div[@class="xiaoquCard"]//div[@class="xiaoqu_main fl"]//span[@class="xiaoqu_main_info"]/text()')[0].replace(' ','').replace('\n','')
    all_data.append(xiaoqu_built_time)
    # 小區建築類型
    xiaoqu_built_style = html.xpath('//div[@class="xiaoquCard"]//div[@class="xiaoqu_main fl"]//span[@class="xiaoqu_main_info"]/text()')[1].replace(' ','').replace('\n','')
    all_data.append(xiaoqu_built_style)
    # 小區樓層總數
    xiaoqu_total_ceng = html.xpath('//div[@class="xiaoquCard"]//div[@class="xiaoqu_main fl"]//span[@class="xiaoqu_main_info"]/text()')[2].replace(' ','').replace('\n','')
    all_data.append(xiaoqu_total_ceng)
    return all_data

# 數據保存至csv文件里(使用pandas中的to_csv保存)
def save_data(data):
    data_frame = pd.DataFrame(data,columns=['小區位置','小區名稱','房屋總價','房屋單價','房屋基本信息','房屋交易信息','小區均價','小區建造時間','小區房屋類型','小區層數'])
    print(data_frame)
    data_frame.to_csv('beijing_fang111.csv',header=False,index=False,mode='a',encoding='utf_8_sig')

def main(page):
    print('開始爬取第{}頁的數據!'.format(page))
    # choice_time = random.choice(range(0,5))
    # print(choice_time)
    
    urls = get_home_url(page)
    for url in urls:
        print('開始爬去詳細網頁為{}的房屋詳細信息資料!'.format(url))
        all_data = get_home_detail_infos(detail_url=url)
        data = []
        data.append(all_data)
        save_data(data)

if __name__ == "__main__":
    page = range(0,100)
    print('爬蟲開始')
    pool = Pool(processes=4)
    pool.map(main,page)
    # proxies = proxy.get_proxy_random()
    # pool.apply_async(main,args=(page,proxies,))
    pool.close()
    pool.join()

#結構截圖

 

 

 

 

 

 

#對數據進行處理

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import csv
data = pd.read_excel(r"F:\0_個人學習\beike_find_house.xlsx", header=None)
data.columns = ['區/縣','區域','小區','總價','單價','房屋戶型','樓層','總面積','戶型結構','套內面積','建築類型','朝向','建築結構','裝修情況','梯戶比例','供暖方式','配備電梯','產權年限','s','交易權屬','u','形式','是否滿五','產權形式','是否有房本','小區均價','小區建成','style','總棟數'] 
data.head()

#結果截圖

 

 

 

#數據清洗

data['裝修情況'] = data.apply(lambda x:x['建築類型'] if ('南北' in str(x['戶型結構'])) else x['裝修情況'],axis=1)

data['建築結構'] = data.apply(lambda x:x['套內面積'] if ('南北' in str(x['戶型結構'])) else x['建築結構'],axis=1)

data['朝向'] = data.apply(lambda x:x['戶型結構'] if ('南北' in str(x['戶型結構'])) else x['朝向'],axis=1)

data['套內面積'] = data.apply(lambda x:'' if ('南北' in str(x['戶型結構'])) else x['套內面積'],axis=1)

data['裝修情況'] = data.apply(lambda x:x['朝向'] if ('' in str(x['戶型結構'])) else x['裝修情況'],axis=1)

data['建築結構'] = data.apply(lambda x:x['建築類型'] if ('' in str(x['戶型結構'])) else x['建築結構'],axis=1)

data['朝向'] = data.apply(lambda x:x['套內面積'] if ('' in str(x['戶型結構'])) else x['朝向'],axis=1)

data['套內面積'] = data.apply(lambda x:'' if ('' in str(x['戶型結構'])) else x['套內面積'],axis=1)

data['套內面積'] = data.apply(lambda x:'' if ('暫無數據' in str(x['套內面積'])) else x['套內面積'],axis=1)

data['裝修情況'] = data.apply(lambda x:x['裝修情況'] if ('' in str(x['套內面積'])) else x['建築結構'],axis=1)

data['建築結構'] = data.apply(lambda x:x['建築結構'] if ('' in str(x['套內面積'])) else x['朝向'],axis=1)

data['朝向'] = data.apply(lambda x:x['朝向'] if ('' in str(x['套內面積'])) else x['建築類型'],axis=1)

data['建築類型'] = data.apply(lambda x:x['建築類型'] if ('' in str(x['套內面積'])) else x['套內面積'],axis=1)

data['套內面積'] = data.apply(lambda x:x['套內面積'] if ('' in str(x['套內面積'])) else '無信息',axis=1)

data['裝修情況'] = data.apply(lambda x:x['建築結構'] if (('') in str(x['裝修情況'])) else x['裝修情況'],axis=1)

data['建築結構'] = data.apply(lambda x:x['朝向'] if (('') in str(x['裝修情況'])) else x['建築結構'],axis=1)

data['朝向'] = data.apply(lambda x:x['建築類型'] if (('') in str(x['裝修情況'])) else x['朝向'],axis=1)

data['建築結構'] = data.apply(lambda x:x['朝向'] if ('結構' in str(x['朝向'])) else x['建築結構'],axis=1)

data['朝向'] = data.apply(lambda x:x['建築類型'] if ('結構' in str(x['朝向'])) else x['朝向'],axis=1)

data['總樓層'] = data.apply(lambda x:str(x[6])[3:].strip('(共').strip('層)'),axis=1)
data['樓層'] = data.apply(lambda x:str(x[6])[:3],axis=1)
data['總面積'] = data.apply(lambda x:str(x[7]).strip(''),axis=1)
data['小區均價'] = data.apply(lambda x:str(x[-5]).strip('元/㎡\n').strip('\n'),axis=1)
data['小區建成'] = data.apply(lambda x:str(x[-4])[:4],axis=1)
data['總棟數'] = data.apply(lambda x:str(x[-2])[:-1],axis=1)
data.to_csv('after_deal_data.csv',encoding='utf_8_sig')
need_data = data[['區/縣','區域','小區','總價','單價','房屋戶型','樓層','總面積','朝向','建築結構','裝修情況','交易權屬','形式','是否滿五','產權形式','是否有房本','小區均價','小區建成','總棟數']] 
need_data.head()

#結果截圖

 

 

 

need_data.info()

 

 

 

need_data.describe()

 

 

 

plt.rcParams['font.sans-serif'] = ['SimHei'] # 步驟一(替換sans-serif字體)
plt.rcParams['axes.unicode_minus'] = False   # 步驟二(解決坐標軸負數的負號顯示問題)
fig, ax=plt.subplots()

'''
各區縣房源分布情況!!!
'''
need_data['區/縣'].value_counts().plot(kind='bar',color=['green','red','blue','grey','pink'],alpha=0.5)
plt.title('北京二手房各區、縣房源分布信息!',fontsize=15)
plt.xlabel('區、縣名稱',fontsize=15)
plt.ylabel('房源數量',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.xticks(rotation=60)
plt.legend()
plt.show()

#結果截圖

 

 

 

'''
各區縣房源均價分布情況!!!
'''
need_data.groupby('區/縣').mean()['單價'].sort_values(ascending=True).plot(kind='barh',color=['r','g','y','b'],alpha=0.5)
plt.title('北京二手房各區、縣房屋均價分布信息!',fontsize=15)
plt.xlabel('房屋均價',fontsize=15)
plt.ylabel('區、縣名稱',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.legend()
plt.show()

#結果截圖

 

'''
房屋戶型情況
'''
room_style = need_data['房屋戶型'].value_counts()
print(room_style)
need_data['房屋戶型'].value_counts()[:10].plot(kind='bar',color='grey')
plt.title('北京二手房房屋戶型情況',fontsize=15,color='red')
plt.xlabel('房屋戶型',fontsize=15)
plt.ylabel('房源數量',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.legend()
plt.xticks(rotation=60)
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
plt.show()

 

 

 

 

 

 

need_data[need_data.房屋戶型 == '1室0廳2衛']

 

 

 

# 北京二手房總價最大、最小值及其房源信息
total_price_min = need_data['總價'].min()
total_price_min_room_info = need_data[need_data.總價==total_price_min]
print('二手房總價最低價位為:\n{}'.format(total_price_min))
print('二手房總價最低的房源信息為:\n{}'.format(total_price_min_room_info))
total_price_max = need_data['總價'].max()
total_price_max_room_info = need_data[need_data.總價==total_price_max]
print('二手房總價最高價位為:\n{}'.format(total_price_max))
print('二手房總價最低的房源信息為:\n{}'.format(total_price_max_room_info))

 

 

 

#  繪制總面積和總價的散點關系圖
home_area = need_data['總面積'].apply(lambda x:float(x))
# print(home_area.head())
total_price = need_data['總價']
# print(total_price.head())
plt.scatter(home_area,total_price,s=3)
plt.title('北京二手房房屋戶型情況',fontsize=15)
plt.xlabel('房屋面積',fontsize=15)
plt.ylabel('房源總價',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.show()

 

 

 

#  分析兩個面積大但是價格較低的房源
area_max = home_area.max()
area_max_room_info = need_data[home_area==area_max]
print('二手房面積最大的房源信息為:\n{}'.format(area_max_room_info))

 

 

 

'''
裝修情況的房源分布情況!!!
'''
need_data['裝修情況'].value_counts().plot(kind='bar',color=['g','r','y','b'],alpha=0.5)
plt.title('北京二手房裝修情況的房源分布信息!',fontsize=15)
plt.xlabel('裝修類型',fontsize=15)
plt.ylabel('房屋均價',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.legend()
plt.xticks(rotation=0)
plt.show()

'''
裝修情況的均價分布情況!!! ''' need_data.groupby('裝修情況').mean()['單價'].plot(kind='bar',color=['g','r','y','b'],alpha=0.5) plt.title('北京二手房裝修與房屋均價分布信息!',fontsize=15) plt.xlabel('裝修類型',fontsize=15) plt.ylabel('房屋均價',fontsize=15) plt.grid(linestyle=":", color="r") plt.legend() plt.xticks(rotation=0) plt.show()

 

 

 

 

 

# 小區均價數據的清洗處理
# 由於小區均價中存在暫無數據的情況,本次使用單價的形式代替房屋均價
need_data = need_data.copy()
need_data['小區均價'] = need_data.apply(lambda x: x['單價'] if ('暫無數據' in str(x['小區均價'])) else x['小區均價'],axis=1)
avg_price = need_data['小區均價'].astype('float')
print('小區均價最高的價格是:{}'.format(avg_price.max()))
print('小區均價最低的價格是:{}'.format(avg_price.min()))

 

 

 

# 小區均價最低的房源信息
need_data[need_data['小區均價'].astype('float')==avg_price.min()]

 

 

# 小區均價最高的房源信息
need_data[need_data['小區均價'].astype('float')==avg_price.max()]

 

 

# 將未有小區建成時間的數據字段直接剔除(2個)
need_data[need_data.小區建成=='暫無數據']

 

 

# 剔除小區建成時間為暫無數據的兩條數據
try:
    need_data = need_data.drop([1931,2527])
except:
    print('數據已經剔除!!!')
need_data[need_data.小區建成=='暫無數據']
# 將小區建成時間轉成日期並僅提取其中的年份
built_year = pd.to_datetime(need_data.小區建成).dt.year
# 繪制小區建成年限與小區均價的散點分布圖
plt.scatter(built_year,need_data['小區均價'].astype(float),s=6)
plt.title('北京二手房小區建成年份與均價分布信息!',fontsize=15)
plt.xlabel('小區建成年份',fontsize=15)
plt.ylabel('房屋均價',fontsize=15)
plt.grid(linestyle=":", color="r")
plt.xticks(rotation=0)
plt.show()

 

 

 

 

# 分析房屋的產權形式(得出結論有兩種)
need_data['產權形式'].value_counts()

 

 

need_data['樓層'].value_counts()
need_data[need_data['樓層']=='未知(']
try:
    need_louceng_data = need_data.drop(1340)
except:
    print('樓層未知的已刪除!')
need_louceng_data[need_louceng_data['樓層']=='未知(']
plt.figure(figsize=(7,7))
need_louceng_data['樓層'].value_counts().plot(kind='pie',autopct='%1.1f%%',shadow=False,startangle=150)
plt.title('北京二手房樓層房源數量分布圖',fontsize=15)
plt.xticks(rotation=30)
plt.grid(linestyle=":", color="g")
plt.show()

 

 

avg_price_louceng = need_louceng_data.groupby('樓層').mean()['單價']
avg_price_louceng.plot(kind='bar',color=['g','r','y','b'],alpha=0.5)
plt.title('北京二手房樓層與房屋均價分布信息!',fontsize=15)
plt.xlabel('樓層信息',fontsize=15)
plt.ylabel('房屋均價',fontsize=15)
plt.grid(linestyle=":", color="g",alpha=0.4)
plt.legend()
plt.xticks(rotation=0)
plt.show()

 

 

五,總結

這次的主題爬蟲爬的是北京貝殼找房網的網站,相對來說進行的還是比較不順利的,該網站設置了反爬限制。在數據的可視化上,大部分都還好,想要的也達到了我的預期效果。現在很多網站是用JSON存儲數據或者用JS動態加載數據的,因此之后會多學習這些方面的知識。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM