城市餐飲店鋪選址的分析
基於這樣一個數據

從 “口味”、“人均消費”、“性價比” 三個維度對不同菜系進行比較,並篩選出可開店鋪的餐飲類型
① 計算出三個維度的指標得分 ② 評價方法: 口味 → 得分越高越好 性價比 → 得分越高越好 人均消費 → 價格適中即可 ③ 制作散點圖,x軸為“人均消費”,y軸為“性價比得分”,點的大小為“口味得分” 繪制柱狀圖,分別顯示“口味得分”、“性價比得分” * (用bokeh做圖)
① 數據清洗,清除空值、為0的數據; ② 口味指標計算方法 → 口味評分字段,按照餐飲類別分組算均值,再做標准化處理; ③ 人均消費指標計算方法 → 人均消費字段,按照餐飲類別分組算均值,再做標准化處理 ④ 性價比指標計算方法 → 性價比 = (口味 + 環境 + 服務)/人均消費,按照餐飲類別分組算均值,再做標准化處理 ⑤ 數據計算之前,檢查一下數據分布,去除異常值(以外限為標准) * 這里排除了高端奢侈餐飲的數據干擾 ⑥ 注意,這里先分別計算三個指標,再合並數據(merge)作圖,目的是指標之間的噪音數據不相互影響
import numpy as np import pandas as pd import matplotlib.pyplot as plt import warnings warnings.filterwarnings('ignore') from bokeh.plotting import figure, show, output_file from bokeh.models import ColumnDataSource
''' (1)加載數據 ''' import os os.chdir(r'C:\Users\Administrator\Desktop\python數據分析\項目07城市餐飲店鋪選址分析') df1 = pd.read_excel('上海餐飲數據.xlsx', sheetname=0, header=0) ''' (2)計算口味、客單價、性價比指標 ''' data1 = df1[['類別','口味','環境','服務','人均消費']] data1.dropna(inplace=True) #刪除缺失值; data1 = data1[(data1['口味'] > 0) & (data1['人均消費'] > 0)] data1['性價比'] = (data1['口味'] + data1['環境'] + data1['服務']) / data1['人均消費'] #數據清洗 + 性價比計算 def f1(): fig,axes = plt.subplots(1,3,figsize = (10,4)) data1.boxplot(column = ['口味'], ax = axes[0]) data1.boxplot(column = ['人均消費'], ax = axes[1]) data1.boxplot(column = ['性價比'], ax = axes[2]) #創建函數f1,制作箱型圖,查看異常值; def f2(data, col): q1 = data[col].quantile(q = 0.25) q3 = data[col].quantile(q = 0.75) iqr = q3-q1 t1 = q1 - 3*iqr t2 = q3 + 3*iqr return data[(data[col] > t1) & (data[col] < t2)][['類別', col]] #要篩選出一個單獨的數據 #創建函數f2,清除異常值; data_kw = f2(data1, '口味') data_rj = f2(data1,'人均消費') data_xjb = f2(data1,'性價比') def f3(data, col): col_name = col + '_norm' data_gp = data.groupby('類別').mean() data_gp[col_name] = (data_gp[col] - data_gp[col].min()) / (data_gp[col].max()-data_gp[col].min()) data_gp.sort_values(by = col_name, inplace = True, ascending = False) return data_gp #創建函數f3,標准化指標並排序 data_kw_score = f3(data_kw, '口味') data_rj_score = f3(data_rj, '人均消費') data_xjb_score = f3(data_xjb, '性價比') #指標標准化得分 data_final_q1 = pd.merge(data_kw_score, data_rj_score, left_index = True, right_index = True) data_final_q1 = pd.merge(data_final_q1, data_xjb_score, left_index = True,right_index = True) #數據合並 ''' 繪制圖表,輔助分析 ''' from bokeh.layouts import gridplot output_file('project07_h1.html') data_final_q1['size'] = data_final_q1['口味_norm'] * 40 data_final_q1.index.name = 'type' data_final_q1.columns = ['kw', 'kw_norm','price', 'price_norm', 'xjb', 'xjb_norm', 'size'] #將列名改為英文 source = ColumnDataSource(data_final_q1) #創建數據 result = figure(plot_width = 800, plot_height = 300, title = '餐飲類型得分', x_axis_label = '人均消費', y_axis_label = '性價比得分') result.circle(x = 'price', y = 'xjb_norm', source = source, line_color = 'black', line_dash = [6,4], fill_alpha = 0.6, size = 'size') #散點圖 data_type = data_final_q1.index.tolist() kw = figure(plot_width = 800, plot_height = 300, title= '口味得分', x_range = data_type) kw.vbar(x = 'type', top = 'kw_norm', source = source, width = 0.8, alpha = 0.7, color = 'red') #柱狀圖1 price = figure(plot_width = 800, plot_height = 300, title= '人均消費得分', x_range = data_type) price.vbar(x = 'type', top = 'price_norm', source = source, width = 0.8, alpha = 0.7, color = 'green') #柱狀圖2 p = gridplot([[result], [kw], [price]]) #把3個並排放一塊 show(p) print('finish')
添加Tool工具、十字標線
from bokeh.models import HoverTool from bokeh.models import BoxAnnotation
hover = HoverTool(tooltips = [("餐飲類型", '@type'), ('人均消費', '@price'), ('性價比得分', '@xjb_norm'), ('口味得分', '@kw_norm') ]) 把Tools加到result、kw、price上 tools = [hover,'box_select, reset, xwheel_zoom, pan, crosshair'] price_mid = BoxAnnotation(left = 40, right = 80, fill_alpha = 0.1, fill_color = 'navy') #區間中值的設置 result.add_layout(price_mid) p = gridplot([[result], [kw], [price]]) #把3個並排放一塊 show(p)
如下:
import numpy as np import pandas as pd import matplotlib.pyplot as plt import warnings warnings.filterwarnings('ignore') from bokeh.plotting import figure, show, output_file from bokeh.models import ColumnDataSource ''' (1)加載數據 ''' import os os.chdir(r'C:\Users\Administrator\Desktop\python數據分析\課程資料\【非常重要】python數據分析_項目資料\項目07城市餐飲店鋪選址分析') df1 = pd.read_excel('上海餐飲數據.xlsx', sheetname=0, header=0) ''' (2)計算口味、客單價、性價比指標 ''' data1 = df1[['類別','口味','環境','服務','人均消費']] data1.dropna(inplace=True) data1 = data1[(data1['口味'] > 0) & (data1['人均消費'] > 0)] data1['性價比'] = (data1['口味'] + data1['環境'] + data1['服務']) / data1['人均消費'] #數據清洗 + 性價比計算 def f1(): fig,axes = plt.subplots(1,3,figsize = (10,4)) data1.boxplot(column = ['口味'], ax = axes[0]) data1.boxplot(column = ['人均消費'], ax = axes[1]) data1.boxplot(column = ['性價比'], ax = axes[2]) #創建函數f1,制作箱型圖,查看異常值; def f2(data, col): q1 = data[col].quantile(q = 0.25) q3 = data[col].quantile(q = 0.75) iqr = q3-q1 t1 = q1 - 3*iqr t2 = q3 + 3*iqr return data[(data[col] > t1) & (data[col] < t2)][['類別', col]] #要篩選出一個單獨的數據 #創建函數f2,清除異常值; data_kw = f2(data1, '口味') data_rj = f2(data1,'人均消費') data_xjb = f2(data1,'性價比') def f3(data, col): col_name = col + '_norm' data_gp = data.groupby('類別').mean() data_gp[col_name] = (data_gp[col] - data_gp[col].min()) / (data_gp[col].max()-data_gp[col].min()) data_gp.sort_values(by = col_name, inplace = True, ascending = False) return data_gp #創建函數f3,標准化指標並排序 data_kw_score = f3(data_kw, '口味') data_rj_score = f3(data_rj, '人均消費') data_xjb_score = f3(data_xjb, '性價比') #指標標准化得分 data_final_q1 = pd.merge(data_kw_score, data_rj_score, left_index = True, right_index = True) data_final_q1 = pd.merge(data_final_q1, data_xjb_score, left_index = True,right_index = True) #數據合並 ''' 繪制圖表,輔助分析 ''' from bokeh.layouts import gridplot from bokeh.models import HoverTool from bokeh.models import BoxAnnotation output_file('project07_h1.html') data_final_q1['size'] = data_final_q1['口味_norm'] * 40 data_final_q1.index.name = 'type' data_final_q1.columns = ['kw', 'kw_norm','price', 'price_norm', 'xjb', 'xjb_norm', 'size'] #將列名改為英文 source = ColumnDataSource(data_final_q1) #創建數據 hover = HoverTool(tooltips = [("餐飲類型", '@type'), ('人均消費', '@price'), ('性價比得分', '@xjb_norm'), ('口味得分', '@kw_norm') ]) result = figure(plot_width = 800, plot_height = 300, title = '餐飲類型得分', x_axis_label = '人均消費', y_axis_label = '性價比得分', tools = [hover,'box_select, reset, xwheel_zoom, pan, crosshair']) result.circle(x = 'price', y = 'xjb_norm', source = source, line_color = 'black', line_dash = [6,4], fill_alpha = 0.6, size = 'size') price_mid = BoxAnnotation(left = 40, right = 80, fill_alpha = 0.1, fill_color = 'navy') #區間中值的設置 result.add_layout(price_mid) #散點圖 data_type = data_final_q1.index.tolist() kw = figure(plot_width = 800, plot_height = 300, title= '口味得分', x_range = data_type, tools = [hover,'box_select, reset, xwheel_zoom, pan, crosshair']) kw.vbar(x = 'type', top = 'kw_norm', source = source, width = 0.8, alpha = 0.7, color = 'red') #柱狀圖1 price = figure(plot_width = 800, plot_height = 300, title= '人均消費得分', x_range = data_type, tools = [hover,'box_select, reset, xwheel_zoom, pan, crosshair']) price.vbar(x = 'type', top = 'price_norm', source = source, width = 0.8, alpha = 0.7, color = 'green') #柱狀圖2 p = gridplot([[result], [kw], [price]]) #把3個並排放一塊 show(p) print('finish')

2. 選擇一個餐飲類型,在qgis中做將上海划分成格網空間,結合python輔助做空間指標評價,得到餐飲選址位置
這里以“素菜館為例” 課程數據 ① net_population.shp → 投影坐標系,上海1km²格網內的人口密度數據 ② road.shp → 投影坐標系,上海道路數據 要求: ① 通過空間分析,分別計算每個格網內的幾個指標:人口密度指標、道路密度指標、餐飲熱度指標、同類競品指標 ② 評價方法: 人口密度指標 → 得分越高越好 道路密度指標 → 得分越高越好 餐飲熱度指標 → 得分越高越好 同類競品指標 → 得分越低越好 綜合指標 = 人口密度指標*0.4 + 餐飲熱度指標*0.3 + 道路密度指標*0.2 +同類競品指標*0.1 ③ 最后得到較好選址的網格位置的中心坐標,以及所屬區域 * 可以用bokeh制作散點圖
------->>> ------->>> ① 道路密度指標計算方法 → 網格內道路長度 ② 餐飲熱度指標計算方法 → 網格內餐飲poi計數 ③ 同類競品指標計算方法 → 網格內素菜館poi計數 ④ 餐飲poi數據記得投影 ⑤ 可以以“net_population.shp”為網格基礎數據,做空間統計 ⑥ 在qgis做空間統計之后,網格數據導出點數據,投影成wgs84地理坐標系,導出excel數據,在python做指標標准化等 ⑦ 在bokeh中做散點圖時,注意添加一個size字段,通過最終評分來賦值 ⑧ 在bokeh中做散點圖時,可以給TOP10的點用顏色區分
都是投影坐標系,將數據改為投影坐標系。

(1)人口密度指標

Z值就是人口密度數量;

(2)道路指標
矢量 -- 分析工具--計算線條總長度

把上海餐飲數據轉換為.csv格式; 然后點擊加載(添加文本數據圖層)

但它是WGS 84的坐標, 選中-右鍵-另存為-(改為投影坐標)EPSG:32651,WGS84 / UTM zone 51N

(3)餐飲熱度指標



(3)同類競爭
canyin --屬性表 --



最后再把它導出來:
再來一個


把格子轉為點,因為它是么有經緯度的。 --->> 矢量 -- 幾何工具--多邊形質心


轉換為經緯度 -- 轉為EPSG:4326--WGS84的
再重新計算下
把原來lng字段給刪掉,重新建立一個。



如上圖都計算好了:人口密度、餐飲.....
最后再把它導出來。用python進行作圖分析
import numpy as np import pandas as pd import matplotlib.pyplot as plt import warnings warnings.filterwarnings('ignore') from bokeh.plotting import figure, show, output_file from bokeh.models import ColumnDataSource ''' (1)加載數據 ''' import os os.chdir(r'C:\Users\Administrator\Desktop\python數據分析\項目\07餐飲') df2 = pd.read_excel('result_point.xlsx', sheetname=0, header=0) df2.fillna(0, inplace = True) df2.columns = ['人口密度', '道路長度', '餐飲計數', '素菜餐飲計數', 'lng', 'lat'] ''' (2)指標統計 ''' df2['rkmd_norm'] = (df2['人口密度'] - df2['人口密度'].min()) / df2['人口密度'].max() - df2['人口密度'].min() df2['cyrd_norm'] = (df2['餐飲計數'] - df2['餐飲計數'].min()) / df2['餐飲計數'].max() - df2['餐飲計數'].min() df2['tljp_norm'] = (df2['素菜餐飲計數'].max() - df2['素菜餐飲計數']) / df2['素菜餐飲計數'].max() - df2['素菜餐飲計數'].min() df2['dlmi_norm'] = (df2['道路長度'] - df2['道路長度'].min()) / df2['道路長度'].max() - df2['道路長度'].min() #指標標准化 df2['final_score'] = df2['rkmd_norm']*0.4 + df2['cyrd_norm']*0.3+df2['tljp_norm']*0.1 + df2['dlmi_norm']*0.2 data_final_q2 = df2.sort_values(by = 'final_score', ascending=False).reset_index() ''' (3)制作空間散點圖 ''' data_final_q2['size'] = data_final_q2['final_score'] * 20 data_final_q2['color'] = 'green' data_final_q2['color'].iloc[:10] = 'red' source = ColumnDataSource(data_final_q2) output_file('project07_h2.html') hover = HoverTool(tooltips=[("經度", "@lng"), ("維度", "@lat"), ("最終得分", "@final_score") ]) p = figure(plot_width = 800, plot_height=800, title = "空間散點圖", tools = [hover, 'box_select, reset, wheel_zoom, pan, crosshair']) p.square(x = 'lng', y = 'lat', source = source, line_color = 'black', fill_alpha = 0.5, size = 'size', color = 'color') #做一個方形圖 p.ygrid.grid_line_dash = [6,4] p.xgrid.grid_line_dash = [6,4] show(p) print('finish')

