2020東京奧運會獎牌榜可視化分析(Pyechart)


數據獲取和處理

從網頁中獲取各國的獎牌數量和排名以及獎牌類型(json格式)。

#獎牌榜數據
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/total-table/15/110000004609'
data= requests.get(url).json()

#從json格式的數據中,提前排名(rank)、國家中文名字、國家ID、金牌數、銀牌數、銅牌數、獎牌總數
df00 = pd.DataFrame()
for item in data['body']['allMedalData']:
    df00 = df00.append([[item['rank'], item['countryName'],item['countryId'],
               item['goldMedalNum'], item['silverMedalNum'],
               item['bronzeMedalNum'], item['totalMedalNum']]])

df00.columns = ['rank', 'C_name', 'countryId','goldMedalNum',
             'silverMedalNum', 'bronzeMedalNum', 'totalMedalNum']
df00.reset_index(drop='index', inplace=True)
df00[['goldMedalNum','silverMedalNum','bronzeMedalNum','totalMedalNum']] = df00[['goldMedalNum','silverMedalNum','bronzeMedalNum','totalMedalNum']].astype(int)

#計數獲獎能力(金牌權重為1,銀牌為2/3、銅牌為1/3
df00['totalMedalNum2'] = df00['goldMedalNum'] + df00['silverMedalNum'] * 2/3 + df00['bronzeMedalNum'] * 1/3
df00['S_level'] = df00['totalMedalNum2']/np.max(df00['totalMedalNum2'])
df00['S_level'] = df00['S_level'].apply(lambda x :'%.2f'%x)
df00.sort_values('totalMedalNum', ascending=False, inplace=True)
#對照表,用於獲取國家英文名稱

with open('./國家名中英文對照表.txt', 'r', encoding='utf-8') as fp:
    name_list = fp.readlines()
    
df01 = pd.DataFrame()
for name in name_list:
    df01 = df01.append([name.strip().split(':')])
df01.columns=['C_name', 'E_name']

#合並獎牌榜數據
df02 = pd.merge(df00, df01, how='left', on='C_name')
#從json格式的獎牌類型數據中提取數據
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/detail-total/15/110000004609'
data2 = requests.get(url).json()

#提取的數據為國家名、國家ID、項目類型、項目分組、獲獎名稱、獎牌類型
df03 = pd.DataFrame()
for item in data2['body']['medalTableDetail']:
        df03 = df03.append([[item['countryName'], item['countryId'],
                             item['bigItemName'], item['minorItemName'],
                             item['sportsName'], item['medalType']]])
df03.columns = ['countryName', 'countryId','bigItemName', 'minorItemName', 'sportsName', 'medalType']
df03.reset_index(drop='index', inplace=True)
df03['medalType2'] = df03['medalType'].replace({1:'Gold', 2:'Silver', 3:'Bronze'})

數據可視化

繪制獎牌數量世界地圖

def wmap_plot(datas):
    w_map = Map()
    w_map.add('獎牌數', [list(z) for z in zip(datas['E_name'], datas['totalMedalNum'])],
             'world', is_map_symbol_show=False)
    w_map.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    w_map.set_global_opts(title_opts=opts.TitleOpts(title='2020年東京奧運會獎牌總數分布圖'),
                          visualmap_opts=opts.VisualMapOpts(max_=np.max(datas['totalMedalNum'])),
                          legend_opts=opts.LegendOpts(is_show=False)
                         )
    return w_map

繪制各國獎牌統計柱狀圖(前20名)

def bar_plot(datas,n=20):
    bar = Bar()

    bar.add_xaxis(datas['C_name'][:n].tolist())
    bar.add_yaxis('GoldMetal', datas['goldMedalNum'][:n].tolist(), stack='stack1')
    bar.add_yaxis('SilverMetal', datas['silverMedalNum'][:n].tolist(), stack='stack1')
    bar.add_yaxis('BronzeMetal', datas['bronzeMedalNum'][:n].tolist(), stack='stack1')
    bar.set_series_opts(label_opts=opts.LabelOpts(position='inside', font_size=8))
    bar.set_global_opts(title_opts=opts.TitleOpts(title='2020年東京奧運會獎牌榜'),
                        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45))
                       )

    bar.extend_axis(yaxis=opts.AxisOpts(name='S_level',type_='value'))
    line=Line()
    line.add_xaxis(datas['C_name'][:n].tolist())
    line.add_yaxis('S_level', yaxis_index=1, 
                   y_axis=datas['S_level'][:n].tolist(),
                   label_opts=opts.LabelOpts(position='top')
                  )

    return bar.overlap(line)

繪制前10名的獎牌類型占比分析圖

def pie_plot(datas, country_name, countryId):
    df = datas[datas['countryId']==countryId]
    df = df.groupby(['bigItemName', 'medalType2']).count()['medalType']
    df = df.unstack().fillna(0)

    dict_datas = []
    for item in df.index:
        dict_data = opts.SunburstItem(
                        name=item,
                        value=df.loc[item].sum(),
                        children=[
                            opts.SunburstItem(name="Gold", value=df.loc[item, 'Gold']),
                            opts.SunburstItem(name="Silver", value=df.loc[item, 'Silver']),
                            opts.SunburstItem(name="Bronze", value=df.loc[item, 'Bronze']),
                        ],
                    )
        dict_datas.append(dict_data)


    sunburst = (
        Sunburst(init_opts=opts.InitOpts(width="1000px", height="600px"))
        .add(series_name=country_name, data_pair=dict_datas, radius=['20%', "80%"])
        .set_global_opts(title_opts=opts.TitleOpts(title="{}獲獎項目比例分析".format(country_name)))
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}", font_size=10))
    )
    return sunburst

def tpie(data01, data02):
    t = Timeline()
    for item in zip(data01[:10]['C_name'],data01[:10]['countryId']):
        pie = pie_plot(datas=data02, country_name=item[0], countryId=item[1])
        t.add(pie, "{}".format(item[0]))
    return t

繪制圖形標頭Title

def title_plot():
    title = (
            Pie(init_opts=opts.InitOpts(chart_id=1))
            .set_global_opts(
                title_opts=opts.TitleOpts(title="2020東京奧運會獎牌榜分析",
                                          title_textstyle_opts=opts.TextStyleOpts(font_size=36, color='#000000'),
                                          pos_left='center',
                                          pos_top='middle'))
        )
    
    return title

頁面布局Page

def page():
    page = Page(layout=Page.DraggablePageLayout, page_title="2020東京奧運會獎牌榜")
    page.add(
        title_plot(),
        wmap_plot(datas=df02),
        bar_plot(datas=df02),
        tpie(data01=df02, data02=df03)
    )
    return page
# page.render('2020東京奧運會獎牌榜-test.html')
# page.save_resize_html(source='2020東京奧運會獎牌榜-test.html',
#                       cfg_file='chart_config2.json',
#                       dest='2020東京奧運會獎牌榜.html'
#                      )


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM