泰坦尼克號獲救率數據分析報告,用數據揭露真相。
一,船上乘客生存率分析報告

泰坦尼克號生存率僅有38%的,可見此次事件救援不力,救生艇嚴重不足,且泰坦尼克號號撞得是冰山,海水冷,沒有救生艇,在水里凍死的乘客不少。
二,哪個年齡段存活率最高(青年人(18歲以下),中年人(18到50歲),老年人(50歲以上))


數據分析:看圖我們得到,年輕人獲救率最高50%,老年人獲救率最低0.39,中年人死亡人數最多。發生生命危險時,自救能力最強的中年人還是起到了中流砥柱的作用。不要再叫猥瑣油膩中年男了哦,他們才是社會的扛把子。
3,女性乘客和男性乘客獲救率分析


從圖中可以看到,女性的獲救率遠遠高於男性,女士優先不是一句空話。
4,船上一等艙二等艙三等艙的乘客貧富差距情況


從圖中可以看出,一等艙占全船24%的人數,消費額時全船的67%的金額。嗯,符合著名的二八法則,80%的財富掌握在20%的人手里。
5,艙位和獲救率關系分析

從圖中可以看出,一等艙的獲救率時三等艙的三倍左右,說明發生沉船事故時,一等艙先上救生艇的,有錢能使鬼推磨,古人誠不欺我也。
附上本人源代碼:
import pandas as pd
import matplotlib.pyplot as plt#導入繪制函數
import numpy as np#導入數組庫
from pylab import *#轉義漢字
mpl.rcParams['font.sans-serif'] = ['SimHei']#
mpl.rcParams['axes.unicode_minus'] = False#
from pyecharts import Pie,Bar,Gauge,EffectScatter,WordCloud,Map,Grid,Line,Timeline
import random
df_Titanic = pd.read_csv('Titanic.csv')
# print(df_Titanic)
#1,一獲救率是多少
def Rescued_rate():
t1 = df_Titanic['Survived'].count()
t2 = df_Titanic[df_Titanic['Survived']==1]['Survived'].count()
t3=round(t2/t1,2)
print('一,存活率為:{}'.format(t3))
#Titanic存活率可視化
attr = ['獲救率','死亡率' ]
v1 = [t3,(1-t3)]
pie = Pie('Titanic生存率報告')
pie.add('生存率', attr, v1, is_label_show=True)
pie.render('TItanic_1.html')
Rescued_rate()
#2,哪個年齡段存活率最高
def age_survived():
#18歲以下的人存活率
young_survived = df_Titanic[(df_Titanic['Age']<=18)&(df_Titanic['Survived']==1)]['Survived'].count()
young_all = df_Titanic[df_Titanic['Age']<=18]['Survived'].count()
#18歲到50歲的存活率
middle_survived = df_Titanic[(df_Titanic['Age']<50)&(df_Titanic['Age']>18)&(df_Titanic['Survived']==1)]['Survived'].count()
middle_all = df_Titanic[(df_Titanic['Age']<50)&(df_Titanic['Age']>18)]['Survived'].count()
#50歲以上乘客的存活率
old_survived = df_Titanic[(df_Titanic['Age'] >= 50) & (df_Titanic['Survived'] == 1)]['Survived'].count()
old_all = df_Titanic[df_Titanic['Age'] >= 50]['Survived'].count()
#三者的生存率
young_odds = round(young_survived/young_all,2)
middle_odds = round(middle_survived/middle_all,2)
old_odds = round(old_survived/old_all,2)
# list=[young_odds,middle_odds,old_odds]
# max_odds = max(list)
# df_odds = pd.Series([young_odds,middle_odds,old_odds])
# df_odds.plot(kind ='bar')
# plt.show()
print('二,年輕人,中年人,老年人生存幾率分別為{},{},{}'.format(young_odds,middle_odds,old_odds))
#獲救率可視化對比圖
attr = ['青少年', '中年人', '老年人']
v1 = [young_odds,middle_odds,old_odds]
v2 = [(1-young_odds), (1-middle_odds), (1-old_odds)]
v3 = [young_all,middle_all,old_all]
bar = Bar('Titanic不同年齡段獲救率對比')
bar.add('獲救率', attr, v1, mark_point=['average','max','min'], is_stack=True)
bar.add('死亡率', attr, v2, mark_line=['min', 'max'], is_stack=True) # stack是否堆疊顯示
bar.add('人數',attr,v3,mark_point=['average','max','min'],is_stack=False)
bar.render('Titanic_2.html')
age_survived()
#3,女性存活率和男性存活率哪個高
def Rescued_rate_man():
s_man =df_Titanic[(df_Titanic['Sex']=='male')&(df_Titanic['Survived']==1)]['Sex'].count()#獲救的男人數
c_man = df_Titanic[df_Titanic['Sex']=='male']['Sex'].count()#男人總數
rescued_man = round(s_man/c_man,2)#男人獲救率
s_woman = df_Titanic[(df_Titanic['Sex']=='female')&(df_Titanic['Survived']==1)]['Sex'].count()
c_woman = df_Titanic[df_Titanic['Sex']=='female']['Sex'].count()
rescued_woman = round(s_woman/c_woman,2)
if rescued_woman > rescued_man:
print('三,女性獲救率高')
else:
print('三,男性獲救率高')
#
attr = ['女性', '男性']
v1 = [rescued_woman,rescued_man]
# v2 = [(1 - rescued_woman), (1 - rescued_man)]
v3 = [c_woman,c_man]
bar = Bar('Titanic——男性女性獲救率報告')
bar.add('獲救率', attr, v1, mark_point=['average', 'max', 'min'], is_stack=True)
# bar.add('死亡率', attr, v2,mark_point=['average', 'max', 'min'], is_stack=True)
bar.add('人數', attr, v3,mark_point=['average', 'max', 'min'], is_stack=False)# stack是否堆疊顯示
bar.render('Titanic_3.html')
Rescued_rate_man()
#船上的貧富差距
def wealth_gap():
#一等艙的人均消費
consume_one = round(df_Titanic[df_Titanic['Pclass']==1]['Fare'].mean(),2)
consume_two = round(df_Titanic[df_Titanic['Pclass']==2]['Fare'].mean(),2)
consume_three = round(df_Titanic[df_Titanic['Pclass']==3]['Fare'].mean(),2)
consume_std = round(df_Titanic['Fare'].std(),2)
#一等艙二等艙三等艙的人數
person_one = df_Titanic[df_Titanic['Pclass']==1]['Survived'].count()
person_two = df_Titanic[df_Titanic['Pclass'] == 2]['Survived'].count()
person_three = df_Titanic[df_Titanic['Pclass'] == 3]['Survived'].count()
#一等艙二等艙三等艙的消費總額
consumeall_one =df_Titanic[df_Titanic['Pclass']==1]['Fare'].sum()
consumeall_two = df_Titanic[df_Titanic['Pclass'] == 2]['Fare'].sum()
consumeall_three = df_Titanic[df_Titanic['Pclass'] == 3]['Fare'].sum()
print('四,一等艙人均消費:{},二等艙人均消費:{},三等艙人均消費:{},人均消費標准差:{}'.format(consume_one,consume_two,consume_three,consume_std))
#可視化
attr = ['一等艙', '二等艙','三等艙']
v2 = [consume_one,consume_two,consume_three]
v1 = [person_one,person_two,person_three]
v3 = [consumeall_one,consumeall_two,consumeall_three]
bar = Bar('Titanic——貧富差距報告')
bar.add('人均消費', attr, v2, mark_point=['average', 'max', 'min'], is_stack=False)
bar.add('艙位人數', attr, v1,mark_point=['average', 'max', 'min'], is_stack=False) # stack是否堆疊顯示
bar.add('消費總額', attr, v3, mark_point=['average', 'max', 'min'],is_stack=False)
bar.render('Titanic_4.html')
wealth_gap()
#頭等艙的生存率是否高於三等艙
def Survival_comparison():
#一等艙的獲救率
s1=df_Titanic[(df_Titanic['Pclass']==1)&(df_Titanic['Survived']==1)]['Survived'].count()
c1 = df_Titanic[df_Titanic['Pclass']==1]['Survived'].count()
svl_1 = round(s1/c1,2)
#二等艙的獲救率
s2 = df_Titanic[(df_Titanic['Pclass'] == 2) & (df_Titanic['Survived'] == 1)]['Survived'].count()
c2 = df_Titanic[df_Titanic['Pclass'] == 2]['Survived'].count()
svl_2 = round(s2 / c2,2)
#三等艙的獲救率
s3 = df_Titanic[(df_Titanic['Pclass'] == 3) & (df_Titanic['Survived'] == 1)]['Survived'].count()
c3 = df_Titanic[df_Titanic['Pclass'] == 3]['Survived'].count()
svl_3 = round(s3 / c3,2)
if svl_1>svl_2>svl_3:
print('五,一等艙二等艙三等艙的獲救率分別為:{},{},{},一等艙獲救率最高'.format(svl_1,svl_2,svl_3))
else:
print('獲救率和艙位關系不大')
#艙位和獲救率的關系
attr = ['一等艙', '二等艙', '三等艙']
v2 = [c1/100, c2/100,c3/100]
v1 = [svl_1,svl_2,svl_3]
bar = Bar('Titanic——艙位和獲救率關系')
bar.add('獲救率', attr, v1, mark_point=['average', 'max', 'min'], is_stack=False)
bar.add('艙位人數(/百人)', attr, v2, mark_point=['average', 'max', 'min'], is_stack=False) # stack是否堆疊顯示
bar.render('Titanic_4.html')
Survival_comparison()
#6,帶家屬的乘客占的比率,有家屬是否會影響生存率
def family_survived():
family_yes = df_Titanic[(df_Titanic['SibSp']==1)|(df_Titanic['Parch']==1)]['Survived'].count()#帶家屬的乘客人數
family_no = df_Titanic[(df_Titanic['SibSp'] == 0) & (df_Titanic['Parch'] == 0)]['Survived'].count()#不帶家屬的乘客人數
family_all = df_Titanic['Survived'].count()
family_odds = round(family_yes/family_all,2)
#帶家屬獲救的人數
family_survive = df_Titanic[(df_Titanic['SibSp']==1)|(df_Titanic['Parch']==1)&(df_Titanic['Survived']==1)]['Survived'].count()
#不帶家屬獲救的人數
family_no_survive = df_Titanic[(df_Titanic['SibSp']==0)&(df_Titanic['Parch']==0)&(df_Titanic['Survived']==1)]['Survived'].count()
#帶家屬獲救的幾率
family_survive_odds = round(family_survive/family_yes,2)
#不帶家屬獲救的幾率
familyno_survive_odds = round(family_no_survive / family_no, 2)
if family_survive_odds>familyno_survive_odds:
print('六,帶家屬的生存率為{},不帶家屬的生存率為{},帶家屬的生存率高一些'.format(family_survive_odds,familyno_survive_odds))
else:
print('帶家屬的乘客獲救幾率和其他乘客一樣')
family_survived()
#七,從哪個港口登陸是否影響生存率
def port_survived():
#S口進入獲救的人數
S_survived = df_Titanic[(df_Titanic['Survived']==1)&(df_Titanic['Embarked']=='S')]['Survived'].count()
#S口進入的總人數
S_all = df_Titanic[df_Titanic['Embarked']=='S']['Survived'].count()
#C口進入獲救的人數
C_survived = df_Titanic[(df_Titanic['Survived']==1)&(df_Titanic['Embarked']=='C')]['Survived'].count()
#C口進入的總人數
C_all = df_Titanic[df_Titanic['Embarked'] == 'C']['Survived'].count()
#Q口進入獲救的人數
Q_survived = df_Titanic[(df_Titanic['Survived']==1)&(df_Titanic['Embarked']=='Q')]['Survived'].count()
#Q口進入的總人數
Q_all = df_Titanic[df_Titanic['Embarked'] == 'Q']['Survived'].count()
#從S,C,Q,進入生存的幾率
s_odds = round(S_survived/S_all,2)
c_odds = round(C_survived/C_all,2)
q_odds = round(Q_survived/Q_all,2)
print('七,s,c,q港口進入的乘客的生存率分別為{},{},{}'.format(s_odds,c_odds,q_odds))
port_survived()
