導入pandas
import pandas as pd
countries = ['Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia'] life_expectancy_values = [74.7, 75. , 83.4, 57.6, 74.6, 75.4, 72.3, 81.5, 80.2, 70.3, 72.1, 76.4, 68.1, 75.2, 69.8, 79.4, 70.8, 62.7, 67.3, 70.6] gdp_values = [ 1681.61390973, 2155.48523109, 21495.80508273, 562.98768478, 13495.1274663 , 9388.68852258, 1424.19056199, 24765.54890176, 27036.48733192, 1945.63754911, 21721.61840978, 13373.21993972, 483.97086804, 9783.98417323, 2253.46411147, 25034.66692293, 3680.91642923, 366.04496652, 1175.92638695, 1132.21387981]
#將普通數組轉換為pandas數組
life_expectancy = pd.Series(life_expectancy_values)
gdp = pd.Series(gdp_values)
pandas數組和numpy數組有很多一樣的操作:
(1) 截取部分
print life_expectancy[0]
# 結果: 74.7 print gdp[3:6]
# 結果: 3 562.987685 4 13495.127466 5 9388.688523 dtype: float64
(2) 循環:
for country_life_expectancy in life_expectancy: print 'Examining life expectancy {}'.format(country_life_expectancy)
# 結果
Examining life expectancy 74.7
Examining life expectancy 75.0
Examining life expectancy 83.4
Examining life expectancy 57.6
Examining life expectancy 74.6
Examining life expectancy 75.4
...
Examining life expectancy 67.3
Examining life expectancy 70.6
(3) 常用函數:
print life_expectancy.mean() # 求平均數
# 結果
72.87
print life_expectancy.std() # 求標准差
# 結果
6.21399947487
print gdp.max() # 求最大值
# 結果
27036.4873319
print gdp.sum() # 求和
# 結果
182957.59833
(4) 向量化運算:
a = pd.Series([1, 2, 3, 4]) b = pd.Series([1, 2, 1, 2]) print a + b # 結果 0 2 1 4 2 4 3 6 dtype: int64 print a * 2 # 結果 0 2 1 4 2 6 3 8 dtype: int64 print a >= 3 # 結果 0 False 1 False 2 True 3 True dtype: bool print a[a >= 3] # 結果 2 3 3 4 dtype: int64