兩組序列數據,求兩者的線性相關系數。
1:使用numpy
import random import numpy as np a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] # 先構造一個矩陣 ab = np.array([a, b]) # 計算協方差矩陣 print(np.cov(ab)) print(np.corrcoef(ab))
2:使用pandas
import pandas as pd # 使用 pandas 計算協方差、相關系數 # 使用 DataFrame 作為數據結構,為方便計算,我們會將 ab 矩陣轉置 dfab = pd.DataFrame(ab.T, columns=['A', 'B']) # A B 協方差 print(dfab.A.cov(dfab.B)) # A B 相關系數 print(dfab.A.corr(dfab.B))
3:使用原生函數
import random import math a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] #計算平均值 def mean(x): return sum(x) / len(x) # 計算每一項數據與均值的差 def de_mean(x): x_bar = mean(x) return [x_i - x_bar for x_i in x] # 輔助計算函數 dot product 、sum_of_squares def dot(v, w): return sum(v_i * w_i for v_i, w_i in zip(v, w)) def sum_of_squares(v): return dot(v, v) # 方差 def variance(x): n = len(x) deviations = de_mean(x) return sum_of_squares(deviations) / (n - 1) # 標准差 def standard_deviation(x): return math.sqrt(variance(x)) # 協方差 def covariance(x, y): n = len(x) return dot(de_mean(x), de_mean(y)) / (n -1) # 相關系數 def correlation(x, y): stdev_x = standard_deviation(x) stdev_y = standard_deviation(y) if stdev_x > 0 and stdev_y > 0: return covariance(x, y) / stdev_x / stdev_y else: return 0 print(a) print(b) print(standard_deviation(a)) print(standard_deviation(b)) print(correlation(a,b))
4:使用R,spss,excel