import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['KaiTi', 'SimHei', 'FangSong'] # 漢字字體,優先使用楷體,如果找不到楷體,則使用黑體
plt.rcParams['font.size'] = 12 # 字體大小
plt.rcParams['axes.unicode_minus'] = False # 正常顯示負號
df = pd.read_csv("...//data2.csv")
# 簡單線性回歸
reg = LinearRegression()
reg.fit(df[["xxx"]],df[["yyy"]])
reg.coef_
# 兩個變量關系的散點圖
plt.title("xxx vs yyy")
plt.xlabel("xxx")
plt.ylabel("yyy")
plt.scatter(df["xxx"],df["yyy"])
plt.savefig("xxx vs yyy.png", dpi = 40)
# 切片
y = df.iloc[:, 4]
y=y*10
x = df.iloc[:,0:4]
pair_fig = sns.pairplot(df, x_vars=["xx1","xx2", "xx3"], y_vars="yyy", kind="reg", height=5, aspect=0.7)
pair_fig.savefig("yyy vs xxxs",dpi=600)
# 隨機分訓練集和測試集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
# clf = LinearRegression(fit_intercept=False) 這樣將設置為無截距
# 擬合模型
clf.fit(x_train, y_train)
# 利用測試集中的x預測y
clf.predict(x_test)
# 評估模型r^2 得分
clf.score(x_test, y_test)
# 多元線性回歸 計算得到的系數
clf.coef_