import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['KaiTi', 'SimHei', 'FangSong'] # 汉字字体,优先使用楷体,如果找不到楷体,则使用黑体
plt.rcParams['font.size'] = 12 # 字体大小
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号
df = pd.read_csv("...//data2.csv")
# 简单线性回归
reg = LinearRegression()
reg.fit(df[["xxx"]],df[["yyy"]])
reg.coef_
# 两个变量关系的散点图
plt.title("xxx vs yyy")
plt.xlabel("xxx")
plt.ylabel("yyy")
plt.scatter(df["xxx"],df["yyy"])
plt.savefig("xxx vs yyy.png", dpi = 40)
# 切片
y = df.iloc[:, 4]
y=y*10
x = df.iloc[:,0:4]
pair_fig = sns.pairplot(df, x_vars=["xx1","xx2", "xx3"], y_vars="yyy", kind="reg", height=5, aspect=0.7)
pair_fig.savefig("yyy vs xxxs",dpi=600)
# 随机分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
clf = LinearRegression()
# clf = LinearRegression(fit_intercept=False) 这样将设置为无截距
# 拟合模型
clf.fit(x_train, y_train)
# 利用测试集中的x预测y
clf.predict(x_test)
# 评估模型r^2 得分
clf.score(x_test, y_test)
# 多元线性回归 计算得到的系数
clf.coef_