模块导入总结
##############sklearn中的模块###############################################
###001KNN
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)
y_predict = clf.score(X_test, y_test)
y_predict
####002数据集
from sklearn import datasets # 导入数据集
###003 线性回归
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train_std, y_train)
lin_reg.score(X_test_std, y_test)
###############sklearn.model_selection#####################################
###001--数据集分割
from sklearn.model_selection import train_test_split
###002网格搜索---------使用了网格搜索
from sklearn.model_selection import GridSearchCV #网格搜索
clf = KNeighborsClassifier()
param_grid = [{
'n_neighbors':[i for i in range(0,10)],
'weights':['uniform','distance']
}
]
gs_clf = GridSearchCV(clf, param_grid=param_grid)
best_clf = gridsearch.best_estimator_ #最优模型
best_clf.score(X_test, y_test)
###############sklearn.metrics#################################################
###001准确率预测
from sklearn.metrics import accuracy_score
accuracy_score(y_predict, y_test) # 准确值预测:r2
###002 均方误差
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_predict)
###003 绝对值误差
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_predict)
###004r2误差
from sklearn.metrics import r2_score
r2_score(y_test, y_predict)
##########
#######sklearn.preprocessing##################################################
###001数据归一化--实例化,fit,transform
from sklearn.preprocessing import StandardScaler
standscaler = StandardScaler()
standscaler.fit(X_train)
std_x_train = standscaler.transform(X_train)
std_x_test = standscaler.transform(X_test)
###########################sklearn.preprocessing###########################
###001 多项式回归模型--实例化,fit--transform
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 2)
poly.fit(X)
X2 = poly.transform(X)
from sklearn.pipeline import Pipeline #pipline使用
from sklearn.preprocessing import StandardScaler
pipe_reg = Pipeline([
('poly', PolynomialFeatures(degree=2)),
('scaler',StandardScaler()),
('lin_reg', LinearRegression())
])
pipe_reg.fit(X, y)
y_predict = pipe_reg.predict(X)
############################岭回归##########################################
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
def pipeRegression(degree, alpha):
return Pipeline([
("poly", PolynomialFeatures(degree = degree)),
("scaler", StandardScaler()),
("lin_reg", Ridge(alpha = alpha))
])
ridge1 = pipeRegression(50, 0.000001)
ridge1.fit(X_train, y_train)
y1_predict = ridge1.predict(X_test)
mean_squared_error(y_test, y1_predict)
KNN算法
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from collections import Counter
#获取数据
X = [[3.40, 2.8],
[3.1, 1.8],
[1.5, 3.4],
[3.6, 4.7],
[2.3, 2.9],
[7.4,4.5],
[5.7, 3.5],
[9.2, 2.5],
[7.9, 3.4]
]
y = [0, 0, 0, 0, 0, 1, 1, 1, 1]
x_train = np.array(X)
y_train = np.array(y)
x = np.array([5.1, 3.4])
from sklearn.neighbors import KNeighborsClassifier
cls = KNeighborsClassifier(n_neighbors=3)
cls.fit(x_train, y_train)
cls.predict(x.reshape(1, -1))