1.导入相关包
# 导入相关包
import numpy as np
import pandas as pd
# 导入数据集
from sklearn.datasets import load_iris
# 导入模型
from sklearn.tree import DecisionTreeClassifier
# 数据分割包
from sklearn.model_selection import train_test_split
# 评价包
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
# 规范化数据包
from sklearn import preprocessing
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
2. 导入数据集,进行数据探索
data = pd.read_csv('./bill_authentication.csv')
data.shape
- 1
- 2
data.info()
- 1
数据没有缺失值。
data.describe()
- 1
3. 特征提取、数据进行规范化
x = data.iloc[:, :4]
y = data.iloc[:, 4]
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.4)
# 数据规范化
ss = preprocessing.StandardScaler()
ss_train_x = ss.fit_transform(train_x)
ss_test_x = ss.fit_transform(test_x)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
4. 创建模型,进行训练,预测,评分
4.1 分类决策树模型
dtc = DecisionTreeClassifier()
dtc.fit(ss_train_x, train_y)
predict_y = dtc.predict(ss_test_x)
# 评分
print("mean_squared_error:", mean_squared_error(test_y, predict_y))
print('accuracy_score:', accuracy_score(test_y, predict_y))
- 1
- 2
- 3
- 4
- 5
- 6
4.2 SVM分类模型
from sklearn.svm import SVC
svc = SVC()
svc.fit(ss_train_x, train_y)
predict_y = svc.predict(ss_test_x)
# 评分
print("mean_squared_error:", mean_squared_error(test_y, predict_y))
print('accuracy_score:', accuracy_score(test_y, predict_y))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
4.3 KNN分类模型
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(ss_train_x, train_y)
predict_y = knn.predict(ss_test_x)
# 评分
print("mean_squared_error:", mean_squared_error(test_y, predict_y))
print('accuracy_score:', accuracy_score(test_y, predict_y))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
4.4 AdaBoost分类模型
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier()
ada.fit(ss_train_x, train_y)
predict_y = ada.predict(ss_test_x)
# 评分
print("mean_squared_error:", mean_squared_error(test_y, predict_y))
print('accuracy_score:', accuracy_score(test_y, predict_y))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
总结:KNN分类的评分最高,其次是KNN分类。