《机器学习》习题2-2 数据集包括100个样本,其2的期望中正例反例各一半,假定学习算法所产生的模型是将新样本预测为训练样本数较多的类别,给出用10折交叉验证法对错误率进行评估的结果。 分析: 所抽取的训练集会有正例反例各占一半,利用十折交叉验证抽到的样本正反例概率一致,错误率的期望是0.5。 #由于本人是非计算机专业本科生,代码粗糙也可能运行起来会有问题,仅供参考,代码有误还望指出~
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
%matplotlib inline
X, Y = make_blobs(n_features=2,n_samples=100,centers=2,random_state=0)
print (X.shape)
print (Y.shape)
print (X)
print (Y)
from sklearn.model_selection import KFold
KF = KFold(n_splits=10)
for train_index, test_index in KF.split(X):
print("TRAIN", train_index, "TEST", test_index)
X_train, X_test = X[train_index], Y[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
print("对于数据X训练数据:", X_train)
print("对于数据X测试数据:", X_test)
print("对于数据Y训练数据:", Y_train)
print("对于数据Y测试数据:", Y_test)
print( KF.get_n_splits(X))
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn import datasets, svm
SVC = svm.SVC()
scores=cross_val_score(SVC, X, Y, cv=10, scoring='accuracy')
print(scores)
print(scores.mean())
|