参考了han同学的答案,数据集也可在han同学的github上下载。
3.4 选择两个 UCI 数据集,比较 10 折交叉验证法和留 法所估计出的对率回归的错误率.
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
data_path = r'Transfusion.txt'
data = np.loadtxt(data_path, delimiter=',').astype(int)
X = data[:, :4]
y = data[:, 4]
m, n = X.shape
X = (X - X.mean()) / X.std(0)
kfold = KFold(n_splits=10)
lr = LogisticRegression(C=2)
score = cross_val_score(lr, X, y, cv=kfold)
print('acc of k-10: {}'.format(score.mean()))
loocv = LeaveOneOut()
score = cross_val_score(lr, X, y, cv=loocv)
print('acc of loo: {}'.format(score.mean()))
|