最近用scikit-learn试了一下半监督学习,我这里分享一下我写的代码: 我的数据集的地址展示:
['./FeatureLearningRotNet/feat/train/disgust/Training_73540712.jpg.npy', './FeatureLearningRotNet/feat/train/disgust/Training_82025016.jpg.npy', './FeatureLearningRotNet/feat/train/disgust/Training_98173595.jpg.npy', './FeatureLearningRotNet/feat/train/disgust/Training_96698508.jpg.npy', './FeatureLearningRotNet/feat/train/disgust/Training_76134387.jpg.npy']
训练
import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.svm import SVC
import glob
from tqdm import tqdm
data_dir='./FeatureLearningRotNet/feat/train/*/*.npy'
file_names=glob.glob(data_dir)
classes=['happy','fear','disgust','angry','neutral','sad','surprise']
def load_feat(file_name):
data=np.load(file_name)
d1=np.mean(data,axis=(1,2))
return d1.flatten()
feats=[load_feat(item) for item in file_names]
labels=[]
for item in tqdm(file_names):
label=item.split('/')
label=label[-2]
idx=classes.index(label)
labels.append(idx)
labels=np.array(labels)
train_data=np.array(feats)
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(feats)) < 0.3
labels[random_unlabeled_points] = -1
svc = SVC(probability=True, gamma="auto")
self_training_model = SelfTrainingClassifier(svc)
self_training_model.fit(train_data, labels)
测试
test_data_dir='./FeatureLearningRotNet/feat/test/*/*.npy'
test_file_names=glob.glob(test_data_dir)
test_feats=[load_feat(item) for item in test_file_names]
test_labels=[classes.index(item.split('/')[-2]) for item in test_file_names]
res=self_training_model.predict(test_feats)
count=0
for y_pred,y_true in zip(res,test_labels):
if(y_pred==y_true):
count+=1
accuracy=count/len(test_labels)
print('Accuracy: {}%'.format(round(accuracy * 100, 2)))
参考文献
sklearn.semi_supervised.SelfTrainingClassifier
|