简介
对 Intel Image Classification 的场景图像进行多分类。
数据集是世界各地自然景观的图像,包含约 25000 张大小为 150x150 的图像,共6个类别。训练集、测试集、验证集有约14000、3000、7000张图像。
{'mountain': 0, 'street': 1, 'glacier': 2, 'buildings': 3, 'sea': 4, 'forest': 5}
类名 | 标签 | 翻译 |
---|
mountain | 0 | 山脉 | street | 1 | 街道 | glacier | 2 | 冰川 | buildings | 3 | 建筑 | sea | 4 | 海洋 | forest | 5 | 森林 |
本文代码下载
安装
pip install tensorflow-gpu==2.3.0
pip install scikit-learn
pip install seaborn
登录后下载 Intel Image Classification 数据集并解压,已上传百度网盘(pkdk)
导包
import os
import json
import time
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
加载数据
target_size = (150, 150)
datasets = ['seg_train/seg_train', 'seg_test/seg_test']
class_names = list(os.listdir(datasets[0]))
json.dump(class_names, open('class_names.json', mode='w'))
class_names_label = {class_name: i for i, class_name in enumerate(class_names)}
nb_classes = len(class_names)
print(class_names_label)
def load_data():
"""加载训练集和测试集"""
output = []
for dataset in datasets:
images = []
labels = []
for folder in os.listdir(dataset):
label = class_names_label[folder]
folder = os.path.join(dataset, folder)
print('加载{}'.format(folder))
for file in os.listdir(folder):
path = os.path.join(folder, file)
image = load_img(path, target_size=target_size)
image = img_to_array(image)
images.append(image)
labels.append(label)
images = np.array(images, dtype='float32')
labels = np.array(labels, dtype='int32')
output.append((images, labels))
return output
(train_images, train_labels), (test_images, test_labels) = load_data()
train_images, train_labels = shuffle(train_images, train_labels, random_state=25)
print('加载完毕')
浏览数据
n_train = train_labels.shape[0]
n_test = test_labels.shape[0]
print ('训练集样本数 {}'.format(n_train))
print ('测试集样本数 {}'.format(n_test))
_, train_counts = np.unique(train_labels, return_counts=True)
_, test_counts = np.unique(test_labels, return_counts=True)
pd.DataFrame({'train': train_counts, 'test': test_counts}, index=class_names).plot.bar()
plt.show()
plt.pie(train_counts, explode=(0, 0, 0, 0, 0, 0), labels=class_names, autopct='%1.1f%%')
plt.axis('equal')
plt.title('Proportion of each observed category')
plt.show()
数据预处理,将像素值缩放到 [0, 1]
train_images = train_images / 255.0
test_images = test_images / 255.0
可视化数据,随机显示
def display_random_image(class_names, images, labels):
"""随机显示图像及标签"""
index = np.random.randint(images.shape[0])
plt.figure()
plt.imshow(images[index])
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.title('Image {} : '.format(index) + class_names[labels[index]])
plt.show()
display_random_image(class_names, train_images, train_labels)
可视化数据,批量显示
def display_examples(class_names, images, labels, title='Some examples of images of the dataset'):
"""显示25张图像"""
fig = plt.figure(figsize=(10, 10))
fig.suptitle(title, fontsize=16)
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(images[i])
plt.xlabel(class_names[labels[i]])
plt.show()
display_examples(class_names, train_images, train_labels)
创建简单模型
步骤:
- 构建模型
- 编译模型
- 训练模型
- 在测试集上进行评估
- 误差分析
激活函数:
- relu:返回 max(x, 0)
- softmax:返回每个类的概率值
优化器:
- adam = RMSProp + Momentum
- RMSProp = 过去梯度的平方的指数加权平均值
- Momentum = 根据过去的梯度来更新梯度
损失函数:
- sparse_categorical_crossentropy:稀疏分类交叉熵,用于多分类任务
input_shape = target_size + (3,)
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(nb_classes, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_images, train_labels, batch_size=128, epochs=20, validation_split=0.2)
def plot_accuracy_loss(history):
"""绘制准确率和损失的学习曲线"""
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
plot_accuracy_loss(history)
在测试集上的准确率仅有 77%,有一点欠拟合
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print('Test accuracy: {:.2f}% loss: {:.2f}'.format(test_accuracy * 100, test_loss))
随机显示预测结果
predictions = model.predict(test_images)
pred_labels = np.argmax(predictions, axis=1)
display_random_image(class_names, test_images, pred_labels)
误差分析
def print_mislabeled_images(class_names, test_images, test_labels, pred_labels):
"""显示25张预测错误的图像"""
mislabeled_indices = np.where(test_labels != pred_labels)
mislabeled_images = test_images[mislabeled_indices]
mislabeled_labels = pred_labels[mislabeled_indices]
title = 'Some examples of mislabeled images by the classifier'
display_examples(class_names, mislabeled_images, mislabeled_labels, title)
print_mislabeled_images(class_names, test_images, test_labels, pred_labels)
混淆矩阵
data = confusion_matrix(test_labels, pred_labels)
fig, ax = plt.subplots(figsize=(10, 6))
sn.set(font_scale=1.4)
sn.heatmap(
data,
annot=True,
annot_kws={'size': 10},
xticklabels=class_names,
yticklabels=class_names,
)
ax.set_title('Confusion matrix')
plt.show()
sn.reset_orig()
可以看出森林forest最容易区分,冰川glacier和山脉mountain很像,建筑building和街道street没区别
使用预训练模型进行特征提取
VGG16 预训练模型
model = VGG16(include_top=False, weights='imagenet')
train_features = model.predict(train_images)
test_features = model.predict(test_images)
使用 sklearn.decomposition.PCA() 进行主成分分析
n_train, x, y, z = train_features.shape
pca = decomposition.PCA(n_components=2)
X = train_features.reshape((n_train, x * y * z))
pca.fit(X)
C = pca.transform(X)
C1 = C[:, 0]
C2 = C[:, 1]
plt.figure(figsize=(10, 10))
for i, class_name in enumerate(class_names):
plt.scatter(C1[train_labels == i][:1000], C2[train_labels == i][:1000], label=class_name, alpha=0.4)
plt.legend()
plt.title('PCA Projection')
plt.show()
可以看出森林forest最容易区分,冰川glacier和山脉mountain很像,建筑building和街道street没区别
用预训练模型进行特征提取,再训练分类器
model2 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(x, y, z)),
tf.keras.layers.Dense(50, activation='relu'),
tf.keras.layers.Dense(6, activation='softmax')
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history2 = model2.fit(train_features, train_labels, batch_size=128, epochs=15, validation_split=0.2)
plot_accuracy_loss(history2)
在测试集上的准确率达到 88%,有了很大的提升
test_loss, test_accuracy = model2.evaluate(test_features, test_labels)
print('Test accuracy: {:.2f}% loss: {:.2f}'.format(test_accuracy * 100, test_loss))
微调预训练模型
通过训练预训练模型的最高级的几层来学习高级特征
model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=model.inputs, outputs=model.layers[-5].output)
train_features = model.predict(train_images)
test_features = model.predict(test_images)
model2 = VGG16(weights='imagenet', include_top=False)
input_shape = model2.layers[-4].get_input_shape_at(0)
print(input_shape)
layer_input = Input(shape=(9, 9, 512))
x = layer_input
for layer in model2.layers[-4::1]:
x = layer(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(100, activation='relu')(x)
x = Dense(6, activation='softmax')(x)
new_model = Model(layer_input, x)
new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
new_model.summary()
history = new_model.fit(train_features, train_labels, batch_size=128, epochs=10, validation_split=0.2)
plot_accuracy_loss(history)
在测试集上的准确率达到 89%,与特征提取方法差不多
test_loss, test_accuracy = new_model.evaluate(test_features, test_labels)
print('Test accuracy: {:.2f}% loss: {:.2f}'.format(test_accuracy * 100, test_loss))
保存模型
new_model.save('model.h5')
加载模型并预测
import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array
target_size = (150, 150)
class_names = json.load(open('class_names.json'))
model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=model.inputs, outputs=model.layers[-5].output)
start = time.clock()
trained_model = tf.keras.models.load_model('model.h5')
print('Warming up took {:.2f}s'.format(time.clock() - start))
dataset = 'seg_pred/seg_pred'
files = os.listdir(dataset)
while True:
index = np.random.randint(len(files))
file = files[index]
path = os.path.join(dataset, file)
x = load_img(path=path, target_size=target_size)
plt.imshow(x)
plt.show()
x = img_to_array(x)
x = np.expand_dims(x, axis=0)
start = time.clock()
features = model.predict(x)
y = trained_model.predict(features)
print(x.shape, features.shape, y.shape)
print('Prediction took {:.2f}s'.format(time.clock() - start))
for i in np.argsort(y[0])[::-1]:
print('{}: {:.2f}%'.format(class_names[i], y[0][i] * 100), end=' ')
print()
q = input('回车继续,q退出')
if q == 'q':
break
汇总
1. 训练模型并保存
import os
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array
def load_data(datasets):
"""加载训练集和测试集"""
output = []
for dataset in datasets:
images = []
labels = []
for folder in os.listdir(dataset):
label = class_names_label[folder]
folder = os.path.join(dataset, folder)
for file in os.listdir(folder):
path = os.path.join(folder, file)
image = load_img(path, target_size=target_size)
image = img_to_array(image)
images.append(image)
labels.append(label)
images = np.array(images, dtype='float32')
labels = np.array(labels, dtype='int32')
output.append((images, labels))
return output
def plot_accuracy_loss(history):
"""绘制准确率和损失的学习曲线"""
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
target_size = (150, 150)
datasets = ['seg_train/seg_train', 'seg_test/seg_test']
class_names = list(os.listdir(datasets[0]))
json.dump(class_names, open('class_names.json', mode='w'))
class_names_label = {class_name: i for i, class_name in enumerate(class_names)}
nb_classes = len(class_names)
print(class_names_label)
print('加载图像中')
(train_images, train_labels), (test_images, test_labels) = load_data(datasets)
train_images, train_labels = shuffle(train_images, train_labels, random_state=25)
train_images = train_images / 255.0
test_images = test_images / 255.0
model = VGG16(include_top=False, weights='imagenet')
train_features = model.predict(train_images)
test_features = model.predict(test_images)
n_train, x, y, z = train_features.shape
model2 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(x, y, z)),
tf.keras.layers.Dense(50, activation='relu'),
tf.keras.layers.Dense(6, activation='softmax')
])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history2 = model2.fit(train_features, train_labels, batch_size=128, epochs=15, validation_split=0.2)
test_loss, test_accuracy = model2.evaluate(test_features, test_labels)
print('Test accuracy: {:.2f}% loss: {:.2f}'.format(test_accuracy * 100, test_loss))
plot_accuracy_loss(history2)
model2.save('model.h5')
2. 加载模型并预测
import os
import json
import time
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array
target_size = (150, 150)
class_names = json.load(open('class_names.json'))
model = VGG16(weights='imagenet', include_top=False)
start = time.clock()
trained_model = tf.keras.models.load_model('model.h5')
print('Warming up took {:.2f}s'.format(time.clock() - start))
dataset = 'seg_pred/seg_pred'
files = os.listdir(dataset)
while True:
index = np.random.randint(len(files))
file = files[index]
path = os.path.join(dataset, file)
x = load_img(path=path, target_size=target_size)
plt.imshow(x)
plt.show()
x = img_to_array(x)
x = np.expand_dims(x, axis=0)
start = time.clock()
features = model.predict(x)
y = trained_model.predict(features)
print('Prediction took {:.2f}s'.format(time.clock() - start))
for i in np.argsort(y[0])[::-1]:
print('{}: {:.2f}%'.format(class_names[i], y[0][i] * 100), end=' ')
print()
q = input('回车继续,q退出')
if q == 'q':
break
官方文档
- NumPy Documentation
- pandas Documentation
- seaborn Documentation
- Matplotlib Documentation
- scikit-learn Documentation
- TensorFlow Documentation
参考文献
- Intel Image Classification (CNN - Keras) | Kaggle
- sklearn分类任务性能度量——以MNIST为例的精度、查准率、查全率、F1、ROC、AUC
- Python进行图片t-SNE降维可视化
- Keras可视化中间激活
- Seaborn configuration hides default matplotlib
- TensorFlow深入了解损失函数Categorical Cross-Entropy Loss、Binary Cross-Entropy Loss等
|