通俗易懂举栗子–怎么理解支持向量机(SVM)?
1. 讲解SVM
"""
@目的:展示svm
"""
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
np.random.seed(2)
x = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
y = [-1] * 20 + [1] * 20
print(x)
print(y)
model = svm.SVC(kernel='linear', C=1.0)
model.fit(x, y)
support_vectors = model.support_vectors_
index_ = model.support_
num = model.n_support_
print(support_vectors)
print(index_)
print(num)
x_ = np.array([[0, 1], [3, 4], [-1, -1]])
y_pred = model.predict(x_)
print(y_pred)
a = model.intercept_
b = model.coef_
plt.scatter(support_vectors[:, 0], support_vectors[:, 1], s=100, edgecolors='k')
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.coolwarm, marker='o', s=50)
x1 = np.linspace(-5, 4, 100)
x2 = (-b[0][0] * x1 - a[0]) / b[0][1]
plt.plot(x1, x2, 'k')
plt.plot(x1, (-b[0][0] * x1 - a[0] - 1) / b[0][1], 'k--')
plt.plot(x1, (-b[0][0] * x1 - a[0] + 1) / b[0][1], 'k--')
plt.scatter(x_[:, 0], x_[:, 1], c=y_pred, marker='^', s=80)
plt.axis('tight')
plt.show()
2. 使用svm做鸢尾花分类
"""
@目的:用svm做鸢尾花分类
"""
from sklearn import datasets, svm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
iris = datasets.load_iris()
iris_feature = iris['data']
iris_target = iris['target']
iris_target_name = iris['target_names']
def show():
t0 = [index for index in range(len(iris_target)) if iris_target[index] == 0]
t1 = [index for index in range(len(iris_target)) if iris_target[index] == 1]
t2 = [index for index in range(len(iris_target)) if iris_target[index] == 2]
plt.scatter(x=iris_feature[t0, 0], y=iris_feature[t0, 1], color='r', label='Iris-virginica')
plt.scatter(x=iris_feature[t1, 0], y=iris_feature[t1, 1], color='g', label='Iris-setosa')
plt.scatter(x=iris_feature[t2, 0], y=iris_feature[t2, 1], color='b', label='Iris-versicolor')
plt.xlabel("花萼长度", fontsize=20)
plt.ylabel("花瓣长度", fontsize=20)
plt.title("数据集展示", fontsize=20)
plt.legend(fontsize=20)
plt.show()
if __name__ == '__main__':
feature_train, feature_test, target_train, target_test = train_test_split(iris_feature, iris_target, test_size=0.33, random_state=10)
model = svm.SVC(C=1.0, kernel='rbf', decision_function_shape='ovr', gamma=0.01)
model.fit(feature_train, target_train)
print("训练集:", model.score(feature_train, target_train))
print("测试集:", model.score(feature_test, target_test))
target_test_predict = model.predict(feature_test)
comp = zip(target_test, target_test_predict)
print(list(comp))
plt.figure()
plt.subplot(121)
plt.scatter(feature_test[:, 0], feature_test[:, 1], c=target_test.reshape((-1)), edgecolors='k', s=50)
plt.subplot(122)
plt.scatter(feature_test[:, 0], feature_test[:, 1], c=target_test_predict.reshape((-1)), edgecolors='k', s=50)
plt.show()
有时间再将三个文件内容进行完善。
|