[人工智能] SOM自组织（竞争型）神经网络（Python实现）

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> SOM自组织（竞争型）神经网络（Python实现） -> 正文阅读

[人工智能]SOM自组织（竞争型）神经网络（Python实现）

SOM是神经网络的一种，它可以将相互关系复杂且非线性的高维数据，映射到具有简单几何结构及相互关系的低维空间中进行展示。（低维映射能够反映高维特征之间的拓扑结构）

可以实现数据的可视化；聚类；分类；特征抽取等任务。（主要做数据可视化）

网络结构

模型训练过程

具体细节

代码实现

seeds_dataset数据集

链接: https://pan.baidu.com/s/1QYU70IGu8XEtHGDmm29KlA?pwd=yame 提取码: yame 复制这段内容后打开百度网盘手机App，操作更方便哦

训练模型

import numpy as np
import random
import tqdm
import matplotlib.pyplot as plt
import pandas as pd


# 利用高斯距离法计算临近点的权重
# X,Y 模板大小，c 中心点的位置， sigma 影响半径
def gaussion_neighborhood(X, Y, c, sigma):
    xx, yy = np.meshgrid(np.arange(X), np.arange(Y))
    d = 2 * sigma * sigma
    ax = np.exp(-np.power(xx - xx.T[c], 2) / d)
    ay = np.exp(-np.power(yy - yy.T[c], 2) / d)
    return (ax * ay).T


# 利用bubble距离法计算临近点的权重
# X,Y 模板大小，c 中心点的位置， sigma 影响半径
def bubble_neighborhood(X, Y, c, sigma):
    neigx = np.arange(X)
    neigY = np.arange(Y)

    ax = np.logical_and(neigx > c[0] - sigma,
                        neigx < c[0] + sigma)
    ay = np.logical_and(neigy > c[1] - sigma,
                        neigy < c[1] + sigma)
    return np.outer(ax, ay) * 1.


# 计算学习率
def get_learning_rate(lr, t, max_steps):
    return lr / (1 + t / (max_steps / 2))


# 计算欧式距离
def euclidean_distance(x, w):
    dis = np.expand_dims(x, axis=(0, 1)) - w
    return np.linalg.norm(dis, axis=-1)


# 特征标准化 (x-mu)/std
def feature_normalization(data):
    mu = np.mean(data, axis=0, keepdims=True)
    sigma = np.std(data, axis=0, keepdims=True)
    return (data - mu) / sigma


# 获取激活节点的位置
def get_winner_index(x, w, dis_fun=euclidean_distance):
    # 计算输入样本和各个节点的距离
    dis = dis_fun(x, w)

    # 找到距离最小的位置
    index = np.where(dis == np.min(dis))
    return (index[0][0], index[1][0])


def weights_PCA(X, Y, data):
    N, D = np.shape(data)
    weights = np.zeros([X, Y, D])

    pc_length, pc = np.linalg.eig(np.cov(np.transpose(data)))
    pc_order = np.argsort(-pc_length)
    for i, c1 in enumerate(np.linspace(-1, 1, X)):
        for j, c2 in enumerate(np.linspace(-1, 1, Y)):
            weights[i, j] = c1 * pc[pc_order[0]] + c2 * pc[pc_order[1]]
    return weights


# 计算量化误差
def get_quantization_error(datas, weights):
    w_x, w_y = zip(*[get_winner_index(d, weights) for d in datas])
    error = datas - weights[w_x, w_y]
    error = np.linalg.norm(error, axis=-1)
    return np.mean(error)


def train_SOM(X,
              Y,
              N_epoch,
              datas,
              init_lr=0.5,
              sigma=0.5,
              dis_fun=euclidean_distance,
              neighborhood_fun=gaussion_neighborhood,
              init_weight_fun=None,
              seed=20):
    # 获取输入特征的维度
    N, D = np.shape(datas)

    # 训练的步数
    N_steps = N_epoch * N

    # 对权重进行初始化
    rng = np.random.RandomState(seed)
    if init_weight_fun is None:
        weights = rng.rand(X, Y, D) * 2 - 1
        weights /= np.linalg.norm(weights, axis=-1, keepdims=True)
    else:
        weights = init_weight_fun(X, Y, datas)

    for n_epoch in range(N_epoch):
        print("Epoch %d" % (n_epoch + 1))
        # 打乱次序
        index = rng.permutation(np.arange(N))
        for n_step, _id in enumerate(index):
            # 取一个样本
            x = datas[_id]

            # 计算learning rate(eta)
            t = N * n_epoch + n_step
            eta = get_learning_rate(init_lr, t, N_steps)

            # 计算样本距离每个顶点的距离,并获得激活点的位置
            winner = get_winner_index(x, weights, dis_fun)

            # 根据激活点的位置计算临近点的权重
            new_sigma = get_learning_rate(sigma, t, N_steps)
            g = neighborhood_fun(X, Y, winner, new_sigma)
            g = g * eta

            # 进行权重的更新
            weights = weights + np.expand_dims(g, -1) * (x - weights)

        # 打印量化误差
        print("quantization_error= %.4f" % (get_quantization_error(datas, weights)))

    return weights


def get_U_Matrix(weights):
    X, Y, D = np.shape(weights)
    um = np.nan * np.zeros((X, Y, 8))  # 8邻域

    ii = [0, -1, -1, -1, 0, 1, 1, 1]
    jj = [-1, -1, 0, 1, 1, 1, 0, -1]

    for x in range(X):
        for y in range(Y):
            w_2 = weights[x, y]

            for k, (i, j) in enumerate(zip(ii, jj)):
                if (x + i >= 0 and x + i < X and y + j >= 0 and y + j < Y):
                    w_1 = weights[x + i, y + j]
                    um[x, y, k] = np.linalg.norm(w_1 - w_2)

    um = np.nansum(um, axis=2)
    return um / um.max()


if __name__ == "__main__":
    # seed 数据展示
    columns = ['area', 'perimeter', 'compactness', 'length_kernel', 'width_kernel',
               'asymmetry_coefficient', 'length_kernel_groove', 'target']
    data = pd.read_csv('seeds_dataset.txt',
                       names=columns,
                       sep='\t+', engine='python')
    labs = data['target'].values
    label_names = {1: 'Kama', 2: 'Rosa', 3: 'Canadian'}
    datas = data[data.columns[:-1]].values
    N, D = np.shape(datas)
    print(N, D)

    # 对训练数据进行正则化处理
    datas = feature_normalization(datas)

    # SOM的训练
    weights = train_SOM(X=9, Y=9, N_epoch=4, datas=datas, sigma=1.5, init_weight_fun=weights_PCA)

    # 获取UMAP
    UM = get_U_Matrix(weights)

    plt.figure(figsize=(9, 9))
    plt.pcolor(UM.T, cmap='bone_r')  # plotting the distance map as background
    plt.colorbar()

    markers = ['o', 's', 'D']
    colors = ['C0', 'C1', 'C2']

    for i in range(N):
        x = datas[i]
        w = get_winner_index(x, weights)
        i_lab = labs[i] - 1

        plt.plot(w[0] + .5, w[1] + .5, markers[i_lab], markerfacecolor='None',
                 markeredgecolor=colors[i_lab], markersize=12, markeredgewidth=2)

    plt.show()

结果图

SOM数据展示

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from SOM import train_SOM, feature_normalization, get_U_Matrix, get_winner_index, weights_PCA
from collections import defaultdict, Counter
import matplotlib.gridspec as gridspec

if __name__ == "__main__":

    # seed 数据展示
    columns = ['area', 'perimeter', 'compactness', 'length_kernel', 'width_kernel',
               'asymmetry_coefficient', 'length_kernel_groove', 'target']
    data = pd.read_csv('seeds_dataset.txt',
                       names=columns,
                       sep='\t+', engine='python')
    labs = data['target'].values
    label_names = {1: 'Kama', 2: 'Rosa', 3: 'Canadian'}
    datas = data[data.columns[:-1]].values
    N, D = np.shape(datas)
    print(N, D)

    # 对训练数据进行正则化处理
    datas = feature_normalization(datas)

    # SOM的训练
    X = 9
    Y = 9
    weights = train_SOM(X=X, Y=Y, N_epoch=4, datas=datas, sigma=1.5, init_weight_fun=weights_PCA)

    # 获取UMAP
    UM = get_U_Matrix(weights)

    print(UM)
    # '''画散点图'''

    # 显示UMAP
    plt.figure(1, figsize=(9, 9))
    plt.pcolor(UM.T, cmap='bone_r')  # plotting the distance map as background
    plt.colorbar()

    markers = ['o', 's', 'D']
    colors = ['C0', 'C1', 'C2']

    # 计算每个样本点投射后的坐标
    w_x, w_y = zip(*[get_winner_index(d, weights) for d in datas])
    w_x = np.array(w_x)
    w_y = np.array(w_y)

    # 分别把每一类的散点在响应的方格内进行打印（+随机位置偏移）
    for c in np.unique(labs):
        idx_target = (labs == c)
        plt.scatter(w_x[idx_target] + .5 + (np.random.rand(np.sum(idx_target)) - .5) * .8,
                    w_y[idx_target] + .5 + (np.random.rand(np.sum(idx_target)) - .5) * .8,
                    s=50, c=colors[c - 1], label=label_names[c])
    plt.legend(loc='upper right')
    plt.grid()
    # plt.show()

    ''' 画饼图'''
    # 计算输出层的每个节点上映射了哪些数据
    win_map = defaultdict(list)
    for x, lab in zip(datas, labs):
        win_map[get_winner_index(x, weights)].append(lab)

    # 统计每个输出节点上，映射了各类数据、各多少个
    for pos in win_map:
        win_map[pos] = Counter(win_map[pos])

    fig = plt.figure(2, figsize=(9, 9))
    # 按照 X,Y对画面进行分格
    the_grid = gridspec.GridSpec(Y, X, fig)
    print(the_grid)

    # 在每个格子里面画饼图
    for pos in win_map.keys():
        label_fracs = [win_map[pos][l] for l in label_names.keys()]

        plt.subplot(the_grid[Y - 1 - pos[1],
                             pos[0]], aspect=1)
        patches, texts = plt.pie(label_fracs)

    plt.legend(labels=label_names.values(), loc='upper left', bbox_to_anchor=(-6, 10))
    # plt.savefig('resulting_images/som_seed_pies.png')
    plt.show()