[人工智能] 人工智能帮你告别脸盲，王力宏，李治廷，蒲巴甲你们往哪里逃？基于MTCNN+arcface

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 人工智能 -> 人工智能帮你告别脸盲，王力宏，李治廷，蒲巴甲你们往哪里逃？基于MTCNN+arcface -> 正文阅读

[人工智能]人工智能帮你告别脸盲，王力宏，李治廷，蒲巴甲你们往哪里逃？基于MTCNN+arcface

有些时候，你真的为一些人感到脸盲。比如下面三位。
请添加图片描述

他们是谁？？？？？？
不过我们可以借助神经网络来实现人脸识别。
人脸识别主要分为两个步骤，人脸检测（face-detective)和人脸识别（face—recognition）前一步告诉你怎么在一张大图中找到他的脸，第二步用找到的人脸和数据库去对比确定是谁。
本文采用pytorch版本的MTCNN进行人脸检测，arcface进行人脸识别。本程序在较为严苛的阈值要求且在复杂环境下，仍能保证一定的鲁棒性。
下面是三人的识别效果图。
在这里插入图片描述

废话不说直接上代码。
代码头没啥好讲的

import argparse
from utils.utils import generate_bbox, py_nms, convert_to_square
from utils.utils import pad, calibrate_box, processed_image
from arc_face import *
from torch.nn import DataParallel
import os
import cv2
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default='infer_models_weights',      help='PNet、RNet、ONet三个模型文件存在的文件夹路径')
args = parser.parse_args()
device = torch.device("cuda")

获取P R N 三个模型

# 获取P模型
pnet = torch.jit.load(os.path.join(args.model_path, 'PNet.pth'))
pnet.to(device)
softmax_p = torch.nn.Softmax(dim=0)
pnet.eval()

# 获取R模型
rnet = torch.jit.load(os.path.join(args.model_path, 'RNet.pth'))
rnet.to(device)
softmax_r = torch.nn.Softmax(dim=-1)
rnet.eval()

# 获取R模型
onet = torch.jit.load(os.path.join(args.model_path, 'ONet.pth'))
onet.to(device)
softmax_o = torch.nn.Softmax(dim=-1)
onet.eval()

使用prn 预测并获取人脸识别结果，boxes_c为人脸框坐标

# 使用PNet模型预测
def predict_pnet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    infer_data = torch.unsqueeze(infer_data, dim=0)
    # 执行预测
    cls_prob, bbox_pred, _ = pnet(infer_data)
    cls_prob = torch.squeeze(cls_prob)
    cls_prob = softmax_p(cls_prob)
    bbox_pred = torch.squeeze(bbox_pred)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy()
# 使用RNet模型预测
def predict_rnet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    # 执行预测
    cls_prob, bbox_pred, _ = rnet(infer_data)
    cls_prob = softmax_r(cls_prob)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy()
# 使用ONet模型预测
def predict_onet(infer_data):
    # 添加待预测的图片
    infer_data = torch.tensor(infer_data, dtype=torch.float32, device=device)
    # 执行预测
    cls_prob, bbox_pred, landmark_pred = onet(infer_data)
    cls_prob = softmax_o(cls_prob)
    return cls_prob.detach().cpu().numpy(), bbox_pred.detach().cpu().numpy(), landmark_pred.detach().cpu().numpy()
# 获取PNet网络输出结果
def detect_pnet(im, min_face_size, scale_factor, thresh):
    """通过pnet筛选box和landmark
    参数：
      im:输入图像[h,2,3]
    """
    net_size = 12
    # 人脸和输入图像的比率
    current_scale = float(net_size) / min_face_size
    im_resized = processed_image(im, current_scale)
    _, current_height, current_width = im_resized.shape
    all_boxes = list()
    # 图像金字塔
    while min(current_height, current_width) > net_size:
        # 类别和box
        cls_cls_map, reg = predict_pnet(im_resized)
        boxes = generate_bbox(cls_cls_map[1, :, :], reg, current_scale, thresh)
        current_scale *= scale_factor  # 继续缩小图像做金字塔
        im_resized = processed_image(im, current_scale)
        _, current_height, current_width = im_resized.shape
        if boxes.size == 0:
            continue
        # 非极大值抑制留下重复低的box
        keep = py_nms(boxes[:, :5], 0.5, mode='Union')
        boxes = boxes[keep]
        all_boxes.append(boxes)
    if len(all_boxes) == 0:
        return None
    all_boxes = np.vstack(all_boxes)
    # 将金字塔之后的box也进行非极大值抑制
    keep = py_nms(all_boxes[:, 0:5], 0.7, mode='Union')
    all_boxes = all_boxes[keep]
    # box的长宽
    bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1
    bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1
    # 对应原图的box坐标和分数
    boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,
                         all_boxes[:, 1] + all_boxes[:, 6] * bbh,
                         all_boxes[:, 2] + all_boxes[:, 7] * bbw,
                         all_boxes[:, 3] + all_boxes[:, 8] * bbh,
                         all_boxes[:, 4]])
    boxes_c = boxes_c.T
    del all_boxes
    return boxes_c
# 获取RNet网络输出结果
def detect_rnet(im, dets, thresh):
    h, w, c = im.shape
    # 将pnet的box变成包含它的正方形，可以避免信息损失
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    # 调整超出图像的box
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    delete_size = np.ones_like(tmpw) * 20
    ones = np.ones_like(tmpw)
    zeros = np.zeros_like(tmpw)
    num_boxes = np.sum(np.where((np.minimum(tmpw, tmph) >= delete_size), ones, zeros))
    cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
    for i in range(int(num_boxes)):
        # 将pnet生成的box相对与原图进行裁剪，超出部分用0补
        if tmph[i] < 20 or tmpw[i] < 20:
            continue
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        try:
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
            img = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_LINEAR)
            img = img.transpose((2, 0, 1))
            img = (img - 127.5) / 128
            cropped_ims[i, :, :, :] = img
        except:
            continue
    cls_scores, reg = predict_rnet(cropped_ims)
    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
    else:
        return None
    keep = py_nms(boxes, 0.6, mode='Union')
    boxes = boxes[keep]
    # 对pnet截取的图像的坐标进行校准，生成rnet的人脸框对于原图的绝对坐标
    boxes_c = calibrate_box(boxes, reg[keep])
    return boxes_c
# 获取ONet模型预测结果
def detect_onet(im, dets, thresh):
    """将onet的选框继续筛选基本和rnet差不多但多返回了landmark"""
    h, w, c = im.shape
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
    num_boxes = dets.shape[0]
    cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32)
    for i in range(num_boxes):
        tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
        tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
        img = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR)
        img = img.transpose((2, 0, 1))
        img = (img - 127.5) / 128
        cropped_ims[i, :, :, :] = img
    cls_scores, reg, landmark = predict_onet(cropped_ims)
    cls_scores = cls_scores[:, 1]
    keep_inds = np.where(cls_scores > thresh)[0]
    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        boxes[:, 4] = cls_scores[keep_inds]
        reg = reg[keep_inds]
        landmark = landmark[keep_inds]
    else:
        return None, None
    w = boxes[:, 2] - boxes[:, 0] + 1
    h = boxes[:, 3] - boxes[:, 1] + 1
    landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(boxes[:, 0], (5, 1)) - 1).T
    landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(boxes[:, 1], (5, 1)) - 1).T
    boxes_c = calibrate_box(boxes, reg)
    keep = py_nms(boxes_c, 0.6, mode='Minimum')
    boxes_c = boxes_c[keep]
    landmark = landmark[keep]
    return boxes_c, landmark

人脸检测调用程序

# 人脸检测
def face_detective(im):
    # 调用第一个模型预测
    boxes_c = detect_pnet(im, 20, 0.79, 0.9)
    if boxes_c is None:
        return None, None
    # 调用第二个模型预测
    boxes_c = detect_rnet(im, boxes_c, 0.6)
    if boxes_c is None:
        return None, None
    # 调用第三个模型预测
    boxes_c, landmark = detect_onet(im, boxes_c, 0.7)
    if boxes_c is None:
        return None, None
    return boxes_c, landmark

人脸特征提取程序

def load_image(img_path):
    image = cv2.imread(img_path, 0)
    if image is None:
        return None
    image = cv2.resize(image,(128,128))
    image = np.dstack((image, np.fliplr(image)))
    image = image.transpose((2, 0, 1))
    image = image[:, np.newaxis, :, :]
    image = image.astype(np.float32, copy=False)
    image -= 127.5
    image /= 127.5
    return image
def get_featuresdict(model, dir):
    list1 = os.listdir(dir)
    person_dict = {}
    for i,each in enumerate(list1):
        image = load_image(f"pic/{each}")
        data = torch.from_numpy(image)
        data = data.to(torch.device("cuda"))
        output = model(data)  # 获取特征
        output = output.data.cpu().numpy()
        # 获取不重复图片 并分组
        fe_1 = output[0]
        fe_2 = output[1]
        feature = np.hstack((fe_1, fe_2))
        person_dict[each] = feature
    return person_dict

比对两个特征的余弦距离
和
画人脸框标注人名

def cosin_metric(x1, x2):
    #计算余弦距离
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
def draw_face(img, boxes_c,label):
    corpbbox = [int(boxes_c[0]), int(boxes_c[1]), int(boxes_c[2]), int(boxes_c[3])]
    # 画人脸框
    cv2.rectangle(img, (corpbbox[0], corpbbox[1]),
                  (corpbbox[2], corpbbox[3]), (255, 0, 0), 2)
    # 填写识别名字
    cv2.putText(img, label,
                (corpbbox[0], corpbbox[1] - 2),
                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2)

人脸识别程序，返回人名

# 人脸识别
def face_recognition(img):
    img0 = img
    boxes_c, landmarks = face_detective(img)
    label2= "none"
    if boxes_c is not None:
        for i, det in enumerate(boxes_c):
            det[det < 0] = 0  # 坐标会有负值，一律给0
            face_img = img[int(det[1]):int(det[3]), int(det[0]):int(det[2])]
            face_img = cv2.resize(face_img, (128, 128))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
            face_img = np.dstack((face_img, np.fliplr(face_img)))
            face_img = face_img.transpose((2, 0, 1))
            face_img = face_img[:, np.newaxis, :, :]
            face_img = face_img.astype(np.float32, copy=False)
            face_img -= 127.5
            face_img /= 127.5
            face_data = torch.from_numpy(face_img)
            face_data = face_data.to(device)
            _output = arcface_model(face_data)  # 获取特征
            _output = _output.data.cpu().numpy()
            fe_1 = _output[0]
            fe_2 = _output[1]
            _feature = np.hstack((fe_1, fe_2))
            label = "none"
            list3 = os.listdir(dir)
            max_f = 0
            t = 0
            for i, each in enumerate(list3):
                t = cosin_metric(features[each], _feature)
                if t > max_f:
                    max_f = t
                    max_n = each
                if (max_f > 0.45):#门限阈值
                    label = max_n[:-4]
            print('可信度:'+str(max_f))
            draw_face(img0, det, label)
            if label!= "none":
                label2=label
    return (img0,label2)

图像旋转程序，用来矫正人脸歪斜

# 旋转angle角度，缺失背景白色（255, 255, 255）填充
def rotate_bound_white_bg(image, angle):
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))

主程序，支持摄像头识别和文件夹内的图片识别。使用时，
source为空时启动摄像头识别，输入文件名时，启动图片识别，并把结果输出到output文件夹内。

if __name__ == '__main__':
    save_img = False
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dir = "pic"#图片库
    arcface_model = resnet_face18(False)
    out='output'#输出库
    arcface_model = DataParallel(arcface_model)
    arcface_model.load_state_dict(torch.load(r'infer_models_weights/resnet18_110.pth'), strict=False)
    arcface_model.to(device).eval()
    features = get_featuresdict(arcface_model, dir)
    vid_path, vid_writer = None, None
    source='pubajia'#空为摄像头采集，可填文件夹
    if source =='':
        cap = cv2.VideoCapture(0)
        while True:
            ret, img = cap.read()
            if ret:
                img0,label = face_recognition(img)#识别人脸
                cv2.imshow('face rec', img0)
                cv2.waitKey(100)
    else:
        n=0
        m=0
        error=0
        for filename in os.listdir(source):
            print('当前帧:' +str(m+1))
            img = cv2.imread(source+'/'+filename)
            img0,label=face_recognition(img)
            if label== "none":
                print('当前空标帧:'+str(n))
                img0 = rotate_bound_white_bg(img, 15)#识别识别旋转15°
                img0, label = face_recognition(img0)
                if label== "none":
                    print('第二次空标帧:'+str(n))
                    img0 = rotate_bound_white_bg(img, -15)#识别识别反向旋转15°
                    img0, label = face_recognition(img)
            if label==source:
                n=n+1
                print('当前有效帧' + str(n))
            elif label!="none":
                error=error+1
                print('当前错误帧' + str(error))
            cv2.imshow('face rec', img0)
            cv2.waitKey(100)
            m = m + 1
            cv2.imwrite(out + '/' + str(m) + '.jpg', img0)
        print('识别成功率' +str( n/m))
        print('张冠李戴率' + str(error/m))