[游戏开发] 麦克风阵列研究3 定向录音

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 游戏开发 -> 麦克风阵列研究3 定向录音 -> 正文阅读

[游戏开发]麦克风阵列研究3 定向录音

上一篇文章实现了测向，也尝试了定向录音的效果。虽然定向录音是有效果的，但是好像目标方向不太稳定。

后来我找到如下文章，它说要在sst里把dynamic改为static，并且指定好方向坐标。Can I record the sound only in fixed direction? · Issue #158 · introlab/odas · GitHubhttps://github.com/introlab/odas/issues/158

于是，我就相应改了cfg文件，方向对应麦克风圆心正上方。?

# Configuration file for ReSpeaker USB 4 Mic Array (ReSpeaker USB Mic Array v2.0)

version = "2.1";

# Raw

raw: 
{

    fS = 16000;
    hopSize = 128;
    nBits = 16;
    nChannels = 6; 

    # Input with raw signal from microphones
    interface: {
        type = "soundcard";
        card = 2;
        device = 0;
    }

}

# Mapping

mapping:
{

    map: (2, 3, 4, 5);

}

# General

general:
{
    
    epsilon = 1E-20;

    size: 
    {
        hopSize = 128;
        frameSize = 256;
    };
    
    samplerate:
    {
        mu = 16000;
        sigma2 = 0.01;
    };

    speedofsound:
    {
        mu = 343.0;
        sigma2 = 25.0;
    };

    mics = (

        # Microphone 2
        { 
            mu = ( -0.032, +0.000, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 3
        { 
            mu = ( +0.000, -0.032, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 4
        { 
            mu = ( +0.032, +0.000, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );
        },

        # Microphone 5
        { 
            mu = ( +0.000, +0.032, +0.000 ); 
            sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
            direction = ( +0.000, +0.000, +1.000 );
            angle = ( 80.0, 100.0 );        
        }
        
    );

    # Spatial filter to include only a range of direction if required
    # (may be useful to remove false detections from the floor)
    spatialfilters = (

        {
            direction = ( +0.000, +0.000, +1.000 );
            angle = (80.0, 100.0);

        }    

    );

    nThetas = 181;
    gainMin = 0.25;

};

# Stationnary noise estimation

sne:
{
    
    b = 3;
    alphaS = 0.1;
    L = 150;
    delta = 3.0;
    alphaD = 0.1;

}

# Sound Source Localization

ssl:
{

    nPots = 4;
    nMatches = 10;
    probMin = 0.5;
    nRefinedLevels = 1;
    interpRate = 4;

    # Number of scans: level is the resolution of the sphere
    # and delta is the size of the maximum sliding window
    # (delta = -1 means the size is automatically computed)
    scans = (
        { level = 2; delta = -1; },
        { level = 4; delta = -1; }
    );

    # Output to export potential sources
    potential: {

        # format = "undefined";
        format = "json";

        interface: {
            #type = "blackhole";
            type = "socket"; ip = "127.0.0.1"; port = 9000;
            #type = "terminal";
        };

    };

};

# Sound Source Tracking

sst:
{  

    # Mode is either "kalman" or "particle"

    mode = "kalman";

    # Add is either "static" or "dynamic"

    add = "static";    

    # Parameters used by both the Kalman and particle filter

    active = (
        { weight = 1.0; mu = 0.4; sigma2 = 0.0025 }
    );

    inactive = (
        { weight = 1.0; mu = 0.25; sigma2 = 0.0025 }
    );

    sigmaR2_prob = 0.0025;
    sigmaR2_active = 0.0225;
    sigmaR2_target = 0.0025;
    Pfalse = 0.1;
    Pnew = 0.1;
    Ptrack = 0.8;

    theta_new = 0.9;
    N_prob = 5;
    theta_prob = 0.8;
    N_inactive = ( 250, 250, 250, 250 );
    theta_inactive = 0.9;

    # Parameters used by the Kalman filter only

    kalman: {

        sigmaQ = 0.001;
        
    };
   
    # Parameters used by the particle filter only

    particle: {

        nParticles = 1000;
        st_alpha = 2.0;
        st_beta = 0.04;
        st_ratio = 0.5;
        ve_alpha = 0.05;
        ve_beta = 0.2;
        ve_ratio = 0.3;
        ac_alpha = 0.5;
        ac_beta = 0.2;
        ac_ratio = 0.2;
        Nmin = 0.7;

    };

    target: 
    (
        { tag = "myTarget"; x = 0.0; y = 0.0; z = 1.0 }
    );

    # Output to export tracked sources
    tracked: {

        format = "json";

        interface: {
            #type = "file"; path = "tracks.txt";
            type = "socket"; ip = "127.0.0.1"; port = 9001;
            #type = "terminal";
        };

    };

}

sss:
{
    
    # Mode is either "dds", "dgss" or "dmvdr"

    mode_sep = "dds";
    mode_pf = "ms";

    gain_sep = 1.0;
    gain_pf = 10.0;

    dds: {

    };

    dgss: {

        mu = 0.01;
        lambda = 0.5;

    };

    dmvdr: {

    };

    ms: {

        alphaPmin = 0.07;
        eta = 0.5;
        alphaZ = 0.8;        
        thetaWin = 0.3;
        alphaWin = 0.3;
        maxAbsenceProb = 0.9;
        Gmin = 0.01;
        winSizeLocal = 3;
        winSizeGlobal = 23;
        winSizeFrame = 256;

    };

    ss: {

        Gmin = 0.01;
        Gmid = 0.9;
        Gslope = 10.0;

    };

    separated: {

        fS = 16000;
        hopSize = 128;
        nBits = 16;        

        interface: {
            type = "file";
            path = "separated.raw";
        };        

    };

    postfiltered: {

        fS = 16000;
        hopSize = 128;
        nBits = 16;        
        gain = 10.0;

        interface: {
            type = "file";
            path = "postfiltered.raw";
        };        

    };

};

classify:
{
    
    frameSize = 4096;
    winSize = 3;
    tauMin = 88;
    tauMax = 551;
    deltaTauMax = 20;
    alpha = 0.3;
    gamma = 0.05;
    phiMin = 0.5;
    r0 = 0.2;    

    category: {

        format = "undefined";

        interface: {
            type = "blackhole";
        }

    }

}

为了更准确地知道声源是不是位于定向录音的位置，我改了界面，在正中间增加白色点，表示录音的方向。并且把圆点半径都改小了，这样更准确。

#!/usr/bin/env python
import socket
import sys
import threading
import random
import os
import time
import struct
import cv2
import signal
import json
import ast
import numpy as np

stop = False
HOST = "0.0.0.0"
PORT = 9000
SOCK_ADDR = (HOST, PORT)

PORT2 = 9001
SOCK_ADDR2 = (HOST, PORT2)


def stop_handler(signum, frame):
    global running
    running = False

signal.signal(signal.SIGINT, stop_handler) 

spectrum_rgb3_lut = [
	[   0,   0,   0 ],
	[   0,   0,   3 ],
	[   0,   0,   6 ],
	[   0,   0,   9 ],
	[   0,   0,  12 ],
	[   0,   0,  15 ],
	[   0,   0,  18 ],
	[   0,   0,  21 ],
	[   0,   0,  24 ],
	[   0,   0,  27 ],
	[   0,   0,  30 ],
	[   0,   0,  33 ],
	[   0,   0,  36 ],
	[   0,   0,  39 ],
	[   0,   0,  42 ],
	[   0,   0,  45 ],
	[   0,   0,  48 ],
	[   0,   0,  51 ],
	[   0,   0,  54 ],
	[   0,   0,  57 ],
	[   0,   0,  60 ],
	[   0,   0,  63 ],
	[   0,   0,  66 ],
	[   0,   0,  69 ],
	[   0,   0,  72 ],
	[   0,   0,  75 ],
	[   0,   0,  78 ],
	[   0,   0,  81 ],
	[   0,   0,  84 ],
	[   0,   0,  87 ],
	[   0,   0,  90 ],
	[   0,   0,  93 ],
	[   0,   0,  96 ],
	[   0,   0,  99 ],
	[   0,   0, 102 ],
	[   0,   0, 105 ],
	[   0,   0, 108 ],
	[   0,   0, 111 ],
	[   0,   0, 114 ],
	[   0,   0, 117 ],
	[   0,   0, 120 ],
	[   0,   0, 123 ],
	[   0,   0, 126 ],
	[   0,   0, 129 ],
	[   0,   0, 132 ],
	[   0,   0, 135 ],
	[   0,   0, 138 ],
	[   0,   0, 141 ],
	[   0,   0, 144 ],
	[   0,   0, 147 ],
	[   0,   0, 150 ],
	[   0,   0, 153 ],
	[   0,   0, 156 ],
	[   0,   0, 159 ],
	[   0,   0, 162 ],
	[   0,   0, 165 ],
	[   0,   0, 168 ],
	[   0,   0, 171 ],
	[   0,   0, 174 ],
	[   0,   0, 177 ],
	[   0,   0, 180 ],
	[   0,   0, 183 ],
	[   0,   0, 186 ],
	[   0,   0, 189 ],
	[   0,   0, 192 ],
	[   0,   0, 195 ],
	[   0,   0, 198 ],
	[   0,   0, 201 ],
	[   0,   0, 204 ],
	[   0,   0, 207 ],
	[   0,   0, 210 ],
	[   0,   0, 213 ],
	[   0,   0, 216 ],
	[   0,   0, 219 ],
	[   0,   0, 222 ],
	[   0,   0, 225 ],
	[   0,   0, 228 ],
	[   0,   0, 231 ],
	[   0,   0, 234 ],
	[   0,   0, 237 ],
	[   0,   0, 240 ],
	[   0,   0, 243 ],
	[   0,   0, 246 ],
	[   0,   0, 249 ],
	[   0,   0, 252 ],
	[   0,   0, 255 ],
	[   0,   3, 252 ],
	[   0,   6, 249 ],
	[   0,   9, 246 ],
	[   0,  12, 243 ],
	[   0,  15, 240 ],
	[   0,  18, 237 ],
	[   0,  21, 234 ],
	[   0,  24, 231 ],
	[   0,  27, 228 ],
	[   0,  30, 225 ],
	[   0,  33, 222 ],
	[   0,  36, 219 ],
	[   0,  39, 216 ],
	[   0,  42, 213 ],
	[   0,  45, 210 ],
	[   0,  48, 207 ],
	[   0,  51, 204 ],
	[   0,  54, 201 ],
	[   0,  57, 198 ],
	[   0,  60, 195 ],
	[   0,  63, 192 ],
	[   0,  66, 189 ],
	[   0,  69, 186 ],
	[   0,  72, 183 ],
	[   0,  75, 180 ],
	[   0,  78, 177 ],
	[   0,  81, 174 ],
	[   0,  84, 171 ],
	[   0,  87, 168 ],
	[   0,  90, 165 ],
	[   0,  93, 162 ],
	[   0,  96, 159 ],
	[   0,  99, 156 ],
	[   0, 102, 153 ],
	[   0, 105, 150 ],
	[   0, 108, 147 ],
	[   0, 111, 144 ],
	[   0, 114, 141 ],
	[   0, 117, 138 ],
	[   0, 120, 135 ],
	[   0, 123, 132 ],
	[   0, 126, 129 ],
	[   0, 129, 126 ],
	[   0, 132, 123 ],
	[   0, 135, 120 ],
	[   0, 138, 117 ],
	[   0, 141, 114 ],
	[   0, 144, 111 ],
	[   0, 147, 108 ],
	[   0, 150, 105 ],
	[   0, 153, 102 ],
	[   0, 156,  99 ],
	[   0, 159,  96 ],
	[   0, 162,  93 ],
	[   0, 165,  90 ],
	[   0, 168,  87 ],
	[   0, 171,  84 ],
	[   0, 174,  81 ],
	[   0, 177,  78 ],
	[   0, 180,  75 ],
	[   0, 183,  72 ],
	[   0, 186,  69 ],
	[   0, 189,  66 ],
	[   0, 192,  63 ],
	[   0, 195,  60 ],
	[   0, 198,  57 ],
	[   0, 201,  54 ],
	[   0, 204,  51 ],
	[   0, 207,  48 ],
	[   0, 210,  45 ],
	[   0, 213,  42 ],
	[   0, 216,  39 ],
	[   0, 219,  36 ],
	[   0, 222,  33 ],
	[   0, 225,  30 ],
	[   0, 228,  27 ],
	[   0, 231,  24 ],
	[   0, 234,  21 ],
	[   0, 237,  18 ],
	[   0, 240,  15 ],
	[   0, 243,  12 ],
	[   0, 246,   9 ],
	[   0, 249,   6 ],
	[   0, 252,   3 ],
	[   0, 255,   0 ],
	[   3, 252,   0 ],
	[   6, 249,   0 ],
	[   9, 246,   0 ],
	[  12, 243,   0 ],
	[  15, 240,   0 ],
	[  18, 237,   0 ],
	[  21, 234,   0 ],
	[  24, 231,   0 ],
	[  27, 228,   0 ],
	[  30, 225,   0 ],
	[  33, 222,   0 ],
	[  36, 219,   0 ],
	[  39, 216,   0 ],
	[  42, 213,   0 ],
	[  45, 210,   0 ],
	[  48, 207,   0 ],
	[  51, 204,   0 ],
	[  54, 201,   0 ],
	[  57, 198,   0 ],
	[  60, 195,   0 ],
	[  63, 192,   0 ],
	[  66, 189,   0 ],
	[  69, 186,   0 ],
	[  72, 183,   0 ],
	[  75, 180,   0 ],
	[  78, 177,   0 ],
	[  81, 174,   0 ],
	[  84, 171,   0 ],
	[  87, 168,   0 ],
	[  90, 165,   0 ],
	[  93, 162,   0 ],
	[  96, 159,   0 ],
	[  99, 156,   0 ],
	[ 102, 153,   0 ],
	[ 105, 150,   0 ],
	[ 108, 147,   0 ],
	[ 111, 144,   0 ],
	[ 114, 141,   0 ],
	[ 117, 138,   0 ],
	[ 120, 135,   0 ],
	[ 123, 132,   0 ],
	[ 126, 129,   0 ],
	[ 129, 126,   0 ],
	[ 132, 123,   0 ],
	[ 135, 120,   0 ],
	[ 138, 117,   0 ],
	[ 141, 114,   0 ],
	[ 144, 111,   0 ],
	[ 147, 108,   0 ],
	[ 150, 105,   0 ],
	[ 153, 102,   0 ],
	[ 156,  99,   0 ],
	[ 159,  96,   0 ],
	[ 162,  93,   0 ],
	[ 165,  90,   0 ],
	[ 168,  87,   0 ],
	[ 171,  84,   0 ],
	[ 174,  81,   0 ],
	[ 177,  78,   0 ],
	[ 180,  75,   0 ],
	[ 183,  72,   0 ],
	[ 186,  69,   0 ],
	[ 189,  66,   0 ],
	[ 192,  63,   0 ],
	[ 195,  60,   0 ],
	[ 198,  57,   0 ],
	[ 201,  54,   0 ],
	[ 204,  51,   0 ],
	[ 207,  48,   0 ],
	[ 210,  45,   0 ],
	[ 213,  42,   0 ],
	[ 216,  39,   0 ],
	[ 219,  36,   0 ],
	[ 222,  33,   0 ],
	[ 225,  30,   0 ],
	[ 228,  27,   0 ],
	[ 231,  24,   0 ],
	[ 234,  21,   0 ],
	[ 237,  18,   0 ],
	[ 240,  15,   0 ],
	[ 243,  12,   0 ],
	[ 246,   9,   0 ],
	[ 249,   6,   0 ],
	[ 252,   3,   0 ],
	[ 255,   0,   0 ]]


class SocketClientObject(object):
    def __init__(self, socket, address ):
        self.socket = socket
        self.address = address

class ClientThread(threading.Thread):
    def __init__(self, client_object):
        threading.Thread.__init__(self)
        self.client_object = client_object

    def run(self):
        global running
        while running == True:
            img = np.zeros((800,800,3),np.uint8)
            data = self.client_object.socket.recv(1024)
            data = data.decode("utf-8")
            data = data.replace("\n", "")
            try:
                src = (data.split('[')[1]).split(']')[0]
                items = src.split(",        ")
                target = json.loads(items[0])
                x = int(float(target["x"]) * 400) + 400
                y = int(-float(target["y"]) * 400) + 400
                energy = int(float(target["E"]) * 255)
                if (energy > 80):
                    cv2.circle(img,  (x, y),  10,  (spectrum_rgb3_lut[255- energy][0], spectrum_rgb3_lut[255- energy][1], spectrum_rgb3_lut[255- energy][2]), -1)
                cv2.circle(img,  (400, 400),  10,  (255,255,255) , -1)
                cv2.imshow('pu', img)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            except:
                print "problem1"

        cv2.destroyAllWindows()
        self.client_object.socket.close()


class VideoThread(threading.Thread):
    def __init__(self,dest_object):
        threading.Thread.__init__(self)
        self.dest_object=dest_object

    def run(self):
        global running
        while running == True:
            #img = np.zeros((800,800,3),np.uint8)
            data = self.dest_object.socket.recv(1024)
            print (data)
            data = data.decode("utf-8")
            data = data.replace("\n", "")
            try:
                src = (data.split('[')[1]).split(']')[0]
                items = src.split(",        ")
                for item in items:
                    target = json.loads(item)
                    x = int(float(target["x"]) * 400) + 400
                    y = int(-float(target["y"]) * 400) + 400
                    activity = int(float(target["activity"]) * 255)
                    #if (activity > 100):
                    #    cv2.circle(img,  (x, y),  30, (0,255,0), -1)

                #cv2.imshow('pu2', img)
                #if cv2.waitKey(1) & 0xFF == ord('q'):
                    #break

            except:
                print "problem2"

        #cv2.destroyAllWindows()
        self.dest_object.socket.close()


def main():
    global running
    running = True

    try:
        sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock1.bind(SOCK_ADDR)
        sock1.listen(5)

        sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock2.bind(SOCK_ADDR2)
        sock2.listen(2)

        while running:
            (clientsocket, address) = sock1.accept()
            print " Accept client: ", address
            ct = ClientThread(SocketClientObject(clientsocket, address))
            ct.start()

            (dst,dst_addr) = sock2.accept()
	    print "Destination Connected by", dst_addr
            vt = VideoThread(SocketClientObject(dst,dst_addr))
	    vt.start()

    except:
        print "#! EXC: ", sys.exc_info()
        sock1.close()
        sock2.close()
        print "THE END! Goodbye!"

if __name__ == "__main__":
    main()

最后我用audacity播放了postfiltered.raw文件。设置里要选为signed 16bit pcm, 32000 sample rate，并且要选为立体声。

试下来好像有点效果，但又不是非常好。

我推测原因是：

1.4通道阵列还是比较小，哪怕程序没问题，效果也要比以前做的16通道差不少。

2.定向录音用了sst模块，而测向显示用了ssl模块。我记得sst如果要显示测向结果也行，但是与实际有偏差，与ssl也会有偏差。因此当我在界面上看到声源方向和定向录音方向重合时，可能sst模块并没有认为重合，导致我认为应该达到定向录音方向时还没达到（应该录到好音质时实际还录不了的情况）。反正就是有一点错位。

感兴趣的朋友自己也可以试试看。

----------------------------------------------------------------------

后来我改了一下界面代码，在界面程序对应terminal里把固定的sst结果打印了出来，你可以看到第一个target位置固定，但是随着实际音源位置变化，activity会在0～1之间变化。

你可以尝试把实际声源对准屏幕中间白点，然后可能稍微偏一点，使得terminal里第一个target activity保持在1，然后看看postfiltered.raw文件里的声音是不是录下来效果最好。