1 模型
采用能够反映人对语音的感知特性的Mel频率倒谱系数(MFCC)作为特征参数,以及为避免时间规整问题采用矢量量化技术开发的说话人识别系统.MFCC主要的是模拟人耳的听觉过程,相对于其它参数它对语音波形的变化不敏感,更加稳定,系统取得很好的识别结果,实验表明系统训练和识别的计算量和存储量都比较低.
2 部分代码
function?varargout?=?Main(varargin)
% MAIN M-file for Main.fig
% ? ? MAIN, by itself, creates a new MAIN or raises the existing
% ? ? singleton*.
%
% ? ? H = MAIN returns the handle to a new MAIN or the handle to
% ? ? the existing singleton*.
%
% ? ? MAIN('CALLBACK',hObject,eventData,handles,...) calls the local
% ? ? function named CALLBACK in MAIN.M with the given input arguments.
%
% ? ? MAIN('Property','Value',...) creates a new MAIN or raises the
% ? ? existing singleton*. Starting from the left, property value pairs are
% ? ? applied to the GUI before Main_OpeningFcn gets called. An
% ? ? unrecognized property name or invalid value makes property application
% ? ? stop. All inputs are passed to Main_OpeningFcn via varargin.
%
% ? ? *See GUI Options on GUIDE's Tools menu. Choose "GUI allows only one
% ? ? instance to run (singleton)".
%
% See also: GUIDE, GUIDATA, GUIHANDLES
% Edit the above text to modify the response to help Main
% Last Modified by GUIDE v2.5 11-Aug-2016 00:35:18
% Begin initialization code - DO NOT EDIT
gui_Singleton?=?1;
gui_State?=?struct('gui_Name', ? ? ??mfilename,?...
? ? ? ? ? ? ? ? ??'gui_Singleton', ?gui_Singleton,?...
? ? ? ? ? ? ? ? ??'gui_OpeningFcn',?@Main_OpeningFcn,?...
? ? ? ? ? ? ? ? ??'gui_OutputFcn', ?@Main_OutputFcn,?...
? ? ? ? ? ? ? ? ??'gui_LayoutFcn', [] ,?...
? ? ? ? ? ? ? ? ??'gui_Callback', ? []);
if?nargin?&&?ischar(varargin{1})
? ?gui_State.gui_Callback?=?str2func(varargin{1});
end
if?nargout
? [varargout{1:nargout}] =?gui_mainfcn(gui_State,?varargin{:});
else
? ?gui_mainfcn(gui_State,?varargin{:});
end
% End initialization code - DO NOT EDIT
% --- Executes just before Main is made visible.
function?Main_OpeningFcn(hObject,?eventdata,?handles,?varargin)
% This function has no output args, see OutputFcn.
% hObject ? handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% varargin ? command line arguments to Main (see VARARGIN)
% Choose default command line output for Main
handles.output?=?hObject;
% Update handles structure
guidata(hObject,?handles);
% UIWAIT makes Main wait for user response (see UIRESUME)
% uiwait(handles.figure1);
load?TrainingSet;
load?TrainingLable;
[totalSampl,q]=size(TrainingSet);
str=num2str(tabulate(TrainingLable));
set(handles.totalrecords,'String',strcat(str));
set(handles.resultText,'String',strcat('Total Samples: ',num2str(totalSampl)));
% --- Outputs from this function are returned to the command line.
function?varargout?=?Main_OutputFcn(hObject,?eventdata,?handles)?
% varargout cell array for returning output args (see VARARGOUT);
% hObject ? handle to figure
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% Get default command line output from handles structure
varargout{1} =?handles.output;
% --- Executes on button press in trainBtn.
function?trainBtn_Callback(hObject,?eventdata,?handles)
% hObject ? handle to trainBtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
clc;
% clear all;
% close all;
set(handles.statusText,'String','Start Speaking...');
pause(0.001);
Fs?=?8000;?% Sampling Freq (Hz)
%%Duration = 2; % Duration (sec)
%%audio_rec_obj = audiorecorder(Fs, 16, 1);
% get(audio_rec_obj);
% Record your voice for Duration seconds.
myRecording?=?wavrecord(2*Fs,Fs);
%%recordblocking(audio_rec_obj, Duration);
% disp('End of Recording.');
set(handles.statusText,'String','Saving....');
pause(0.001);
% Play back the recording.
%%play(audio_rec_obj);
% Store data in double-precision array.
%%myRecording = getaudiodata(audio_rec_obj);
% Plot the waveform.
% figure,
%plot(myRecording);
%grid on;
% title('Input Signal');
%xlabel('Samples');
%ylabel('Magnitude(db)');
%pre-empasis or high pass filter
Prem=0.97;
Filtered_output=filter([1,-Prem],1,myRecording);
%sound(Filtered_output);
wavwrite(Filtered_output,?Fs,?16,'RAW');
wavplay(Filtered_output,Fs);
% figure,
%plot(Filtered_output);
%grid on;
% title('Pre-empasis Signal/Filtered Signal');
%xlabel('Samples');
%ylabel('Magnitude(db)');
len=length(Filtered_output);
Frame_size?=?Fs*32/1000;?%200 (sample points)
Frame_overlap?=?Fs*16/1000;?%120 (sample points)
Frame_step?=?Frame_size-Frame_overlap;?% 80 (sample points)
Frame_rate?=?round(Fs/Frame_step)+1;?%100; frames/sec
Fft_size=Frame_size;
numFrames=length(Filtered_output)/Frame_step;
%padd the zeros for equal frame length
for?i=1:numFrames*Frame_size
paddesSignal(i,:)=0; ? ?
end
%get orignal signal
for?n=1:len
paddesSignal(n,:) =?Filtered_output(n,:);
end
? ? ? ?
%frame blocking or farming
for?i=1:numFrames
? ?for?n=1:Frame_size
? ? ? ?fdata(i,n)=paddesSignal(i*Frame_step+n,:);
? ?end
end
%% (2) Windowing..
? ?frameSize?=?size(fdata);?
? ?nbFrames?=?frameSize(1);?
? ?nbSamples?=?frameSize(2);?
? ?% Hamming window..?
? ?w?=?hamming(nbSamples);?
? ?afterWindow?=?zeros(nbFrames,nbSamples);
? ?for?i?=?1:nbFrames
? ? ? ?singleFrame?=?fdata(i,1:nbSamples);?
? ? ? ?afterWindow(i,?1:nbSamples) =?w'.*singleFrame;?
? ?end
% ? ? figure,
%plot(afterWindow);
%grid on;
%xlabel('Samples');
%ylabel('Magnitude(db)');
% ? ? title('Windowing graph');
? ?
? ?
%ylabel('Magnitude(db)');
% ? ? title('mfcc normalized freq graph');
% ? ? ? disp('done feature extraction ');
set(handles.statusText,'String','Input Saved in .wav file format');
pause(0.001);
% ? ? %get size of train variable
? ?%%%try
? ? ? ?%%%load TrainingSet;
? ? ? ?%%%load TrainingLable;
? ?%%%catch er
? ? ? ?%%%TrainingSet=[];
? ? ? ?%%%TrainingLable=[];
? ? ? ?%%%disp('created new training');
? ?%%%end
? ?
? ?%%%[featuresCnt,Samples]=size(TrainingSet);
? ?%%%TrainingSet(featuresCnt+1,:)=meanMFCC; ? ? ? ? ? %craete training matrix
? ?
? ?%create lables for features from user input
? ?inputLable=input('Press any key ',?'s');
? ?
? ?%%%TrainingLable(featuresCnt+1)=str2num(inputLable);
? ?disp('Select saved input through "Train with Audio" for Feature Extraction');
? ?%store training and labels in .mat files for classifier training
? ?%%%try
? ?%%%save('TrainingSet','TrainingSet');
? ?%%%save('TrainingLable','TrainingLable');
? ?%%%set(handles.statusText,'String','Done with Training and Saved');
? ?%%%pause(0.001);
? ?%%%catch ers
? ? ? ?%%%disp('Unable to save training set try again');
? ?%%%end
? ?
??
? ?
% --- Executes on button press in testBtn.
function?testBtn_Callback(hObject,?eventdata,?handles)
% hObject ? handle to testBtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
clc;
set(handles.statusText,'String','Start Speaking...');
Fs?=?8000;?% Sampling Freq (Hz)
%%Duration = 2; % Duration (sec)
%%audio_rec_obj = audiorecorder(Fs, 16, 1);
% get(audio_rec_obj);
% Record your voice for Duration seconds.
% disp('Start speaking.')
myRecording?=audiorecorder(2*Fs,Fs);
pause(0.01);
set(handles.outputText,'String','--');
%%recordblocking(audio_rec_obj, Duration);
% disp('End of Recording.');
set(handles.statusText,'String','Stop Speaking');
pause(0.001);
% Play back the recording.
%%play(audio_rec_obj);
% Store data in double-precision array.
%%myRecording = getaudiodata(audio_rec_obj);
% Plot the waveform.
% figure,
axes(handles.axes1);
plot(myRecording);
grid?on;
%title('Input Signal');
xlabel('Samples');
ylabel('Magnitude(db)');
set(handles.statusText,'String','Done with Recording...');
pause(0.001);
%pre-empasis or high pass filter
Prem=0.97;
Filtered_output=filter([1,-Prem],1,myRecording);
sound(Filtered_output);
% figure,
axes(handles.axes2);
plot(Filtered_output);
grid?on;
%title('Pre-empasis Signal/Filtered Signal');
xlabel('Samples');
ylabel('Magnitude(db)');
len=length(Filtered_output);
Frame_size?=?Fs*32/1000;?%200 (sample points)
Frame_overlap?=?Fs*16/1000;?%120 (sample points)
Frame_step?=?Frame_size-Frame_overlap;?% 80 (sample points)
Frame_rate?=?round(Fs/Frame_step)+1;?%100; frames/sec
Fft_size=Frame_size;
numFrames=length(Filtered_output)/Frame_step;
%padd the zeros for equal frame length
for?i=1:numFrames*Frame_size
paddesSignal(i,:)=0; ? ?
end
%get orignal signal
for?n=1:len
paddesSignal(n,:) =?Filtered_output(n,:);
end
? ? ? ?
%frame blocking or farming
for?i=1:numFrames
? ?for?n=1:Frame_size
? ? ? ?fdata(i,n)=paddesSignal(i*Frame_step+n,:);
? ?end
end
%% (2) Windowing..
? ?frameSize?=?size(fdata);?
? ?nbFrames?=?frameSize(1);?
? ?nbSamples?=?frameSize(2);?
? ?% Hamming window..?
? ?w?=?hamming(nbSamples);?
? ?afterWindow?=?zeros(nbFrames,nbSamples);
? ?for?i?=?1:nbFrames
? ? ? ?singleFrame?=?fdata(i,1:nbSamples);?
? ? ? ?afterWindow(i,?1:nbSamples) =?w'.*singleFrame;?
? ?end
% ? ? figure,
? ?axes(handles.axes3);
? ?plot(afterWindow);
? ?grid?on;
? ?xlabel('Samples');
? ?ylabel('Magnitude(db)');
? ?%title('Windowing graph');
? ?
? ?
? ? ? ? ?Tw?=?25; ? ? ? ? ??% analysis frame duration (ms)
? ? ? ? ?Ts?=?10; ? ? ? ? ??% analysis frame shift (ms)
? ? ? ? ?alpha?=?0.97; ? ? ?% preemphasis coefficient
? ? ? ? ?R?= [?300?3700?]; ?% frequency range to consider
? ? ? ? ?M?=?20; ? ? ? ? ? ?% number of filterbank channels?
? ? ? ? ?N?=?13; ? ? ? ? ? ?% number of cepstral coefficients
? ? ? ? ?L?=?22; ? ?
? ? ? ? ?nfft?=?2^nextpow2(?nbFrames?); ? ??% length of FFT analysis?
? ? ? ? ?K?=?nfft/2+1; ? ? ? ? ? ? ? ? ? ? ?% length of the unique part of the FFT?
? ?
? ??%% HANDY INLINE FUNCTION HANDLES
? ?% Forward and backward mel frequency warping.
? ?% Note that base 10 is used in [1], while base e is used here and in HTK code
? ?hz2mel?=?@(?hz?)(?1127*log(1+hz/700) ); ? ??% Hertz to mel warping function
? ?mel2hz?=?@(?mel?)(?700*exp(mel/1127)-700?);?% mel to Hertz warping function
? ?% Type III DCT matrix routine?
? ?dctm?=?@(?N,?M?)(?sqrt(2.0/M)?*?cos(?repmat([0:N-1].',1,M).* repmat(pi*([1:M]-0.5)/M,N,1) ) );
? ?% Cepstral lifter routine?
? ?ceplifter?=?@(?N,?L?)(?1+0.5*L*sin(pi*[0:N-1]/L) );
? ?
? ?
? ?MAG?=?abs(?fft(afterWindow,nfft,1) );?
% ? ? figure,
% ? ? plot(MAG);
% ? ? title('fft magnitude garaph');
? ?% Triangular filterbank with uniformly spaced filters on mel scale
? ?H?=?trifbank(?M,?K,?R,?Fs,?hz2mel,?mel2hz?);?% size of H is M x K?
? ?% Filterbank application to unique part of the magnitude spectrum
? ?FBE?=?H?*?MAG(1:K,:);?% FBE( FBE<1.0 ) = 1.0; % apply mel floor
? ?
? ?% DCT matrix computation
? ?temp?=?dctm(?N,?M?);
? ?% Conversion of logFBEs to cepstral coefficients through DCT
? ?CC?= ?temp?*?log(?FBE?);
? ?
? ?% Cepstral lifter computation
? ?lifter?=?ceplifter(?N,?L?);
? ?% Cepstral liftering gives liftered cepstral coefficients
? ?CC?=?diag(?lifter?)?*?CC;?% ~ HTK's MFCCs
?
? ?%%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%%
? ?%to train the classifier normalize the values by taking the mean of CC;
? ?meanMFCC=mean(CC); ? ? ? ? ? ? ? ? ?%mean of CC 1xN
% ? ? plot(CC)
% ? ? figure,
??axes(handles.axes4);
? ?plot(meanMFCC);
? ?grid?on;
? ?%title('mfcc normalized freq graph');
? ?xlabel('Samples');
? ?ylabel('Magnitude(db)');
? ?set(handles.statusText,'String','Done');
% ? ? ? disp('done feature extraction ');
% ? ? %get size of train variable
% ? ? try
% ? ? ? ? load TrainingSet;
% ? ? ? ? load TrainingLable;
% ? ? catch er
% ? ? ? ? TrainingSet=[];
% ? ? ? ? TrainingLable=[];
% ? ? ? ? disp('created new training');
% ? ? end
% ? ??
% ? ? [featuresCnt,Samples]=size(TrainingSet);
% ? ? TrainingSet(featuresCnt+1,:)=meanMFCC; ? ? ? ? ? %craete training matrix
% ? ??
% ? ? %create lables for features from user input
% ? ? inputLable=input('Type the language lable (e.g. 1 for Marathi, 0 for English): ', 's');
% ? ??
% ? ? TrainingLable(featuresCnt+1)=str2num(inputLable);
% ? ? disp('done feature extraction');
% ? ? %store training and labels in .mat files for classifier training
% ? ? try
% ? ? save('TrainingSet','TrainingSet');
% ? ? save('TrainingLable','TrainingLable');
% ? ? catch ers
% ? ? ? ? disp('Unable to save training set try again');
% ? ? end
? ?
? ?clc;
? ?testData=meanMFCC;
? ?%call svm training function
? ?load?Traininglable;
? ?load?TrainingSet;
? ?
% ? ? svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false);?
% ? ? classes = svmclassify(svmStruct,testData,'showplot',false);
? ?classes?=?multisvm(TrainingSet,?TrainingLable',?testData)
% ? ? disp('Done training');
? ?
set(handles.outputText,'String','--');
??if(classes==1)
? ?set(handles.outputText,'String','English');
??end
? ? ??
? ?if(classes==2)
? ?set(handles.outputText,'String','Marathi');
? ?end
? ? ?if(classes==3)
? ?set(handles.outputText,'String','Hindi');
? ?end
? ?
? ?
? ?
? ?% Conversion of logFBEs to cepstral coefficients through DCT
? ?CC?= ?DCT?*?log(?FBE?);
? ?
? ?% Cepstral lifter computation
? ?lifter?=?ceplifter(?N,?L?);
? ?% Cepstral liftering gives liftered cepstral coefficients
? ?CC?=?diag(?lifter?)?*?CC;?% ~ HTK's MFCCs
?
? ?%%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%%
? ?%to train the classifier normalize the values by taking the mean of CC;
? ?meanMFCC=mean(CC); ? ? ? ? ? ? ? ??%mean of CC 1xN
? ?
? ?set(handles.statusText,'String','Done Feature extraction');
? ?axes(handles.axes4);
? ??plot(meanMFCC);
? ??grid?on;
? ?%title('mfcc normalized freq graph');
? ?xlabel('Samples');
? ?ylabel('Magnitude(db)');
? ?set(handles.statusText,'String','Done feature extraction');
? ?
? ?clc;
? ?testData=meanMFCC;
? ?%call svm training function
? ?load?Traininglable;
? ?load?TrainingSet;
? ?
% ? ? svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false);?
% ? ? classes = svmclassify(svmStruct,testData,'showplot',false);
? ?classes?=?multisvm(TrainingSet,?TrainingLable',?testData);
% ? ? disp('Done training');
? ?
set(handles.outputText,'String','--');
??if(classes==1)
? ?set(handles.outputText,'String','English');
??end
? ?
??
? ?
? ?if(classes==2)
? ?set(handles.outputText,'String','Marathi');
? ?end
? ? ?if(classes==3)
? ?set(handles.outputText,'String','Hindi');
? ?end
? ?
function?edit1_Callback(hObject,?eventdata,?handles)
% hObject ? handle to edit1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% Hints: get(hObject,'String') returns contents of edit1 as text
% ? ? ? str2double(get(hObject,'String')) returns contents of edit1 as a double
% --- Executes during object creation, after setting all properties.
function?edit1_CreateFcn(hObject,?eventdata,?handles)
% hObject ? handle to edit1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? empty - handles not created until after all CreateFcns called
% Hint: edit controls usually have a white background on Windows.
% ? ? ? See ISPC and COMPUTER.
if?ispc?&&?isequal(get(hObject,'BackgroundColor'),?get(0,'defaultUicontrolBackgroundColor'))
? ?set(hObject,'BackgroundColor','white');
end
% --------------------------------------------------------------------
function?uipanel1_ButtonDownFcn(hObject,?eventdata,?handles)
% hObject ? handle to uipanel1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% --- Executes on mouse press over figure background.
function?figure1_ButtonDownFcn(hObject,?eventdata,?handles)
% hObject ? handle to figure1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% --- If Enable == 'on', executes on mouse press in 5 pixel border.
% --- Otherwise, executes on mouse press in 5 pixel border or over trainWithFilebtn.
function?trainWithFilebtn_ButtonDownFcn(hObject,?eventdata,?handles)
% hObject ? handle to trainWithFilebtn (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% --- Executes when figure1 is resized.
function?figure1_ResizeFcn(hObject,?eventdata,?handles)
% hObject ? handle to figure1 (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB
% handles ? structure with handles and user data (see GUIDATA)
% --- Executes on key press with focus on testWithAudioBtn and none of its controls.
function?testWithAudioBtn_KeyPressFcn(hObject,?eventdata,?handles)
% hObject ? handle to testWithAudioBtn (see GCBO)
% eventdata structure with the following fields (see UICONTROL)
%Key: name of the key that was pressed, in lower case
%Character: character interpretation of the key(s) that was pressed
%Modifier: name(s) of the modifier key(s) (i.e., control, shift) pressed
% handles ? structure with handles and user data (see GUIDATA)
3 仿真结果
4 参考文献
[1]王伟, and 邓辉文. "基于MFCC参数和VQ的说话人识别系统." 第四届全国信息获取与处理学术会议 0.
|