依赖库: python-speech-features-0.6
import python_speech_features
import numpy as np
import scipy.io.wavfile
from matplotlib import pyplot as plt
import os
'''
signal - 需要用来计算特征的音频信号,应该是一个N*1的数组
samplerate - 我们用来工作的信号的采样率
winlen - 分析窗口的长度,按秒计,默认0.025s(25ms)
winstep - 连续窗口之间的步长,按秒计,默认0.01s(10ms)
numcep - 倒频谱返回的数量,默认13
nfilt - 滤波器组的滤波器数量,默认26
nfft - FFT的大小,默认512
lowfreq - 梅尔滤波器的最低边缘,单位赫兹,默认为0
highfreq - 梅尔滤波器的最高边缘,单位赫兹,默认为采样率/2
preemph - 应用预加重过滤器和预加重过滤器的系数,0表示没有过滤器,默认0.97
ceplifter - 将升降器应用于最终的倒谱系数。 0没有升降机。默认值为22。
appendEnergy - 如果是true,则将第0个倒谱系数替换为总帧能量的对数。
'''
def get_mfcc(data, fs, winSize, winStep):
wav_feature = python_speech_features.mfcc(data, fs,
numcep=13, winlen=winSize, winstep=winStep,
nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97)
d_mfcc_feat = python_speech_features.delta(wav_feature, 1)
d_mfcc_feat2 = python_speech_features.delta(wav_feature, 2)
feature = np.hstack((wav_feature, d_mfcc_feat, d_mfcc_feat2))
return feature
def readWavToMFCCnpy(path, destination, win=0.025, step=0.025):
idx = 0
for wav in os.listdir(path):
idx += 1
if idx % 100 == 1:
print("======= ",idx,"MFCC npy done =======")
sample_rate, signal = scipy.io.wavfile.read(path +"/"+ wav)
mfcck = get_mfcc(signal, sample_rate, win, step)
np.save(destination + "/" + "win{}step{}-{}.npy"
.format(win*1000, step*1000, wav.split(".wav")[0]), mfcck)
print("----------------------finish get_mfcc----------------------")
if __name__ == '__main__':
source_path = "D:/ASVsproof/2017v2/ASVspoof2017_V2_train"
des_path = "D:/ASVsproof/2017v2npy/train"
readWavToMFCCnpy(source_path, des_path, win=0.025, step=0.01)
|