55from python_speech_features import sigproc
66from scipy .fftpack import dct
77
8+ def calculate_nfft (samplerate , winlen ):
9+ """Calculates the FFT size as a power of two greater than or equal to
10+ the number of samples in a single window length.
11+
12+ Having an FFT less than the window length loses precision by dropping
13+ many of the samples; a longer FFT than the window allows zero-padding
14+ of the FFT buffer which is neutral in terms of frequency domain conversion.
15+
16+ :param samplerate: The sample rate of the signal we are working with, in Hz.
17+ :param winlen: The length of the analysis window in seconds.
18+ """
19+ window_length_samples = winlen * samplerate
20+ nfft = 1
21+ while nfft < window_length_samples :
22+ nfft *= 2
23+ return nfft
24+
825def mfcc (signal ,samplerate = 16000 ,winlen = 0.025 ,winstep = 0.01 ,numcep = 13 ,
9- nfilt = 26 ,nfft = 512 ,lowfreq = 0 ,highfreq = None ,preemph = 0.97 ,ceplifter = 22 ,appendEnergy = True ,
26+ nfilt = 26 ,nfft = None ,lowfreq = 0 ,highfreq = None ,preemph = 0.97 ,ceplifter = 22 ,appendEnergy = True ,
1027 winfunc = lambda x :numpy .ones ((x ,))):
1128 """Compute MFCC features from an audio signal.
1229
1330 :param signal: the audio signal from which to compute features. Should be an N*1 array
14- :param samplerate: the samplerate of the signal we are working with.
31+ :param samplerate: the sample rate of the signal we are working with, in Hz .
1532 :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
1633 :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
1734 :param numcep: the number of cepstrum to return, default 13
1835 :param nfilt: the number of filters in the filterbank, default 26.
19- :param nfft: the FFT size. Default is 512 .
36+ :param nfft: the FFT size. Default is None, which uses the calculate_nfft function to choose the smallest size that does not drop sample data .
2037 :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
2138 :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
2239 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
@@ -25,6 +42,7 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
2542 :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
2643 :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
2744 """
45+ nfft = nfft or calculate_nfft (samplerate , winlen )
2846 feat ,energy = fbank (signal ,samplerate ,winlen ,winstep ,nfilt ,nfft ,lowfreq ,highfreq ,preemph ,winfunc )
2947 feat = numpy .log (feat )
3048 feat = dct (feat , type = 2 , axis = 1 , norm = 'ortho' )[:,:numcep ]
@@ -38,7 +56,7 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
3856 """Compute Mel-filterbank energy features from an audio signal.
3957
4058 :param signal: the audio signal from which to compute features. Should be an N*1 array
41- :param samplerate: the samplerate of the signal we are working with.
59+ :param samplerate: the sample rate of the signal we are working with, in Hz .
4260 :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
4361 :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
4462 :param nfilt: the number of filters in the filterbank, default 26.
@@ -69,7 +87,7 @@ def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
6987 """Compute log Mel-filterbank energy features from an audio signal.
7088
7189 :param signal: the audio signal from which to compute features. Should be an N*1 array
72- :param samplerate: the samplerate of the signal we are working with.
90+ :param samplerate: the sample rate of the signal we are working with, in Hz .
7391 :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
7492 :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
7593 :param nfilt: the number of filters in the filterbank, default 26.
@@ -89,7 +107,7 @@ def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
89107 """Compute Spectral Subband Centroid features from an audio signal.
90108
91109 :param signal: the audio signal from which to compute features. Should be an N*1 array
92- :param samplerate: the samplerate of the signal we are working with.
110+ :param samplerate: the sample rate of the signal we are working with, in Hz .
93111 :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
94112 :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
95113 :param nfilt: the number of filters in the filterbank, default 26.
@@ -134,7 +152,7 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
134152
135153 :param nfilt: the number of filters in the filterbank, default 20.
136154 :param nfft: the FFT size. Default is 512.
137- :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
155+ :param samplerate: the sample rate of the signal we are working with, in Hz . Affects mel spacing.
138156 :param lowfreq: lowest band edge of mel filters, default 0 Hz
139157 :param highfreq: highest band edge of mel filters, default samplerate/2
140158 :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
0 commit comments