Skip to content

Commit 39f60ed

Browse files
Laurent El ShafeyLaurent El Shafey
Laurent El Shafey
authored and
Laurent El Shafey
committed
Cepstral features: improve documentation
1 parent 0ceee57 commit 39f60ed

File tree

4 files changed

+79
-73
lines changed

4 files changed

+79
-73
lines changed

include/bob/ap/Ceps.h

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@
2929
#include <vector>
3030
#include "bob/sp/FFT1D.h"
3131

32-
const double ENERGY_FLOOR = 1.0;
33-
const double FBANK_OUT_FLOOR = 1.0;
34-
3532
namespace bob {
3633
/**
3734
* \ingroup libap_api
@@ -55,21 +52,21 @@ class Ceps
5552
{
5653
public:
5754
/**
58-
* @brief Constructor: Initialize working arrays
55+
* @brief Constructor. Initializes working arrays
5956
*/
6057
Ceps(double sampling_frequency, double win_length_ms=20., double win_shift_ms=10.,
6158
size_t n_filters=24, size_t n_ceps=19, double f_min=0.,
6259
double f_max=4000., size_t delta_win=2, double pre_emphasis_coef=0.95,
6360
bool mel_scale=true, bool dct_norm=false);
6461

6562
/**
66-
* @brief Get the Cepstral Shape
63+
* @brief Gets the Cepstral features shape for a given input/input length
6764
*/
6865
blitz::TinyVector<int,2> getCepsShape(const size_t input_length) const;
6966
blitz::TinyVector<int,2> getCepsShape(const blitz::Array<double,1>& input) const;
7067

7168
/**
72-
* @brief Compute Cepstral features
69+
* @brief Computes Cepstral features
7370
*/
7471
void operator()(const blitz::Array<double,1>& input, blitz::Array<double,2>& output);
7572

@@ -245,7 +242,7 @@ class Ceps
245242
* @brief Computes the first order derivative from the given input.
246243
* This methods is used to compute both the delta's and double delta's.
247244
*/
248-
void addDerivative(const blitz::Array<double,2>& input, blitz::Array<double,2>& output);
245+
void addDerivative(const blitz::Array<double,2>& input, blitz::Array<double,2>& output) const;
249246
/**
250247
* @brief Converts a frequency in Herz to the corresponding one in Mel
251248
*/
@@ -258,27 +255,32 @@ class Ceps
258255
* @brief Pre-emphasises the signal by applying the first order equation
259256
* \f$data_{n} := data_{n} − a*data_{n−1}\f$
260257
*/
261-
void pre_emphasis(blitz::Array<double,1> &data);
258+
void pre_emphasis(blitz::Array<double,1> &data) const;
262259
/**
263260
* @brief Applies the Hamming window to the signal
264261
*/
265-
void hammingWindow(blitz::Array<double,1> &data);
262+
void hammingWindow(blitz::Array<double,1> &data) const;
266263

264+
/**
265+
* @brief Computes the power-spectrum of the FFT of the input frame and
266+
* applies the triangular filter bank
267+
*/
267268
void logFilterBank(blitz::Array<double,1>& x);
268269
/**
269-
* @brief Apply triangular filter bank to the input array and return the log
270-
* of the energy in each band.
270+
* @brief Applies the triangular filter bank to the input array and
271+
* returns the logarithm of the energy in each band.
271272
*/
272-
void logTriangularFBank(blitz::Array<double,1>& data);
273-
double logEnergy(blitz::Array<double,1> &data);
273+
void logTriangularFilterBank(blitz::Array<double,1>& data) const;
274274
/**
275-
* @brief Apply a p order DCT to vector v1.
276-
* Results are returned through v2.
277-
* If {m[1],...,m[N]} are the output of the filters, then
278-
* c[i]=sqrt(2/N)*sum for j=1 to N of(m[j]cos(M_PI*i*(j-0.5)/N) i=1,...,p
279-
* This is what is implemented here with arrays indexed from 0 to N-1.
275+
* @brief Computes the logarithm of the energy
280276
*/
281-
void transformDCT(blitz::Array<double,1>& ceps_row);
277+
double logEnergy(blitz::Array<double,1> &data) const;
278+
/**
279+
* @brief Applies the DCT to the cepstral features:
280+
* \f$out[i]=sqrt(2/N)*sum_{j=1}^{N} (in[j]cos(M_PI*i*(j-0.5)/N)\f$
281+
*/
282+
void applyDct(blitz::Array<double,1>& ceps_row) const;
283+
282284
void initWinSize();
283285
void initWinLength();
284286
void initWinShift();
@@ -287,8 +289,8 @@ class Ceps
287289
void initCacheDctKernel();
288290
void initCacheFilterBank();
289291
/**
290-
* @brief Initialize the table m_p_index, which contains the indices of the
291-
* cut-off frequencies. It looks like something like this:
292+
* @brief Initialize the table m_p_index, which contains the indices of
293+
* the cut-off frequencies of the triangular filters.. It looks like:
292294
*
293295
* filter 2
294296
* <------------->
@@ -298,8 +300,8 @@ class Ceps
298300
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 ..........
299301
* ^ ^ ^ ^ ^
300302
* | | | | |
301-
* p_in[0] | p_in[2] | p_in[4]
302-
* p_in[1] p_in[3]
303+
* t[0] | t[2] | t[4]
304+
* t[1] t[3]
303305
*
304306
*/
305307
void initCachePIndex();
@@ -322,16 +324,21 @@ class Ceps
322324
bool m_with_energy;
323325
bool m_with_delta;
324326
bool m_with_delta_delta;
327+
double m_energy_floor;
328+
double m_fb_out_floor;
329+
double m_log_energy_floor;
330+
double m_log_fb_out_floor;
331+
325332
blitz::Array<double,2> m_dct_kernel;
326333
blitz::Array<double,1> m_hamming_kernel;
327-
blitz::Array<double,1> m_filters;
328334
blitz::Array<int,1> m_p_index;
329335
std::vector<blitz::Array<double,1> > m_filter_bank;
330336
bob::sp::FFT1D m_fft;
331337

332-
mutable blitz::Array<double,1> m_cache_frame;
333-
mutable blitz::Array<std::complex<double>,1> m_cache_complex1;
334-
mutable blitz::Array<std::complex<double>,1> m_cache_complex2;
338+
mutable blitz::Array<double,1> m_cache_frame_d;
339+
mutable blitz::Array<std::complex<double>,1> m_cache_frame_c1;
340+
mutable blitz::Array<std::complex<double>,1> m_cache_frame_c2;
341+
mutable blitz::Array<double,1> m_cache_filters;
335342

336343
friend class TestCeps;
337344
};
@@ -349,16 +356,16 @@ class TestCeps
349356
{ return m_ceps.getCepsShape(input_length); }
350357
blitz::TinyVector<int,2> getCepsShape(const blitz::Array<double,1>& input) const
351358
{ return m_ceps.getCepsShape(input); }
352-
blitz::Array<double,1> getFilter(void) { return m_ceps.m_filters; }
359+
blitz::Array<double,1> getFilterOutput() { return m_ceps.m_cache_filters; }
353360

354361
void operator()(const blitz::Array<double,1>& input, blitz::Array<double,2>& ceps_2D)
355362
{ m_ceps(input, ceps_2D);}
356363
void hammingWindow(blitz::Array<double,1>& data){ m_ceps.hammingWindow(data); }
357364
void pre_emphasis(blitz::Array<double,1>& data){ m_ceps.pre_emphasis(data); }
358365
void logFilterBank(blitz::Array<double,1>& x){ m_ceps.logFilterBank(x); }
359-
void logTriangularFBank(blitz::Array<double,1>& data){ m_ceps.logTriangularFBank(data); }
366+
void logTriangularFilterBank(blitz::Array<double,1>& data){ m_ceps.logTriangularFilterBank(data); }
360367
double logEnergy(blitz::Array<double,1> &data){ return m_ceps.logEnergy(data); }
361-
void transformDCT(blitz::Array<double,1>& ceps_row) { m_ceps.transformDCT(ceps_row); }
368+
void applyDct(blitz::Array<double,1>& ceps_row) { m_ceps.applyDct(ceps_row); }
362369
};
363370

364371
}

python/bob/ap/test/test_ceps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def cepstral_features_extraction(obj, rate_wavsample, win_length_ms, win_shift_m
248248
obj.assertAlmostEqual(filters[kk], filt2[kk], 7, "Error in log Filtering")
249249

250250
ceps = dct_transform(filters, n_filters, dct_kernel, n_ceps, dct_norm)
251-
ceps2 = ct.dct_transform(n_ceps)
251+
ceps2 = ct.apply_dct(n_ceps)
252252

253253

254254
if(with_energy):
@@ -453,4 +453,4 @@ def test_cepstral(self):
453453
with_delta_delta = False
454454
cepstral_comparison_run(self,rate_wavsample, win_length_ms, win_shift_ms, n_filters, n_ceps, dct_norm, f_min, f_max, delta_win,
455455
pre_emphasis_coef, mel_scale, with_energy, with_delta, with_delta_delta)
456-
456+

src/ap/cxx/Ceps.cc

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ bob::ap::Ceps::Ceps( double sampling_frequency, double win_length_ms, double win
3434
m_delta_win(delta_win), m_pre_emphasis_coeff(pre_emphasis_coeff),
3535
m_mel_scale(mel_scale), m_dct_norm(dct_norm),
3636
m_with_energy(true), m_with_delta(true), m_with_delta_delta(true),
37-
m_filter_bank(), m_fft(1)
37+
m_energy_floor(1.), m_fb_out_floor(1.), m_fft(1)
3838
{
3939
initWinLength();
4040
initWinShift();
4141

42-
m_filters.resize(m_n_filters);
42+
// Initializes logarithm of flooring values
43+
m_log_energy_floor = log(m_energy_floor);
44+
m_log_fb_out_floor = log(m_fb_out_floor);
4345

46+
m_cache_filters.resize(m_n_filters);
4447
initCacheDctKernel();
4548
}
4649

@@ -70,7 +73,7 @@ void bob::ap::Ceps::setWinShiftMs(double win_shift_ms)
7073
void bob::ap::Ceps::setNFilters(size_t n_filters)
7174
{
7275
m_n_filters = n_filters;
73-
m_filters.resize(m_n_filters);
76+
m_cache_filters.resize(m_n_filters);
7477
initCacheFilterBank();
7578
initCacheDctKernel();
7679
}
@@ -132,10 +135,10 @@ void bob::ap::Ceps::initWinShift()
132135
void bob::ap::Ceps::initWinSize()
133136
{
134137
m_win_size = (size_t)pow(2.0,ceil(log((double)m_win_length)/log(2)));
135-
m_cache_frame.resize(m_win_size);
138+
m_cache_frame_d.resize(m_win_size);
136139
m_fft.reset(m_win_size);
137-
m_cache_complex1.resize(m_win_size);
138-
m_cache_complex2.resize(m_win_size);
140+
m_cache_frame_c1.resize(m_win_size);
141+
m_cache_frame_c2.resize(m_win_size);
139142
}
140143

141144
void bob::ap::Ceps::initCacheHammingKernel()
@@ -262,26 +265,26 @@ void bob::ap::Ceps::operator()(const blitz::Array<double,1>& input,
262265
for(int i=0; i<n_frames; ++i)
263266
{
264267
// Set padded frame to zero
265-
m_cache_frame = 0.;
268+
m_cache_frame_d = 0.;
266269
// Extract frame input vector
267270
blitz::Range ri(i*(int)m_win_shift,i*(int)m_win_shift+(int)m_win_length-1);
268-
m_cache_frame(rf) = input(ri);
271+
m_cache_frame_d(rf) = input(ri);
269272
// Substract mean value
270-
m_cache_frame -= blitz::mean(m_cache_frame);
273+
m_cache_frame_d -= blitz::mean(m_cache_frame_d);
271274

272275
// Update output with energy if required
273276
if(m_with_energy)
274-
ceps_matrix(i,(int)m_n_ceps) = logEnergy(m_cache_frame);
277+
ceps_matrix(i,(int)m_n_ceps) = logEnergy(m_cache_frame_d);
275278

276279
// Apply pre-emphasis
277-
pre_emphasis(m_cache_frame);
280+
pre_emphasis(m_cache_frame_d);
278281
// Apply the Hamming window
279-
hammingWindow(m_cache_frame);
282+
hammingWindow(m_cache_frame_d);
280283
// Filter with the triangular filter bank (either in linear or Mel domain)
281-
logFilterBank(m_cache_frame);
284+
logFilterBank(m_cache_frame_d);
282285
// Apply DCT kernel and update the output
283286
blitz::Array<double,1> ceps_matrix_row(ceps_matrix(i,r1));
284-
transformDCT(ceps_matrix_row);
287+
applyDct(ceps_matrix_row);
285288
}
286289

287290
blitz::Range rall = blitz::Range::all();
@@ -302,7 +305,7 @@ void bob::ap::Ceps::operator()(const blitz::Array<double,1>& input,
302305
}
303306
}
304307

305-
void bob::ap::Ceps::pre_emphasis(blitz::Array<double,1> &data)
308+
void bob::ap::Ceps::pre_emphasis(blitz::Array<double,1> &data) const
306309
{
307310
if(m_pre_emphasis_coeff!=0.)
308311
{
@@ -315,7 +318,7 @@ void bob::ap::Ceps::pre_emphasis(blitz::Array<double,1> &data)
315318
}
316319
}
317320

318-
void bob::ap::Ceps::hammingWindow(blitz::Array<double,1> &data)
321+
void bob::ap::Ceps::hammingWindow(blitz::Array<double,1> &data) const
319322
{
320323
blitz::Range r(0,(int)m_win_length-1);
321324
data(r) *= m_hamming_kernel;
@@ -324,46 +327,44 @@ void bob::ap::Ceps::hammingWindow(blitz::Array<double,1> &data)
324327
void bob::ap::Ceps::logFilterBank(blitz::Array<double,1>& x)
325328
{
326329
// Apply the FFT
327-
m_cache_complex1 = bob::core::cast<std::complex<double> >(x);
328-
m_fft(m_cache_complex1, m_cache_complex2);
330+
m_cache_frame_c1 = bob::core::cast<std::complex<double> >(x);
331+
m_fft(m_cache_frame_c1, m_cache_frame_c2);
329332

330333
// Take the the power spectrum of the first part of the output of the FFT
331334
blitz::Range r(0,(int)m_win_size/2);
332335
blitz::Array<double,1> x_half(x(r));
333-
blitz::Array<std::complex<double>,1> complex_half(m_cache_complex2(r));
336+
blitz::Array<std::complex<double>,1> complex_half(m_cache_frame_c2(r));
334337
x_half = blitz::abs(complex_half);
335338

336339
// Apply the Triangular filter bank to this power spectrum
337-
logTriangularFBank(x);
340+
logTriangularFilterBank(x);
338341
}
339342

340-
void bob::ap::Ceps::logTriangularFBank(blitz::Array<double,1>& data)
343+
void bob::ap::Ceps::logTriangularFilterBank(blitz::Array<double,1>& data) const
341344
{
342345
for(int i=0; i<(int)m_n_filters; ++i)
343346
{
344347
blitz::Array<double,1> data_slice(data(blitz::Range(m_p_index(i),m_p_index(i+2))));
345348
double res = blitz::sum(data_slice * m_filter_bank[i]);
346-
m_filters(i)=(res < FBANK_OUT_FLOOR)?(double)log(FBANK_OUT_FLOOR):(double)log(res);
349+
m_cache_filters(i)= (res < m_fb_out_floor ? m_log_fb_out_floor : log(res));
347350
}
348351
}
349352

350-
double bob::ap::Ceps::logEnergy(blitz::Array<double,1> &data)
353+
double bob::ap::Ceps::logEnergy(blitz::Array<double,1> &data) const
351354
{
352355
blitz::Array<double,1> data_p(data(blitz::Range(0,(int)m_win_length-1)));
353356
double gain = blitz::sum(blitz::pow2(data_p));
354-
gain = gain < ENERGY_FLOOR ?
355-
(double)(log(ENERGY_FLOOR)) : (double)(log(gain));
356-
return (gain);
357+
return (gain < m_energy_floor ? m_log_energy_floor : log(gain));
357358
}
358359

359-
void bob::ap::Ceps::transformDCT(blitz::Array<double,1>& ceps_row)
360+
void bob::ap::Ceps::applyDct(blitz::Array<double,1>& ceps_row) const
360361
{
361362
blitz::firstIndex i;
362363
blitz::secondIndex j;
363-
ceps_row = blitz::sum(m_filters(j) * m_dct_kernel(i,j), j);
364+
ceps_row = blitz::sum(m_cache_filters(j) * m_dct_kernel(i,j), j);
364365
}
365366

366-
void bob::ap::Ceps::addDerivative(const blitz::Array<double,2>& input, blitz::Array<double,2>& output)
367+
void bob::ap::Ceps::addDerivative(const blitz::Array<double,2>& input, blitz::Array<double,2>& output) const
367368
{
368369
// Initialize output to zero
369370
output = 0.;

src/ap/python/ceps.cc

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ using namespace boost::python;
3131
static const char* CEPS_DOC = "Objects of this class, after configuration, can extract Cepstral Features from a 1D array/signal.";
3232
static const char* TESTCEPS_DOC = "Objects of this class, after configuration, can be used to test the private methods of bob.ap.Ceps.";
3333

34-
static object py_ceps_analysis(bob::ap::Ceps& ceps, bob::python::const_ndarray input)
34+
static object py_forward(bob::ap::Ceps& ceps, bob::python::const_ndarray input)
3535
{
3636
// Gets the shape of the feature
3737
const blitz::Array<double,1> input_ = input.bz<double,1>();
@@ -64,8 +64,6 @@ static boost::python::tuple py_get_ceps_shape(bob::ap::Ceps& ceps, object input_
6464
static double py_logEnergy(bob::ap::TestCeps& ceps, bob::python::ndarray data)
6565
{
6666
blitz::Array<double,1> data_ = data.bz<double,1>();
67-
68-
// Get the logEnergy
6967
return ceps.logEnergy(data_);
7068
}
7169

@@ -88,17 +86,17 @@ static object py_logFilterBank(bob::ap::TestCeps& ceps, bob::python::ndarray dat
8886
ceps.logFilterBank(data_);
8987
bob::python::ndarray filter(bob::core::array::t_float64, n_filters);
9088
blitz::Array<double,1> filter_ = filter.bz<double,1>();
91-
// Gets the filter Bank
92-
filter_ = ceps.getFilter();
89+
// Gets the filter bank output
90+
filter_ = ceps.getFilterOutput();
9391
return filter.self();
9492
}
9593

96-
static object py_transformDCT(bob::ap::TestCeps& ceps, int n_ceps)
94+
static object py_applyDct(bob::ap::TestCeps& ceps, int n_ceps)
9795
{
9896
bob::python::ndarray ceps_row(bob::core::array::t_float64, n_ceps);
9997
blitz::Array<double,1> ceps_row_ = ceps_row.bz<double,1>();
100-
// Get the Cepstral features
101-
ceps.transformDCT(ceps_row_);
98+
// Gets the Cepstral features
99+
ceps.applyDct(ceps_row_);
102100
return ceps_row.self();
103101
}
104102

@@ -107,7 +105,7 @@ void bind_ap_ceps()
107105
{
108106
class_<bob::ap::Ceps, boost::shared_ptr<bob::ap::Ceps> >("Ceps", CEPS_DOC, init<double, optional<double, double, size_t, size_t, double, double, int, double, bool, bool> >
109107
((arg("sampling_frequency"), arg("win_length_ms")=20., arg("win_shift_ms")=10., arg("n_filters")=24, arg("n_ceps")=19, arg("f_min")=0., arg("f_max")=4000., arg("delta_win"), arg("pre_emphasis_coeff")=0.95, arg("mel_scale")=true, arg("dct_norm")=false)))
110-
.add_property("sampling_frequency", &bob::ap::Ceps::getSamplingFrequency, &bob::ap::Ceps::setSamplingFrequency, "The sample frequency of the input data")
108+
.add_property("sampling_frequency", &bob::ap::Ceps::getSamplingFrequency, &bob::ap::Ceps::setSamplingFrequency, "The sampling frequency of the input data")
111109
.add_property("win_length_ms", &bob::ap::Ceps::getWinLengthMs, &bob::ap::Ceps::setWinLengthMs, "The window length of the cepstral analysis in milliseconds")
112110
.add_property("win_length", &bob::ap::Ceps::getWinLength, "The normalized window length wrt. to the sample frequency")
113111
.add_property("win_shift_ms", &bob::ap::Ceps::getWinShiftMs, &bob::ap::Ceps::setWinShiftMs, "The window shift of the cepstral analysis in milliseconds")
@@ -123,8 +121,8 @@ void bind_ap_ceps()
123121
.add_property("with_energy", &bob::ap::Ceps::getWithEnergy, &bob::ap::Ceps::setWithEnergy, "Tells if we add the energy to the output feature")
124122
.add_property("with_delta", &bob::ap::Ceps::getWithDelta, &bob::ap::Ceps::setWithDelta, "Tells if we add the first derivatives to the output feature")
125123
.add_property("with_delta_delta", &bob::ap::Ceps::getWithDeltaDelta, &bob::ap::Ceps::setWithDeltaDelta, "Tells if we add the second derivatives to the output feature")
126-
.def("__call__", &py_ceps_analysis, (arg("input")), "Compute the features")
127-
.def("get_ceps_shape", &py_get_ceps_shape, (arg("n_size"), arg("input_data")), "Compute the shape of the output features")
124+
.def("__call__", &py_forward, (arg("input")), "Computes the cepstral features")
125+
.def("get_ceps_shape", &py_get_ceps_shape, (arg("n_size"), arg("input_data")), "Computes the shape of the output features")
128126
;
129127

130128
class_<bob::ap::TestCeps, boost::shared_ptr<bob::ap::TestCeps> >("TestCeps", TESTCEPS_DOC, init<bob::ap::Ceps&>((arg("ceps"))))
@@ -134,7 +132,7 @@ void bind_ap_ceps()
134132
.def("pre_emphasis", &py_emphasis, (arg("data")), "compute pre-emphasis")
135133
.def("hamming_window", &py_hammingWindow, (arg("data")), "compute the wraped signal on a hamming Window")
136134
.def("log_filter_bank", &py_logFilterBank, (arg("data"), arg("m_win_size"), arg("n_filters")), "compute log Filter Bank")
137-
.def("dct_transform", &py_transformDCT, (arg("n_ceps")), "DCT Transform")
135+
.def("apply_dct", &py_applyDct, (arg("n_ceps")), "DCT Transform")
138136
;
139137
}
140138

0 commit comments

Comments
 (0)