53 #include "sphinxbase/byteorder.h"
54 #include "sphinxbase/fixpoint.h"
60 #include "fe_internal.h"
63 static const arg_t fe_args[] = {
64 waveform_to_cepstral_command_line_macro(),
65 { NULL, 0, NULL, NULL }
74 fe->sampling_rate = cmd_ln_float32_r(config,
"-samprate");
75 frate = cmd_ln_int32_r(config,
"-frate");
76 if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) {
78 (
"Frame rate %d can not be bigger than sample rate %.02f\n",
79 frate, fe->sampling_rate);
83 fe->frame_rate = (int16)frate;
86 fe->seed = cmd_ln_int32_r(config,
"-seed");
88 #ifdef WORDS_BIGENDIAN
89 fe->swap = strcmp(
"big",
cmd_ln_str_r(config,
"-input_endian")) == 0 ? 0 : 1;
91 fe->swap = strcmp(
"little",
cmd_ln_str_r(config,
"-input_endian")) == 0 ? 0 : 1;
93 fe->window_length = cmd_ln_float32_r(config,
"-wlen");
94 fe->pre_emphasis_alpha = cmd_ln_float32_r(config,
"-alpha");
96 fe->num_cepstra = (uint8)cmd_ln_int32_r(config,
"-ncep");
97 fe->fft_size = (int16)cmd_ln_int32_r(config,
"-nfft");
100 for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
101 if (((j % 2) != 0) || (fe->fft_size <= 0)) {
102 E_ERROR(
"fft: number of points must be a power of 2 (is %d)\n",
108 if (fe->fft_size < (
int)(fe->window_length * fe->sampling_rate)) {
109 E_ERROR(
"FFT: Number of points must be greater or equal to frame size (%d samples)\n",
110 (
int)(fe->window_length * fe->sampling_rate));
116 if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"dct"))
117 fe->transform = DCT_II;
118 else if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"legacy"))
119 fe->transform = LEGACY_DCT;
120 else if (0 == strcmp(
cmd_ln_str_r(config,
"-transform"),
"htk"))
121 fe->transform = DCT_HTK;
123 E_ERROR(
"Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
128 fe->log_spec = RAW_LOG_SPEC;
130 fe->log_spec = SMOOTH_LOG_SPEC;
138 mel->sampling_rate = fe->sampling_rate;
139 mel->fft_size = fe->fft_size;
140 mel->num_cepstra = fe->num_cepstra;
141 mel->num_filters = cmd_ln_int32_r(config,
"-nfilt");
144 fe->feature_dimension = mel->num_filters;
146 fe->feature_dimension = fe->num_cepstra;
148 mel->upper_filt_freq = cmd_ln_float32_r(config,
"-upperf");
149 mel->lower_filt_freq = cmd_ln_float32_r(config,
"-lowerf");
154 mel->warp_params =
cmd_ln_str_r(config,
"-warp_params");
155 mel->lifter_val = cmd_ln_int32_r(config,
"-lifter");
160 if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
161 E_ERROR(
"Failed to initialize the warping function.\n");
164 fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
169 fe_print_current(
fe_t const *fe)
171 E_INFO(
"Current FE Parameters:\n");
172 E_INFO(
"\tSampling Rate: %f\n", fe->sampling_rate);
173 E_INFO(
"\tFrame Size: %d\n", fe->frame_size);
174 E_INFO(
"\tFrame Shift: %d\n", fe->frame_shift);
175 E_INFO(
"\tFFT Size: %d\n", fe->fft_size);
176 E_INFO(
"\tLower Frequency: %g\n",
177 fe->mel_fb->lower_filt_freq);
178 E_INFO(
"\tUpper Frequency: %g\n",
179 fe->mel_fb->upper_filt_freq);
180 E_INFO(
"\tNumber of filters: %d\n", fe->mel_fb->num_filters);
181 E_INFO(
"\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps);
182 E_INFO(
"\tStart Utt Status: %d\n", fe->start_flag);
183 E_INFO(
"Will %sremove DC offset at frame level\n",
184 fe->remove_dc ?
"" :
"not ");
186 E_INFO(
"Will add dither to audio\n");
187 E_INFO(
"Dither seeded with %d\n", fe->seed);
190 E_INFO(
"Will not add dither to audio\n");
192 if (fe->mel_fb->lifter_val) {
193 E_INFO(
"Will apply sine-curve liftering, period %d\n",
194 fe->mel_fb->lifter_val);
196 E_INFO(
"Will %snormalize filters to unit area\n",
197 fe->mel_fb->unit_area ?
"" :
"not ");
198 E_INFO(
"Will %sround filter frequencies to DFT points\n",
199 fe->mel_fb->round_filters ?
"" :
"not ");
200 E_INFO(
"Will %suse double bandwidth in mel filter\n",
201 fe->mel_fb->doublewide ?
"" :
"not ");
219 if (fe_parse_general_params(
cmd_ln_retain(config), fe) < 0) {
228 fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
229 fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
231 fe->frame_counter = 0;
233 assert (fe->frame_shift > 1);
235 if (fe->frame_size > (fe->fft_size)) {
237 (
"Number of FFT points has to be a power of 2 higher than %d\n",
244 fe_init_dither(fe->seed);
247 fe->overflow_samps =
ckd_calloc(fe->frame_size,
sizeof(int16));
248 fe->hamming_window =
ckd_calloc(fe->frame_size/2,
sizeof(window_t));
251 fe_create_hamming(fe->hamming_window, fe->frame_size);
254 fe->mel_fb =
ckd_calloc(1,
sizeof(*fe->mel_fb));
257 fe_parse_melfb_params(config, fe, fe->mel_fb);
258 fe_build_melfilters(fe->mel_fb);
259 fe_compute_melcosine(fe->mel_fb);
263 fe->spch =
ckd_calloc(fe->frame_size,
sizeof(*fe->spch));
264 fe->frame =
ckd_calloc(fe->fft_size,
sizeof(*fe->frame));
265 fe->spec =
ckd_calloc(fe->fft_size,
sizeof(*fe->spec));
266 fe->mfspec =
ckd_calloc(fe->mel_fb->num_filters,
sizeof(*fe->mfspec));
269 fe->ccc =
ckd_calloc(fe->fft_size / 4,
sizeof(*fe->ccc));
270 fe->sss =
ckd_calloc(fe->fft_size / 4,
sizeof(*fe->sss));
271 fe_create_twiddle(fe);
274 fe_print_current(fe);
290 fe_get_config(
fe_t *fe)
296 fe_init_dither(int32 seed)
299 E_INFO(
"You are using the internal mechanism to generate the seed.\n");
307 E_INFO(
"You are using %d as the seed.\n", seed);
313 fe_start_utt(
fe_t * fe)
315 fe->num_overflow_samps = 0;
316 memset(fe->overflow_samps, 0, fe->frame_size *
sizeof(int16));
323 fe_get_output_size(
fe_t *fe)
325 return (
int)fe->feature_dimension;
329 fe_get_input_size(
fe_t *fe,
int *out_frame_shift,
333 *out_frame_shift = fe->frame_shift;
335 *out_frame_size = fe->frame_size;
339 fe_process_frame(
fe_t * fe, int16
const *spch, int32 nsamps, mfcc_t * fr_cep)
341 fe_read_frame(fe, spch, nsamps);
342 return fe_write_frame(fe, fr_cep);
346 fe_process_frames(
fe_t *fe,
347 int16
const **inout_spch,
348 size_t *inout_nsamps,
350 int32 *inout_nframes)
353 int outidx, i, n, n_overflow, orig_n_overflow;
354 int16
const *orig_spch;
358 if (buf_cep == NULL) {
359 if (*inout_nsamps + fe->num_overflow_samps < (
size_t)fe->frame_size)
363 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
365 return *inout_nframes;
369 if (*inout_nsamps + fe->num_overflow_samps < (
size_t)fe->frame_size) {
370 if (*inout_nsamps > 0) {
372 memcpy(fe->overflow_samps + fe->num_overflow_samps,
373 *inout_spch, *inout_nsamps * (
sizeof(int16)));
374 fe->num_overflow_samps += *inout_nsamps;
376 *inout_spch += *inout_nsamps;
385 if (*inout_nframes < 1) {
391 orig_spch = *inout_spch;
392 orig_n_overflow = fe->num_overflow_samps;
395 + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
398 if (frame_count > *inout_nframes)
399 frame_count = *inout_nframes;
404 if (fe->num_overflow_samps) {
405 int offset = fe->frame_size - fe->num_overflow_samps;
408 memcpy(fe->overflow_samps + fe->num_overflow_samps,
409 *inout_spch, offset *
sizeof(**inout_spch));
410 fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
411 assert(outidx < frame_count);
412 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
416 *inout_spch += offset;
417 *inout_nsamps -= offset;
418 fe->num_overflow_samps -= fe->frame_shift;
421 fe_read_frame(fe, *inout_spch, fe->frame_size);
422 assert(outidx < frame_count);
423 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
427 *inout_spch += fe->frame_size;
428 *inout_nsamps -= fe->frame_size;
432 for (i = 1; i < frame_count; ++i) {
433 assert(*inout_nsamps >= (
size_t)fe->frame_shift);
435 fe_shift_frame(fe, *inout_spch, fe->frame_shift);
436 assert(outidx < frame_count);
437 if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
441 *inout_spch += fe->frame_shift;
442 *inout_nsamps -= fe->frame_shift;
444 if (fe->num_overflow_samps > 0)
445 fe->num_overflow_samps -= fe->frame_shift;
449 if (fe->num_overflow_samps <= 0) {
451 n_overflow = *inout_nsamps;
452 if (n_overflow > fe->frame_shift)
453 n_overflow = fe->frame_shift;
454 fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
456 if (fe->num_overflow_samps > *inout_spch - orig_spch)
457 fe->num_overflow_samps = *inout_spch - orig_spch;
458 fe->num_overflow_samps += n_overflow;
459 if (fe->num_overflow_samps > 0) {
460 memcpy(fe->overflow_samps,
461 *inout_spch - (fe->frame_size - fe->frame_shift),
462 fe->num_overflow_samps *
sizeof(**inout_spch));
464 *inout_spch += n_overflow;
465 *inout_nsamps -= n_overflow;
471 memmove(fe->overflow_samps,
472 fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
473 fe->num_overflow_samps *
sizeof(*fe->overflow_samps));
475 n_overflow = *inout_spch - orig_spch + *inout_nsamps;
476 if (n_overflow > fe->frame_size - fe->num_overflow_samps)
477 n_overflow = fe->frame_size - fe->num_overflow_samps;
478 memcpy(fe->overflow_samps + fe->num_overflow_samps,
479 orig_spch, n_overflow *
sizeof(*orig_spch));
480 fe->num_overflow_samps += n_overflow;
482 if (n_overflow > *inout_spch - orig_spch) {
483 n_overflow -= (*inout_spch - orig_spch);
484 *inout_spch += n_overflow;
485 *inout_nsamps -= n_overflow;
490 *inout_nframes = outidx;
495 fe_process_utt(
fe_t * fe, int16
const * spch,
size_t nsamps,
496 mfcc_t *** cep_block, int32 * nframes)
502 fe_process_frames(fe, NULL, &nsamps, NULL, nframes);
505 cep = (mfcc_t **)
ckd_calloc_2d(*nframes, fe->feature_dimension,
sizeof(**cep));
507 cep = (mfcc_t **)
ckd_calloc_2d(1, fe->feature_dimension,
sizeof(**cep));
509 rv = fe_process_frames(fe, &spch, &nsamps, cep, nframes);
517 fe_end_utt(
fe_t * fe, mfcc_t * cepvector, int32 * nframes)
520 if (fe->num_overflow_samps > 0) {
521 fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
522 *nframes = fe_write_frame(fe, cepvector);
529 fe->num_overflow_samps = 0;
547 if (--fe->refcount > 0)
552 if (fe->mel_fb->mel_cosine)
553 fe_free_2d((
void *) fe->mel_fb->mel_cosine);
579 fe_mfcc_to_float(
fe_t * fe,
580 mfcc_t ** input, float32 ** output, int32 nframes)
585 if ((
void *) input == (
void *) output)
586 return nframes * fe->feature_dimension;
588 for (i = 0; i < nframes * fe->feature_dimension; ++i)
589 output[0][i] = MFCC2FLOAT(input[0][i]);
598 fe_float_to_mfcc(
fe_t * fe,
599 float32 ** input, mfcc_t ** output, int32 nframes)
604 if ((
void *) input == (
void *) output)
605 return nframes * fe->feature_dimension;
607 for (i = 0; i < nframes * fe->feature_dimension; ++i)
608 output[0][i] = FLOAT2MFCC(input[0][i]);
614 fe_logspec_to_mfcc(
fe_t * fe,
const mfcc_t * fr_spec, mfcc_t * fr_cep)
617 fe_spec2cep(fe, fr_spec, fr_cep);
622 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
623 for (i = 0; i < fe->mel_fb->num_filters; ++i)
624 powspec[i] = (powspec_t) fr_spec[i];
625 fe_spec2cep(fe, powspec, fr_cep);
632 fe_logspec_dct2(
fe_t * fe,
const mfcc_t * fr_spec, mfcc_t * fr_cep)
635 fe_dct2(fe, fr_spec, fr_cep, 0);
640 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
641 for (i = 0; i < fe->mel_fb->num_filters; ++i)
642 powspec[i] = (powspec_t) fr_spec[i];
643 fe_dct2(fe, powspec, fr_cep, 0);
650 fe_mfcc_dct3(
fe_t * fe,
const mfcc_t * fr_cep, mfcc_t * fr_spec)
653 fe_dct3(fe, fr_cep, fr_spec);
658 powspec =
ckd_malloc(fe->mel_fb->num_filters *
sizeof(powspec_t));
659 fe_dct3(fe, fr_cep, powspec);
660 for (i = 0; i < fe->mel_fb->num_filters; ++i)
661 fr_spec[i] = (mfcc_t) powspec[i];
Command-line and other configurationparsing and handling.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_retain(cmd_ln_t *cmdln)
Retain ownership of a command-line argument set.
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Base Struct to hold all structure for MFCC computation.
#define E_INFO
Print logging information to standard error stream.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
#define s3_rand_seed(s)
Macros to simplify calling of random generator function.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_get(void)
Retrieve the global cmd_ln_t object used by non-re-entrant functions.
#define E_WARN
Print warning information to standard error stream.
Implementation of logging routines.
Argument definition structure.
High performance prortable random generator created by Takuji Nishimura and Makoto Matsumoto...
Opaque structure used to hold the results of command-line parsing.
#define ckd_malloc(sz)
Macro for ckd_malloc
#define E_ERROR
Print error message to standard error stream.
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
Structure for the front-end computation.