51 #if defined(WIN32) && !defined(GNUWINCE)
52 #define srand48(x) srand(x)
53 #define lrand48() rand()
60 #include <sphinxbase/sphinxbase_export.h>
63 #include <sphinxbase/fixpoint.h>
73 #ifdef WORDS_BIGENDIAN
74 #define NATIVE_ENDIAN "big"
76 #define NATIVE_ENDIAN "little"
80 #define DEFAULT_SAMPLING_RATE 16000
82 #define DEFAULT_FRAME_RATE 100
85 #define DEFAULT_FRAME_SHIFT 160
87 #define DEFAULT_WINDOW_LENGTH 0.025625
89 #define DEFAULT_FFT_SIZE 512
91 #define DEFAULT_NUM_CEPSTRA 13
93 #define DEFAULT_NUM_FILTERS 40
95 #define DEFAULT_LOWER_FILT_FREQ 133.33334
97 #define DEFAULT_UPPER_FILT_FREQ 6855.4976
99 #define DEFAULT_PRE_EMPHASIS_ALPHA 0.97
101 #define DEFAULT_WARP_TYPE "inverse_linear"
105 #define waveform_to_cepstral_command_line_macro() \
109 "Write out logspectral files instead of cepstra" }, \
114 "Write out cepstral-smoothed logspectral files" }, \
119 "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \
123 ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \
124 "Preemphasis parameter" }, \
128 ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \
133 ARG_STRINGIFY(DEFAULT_FRAME_RATE), \
138 ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \
139 "Hamming window length" }, \
143 ARG_STRINGIFY(DEFAULT_FFT_SIZE), \
148 ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \
149 "Number of filter banks" }, \
153 ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \
154 "Lower edge of filters" }, \
158 ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \
159 "Upper edge of filters" }, \
164 "Normalize mel filters to unit area" }, \
166 { "-round_filters", \
169 "Round mel filter frequencies to DFT points" }, \
173 ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \
174 "Number of cep coefficients" }, \
179 "Use double bandwidth filters (same center freq)" }, \
184 "Length of sin-curve for liftering, or 0 for no liftering." }, \
189 "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \
194 "Warping function type (or shape)" }, \
199 "Parameters defining the warping function" }, \
204 "Add 1/2-bit noise" }, \
208 ARG_STRINGIFY(SEED), \
209 "Seed for random number generator; if less than zero, pick our own" }, \
214 "Remove DC offset from each frame" }, \
219 "Show input filenames" } \
224 typedef fixed32 mfcc_t;
227 #define FLOAT2MFCC(x) FLOAT2FIX(x)
229 #define MFCC2FLOAT(x) FIX2FLOAT(x)
231 #define MFCCMUL(a,b) FIXMUL(a,b)
232 #define MFCCLN(x,in,out) FIXLN_ANY(x,in,out)
236 typedef float32 mfcc_t;
238 #define FLOAT2MFCC(x) (x)
240 #define MFCC2FLOAT(x) (x)
242 #define MFCCMUL(a,b) ((a)*(b))
243 #define MFCCLN(x,in,out) log(x)
256 FE_OUTPUT_FILE_SUCCESS = 0,
257 FE_CONTROL_FILE_ERROR = -1,
259 FE_UNKNOWN_SINGLE_OR_BATCH = -3,
260 FE_INPUT_FILE_OPEN_ERROR = -4,
261 FE_INPUT_FILE_READ_ERROR = -5,
262 FE_MEM_ALLOC_ERROR = -6,
263 FE_OUTPUT_FILE_WRITE_ERROR = -7,
264 FE_OUTPUT_FILE_OPEN_ERROR = -8,
265 FE_ZERO_ENERGY_ERROR = -9,
266 FE_INVALID_PARAM_ERROR = -10
277 fe_t* fe_init_auto(
void);
287 arg_t const *fe_get_args(
void);
317 int fe_start_utt(
fe_t *fe);
332 int fe_get_output_size(
fe_t *fe);
347 void fe_get_input_size(
fe_t *fe,
int *out_frame_shift,
348 int *out_frame_size);
365 int fe_end_utt(
fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes);
383 int fe_free(
fe_t *fe);
394 int fe_process_frame(
fe_t *fe, int16
const *spch,
395 int32 nsamps, mfcc_t *out_cep);
445 int fe_process_frames(
fe_t *fe,
446 int16
const **inout_spch,
447 size_t *inout_nsamps,
449 int32 *inout_nframes);
467 int fe_process_utt(
fe_t *fe,
478 void fe_free_2d(
void *arr);
484 int fe_mfcc_to_float(
fe_t *fe,
493 int fe_float_to_mfcc(
fe_t *fe,
522 int fe_logspec_to_mfcc(
fe_t *fe,
523 const mfcc_t *fr_spec,
536 int fe_logspec_dct2(
fe_t *fe,
537 const mfcc_t *fr_spec,
550 int fe_mfcc_dct3(
fe_t *fe,
551 const mfcc_t *fr_cep,
Command-line and other configurationparsing and handling.
Argument definition structure.
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.