SphinxBase  0.6
fe.h
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /*
39  * fe.h
40  *
41  * $Log: fe.h,v $
42  * Revision 1.11 2005/02/05 02:15:02 egouvea
43  * Removed fe_process(), never used
44  *
45  * Revision 1.10 2004/12/10 16:48:55 rkm
46  * Added continuous density acoustic model handling
47  *
48  *
49  */
50 
51 #if defined(WIN32) && !defined(GNUWINCE)
52 #define srand48(x) srand(x)
53 #define lrand48() rand()
54 #endif
55 
56 #ifndef _NEW_FE_H_
57 #define _NEW_FE_H_
58 
59 /* Win32/WinCE DLL gunk */
60 #include <sphinxbase/sphinxbase_export.h>
61 
62 #include <sphinxbase/cmd_ln.h>
63 #include <sphinxbase/fixpoint.h>
64 
65 #ifdef __cplusplus
66 extern "C" {
67 #endif
68 #if 0
69 /* Fool Emacs. */
70 }
71 #endif
72 
73 #ifdef WORDS_BIGENDIAN
74 #define NATIVE_ENDIAN "big"
75 #else
76 #define NATIVE_ENDIAN "little"
77 #endif
78 
80 #define DEFAULT_SAMPLING_RATE 16000
81 
82 #define DEFAULT_FRAME_RATE 100
83 
85 #define DEFAULT_FRAME_SHIFT 160
86 
87 #define DEFAULT_WINDOW_LENGTH 0.025625
88 
89 #define DEFAULT_FFT_SIZE 512
90 
91 #define DEFAULT_NUM_CEPSTRA 13
92 
93 #define DEFAULT_NUM_FILTERS 40
94 
95 #define DEFAULT_LOWER_FILT_FREQ 133.33334
96 
97 #define DEFAULT_UPPER_FILT_FREQ 6855.4976
98 
99 #define DEFAULT_PRE_EMPHASIS_ALPHA 0.97
100 
101 #define DEFAULT_WARP_TYPE "inverse_linear"
102 
103 #define SEED -1
104 
105 #define waveform_to_cepstral_command_line_macro() \
106  { "-logspec", \
107  ARG_BOOLEAN, \
108  "no", \
109  "Write out logspectral files instead of cepstra" }, \
110  \
111  { "-smoothspec", \
112  ARG_BOOLEAN, \
113  "no", \
114  "Write out cepstral-smoothed logspectral files" }, \
115  \
116  { "-transform", \
117  ARG_STRING, \
118  "legacy", \
119  "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \
120  \
121  { "-alpha", \
122  ARG_FLOAT32, \
123  ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \
124  "Preemphasis parameter" }, \
125  \
126  { "-samprate", \
127  ARG_FLOAT32, \
128  ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \
129  "Sampling rate" }, \
130  \
131  { "-frate", \
132  ARG_INT32, \
133  ARG_STRINGIFY(DEFAULT_FRAME_RATE), \
134  "Frame rate" }, \
135  \
136  { "-wlen", \
137  ARG_FLOAT32, \
138  ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \
139  "Hamming window length" }, \
140  \
141  { "-nfft", \
142  ARG_INT32, \
143  ARG_STRINGIFY(DEFAULT_FFT_SIZE), \
144  "Size of FFT" }, \
145  \
146  { "-nfilt", \
147  ARG_INT32, \
148  ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \
149  "Number of filter banks" }, \
150  \
151  { "-lowerf", \
152  ARG_FLOAT32, \
153  ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \
154  "Lower edge of filters" }, \
155  \
156  { "-upperf", \
157  ARG_FLOAT32, \
158  ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \
159  "Upper edge of filters" }, \
160  \
161  { "-unit_area", \
162  ARG_BOOLEAN, \
163  "yes", \
164  "Normalize mel filters to unit area" }, \
165  \
166  { "-round_filters", \
167  ARG_BOOLEAN, \
168  "yes", \
169  "Round mel filter frequencies to DFT points" }, \
170  \
171  { "-ncep", \
172  ARG_INT32, \
173  ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \
174  "Number of cep coefficients" }, \
175  \
176  { "-doublebw", \
177  ARG_BOOLEAN, \
178  "no", \
179  "Use double bandwidth filters (same center freq)" }, \
180  \
181  { "-lifter", \
182  ARG_INT32, \
183  "0", \
184  "Length of sin-curve for liftering, or 0 for no liftering." }, \
185  \
186  { "-input_endian", \
187  ARG_STRING, \
188  NATIVE_ENDIAN, \
189  "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \
190  \
191  { "-warp_type", \
192  ARG_STRING, \
193  DEFAULT_WARP_TYPE, \
194  "Warping function type (or shape)" }, \
195  \
196  { "-warp_params", \
197  ARG_STRING, \
198  NULL, \
199  "Parameters defining the warping function" }, \
200  \
201  { "-dither", \
202  ARG_BOOLEAN, \
203  "no", \
204  "Add 1/2-bit noise" }, \
205  \
206  { "-seed", \
207  ARG_INT32, \
208  ARG_STRINGIFY(SEED), \
209  "Seed for random number generator; if less than zero, pick our own" }, \
210  \
211  { "-remove_dc", \
212  ARG_BOOLEAN, \
213  "no", \
214  "Remove DC offset from each frame" }, \
215  \
216  { "-verbose", \
217  ARG_BOOLEAN, \
218  "no", \
219  "Show input filenames" } \
220 
221 
222 #ifdef FIXED_POINT
223 
224 typedef fixed32 mfcc_t;
225 
227 #define FLOAT2MFCC(x) FLOAT2FIX(x)
228 
229 #define MFCC2FLOAT(x) FIX2FLOAT(x)
230 
231 #define MFCCMUL(a,b) FIXMUL(a,b)
232 #define MFCCLN(x,in,out) FIXLN_ANY(x,in,out)
233 #else /* !FIXED_POINT */
234 
236 typedef float32 mfcc_t;
238 #define FLOAT2MFCC(x) (x)
239 
240 #define MFCC2FLOAT(x) (x)
241 
242 #define MFCCMUL(a,b) ((a)*(b))
243 #define MFCCLN(x,in,out) log(x)
244 #endif /* !FIXED_POINT */
245 
249 typedef struct fe_s fe_t;
250 
254 enum fe_error_e {
255  FE_SUCCESS = 0,
256  FE_OUTPUT_FILE_SUCCESS = 0,
257  FE_CONTROL_FILE_ERROR = -1,
258  FE_START_ERROR = -2,
259  FE_UNKNOWN_SINGLE_OR_BATCH = -3,
260  FE_INPUT_FILE_OPEN_ERROR = -4,
261  FE_INPUT_FILE_READ_ERROR = -5,
262  FE_MEM_ALLOC_ERROR = -6,
263  FE_OUTPUT_FILE_WRITE_ERROR = -7,
264  FE_OUTPUT_FILE_OPEN_ERROR = -8,
265  FE_ZERO_ENERGY_ERROR = -9,
266  FE_INVALID_PARAM_ERROR = -10
267 };
268 
276 SPHINXBASE_EXPORT
277 fe_t* fe_init_auto(void);
278 
286 SPHINXBASE_EXPORT
287 arg_t const *fe_get_args(void);
288 
299 SPHINXBASE_EXPORT
300 fe_t *fe_init_auto_r(cmd_ln_t *config);
301 
309 SPHINXBASE_EXPORT
310 const cmd_ln_t *fe_get_config(fe_t *fe);
311 
316 SPHINXBASE_EXPORT
317 int fe_start_utt(fe_t *fe);
318 
331 SPHINXBASE_EXPORT
332 int fe_get_output_size(fe_t *fe);
333 
346 SPHINXBASE_EXPORT
347 void fe_get_input_size(fe_t *fe, int *out_frame_shift,
348  int *out_frame_size);
349 
364 SPHINXBASE_EXPORT
365 int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes);
366 
372 SPHINXBASE_EXPORT
373 fe_t *fe_retain(fe_t *fe);
374 
382 SPHINXBASE_EXPORT
383 int fe_free(fe_t *fe);
384 
393 SPHINXBASE_EXPORT
394 int fe_process_frame(fe_t *fe, int16 const *spch,
395  int32 nsamps, mfcc_t *out_cep);
396 
444 SPHINXBASE_EXPORT
445 int fe_process_frames(fe_t *fe,
446  int16 const **inout_spch,
447  size_t *inout_nsamps,
448  mfcc_t **buf_cep,
449  int32 *inout_nframes);
450 
466 SPHINXBASE_EXPORT
467 int fe_process_utt(fe_t *fe,
468  int16 const *spch,
469  size_t nsamps,
470  mfcc_t ***cep_block,
471  int32 *nframes
472  );
473 
477 SPHINXBASE_EXPORT
478 void fe_free_2d(void *arr);
479 
483 SPHINXBASE_EXPORT
484 int fe_mfcc_to_float(fe_t *fe,
485  mfcc_t **input,
486  float32 **output,
487  int32 nframes);
488 
492 SPHINXBASE_EXPORT
493 int fe_float_to_mfcc(fe_t *fe,
494  float32 **input,
495  mfcc_t **output,
496  int32 nframes);
497 
521 SPHINXBASE_EXPORT
522 int fe_logspec_to_mfcc(fe_t *fe,
523  const mfcc_t *fr_spec,
524  mfcc_t *fr_cep
525  );
526 
535 SPHINXBASE_EXPORT
536 int fe_logspec_dct2(fe_t *fe,
537  const mfcc_t *fr_spec,
538  mfcc_t *fr_cep
539  );
540 
549 SPHINXBASE_EXPORT
550 int fe_mfcc_dct3(fe_t *fe,
551  const mfcc_t *fr_cep,
552  mfcc_t *fr_spec
553  );
554 
555 #ifdef __cplusplus
556 }
557 #endif
558 
559 
560 #endif
Command-line and other configurationparsing and handling.
Argument definition structure.
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.
Definition: fe_internal.h:124