SphinxBase  0.6
cont_ad.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2001 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * cont_ad.h -- Continuous A/D listening and silence filtering module.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  *
49  * 13-Jul-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
50  * Added spf and adbufsize to cont_ad_t in order to support variable
51  * frame sizes depending on audio sampling rate.
52  *
53  * 30-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
54  * Added FILE* argument to cont_ad_powhist_dump().
55  *
56  * 16-Jan-98 Paul Placeway (pwp@cs.cmu.edu) at Carnegie Mellon University
57  * Changed to use dB instead of the weird power measure.
58  * Added most system parameters to cont_ad_t instead of hardwiring
59  * them in cont_ad.c.
60  * Added cont_ad_set_params() and cont_ad_get_params().
61  *
62  * 28-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
63  * Added cont_ad_t.siglvl.
64  *
65  * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
66  * Added the option for cont_ad_read to return -1 on EOF.
67  *
68  * 21-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
69  * Added cont_ad_set_thresh().
70  *
71  * 20-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
72  * Separated thresholds for speech and silence.
73  *
74  * 17-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
75  * Created, based loosely on Steve Reed's original implementation.
76  */
77 
78 
79 #ifndef _CONT_AD_H_
80 #define _CONT_AD_H_
81 
82 /* Win32/WinCE DLL gunk */
83 #include <sphinxbase/sphinxbase_export.h>
84 #include <sphinxbase/prim_type.h>
85 #include <sphinxbase/ad.h>
86 
114 #include <stdio.h>
115 
116 
117 #ifdef __cplusplus
118 extern "C" {
119 #endif
120 #if 0
121 /* Fool Emacs. */
122 }
123 #endif
124 
125 /* States of continuous listening module */
126 #define CONT_AD_STATE_SIL 0
127 #define CONT_AD_STATE_SPEECH 1
128 
129 
135 typedef struct spseg_s {
136  int32 startfrm;
137  int32 nfrm;
138  struct spseg_s *next;
139 } spseg_t;
140 
141 
151 typedef struct {
152  /* Function to be called for obtaining A/D data (see prototype for ad_read in ad.h) */
153  int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max);
156  int32 rawmode;
158  int16 *adbuf;
160  /* **************************************************************************
161  * state, read_ts, and siglvl are provided for READ-ONLY use by client
162  * applications, and are updated by calls to cont_ad_read() (see below). All
163  * other variables should be left alone.
164  */
165  int32 state;
167  int32 read_ts;
171  int32 seglen;
175  int32 siglvl;
178  /* ************************************************************************ */
179 
180  int32 sps;
183  int32 eof;
185  int32 spf;
186  int32 adbufsize;
187  int32 prev_sample;
188  int32 headfrm;
189  int32 n_frm;
190  int32 n_sample;
191  int32 tot_frm;
192  int32 noise_level;
194  int32 *pow_hist;
195  char *frm_pow;
197  int32 auto_thresh;
198  int32 delta_sil;
199  int32 delta_speech;
200  int32 min_noise;
201  int32 max_noise;
202  int32 winsize;
203  int32 speech_onset;
204  int32 sil_onset;
205  int32 leader;
206  int32 trailer;
210  int32 thresh_sil;
213  float32 adapt_rate;
217  int32 tail_state;
220  int32 win_startfrm;
221  int32 win_validfrm;
222  int32 n_other;
227  FILE *rawfp;
231  FILE *logfp;
237 } cont_ad_t;
238 
239 
255 SPHINXBASE_EXPORT
257  int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max)
261  );
262 
269 SPHINXBASE_EXPORT
271  int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max));
272 
273 
302 SPHINXBASE_EXPORT
303 int32 cont_ad_read (cont_ad_t *r,
304  int16 *buf,
307  int32 max
310  );
311 
315 SPHINXBASE_EXPORT
317 
330 SPHINXBASE_EXPORT
331 int32 cont_ad_calib (cont_ad_t *cont
332  );
333 
345 SPHINXBASE_EXPORT
346 int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max);
347 
359 SPHINXBASE_EXPORT
360 int32 cont_ad_calib_size(cont_ad_t *r);
361 
374 SPHINXBASE_EXPORT
375 int32 cont_ad_set_thresh (cont_ad_t *cont,
376  int32 sil,
377  int32 sp
378  );
379 
380 
388 SPHINXBASE_EXPORT
389 int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech,
390  int32 min_noise, int32 max_noise,
391  int32 winsize, int32 speech_onset, int32 sil_onset,
392  int32 leader, int32 trailer,
393  float32 adapt_rate);
394 
402 SPHINXBASE_EXPORT
403 int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech,
404  int32 *min_noise, int32 *max_noise,
405  int32 *winsize, int32 *speech_onset, int32 *sil_onset,
406  int32 *leader, int32 *trailer,
407  float32 *adapt_rate);
408 
413 SPHINXBASE_EXPORT
414 int32 cont_ad_reset (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */
415 
416 
420 SPHINXBASE_EXPORT
421 int32 cont_ad_close (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */
422 
423 
427 SPHINXBASE_EXPORT
428 void cont_ad_powhist_dump (FILE *fp, cont_ad_t *cont);
429 
430 
435 SPHINXBASE_EXPORT
436 int32 cont_ad_detach (cont_ad_t *c);
437 
438 
444 SPHINXBASE_EXPORT
445 int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32));
446 
447 
459 SPHINXBASE_EXPORT
460 int32 cont_ad_set_rawfp (cont_ad_t *c, /* The cont_ad object being addressed */
461  FILE *fp); /* File to which raw audio data is to
462  be dumped; NULL to stop dumping. */
463 
471 SPHINXBASE_EXPORT
472 int32 cont_ad_set_logfp (cont_ad_t *c, /* The cont_ad object being addressed */
473  FILE *fp); /* File to which logs are written;
474  NULL to stop logging. */
475 
484 SPHINXBASE_EXPORT
485 int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech);
486 
487 #ifdef __cplusplus
488 }
489 #endif
490 
491 
492 #endif
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init_rawmode(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initializes a continuous listening object which simply passes data through (!)
spseg_t * spseg_tail
Last of unconsumed speech segments.
Definition: cont_ad.h:225
int32 tot_frm
Total number of frames of A/D data read, including consumed ones.
Definition: cont_ad.h:191
int32 max_noise
noise higher than this signals an error
Definition: cont_ad.h:201
int32 sps
Samples/sec; moved from ad->sps to break dependence on ad by N.
Definition: cont_ad.h:180
int32 startfrm
Frame-id in adbuf (see below) of start of this segment.
Definition: cont_ad.h:136
FILE * rawfp
If non-NULL, raw audio input data processed by cont_ad is dumped to this file.
Definition: cont_ad.h:227
int32 seglen
Total no.
Definition: cont_ad.h:171
SPHINXBASE_EXPORT int32 cont_ad_reset(cont_ad_t *cont)
Reset, discarding any accumulated speech segments.
Definition: ad.h:255
int16 * adbuf
Circular buffer for maintaining A/D data read until consumed.
Definition: cont_ad.h:158
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
Definition: cont_ad_base.c:863
int32 state
State of data returned by most recent cont_ad_read call; CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
Definition: cont_ad.h:165
SPHINXBASE_EXPORT int32 cont_ad_set_logfp(cont_ad_t *c, FILE *fp)
Set the file to which cont_ad logs its progress.
int32 * pow_hist
Histogram of frame power, moving window, decayed.
Definition: cont_ad.h:194
int32 eof
Whether the source ad device has encountered EOF.
Definition: cont_ad.h:183
int32 leader
pad beggining of speech with this many extra frms
Definition: cont_ad.h:205
(FOR INTERNAL USE ) Data structure for maintaining speech (non-silence) segments not yet consumed by ...
FILE * logfp
If non-NULL, write detailed logs of this object's progress to the file.
Definition: cont_ad.h:231
Basic type definitions used in Sphinx.
char * frm_pow
Frame power.
Definition: cont_ad.h:195
int32 speech_onset
start speech on >= these many frames out of winsize, of >= delta_speech
Definition: cont_ad.h:203
int32 headfrm
Frame number in adbuf with unconsumed A/D data.
Definition: cont_ad.h:188
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
int32 auto_thresh
Do automatic threshold adjustment or not.
Definition: cont_ad.h:197
spseg_t * spseg_head
First of unconsumed speech segments.
Definition: cont_ad.h:224
int32 n_calib_frame
Number of frames of calibration data seen so far.
Definition: cont_ad.h:236
int32 win_startfrm
Where next analysis window begins.
Definition: cont_ad.h:220
int32 n_sample
Number of samples of unconsumed data in adbuf.
Definition: cont_ad.h:190
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
int32 win_validfrm
Number of frames currently available from win_startfrm for analysis.
Definition: cont_ad.h:221
int32 n_other
If in SILENCE state, number of frames in analysis window considered to be speech; otherwise number of...
Definition: cont_ad.h:222
SPHINXBASE_EXPORT void cont_ad_powhist_dump(FILE *fp, cont_ad_t *cont)
Dump the power histogram.
Definition: cont_ad_base.c:231
SPHINXBASE_EXPORT int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech)
Set the silence and speech thresholds.
int32 nfrm
Number of frames in segment (may wrap around adbuf)
Definition: cont_ad.h:137
int32 delta_sil
Max silence power/frame ABOVE noise level.
Definition: cont_ad.h:198
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_detach(cont_ad_t *c)
Detach the given continuous listening module from the associated audio device.
SPHINXBASE_EXPORT int32 cont_ad_calib_size(cont_ad_t *r)
Get the number of samples required to calibrate the silence filter.
generic live audio interface for recording and playback
int32 tail_state
State at the end of its internal buffer (internal use): CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
Definition: cont_ad.h:217
int32 min_noise
noise lower than this we ignore
Definition: cont_ad.h:200
SPHINXBASE_EXPORT int32 cont_ad_attach(cont_ad_t *c, ad_rec_t *a, int32(*func)(ad_rec_t *, int16 *, int32))
Attach the continuous listening module to the given audio device/function.
SPHINXBASE_EXPORT int32 cont_ad_set_rawfp(cont_ad_t *c, FILE *fp)
Set a file for dumping raw audio input.
SPHINXBASE_EXPORT int32 cont_ad_get_params(cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate)
PWP 1/14/98 – get the changable params.
int32 noise_level
PWP: what we claim as the "current" noise level.
Definition: cont_ad.h:192
Continuous listening module or object Continuous listening module or object.
Definition: cont_ad.h:151
int32 n_frm
Number of complete frames of unconsumed A/D data in adbuf.
Definition: cont_ad.h:189
SPHINXBASE_EXPORT int32 cont_ad_buffer_space(cont_ad_t *r)
Get the maximum number of samples which can be passed into cont_ad_read().
Definition: cont_ad_base.c:707
float32 adapt_rate
Linear interpolation constant for rate at which noise level adapted to each estimate; range: 0-1; 0=>...
Definition: cont_ad.h:213
int32 delta_speech
Min speech power/frame ABOVE noise level.
Definition: cont_ad.h:199
SPHINXBASE_EXPORT int32 cont_ad_calib_loop(cont_ad_t *r, int16 *buf, int32 max)
Calibrate the silence filter without an audio device.
int32 prev_sample
For pre-emphasis filter.
Definition: cont_ad.h:187
int32 spf
Samples/frame; audio level is analyzed within frames.
Definition: cont_ad.h:185
int32 read_ts
Absolute timestamp (total no.
Definition: cont_ad.h:167
struct spseg_s * next
Next speech segment (with some intervening silence)
Definition: cont_ad.h:138
ad_rec_t * ad
A/D device argument for adfunc.
Definition: cont_ad.h:154
int32 rawmode
Pass all input data through, without filtering silence.
Definition: cont_ad.h:156
int32 siglvl
Max signal level for the data consumed by the most recent cont_ad_read call (dB range: 0-99)...
Definition: cont_ad.h:175
int32 winsize
how many frames to look at for speech det
Definition: cont_ad.h:202
int32 thresh_update
Number of frames before next update to pow_hist/thresholds.
Definition: cont_ad.h:212
int32 sil_onset
end speech on >= these many frames out of winsize, of <= delta_sil
Definition: cont_ad.h:204
SPHINXBASE_EXPORT int32 cont_ad_set_thresh(cont_ad_t *cont, int32 sil, int32 sp)
Set silence and speech threshold parameters.
int32 trailer
pad end of speech with this many extra frms
Definition: cont_ad.h:206
int32 adbufsize
Buffer size (Number of samples)
Definition: cont_ad.h:186
int32 thresh_speech
Frame considered to be speech if power >= thresh_speech (for transitioning from SILENCE to SPEECH sta...
Definition: cont_ad.h:208
int32 thresh_sil
Frame considered to be silence if power <= thresh_sil (for transitioning from SPEECH to SILENCE state...
Definition: cont_ad.h:210
SPHINXBASE_EXPORT int32 cont_ad_set_params(cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate)
Set the changable parameters.