83 #include <sphinxbase/sphinxbase_export.h>
126 #define CONT_AD_STATE_SIL 0
127 #define CONT_AD_STATE_SPEECH 1
153 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max);
257 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max)
271 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max));
390 int32 min_noise, int32 max_noise,
391 int32 winsize, int32 speech_onset, int32 sil_onset,
392 int32 leader, int32 trailer,
404 int32 *min_noise, int32 *max_noise,
405 int32 *winsize, int32 *speech_onset, int32 *sil_onset,
406 int32 *leader, int32 *trailer,
407 float32 *adapt_rate);
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init_rawmode(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initializes a continuous listening object which simply passes data through (!)
spseg_t * spseg_tail
Last of unconsumed speech segments.
int32 tot_frm
Total number of frames of A/D data read, including consumed ones.
int32 max_noise
noise higher than this signals an error
int32 sps
Samples/sec; moved from ad->sps to break dependence on ad by N.
int32 startfrm
Frame-id in adbuf (see below) of start of this segment.
FILE * rawfp
If non-NULL, raw audio input data processed by cont_ad is dumped to this file.
SPHINXBASE_EXPORT int32 cont_ad_reset(cont_ad_t *cont)
Reset, discarding any accumulated speech segments.
int16 * adbuf
Circular buffer for maintaining A/D data read until consumed.
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
int32 state
State of data returned by most recent cont_ad_read call; CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
SPHINXBASE_EXPORT int32 cont_ad_set_logfp(cont_ad_t *c, FILE *fp)
Set the file to which cont_ad logs its progress.
int32 * pow_hist
Histogram of frame power, moving window, decayed.
int32 eof
Whether the source ad device has encountered EOF.
int32 leader
pad beggining of speech with this many extra frms
(FOR INTERNAL USE ) Data structure for maintaining speech (non-silence) segments not yet consumed by ...
FILE * logfp
If non-NULL, write detailed logs of this object's progress to the file.
Basic type definitions used in Sphinx.
char * frm_pow
Frame power.
int32 speech_onset
start speech on >= these many frames out of winsize, of >= delta_speech
int32 headfrm
Frame number in adbuf with unconsumed A/D data.
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
int32 auto_thresh
Do automatic threshold adjustment or not.
spseg_t * spseg_head
First of unconsumed speech segments.
int32 n_calib_frame
Number of frames of calibration data seen so far.
int32 win_startfrm
Where next analysis window begins.
int32 n_sample
Number of samples of unconsumed data in adbuf.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
int32 win_validfrm
Number of frames currently available from win_startfrm for analysis.
int32 n_other
If in SILENCE state, number of frames in analysis window considered to be speech; otherwise number of...
SPHINXBASE_EXPORT void cont_ad_powhist_dump(FILE *fp, cont_ad_t *cont)
Dump the power histogram.
SPHINXBASE_EXPORT int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech)
Set the silence and speech thresholds.
int32 nfrm
Number of frames in segment (may wrap around adbuf)
int32 delta_sil
Max silence power/frame ABOVE noise level.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_detach(cont_ad_t *c)
Detach the given continuous listening module from the associated audio device.
SPHINXBASE_EXPORT int32 cont_ad_calib_size(cont_ad_t *r)
Get the number of samples required to calibrate the silence filter.
generic live audio interface for recording and playback
int32 tail_state
State at the end of its internal buffer (internal use): CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
int32 min_noise
noise lower than this we ignore
SPHINXBASE_EXPORT int32 cont_ad_attach(cont_ad_t *c, ad_rec_t *a, int32(*func)(ad_rec_t *, int16 *, int32))
Attach the continuous listening module to the given audio device/function.
SPHINXBASE_EXPORT int32 cont_ad_set_rawfp(cont_ad_t *c, FILE *fp)
Set a file for dumping raw audio input.
SPHINXBASE_EXPORT int32 cont_ad_get_params(cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate)
PWP 1/14/98 – get the changable params.
int32 noise_level
PWP: what we claim as the "current" noise level.
Continuous listening module or object Continuous listening module or object.
int32 n_frm
Number of complete frames of unconsumed A/D data in adbuf.
SPHINXBASE_EXPORT int32 cont_ad_buffer_space(cont_ad_t *r)
Get the maximum number of samples which can be passed into cont_ad_read().
float32 adapt_rate
Linear interpolation constant for rate at which noise level adapted to each estimate; range: 0-1; 0=>...
int32 delta_speech
Min speech power/frame ABOVE noise level.
SPHINXBASE_EXPORT int32 cont_ad_calib_loop(cont_ad_t *r, int16 *buf, int32 max)
Calibrate the silence filter without an audio device.
int32 prev_sample
For pre-emphasis filter.
int32 spf
Samples/frame; audio level is analyzed within frames.
int32 read_ts
Absolute timestamp (total no.
struct spseg_s * next
Next speech segment (with some intervening silence)
ad_rec_t * ad
A/D device argument for adfunc.
int32 rawmode
Pass all input data through, without filtering silence.
int32 siglvl
Max signal level for the data consumed by the most recent cont_ad_read call (dB range: 0-99)...
int32 winsize
how many frames to look at for speech det
int32 thresh_update
Number of frames before next update to pow_hist/thresholds.
int32 sil_onset
end speech on >= these many frames out of winsize, of <= delta_sil
SPHINXBASE_EXPORT int32 cont_ad_set_thresh(cont_ad_t *cont, int32 sil, int32 sp)
Set silence and speech threshold parameters.
int32 trailer
pad end of speech with this many extra frms
int32 adbufsize
Buffer size (Number of samples)
int32 thresh_speech
Frame considered to be speech if power >= thresh_speech (for transitioning from SILENCE to SPEECH sta...
int32 thresh_sil
Frame considered to be silence if power <= thresh_sil (for transitioning from SPEECH to SILENCE state...
SPHINXBASE_EXPORT int32 cont_ad_set_params(cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate)
Set the changable parameters.