110 static int32 max_ad_read_size;
112 #if defined(WIN32) && !defined(GNUWINCE)
113 #define NULL_DEVICE "NUL"
115 #define NULL_DEVICE "/dev/null"
125 file_ad_read(
ad_rec_t * r, int16 * buf, int32 max)
129 if (max > max_ad_read_size)
130 max = max_ad_read_size;
132 k = fread(buf,
sizeof(int16), max, infp);
134 for (i = 0; i < k; i++) {
135 buf[i] = ((buf[i] >> 8) & 0x00ff) | ((buf[i] << 8) & 0xff00);
139 return ((k > 0) ? k : -1);
146 E_INFO(
"Usage: %s \\\n", pgm);
149 E_INFOCONT(
"\t[-sps <sampling-rate> (16000)] \\\n");
152 (
"\t[{-s | -silsep} <length-silence-separator(sec) (0.5)]> \\\n");
154 E_INFOCONT(
"\t[-min-noise <min-noise>] \\\n");
155 E_INFOCONT(
"\t[-max-noise <max-noise>] \\\n");
156 E_INFOCONT(
"\t[-delta-sil <delta-sil>] \\\n");
157 E_INFOCONT(
"\t[-delta-speech <delta-speech>] \\\n");
158 E_INFOCONT(
"\t[-sil-onset <sil-onset>] \\\n");
159 E_INFOCONT(
"\t[-speech-onset <speech-onset>] \\\n");
160 E_INFOCONT(
"\t[-adapt-rate <adapt-rate>] \\\n");
161 E_INFOCONT(
"\t[-max-adreadsize <ad_read_blksize>] \\\n");
175 main(int32 argc,
char **argv)
178 int32 uttid, uttlen, starttime, siltime, sps, debug, writeseg, rawmode;
180 char *infile, *copyfile, segfile[1024];
185 int32 winsize, leader, trailer;
186 int32 orig_min_noise, orig_max_noise;
187 int32 orig_delta_sil, orig_delta_speech;
188 int32 orig_speech_onset, orig_sil_onset;
189 int32 min_noise, max_noise;
190 int32 delta_sil, delta_speech;
191 int32 sil_onset, speech_onset;
192 float32 orig_adapt_rate;
194 int32 total_speech_samples;
195 float32 total_speech_sec;
204 min_noise = max_noise = -1;
205 delta_sil = delta_speech = -1;
206 sil_onset = speech_onset = -1;
208 max_ad_read_size = (int32) 0x7ffffff0;
216 for (i = 1; i < argc; i++) {
217 if ((strcmp(argv[i],
"-help") == 0)
218 || (strcmp(argv[i],
"-h") == 0)
219 || (strcmp(argv[i],
"-?") == 0)) {
222 else if ((strcmp(argv[i],
"-debug") == 0)
223 || (strcmp(argv[i],
"-d") == 0)) {
226 else if (strcmp(argv[i],
"-sps") == 0) {
229 || (sscanf(argv[i],
"%d", &sps) != 1)
231 E_ERROR(
"Invalid -sps argument\n");
235 else if ((strcmp(argv[i],
"-byteswap") == 0)
236 || (strcmp(argv[i],
"-b") == 0)) {
239 else if ((strcmp(argv[i],
"-silsep") == 0)
240 || (strcmp(argv[i],
"-s") == 0)) {
243 || (sscanf(argv[i],
"%f", &endsil) != 1)
244 || (endsil <= 0.0)) {
245 E_ERROR(
"Invalid -silsep argument\n");
249 else if ((strcmp(argv[i],
"-writeseg") == 0)
250 || (strcmp(argv[i],
"-w") == 0)) {
253 else if (strcmp(argv[i],
"-min-noise") == 0) {
256 (sscanf(argv[i],
"%d", &min_noise) != 1) ||
258 E_ERROR(
"Invalid -min-noise argument\n");
262 else if (strcmp(argv[i],
"-max-noise") == 0) {
265 (sscanf(argv[i],
"%d", &max_noise) != 1) ||
267 E_ERROR(
"Invalid -max-noise argument\n");
271 else if (strcmp(argv[i],
"-delta-sil") == 0) {
274 (sscanf(argv[i],
"%d", &delta_sil) != 1) ||
276 E_ERROR(
"Invalid -delta-sil argument\n");
280 else if (strcmp(argv[i],
"-delta-speech") == 0) {
283 (sscanf(argv[i],
"%d", &delta_speech) != 1) ||
284 (delta_speech < 0)) {
285 E_ERROR(
"Invalid -delta-speech argument\n");
289 else if (strcmp(argv[i],
"-sil-onset") == 0) {
292 (sscanf(argv[i],
"%d", &sil_onset) != 1) ||
294 E_ERROR(
"Invalid -sil-onset argument\n");
298 else if (strcmp(argv[i],
"-speech-onset") == 0) {
301 (sscanf(argv[i],
"%d", &speech_onset) != 1) ||
302 (speech_onset < 1)) {
303 E_ERROR(
"Invalid -speech-onset argument\n");
307 else if (strcmp(argv[i],
"-adapt-rate") == 0) {
310 (sscanf(argv[i],
"%f", &adapt_rate) != 1) ||
311 (adapt_rate < 0.0) || (adapt_rate > 1.0)) {
312 E_ERROR(
"Invalid -adapt-rate argument\n");
316 else if (strcmp(argv[i],
"-max-adreadsize") == 0) {
319 (sscanf(argv[i],
"%d", &max_ad_read_size) != 1) ||
320 (max_ad_read_size < 1)) {
321 E_ERROR(
"Invalid -max-adreadsize argument\n");
325 else if (strcmp(argv[i],
"-c") == 0) {
328 E_ERROR(
"Invalid -c argument\n");
333 else if ((strcmp(argv[i],
"-rawmode") == 0)
334 || (strcmp(argv[i],
"-r") == 0)) {
337 else if (strcmp(argv[i],
"-i") == 0) {
340 E_ERROR(
"Invalid -i argument\n");
350 if (infile == NULL) {
351 E_ERROR(
"No input file specified\n");
355 if ((infp = fopen(infile,
"rb")) == NULL)
364 ad.
bps =
sizeof(int16);
370 printf(
"Calibrating ...");
373 printf(
" failed; file too short?\n");
379 siltime = (int32) (endsil * sps);
383 if ((rawfp = fopen(copyfile,
"wb")) == NULL)
384 E_ERROR_SYSTEM(
"Failed to open raw output file '%s' for writing");
390 &orig_delta_sil, &orig_delta_speech,
391 &orig_min_noise, &orig_max_noise,
393 &orig_speech_onset, &orig_sil_onset,
394 &leader, &trailer, &orig_adapt_rate);
396 E_INFO(
"Default parameters:\n");
397 E_INFOCONT(
"\tmin-noise = %d, max-noise = %d\n",
398 orig_min_noise, orig_max_noise);
399 E_INFOCONT(
"\tdelta-sil = %d, delta-speech = %d\n",
400 orig_delta_sil, orig_delta_speech);
401 E_INFOCONT(
"\tsil-onset = %d, speech-onset = %d\n",
402 orig_sil_onset, orig_speech_onset);
403 E_INFOCONT(
"\tadapt_rate = %.3f\n", orig_adapt_rate);
406 min_noise = orig_min_noise;
408 max_noise = orig_max_noise;
410 delta_sil = orig_delta_sil;
411 if (delta_speech < 0)
412 delta_speech = orig_delta_speech;
414 sil_onset = orig_sil_onset;
415 if (speech_onset < 0)
416 speech_onset = orig_speech_onset;
417 if (adapt_rate < 0.0)
418 adapt_rate = orig_adapt_rate;
421 delta_sil, delta_speech,
422 min_noise, max_noise,
424 speech_onset, sil_onset,
425 leader, trailer, adapt_rate);
427 E_INFO(
"Current parameters:\n");
428 E_INFOCONT(
"\tmin-noise = %d, max-noise = %d\n", min_noise, max_noise);
429 E_INFOCONT(
"\tdelta-sil = %d, delta-speech = %d\n", delta_sil,
431 E_INFOCONT(
"\tsil-onset = %d, speech-onset = %d\n", sil_onset,
433 E_INFOCONT(
"\tadapt_rate = %.3f\n", adapt_rate);
435 E_INFO(
"Sampling rate: %d", sps);
436 E_INFOCONT(
"; Byteswap: %s", swap ?
"Yes" :
"No");
437 E_INFOCONT(
"; Max ad-read size: %d\n", max_ad_read_size);
442 total_speech_samples = 0;
443 total_speech_sec = 0.0;
461 (
"Utt %08d, st= %8.2fs, et= %8.2fs, seg= %7.2fs (#samp= %10d)\n",
462 uttid, (
double) starttime / (
double) sps,
463 (
double) (starttime + uttlen) / (
double) sps,
464 (
double) uttlen / (
double) sps, uttlen);
467 total_speech_samples += uttlen;
468 total_speech_sec += (double) uttlen / (
double) sps;
476 if (cont->
state == CONT_AD_STATE_SIL) {
478 if (cont->
seglen > siltime) {
483 (
"Utt %08d, st= %8.2fs, et= %8.2fs, seg= %7.2fs (#samp= %10d)\n",
484 uttid, (
double) starttime / (
double) sps,
485 (
double) (starttime + uttlen) / (
double) sps,
486 (
double) uttlen / (
double) sps, uttlen);
489 total_speech_samples += uttlen;
490 total_speech_sec += (double) uttlen / (
double) sps;
500 fwrite(buf,
sizeof(int16), k, fp);
507 assert(cont->
state == CONT_AD_STATE_SPEECH);
511 sprintf(segfile,
"%08d.raw", uttid);
513 strcpy(segfile, NULL_DEVICE);
514 if ((fp = fopen(segfile,
"wb")) == NULL)
515 E_FATAL_SYSTEM(
"Failed to open segmentation file '%s' for writing", segfile);
523 fwrite(buf,
sizeof(int16), k, fp);
532 E_INFO(
"Total raw input speech = %d frames, %d samples, %.2f sec\n",
535 E_INFO(
"Total speech detected = %d samples, %.2f sec\n",
536 total_speech_samples, total_speech_sec);
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init_rawmode(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initializes a continuous listening object which simply passes data through (!)
int32 tot_frm
Total number of frames of A/D data read, including consumed ones.
int32 sps
Samples/sec; moved from ad->sps to break dependence on ad by N.
#define E_INFO
Print logging information to standard error stream.
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
int32 state
State of data returned by most recent cont_ad_read call; CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
SPHINXBASE_EXPORT int32 cont_ad_set_logfp(cont_ad_t *c, FILE *fp)
Set the file to which cont_ad logs its progress.
Continuous A/D listening and silence filtering module.
Basic type definitions used in Sphinx.
#define E_FATAL_SYSTEM
Print error text; Call perror(""); exit(errno);.
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
#define E_INFOCONT
Print logging information without header, to standard error stream.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
Implementation of logging routines.
generic live audio interface for recording and playback
SPHINXBASE_EXPORT int32 cont_ad_set_rawfp(cont_ad_t *c, FILE *fp)
Set a file for dumping raw audio input.
SPHINXBASE_EXPORT int32 cont_ad_get_params(cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate)
PWP 1/14/98 – get the changable params.
Continuous listening module or object Continuous listening module or object.
#define E_ERROR
Print error message to standard error stream.
int32 spf
Samples/frame; audio level is analyzed within frames.
int32 read_ts
Absolute timestamp (total no.
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
SPHINXBASE_EXPORT int32 cont_ad_set_params(cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate)
Set the changable parameters.