SphinxBase  0.6
cont_adseg.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2001 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * cont_adseg.c -- Continuously listen and segment input speech into utterances.
39  *
40  * HISTORY
41  *
42  * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
43  * Created.
44  */
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <assert.h>
50 #include <math.h>
51 
52 #include <sphinxbase/prim_type.h>
53 #include <sphinxbase/ad.h>
54 #include <sphinxbase/cont_ad.h>
55 #include <sphinxbase/err.h>
56 
57 /*
58  * Segment raw A/D input data into utterances whenever silence region of given
59  * duration is encountered.
60  * Utterances are written to files named 0001.raw, 0002.raw, 0003.raw, etc.
61  */
62 int
63 main(int32 argc, char **argv)
64 {
65  ad_rec_t *ad;
66  cont_ad_t *cont;
67  int32 k, uttno, ts, uttlen, sps, endsilsamples;
68  float endsil;
69  int16 buf[4096];
70  FILE *fp;
71  char file[1024];
72 
73  if ((argc != 3) ||
74  (sscanf(argv[1], "%d", &sps) != 1) ||
75  (sscanf(argv[2], "%f", &endsil) != 1) || (endsil <= 0.0)) {
76  E_FATAL("Usage: %s <sampling-rate> <utt-end-sil(sec)>\n", argv[0]);
77  }
78 
79  /* Convert desired min. inter-utterance silence duration to #samples */
80  endsilsamples = (int32) (endsil * sps);
81 
82  /* Open raw A/D device */
83  if ((ad = ad_open_sps(sps)) == NULL)
84  E_FATAL("ad_open_sps(%d) failed\n", sps);
85 
86  /* Associate new continuous listening module with opened raw A/D device */
87  if ((cont = cont_ad_init(ad, ad_read)) == NULL)
88  E_FATAL("cont_ad_init failed\n");
89 
90  /* Calibrate continuous listening for background noise/silence level */
91  printf("Calibrating ...");
92  fflush(stdout);
93  ad_start_rec(ad);
94  if (cont_ad_calib(cont) < 0)
95  printf(" failed\n");
96  else
97  printf(" done\n");
98 
99  /* Forever listen for utterances */
100  printf("You may speak now\n");
101  fflush(stdout);
102  uttno = 0;
103  for (;;) {
104  /* Wait for beginning of next utterance; for non-silence data */
105  while ((k = cont_ad_read(cont, buf, 4096)) == 0);
106  if (k < 0)
107  E_FATAL("cont_ad_read failed\n");
108 
109  /* Non-silence data received; open and write to new logging file */
110  uttno++;
111  sprintf(file, "%04d.raw", uttno);
112  if ((fp = fopen(file, "wb")) == NULL)
113  E_FATAL_SYSTEM("Failed to open '%s' for reading", file);
114  fwrite(buf, sizeof(int16), k, fp);
115  uttlen = k;
116  printf("Utterance %04d, logging to %s\n", uttno, file);
117 
118  /* Note current timestamp */
119  ts = cont->read_ts;
120 
121  /* Read utterance data until a gap of at least 1 sec observed */
122  for (;;) {
123  if ((k = cont_ad_read(cont, buf, 4096)) < 0)
124  E_FATAL("cont_ad_read failed\n");
125  if (k == 0) {
126  /*
127  * No speech data available; check current timestamp. End of
128  * utterance if no non-silence data been read for at least 1 sec.
129  */
130  if ((cont->read_ts - ts) > endsilsamples)
131  break;
132  }
133  else {
134  /* Note timestamp at the end of most recently read speech data */
135  ts = cont->read_ts;
136  uttlen += k;
137  fwrite(buf, sizeof(int16), k, fp);
138  }
139  }
140  fclose(fp);
141 
142  printf("\tUtterance %04d = %d samples (%.1fsec)\n\n",
143  uttno, uttlen, (double) uttlen / (double) sps);
144  }
145 
146  ad_stop_rec(ad);
147  cont_ad_close(cont);
148  ad_close(ad);
149  return 0;
150 }
Definition: ad.h:255
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
Definition: cont_ad_base.c:863
Continuous A/D listening and silence filtering module.
Basic type definitions used in Sphinx.
#define E_FATAL_SYSTEM
Print error text; Call perror(""); exit(errno);.
Definition: err.h:132
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
Implementation of logging routines.
generic live audio interface for recording and playback
Continuous listening module or object Continuous listening module or object.
Definition: cont_ad.h:151
#define E_FATAL
Exit with non-zero status after error message.
Definition: err.h:127
int32 read_ts
Absolute timestamp (total no.
Definition: cont_ad.h:167
SPHINXBASE_EXPORT ad_rec_t * ad_open_sps(int32 samples_per_sec)
Open the default audio device with a given sampling rate.
Definition: ad_alsa.c:290