SphinxBase  0.6
ad.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * ad.h -- generic live audio interface for recording and playback
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  *
49  * $Log: ad.h,v $
50  * Revision 1.8 2005/06/22 08:00:06 arthchan2003
51  * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs.
52  *
53  * Revision 1.7 2004/12/14 00:39:49 arthchan2003
54  * add <s3types.h> to the code, change some comments to doxygen style
55  *
56  * Revision 1.6 2004/12/06 11:17:55 arthchan2003
57  * Update the copyright information of ad.h, *sigh* start to feel tired of updating documentation system. Anyone who has time, please take up libs3audio. That is the last place which is undocumented
58  *
59  * Revision 1.5 2004/07/23 23:44:46 egouvea
60  * Changed the cygwin code to use the same audio files as the MS Visual code, removed unused variables from fe_interface.c
61  *
62  * Revision 1.4 2004/02/29 23:48:31 egouvea
63  * Updated configure.in to the recent automake/autoconf, fixed win32
64  * references in audio files.
65  *
66  * Revision 1.3 2002/11/10 19:27:38 egouvea
67  * Fixed references to sun's implementation of audio interface,
68  * referring to the correct .h file, and replacing sun4 with sunos.
69  *
70  * Revision 1.2 2001/12/11 04:40:55 lenzo
71  * License cleanup.
72  *
73  * Revision 1.1.1.1 2001/12/03 16:01:45 egouvea
74  * Initial import of sphinx3
75  *
76  * Revision 1.1.1.1 2001/01/17 05:17:14 ricky
77  * Initial Import of the s3.3 decoder, has working decodeaudiofile, s3.3_live
78  *
79  *
80  * 19-Jan-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
81  * Added AD_ return codes. Added ad_open_sps_bufsize(), and
82  * ad_rec_t.n_buf.
83  *
84  * 17-Apr-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
85  * Added ad_open_play_sps().
86  *
87  * 07-Mar-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
88  * Added ad_open_sps().
89  *
90  * 10-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
91  * Added ad_wbuf_t, ad_rec_t, and ad_play_t types, and augmented all
92  * recording functions with ad_rec_t, and playback functions with
93  * ad_play_t.
94  *
95  * 06-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
96  * Created.
97  */
98 
103 #ifndef _AD_H_
104 #define _AD_H_
105 
106 #include <sphinx_config.h>
107 
108 #if defined (__CYGWIN__)
109 #include <w32api/windows.h>
110 #include <w32api/mmsystem.h>
111 #elif (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
112 #include <windows.h>
113 #include <mmsystem.h>
114 #elif defined(AD_BACKEND_JACK)
115 #include <jack/jack.h>
116 #include <jack/ringbuffer.h>
117 #ifdef HAVE_SAMPLERATE_H
118 #include <samplerate.h>
119 #endif
120 #elif defined(AD_BACKEND_PULSEAUDIO)
121 #include <pulse/pulseaudio.h>
122 #include <pulse/simple.h>
123 #elif defined(AD_BACKEND_ALSA)
124 #include <alsa/asoundlib.h>
125 #endif
126 
127 /* Win32/WinCE DLL gunk */
128 #include <sphinxbase/sphinxbase_export.h>
129 
130 #include <sphinxbase/prim_type.h>
131 
132 #ifdef __cplusplus
133 extern "C" {
134 #endif
135 #if 0
136 /* Fool Emacs. */
137 }
138 #endif
139 
140 #define AD_SAMPLE_SIZE (sizeof(int16))
141 #define DEFAULT_SAMPLES_PER_SEC 16000
142 
143 /* Return codes */
144 #define AD_OK 0
145 #define AD_EOF -1
146 #define AD_ERR_GEN -1
147 #define AD_ERR_NOT_OPEN -2
148 #define AD_ERR_WAVE -3
149 
150 
151 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
152 typedef struct {
153  HGLOBAL h_whdr;
154  LPWAVEHDR p_whdr;
155  HGLOBAL h_buf;
156  LPSTR p_buf;
157 } ad_wbuf_t;
158 #endif
159 
160 
161 /* ------------ RECORDING -------------- */
162 
163 /*
164  * NOTE: ad_rec_t and ad_play_t are READ-ONLY structures for the user.
165  */
166 
167 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
168 
169 #define DEFAULT_DEVICE (char*)DEV_MAPPER
170 
174 typedef struct ad_rec_s {
175  HWAVEIN h_wavein; /* "HANDLE" to the audio input device */
176  ad_wbuf_t *wi_buf; /* Recording buffers provided to system */
177  int32 n_buf; /* #Recording buffers provided to system */
178  int32 opened; /* Flag; A/D opened for recording */
179  int32 recording;
180  int32 curbuf; /* Current buffer with data for application */
181  int32 curoff; /* Start of data for application in curbuf */
182  int32 curlen; /* #samples of data from curoff in curbuf */
183  int32 lastbuf; /* Last buffer containing data after recording stopped */
184  int32 sps; /* Samples/sec */
185  int32 bps; /* Bytes/sample */
186 } ad_rec_t;
187 
188 #elif defined(AD_BACKEND_OSS)
189 
190 #define DEFAULT_DEVICE "/dev/dsp"
191 
195 typedef struct {
196  int32 dspFD; /* Audio device descriptor */
197  int32 recording;
198  int32 sps; /* Samples/sec */
199  int32 bps; /* Bytes/sample */
200 } ad_rec_t;
201 
202 #elif defined(AD_BACKEND_PULSEAUDIO)
203 
204 #define DEFAULT_DEVICE NULL
205 
206 typedef struct {
207  pa_simple* pa;
208  int32 recording;
209  int32 sps;
210  int32 bps;
211 } ad_rec_t;
212 
213 #elif defined(AD_BACKEND_ALSA)
214 
215 #define DEFAULT_DEVICE "default"
216 typedef struct {
217  snd_pcm_t *dspH;
218  int32 recording;
219  int32 sps;
220  int32 bps;
221 } ad_rec_t;
222 
223 #elif defined(AD_BACKEND_JACK)
224 
225 typedef struct {
226  jack_client_t *client;
227  jack_port_t *input_port;
228  jack_port_t *output_port;
229  jack_ringbuffer_t* rbuffer;
230  jack_default_audio_sample_t* sample_buffer;
231  int32 recording;
232  int32 sps;
233  int32 bps;
234 #ifdef HAVE_SAMPLERATE_H
235  SRC_STATE *resample_state;
236  jack_default_audio_sample_t *resample_buffer;
237 #endif
238 } ad_rec_t;
239 
240 #elif defined(AD_BACKEND_S60)
241 
242 typedef struct ad_rec_s {
243  void* recorder;
244  int32 recording;
245  int32 sps;
246  int32 bps;
247 } ad_rec_t;
248 
249 SPHINXBASE_EXPORT
250 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
251 
252 #else
253 
254 #define DEFAULT_DEVICE NULL
255 typedef struct {
256  int32 sps;
257  int32 bps;
258 } ad_rec_t;
259 
260 
261 #endif
262 
263 
273 SPHINXBASE_EXPORT
275  const char *dev,
276  int32 samples_per_sec
277  );
278 
282 SPHINXBASE_EXPORT
284  int32 samples_per_sec
285  );
286 
287 
291 SPHINXBASE_EXPORT
292 ad_rec_t *ad_open ( void );
293 
294 
295 #if defined(WIN32) && !defined(GNUWINCE)
296 /*
297  * Like ad_open_sps but specifies buffering required within driver. This function is
298  * useful if the default (5000 msec worth) is too small and results in loss of data.
299  */
300 SPHINXBASE_EXPORT
301 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
302 #endif
303 
304 
305 /* Start audio recording. Return value: 0 if successful, <0 otherwise */
306 SPHINXBASE_EXPORT
307 int32 ad_start_rec (ad_rec_t *);
308 
309 
310 /* Stop audio recording. Return value: 0 if successful, <0 otherwise */
311 SPHINXBASE_EXPORT
312 int32 ad_stop_rec (ad_rec_t *);
313 
314 
315 /* Close the recording device. Return value: 0 if successful, <0 otherwise */
316 SPHINXBASE_EXPORT
317 int32 ad_close (ad_rec_t *);
318 
319 
320 /*
321  * Read next block of audio samples while recording; read upto max samples into buf.
322  * Return value: # samples actually read (could be 0 since non-blocking); -1 if not
323  * recording and no more samples remaining to be read from most recent recording.
324  */
325 SPHINXBASE_EXPORT
326 int32 ad_read (ad_rec_t *, int16 *buf, int32 max);
327 
328 
329 /* ------ PLAYBACK; SIMILAR TO RECORDING ------- */
330 
331 #if defined(WIN32) && !defined(GNUWINCE)
332 
333 typedef struct {
334  HWAVEOUT h_waveout; /* "HANDLE" to the audio output device */
335  ad_wbuf_t *wo_buf; /* Playback buffers given to the system */
336  int32 opened; /* Flag; A/D opened for playback */
337  int32 playing;
338  char *busy; /* flags [N_WO_BUF] indicating whether given to system */
339  int32 nxtbuf; /* Next buffer [0..N_WO_BUF-1] to be used for playback data */
340  int32 sps; /* Samples/sec */
341  int32 bps; /* Bytes/sample */
342 } ad_play_t;
343 
344 #else
345 
346 typedef struct {
347  int32 sps; /* Samples/sec */
348  int32 bps; /* Bytes/sample */
349 } ad_play_t; /* Dummy definition for systems without A/D stuff */
350 
351 #endif
352 
353 
354 SPHINXBASE_EXPORT
355 ad_play_t *ad_open_play_sps (int32 samples_per_sec);
356 
357 SPHINXBASE_EXPORT
358 ad_play_t *ad_open_play ( void );
359 
360 SPHINXBASE_EXPORT
361 int32 ad_start_play (ad_play_t *);
362 
363 SPHINXBASE_EXPORT
364 int32 ad_stop_play (ad_play_t *);
365 
366 SPHINXBASE_EXPORT
367 int32 ad_close_play (ad_play_t *);
368 
369 
378 SPHINXBASE_EXPORT
379 int32 ad_write (ad_play_t *, int16 *buf, int32 len);
380 
381 
382 /* ------ MISCELLANEOUS ------- */
383 
387 SPHINXBASE_EXPORT
388 void ad_mu2li (int16 *outbuf, /* Out: PCM data placed here (allocated by user) */
389  unsigned char *inbuf, /* In: Input buffer with mulaw data */
390  int32 n_samp); /* In: #Samples in inbuf */
391 
392 #ifdef __cplusplus
393 }
394 #endif
395 
396 
397 #endif
SPHINXBASE_EXPORT void ad_mu2li(int16 *outbuf, unsigned char *inbuf, int32 n_samp)
Convert mu-law data to int16 linear PCM format.
Definition: ad.h:255
int32 sps
Samples/sec.
Definition: ad.h:256
Basic type definitions used in Sphinx.
int32 bps
Bytes/sample.
Definition: ad.h:257
SPHINXBASE_EXPORT ad_rec_t * ad_open(void)
Open the default audio device.
Definition: ad_alsa.c:296
Definition: ad.h:346
SPHINXBASE_EXPORT ad_rec_t * ad_open_dev(const char *dev, int32 samples_per_sec)
Open a specific audio device for recording.
Definition: ad_alsa.c:252
SPHINXBASE_EXPORT ad_rec_t * ad_open_sps(int32 samples_per_sec)
Open the default audio device with a given sampling rate.
Definition: ad_alsa.c:290
SPHINXBASE_EXPORT int32 ad_write(ad_play_t *, int16 *buf, int32 len)
Queue a block of audio samples for playback.
Definition: play_win32.c:373