51 #include <sphinxbase/fe.h>
58 #include <sphinxbase/byteorder.h>
61 #include "sphinx_wave2feat.h"
62 #include "cmd_ln_defn.h"
102 int32 RemainingLength;
124 if ((fh = fopen(wtf->
infile,
"rb")) == NULL) {
128 if (fread(&hdr,
sizeof(hdr), 1, fh) != 1) {
134 if (0 != memcmp(hdr.rifftag,
"RIFF", 4)) {
140 cmd_ln_set_int32_r(wtf->
config,
"-nchans", hdr.numchannels);
141 cmd_ln_set_float32_r(wtf->
config,
"-samprate", hdr.SamplingFreq);
148 open_nist_file(
sphinx_wave2feat_t *wtf,
char const *infile, FILE **out_fh,
int detect_endian)
154 if ((fh = fopen(infile,
"rb")) == NULL) {
158 if (fread(&nist, 1, 7, fh) != 7) {
164 if (0 != strncmp(nist,
"NIST_1A", 7)) {
169 fseek(fh, 0, SEEK_SET);
175 if (strlen(li->buf) == 0) {
184 if (0 == strcmp(words[0],
"sample_rate")) {
185 cmd_ln_set_float32_r(wtf->
config,
"-samprate",
atof_c(words[2]));
187 if (0 == strcmp(words[0],
"channel_count")) {
188 cmd_ln_set_int32_r(wtf->
config,
"-nchans", atoi(words[2]));
190 if (detect_endian && 0 == strcmp(words[0],
"sample_byte_format")) {
192 (0 == strcmp(words[2],
"10")) ?
"big" :
"little");
197 fseek(fh, 1024, SEEK_SET);
214 if ((rv = open_nist_file(wtf, wtf->
infile, NULL, FALSE)) != TRUE)
219 if ((fh = popen(cmdline,
"r")) == NULL) {
232 E_ERROR(
"popen() not available, cannot run sph2pipe\n");
248 if ((rv = open_nist_file(wtf, wtf->
infile, &fh, TRUE)) != TRUE)
267 if ((fh = fopen(wtf->
infile,
"rb")) == NULL) {
288 if ((fh = fopen(wtf->
infile,
"rb")) == NULL) {
292 if (fread(&len, 4, 1, fh) != 1) {
297 fseek(fh, 0, SEEK_END);
301 flen = (flen / 4) - 1;
307 E_ERROR(
"Mismatch in header/file lengths: 0x%08x vs 0x%08x\n",
314 ?
"little" :
"big"));
317 fseek(fh, 4, SEEK_SET);
328 E_ERROR(
"Sphinx MFCC file reading requested but -spec2cep/-cep2spec not given\n");
336 mixnpick_channels(int16 *buf, int32 nsamp, int32 nchans, int32 whichchan)
341 for (i = whichchan - 1; i < nsamp; i += nchans)
342 buf[i/nchans] = buf[i];
345 for (i = 0; i < nsamp; i += nchans) {
347 for (j = 0; j < nchans && i + j < nsamp; ++j) {
350 buf[i/nchans] = (int16)(tmp / nchans);
356 #ifdef HAVE_SNDFILE_H
368 memset(&sfinfo, 0,
sizeof(sfinfo));
371 if ((sf = sf_open(wtf->
infile, SFM_READ, &sfinfo)) == NULL) {
375 cmd_ln_set_int32_r(wtf->
config,
"-nchans", sfinfo.channels);
376 cmd_ln_set_float32_r(wtf->
config,
"-samprate", sfinfo.samplerate);
391 int32 nfr, nchans, whichchan;
394 nchans = cmd_ln_int32_r(wtf->
config,
"-nchans");
395 whichchan = cmd_ln_int32_r(wtf->
config,
"-whichchan");
396 fe_start_utt(wtf->
fe);
398 while ((nsamp = sf_read_short(wtf->insfh,
401 int16
const *inspeech;
406 nsamp = mixnpick_channels(wtf->
audio, nsamp, nchans, whichchan);
408 inspeech = wtf->
audio;
413 fe_process_frames(wtf->
fe, &inspeech, &nsamp, wtf->
feat, &nfr);
415 if ((n = (*wtf->
ot->output_frames)(wtf, wtf->
feat, nfr)) < 0)
420 inspeech = wtf->
audio;
423 fe_end_utt(wtf->
fe, wtf->
feat[0], &nfr);
425 if ((n = (*wtf->
ot->output_frames)(wtf, wtf->
feat, nfr)) < 0)
430 sf_close(wtf->insfh);
444 int32 nfr, nchans, whichchan;
447 nchans = cmd_ln_int32_r(wtf->
config,
"-nchans");
448 whichchan = cmd_ln_int32_r(wtf->
config,
"-whichchan");
449 fe_start_utt(wtf->
fe);
453 int16
const *inspeech;
457 for (n = 0; n < nsamp; ++n)
458 SWAP_INT16(wtf->
audio + n);
463 nsamp = mixnpick_channels(wtf->
audio, nsamp, nchans, whichchan);
465 inspeech = wtf->
audio;
470 fe_process_frames(wtf->
fe, &inspeech, &nsamp, wtf->
feat, &nfr);
472 if ((n = (*wtf->
ot->output_frames)(wtf, wtf->
feat, nfr)) < 0)
477 inspeech = wtf->
audio;
480 fe_end_utt(wtf->
fe, wtf->
feat[0], &nfr);
482 if ((n = (*wtf->
ot->output_frames)(wtf, wtf->
feat, nfr)) < 0)
487 if (fclose(wtf->
infh) == EOF)
508 while ((n = fread(wtf->
feat[0],
sizeof(**wtf->
feat),
512 E_ERROR(
"Size of file %d not a multiple of veclen %d\n",
518 for (i = 0; i < n; ++i)
519 SWAP_FLOAT32(wtf->
feat[0] + i);
521 fe_float_to_mfcc(wtf->
fe, (float32 **)wtf->
feat, wtf->
feat, nfr);
522 for (i = 0; i < nfr; ++i) {
525 fe_logspec_to_mfcc(wtf->
fe, wtf->
feat[i], wtf->
feat[i]);
527 fe_logspec_dct2(wtf->
fe, wtf->
feat[i], wtf->
feat[i]);
530 fe_mfcc_dct3(wtf->
fe, wtf->
feat[i], wtf->
feat[i]);
533 if ((n = (*wtf->
ot->output_frames)(wtf, wtf->
feat, nfr)) < 0)
538 if (fclose(wtf->
infh) == EOF)
545 #ifdef HAVE_SNDFILE_H
546 {
"-sndfile", &detect_sndfile, &decode_sndfile },
548 {
"-mswav", &detect_riff, &decode_pcm },
549 {
"-nist", &detect_nist, &decode_pcm },
550 {
"-raw", &detect_raw, &decode_pcm },
551 {
"-sph2pipe", &detect_sph2pipe, &decode_pcm }
553 static const int ntypes =
sizeof(types)/
sizeof(types[0]);
555 "sphinx_mfc", &detect_sphinx_mfc, &decode_sphinx_mfc
566 if (fwrite(&nfloat, 4, 1, wtf->
outfh) != 1) {
583 fe_mfcc_to_float(wtf->
fe, frames, (float32 **)frames, nfr);
584 for (i = 0; i < nfr; ++i) {
585 if (fwrite(frames[i],
sizeof(float32), wtf->
veclen, wtf->
outfh) != wtf->
veclen) {
595 typedef enum htk_feature_kind_e {
608 } htk_feature_kind_t;
610 typedef enum htk_feature_flag_e {
621 } htk_feature_flag_t;
638 if (swap) SWAP_INT32(&nfloat);
639 if (fwrite(&nfloat, 4, 1, wtf->
outfh) != 1)
642 samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->
config,
"-frate"));
643 if (swap) SWAP_INT32(&samp_period);
644 if (fwrite(&samp_period, 4, 1, wtf->
outfh) != 1)
647 samp_size = wtf->
veclen * 4;
648 if (swap) SWAP_INT16(&samp_size);
649 if (fwrite(&samp_size, 2, 1, wtf->
outfh) != 1)
656 param_kind = MFCC | _O;
657 if (swap) SWAP_INT16(¶m_kind);
658 if (fwrite(¶m_kind, 2, 1, wtf->
outfh) != 1)
670 int i, j, swap, htk_reorder, nfloat = 0;
672 fe_mfcc_to_float(wtf->
fe, frames, (float32 **)frames, nfr);
675 htk_reorder = (0 == strcmp(
"htk", wtf->
ot->name)
678 for (i = 0; i < nfr; ++i) {
680 mfcc_t c0 = frames[i][0];
681 memmove(frames[i] + 1, frames[i], (wtf->
veclen - 1) * 4);
682 frames[i][wtf->
veclen - 1] = c0;
685 for (j = 0; j < wtf->
veclen; ++j)
686 SWAP_FLOAT32(frames[i] + j);
687 if (fwrite(frames[i],
sizeof(float32), wtf->
veclen, wtf->
outfh) != wtf->
veclen) {
703 int i, j, nfloat = 0;
705 fe_mfcc_to_float(wtf->
fe, frames, (float32 **)frames, nfr);
706 for (i = 0; i < nfr; ++i) {
707 for (j = 0; j < wtf->
veclen; ++j) {
708 fprintf(wtf->
outfh,
"%.5g", frames[i][j]);
710 fprintf(wtf->
outfh,
"\n");
712 fprintf(wtf->
outfh,
" ");
720 {
"sphinx", &output_header_sphinx, &output_frames_sphinx },
721 {
"htk", &output_header_htk, &output_frames_htk },
722 {
"text", NULL, &output_frames_text }
724 static const int nouttypes =
sizeof(outtypes)/
sizeof(outtypes[0]);
727 sphinx_wave2feat_init(
cmd_ln_t *config)
735 wtf->
fe = fe_init_auto_r(wtf->
config);
737 for (i = 0; i < nouttypes; ++i) {
739 if (0 == strcmp(
cmd_ln_str_r(config,
"-ofmt"), otype->name)) {
744 if (i == nouttypes) {
745 E_ERROR(
"Unknown output type: '%s'\n",
747 sphinx_wave2feat_free(wtf);
771 if (fclose(wtf->
infh) == EOF)
775 if (fclose(wtf->
outfh) == EOF)
801 int rv = mfcc_type.detect(wtf);
808 for (i = 0; i < ntypes; ++i) {
812 rv = (*atype->detect)(wtf);
821 for (i = 0; i < ntypes; ++i) {
824 rv = (*atype->detect)(wtf);
843 char const *infile,
char const *outfile)
845 int nchans, minfft, nfft, nfloat, veclen;
850 E_INFO(
"Converting %s to %s\n", infile, outfile);
855 if ((atype = detect_audio_type(wtf)) == NULL)
863 minfft = (int)(cmd_ln_float32_r(wtf->
config,
"-samprate")
864 * cmd_ln_float32_r(wtf->
config,
"-wlen") + 0.5);
865 for (nfft = 1; nfft < minfft; nfft <<= 1)
867 if (nfft > cmd_ln_int32_r(wtf->
config,
"-nfft")) {
868 E_WARN(
"Value of -nfft = %d is too small, increasing to %d\n",
869 cmd_ln_int32_r(wtf->
config,
"-nfft"), nfft);
870 cmd_ln_set_int32_r(wtf->
config,
"-nfft", nfft);
872 wtf->
fe = fe_init_auto_r(wtf->
config);
877 wtf->
veclen = fe_get_output_size(wtf->
fe);
880 fe_get_input_size(wtf->
fe, &fshift, &fsize);
884 nchans = cmd_ln_int32_r(wtf->
config,
"-nchans");
886 if (wtf->
blocksize < (fsize + fshift) * nchans) {
887 E_INFO(
"Block size of %d too small, increasing to %d\n",
889 (fsize + fshift) * nchans);
890 wtf->
blocksize = (fsize + fshift) * nchans;
902 if ((wtf->
outfh = fopen(outfile,
"wb")) == NULL) {
907 if (wtf->
ot->output_header &&
908 (*wtf->
ot->output_header)(wtf, 0) < 0) {
914 if ((nfloat = (*atype->decode)(wtf)) < 0) {
919 if (wtf->
ot->output_header) {
920 if (fseek(wtf->
outfh, 0, SEEK_SET) < 0) {
924 if ((*wtf->
ot->output_header)(wtf, nfloat) < 0) {
946 if (fclose(wtf->
outfh) == EOF)
969 if (fclose(wtf->
outfh) == EOF)
977 build_filenames(
cmd_ln_t *config,
char const *basename,
978 char **out_infile,
char **out_outfile)
980 char const *di, *do_, *ei, *eo;
1016 int nskip, runlen, npart, rv = 0;
1018 if ((ctlfh = fopen(ctlfile,
"r")) == NULL) {
1022 nskip = cmd_ln_int32_r(wtf->
config,
"-nskip");
1023 runlen = cmd_ln_int32_r(wtf->
config,
"-runlen");
1024 if ((npart = cmd_ln_int32_r(wtf->
config,
"-npart"))) {
1026 int partlen, part, nlines = 0;
1027 part = cmd_ln_int32_r(wtf->
config,
"-part");
1030 fseek(ctlfh, 0, SEEK_SET);
1031 partlen = nlines / npart;
1032 nskip = partlen * (part - 1);
1039 E_INFO(
"Processing %d utterances at position %d\n", runlen, nskip);
1043 E_INFO(
"Processing all remaining utterances at position %d\n", nskip);
1047 char *c, *infile, *outfile;
1059 if ((c = strchr(li->buf,
' ')) != NULL)
1061 if (strlen(li->buf) == 0) {
1062 E_WARN(
"Empty line %d in control file, skipping\n", li->lineno);
1065 build_filenames(wtf->
config, li->buf, &infile, &outfile);
1068 rv = sphinx_wave2feat_convert_file(wtf, infile, outfile);
1082 if (fclose(ctlfh) == EOF)
1088 main(
int argc,
char *argv[])
1095 if ((config =
cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
1102 if (config == NULL) {
1103 E_ERROR(
"Command line parsing failed\n");
1106 if ((wtf = sphinx_wave2feat_init(config)) == NULL) {
1107 E_ERROR(
"Failed to initialize wave2feat object\n");
1115 rv = run_control_file(wtf,
cmd_ln_str_r(config,
"-c"));
1117 rv = sphinx_wave2feat_convert_file(wtf,
cmd_ln_str_r(config,
"-i"),
1120 sphinx_wave2feat_free(wtf);
Command-line and other configurationparsing and handling.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_retain(cmd_ln_t *cmdln)
Retain ownership of a command-line argument set.
Miscellaneous useful string functions.
hash_entry_t * ent
Current entry in that table.
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
int veclen
Length of each output vector.
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
#define E_INFO
Print logging information to standard error stream.
output_type_t const * ot
Output type object.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
File names related operation.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_parse_r(cmd_ln_t *inout_cmdln, arg_t const *defn, int32 argc, char *argv[], int32 strict)
Parse a list of strings into argumetns.
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
#define ckd_salloc(ptr)
Macro for ckd_salloc
#define hash_entry_val(e)
Access macros.
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
FILE * infh
Input file handle.
int refcount
Reference count.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
#define E_WARN
Print warning information to standard error stream.
char * outfile
Path to output file.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
SPHINXBASE_EXPORT int build_directory(const char *path)
Create a directory and all of its parent directories, as needed.
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
int featsize
Size of feature buffer.
SPHINXBASE_EXPORT void lineiter_free(lineiter_t *li)
Stop reading lines from a file.
FILE * outfh
Output file handle.
SPHINXBASE_EXPORT lineiter_t * lineiter_next(lineiter_t *li)
Move to the next line in the file.
int byteswap
Whether byteswapping is necessary.
mfcc_t ** feat
Feature buffer.
SPHINXBASE_EXPORT lineiter_t * lineiter_start(FILE *fh)
Start reading lines from a file.
int in_veclen
Length of each input vector (for cep<->spec).
Implementation of logging routines.
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, arg_t const *defn, char const *filename, int32 strict)
Parse an arguments file by deliminating on " \r\t\n" and putting each tokens into an argv[] for cmd_l...
short * audio
Audio buffer.
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
Opaque structure used to hold the results of command-line parsing.
char * infile
Path to input file.
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string...
#define E_ERROR
Print error message to standard error stream.
SPHINXBASE_EXPORT void ckd_free_2d(void *ptr)
Free a 2-D array (ptr) previously allocated by ckd_calloc_2d.
int blocksize
Size of audio buffer.
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
cmd_ln_t * config
Configuration parameters.
Hash table implementation.
Structure for the front-end computation.
SPHINXBASE_EXPORT void cmd_ln_set_str_r(cmd_ln_t *cmdln, char const *name, char const *str)
Set a string in a command-line object.
SPHINXBASE_EXPORT char * string_trim(char *string, enum string_edge_e which)
Remove whitespace from a string, modifying it in-place.
fe_t * fe
Front end object.
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
file IO related operations.