54 #include "sphinxbase/fsg_model.h"
76 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
77 #define FSG_MODEL_END_DECL "FSG_END"
78 #define FSG_MODEL_N_DECL "N"
79 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
80 #define FSG_MODEL_S_DECL "S"
81 #define FSG_MODEL_START_STATE_DECL "START_STATE"
82 #define FSG_MODEL_F_DECL "F"
83 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
84 #define FSG_MODEL_T_DECL "T"
85 #define FSG_MODEL_TRANSITION_DECL "TRANSITION"
86 #define FSG_MODEL_COMMENT_CHAR '#'
90 nextline_str2words(FILE * fp, int32 * lineno,
91 char **lineptr,
char ***wordptr)
103 if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
111 if (*wordptr == NULL)
114 *wordptr =
ckd_realloc(*wordptr, n *
sizeof(**wordptr));
121 int32 from, int32 to, int32 logp, int32 wid)
127 if (fsg->
trans[from].trans == NULL)
131 for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
133 if (link->
wid == wid) {
142 link->from_state = from;
150 (
char const *) &link->to_state,
151 sizeof(link->to_state), gl);
155 fsg_model_tag_trans_add(
fsg_model_t * fsg, int32 from, int32 to,
156 int32 logp, int32 wid)
162 E_FATAL(
"Null transition prob must be <= 1.0 (state %d -> %d)\n",
170 if (fsg->
trans[from].null_trans == NULL)
174 link = fsg_model_null_trans(fsg, from, to);
186 link->from_state = from;
193 (
char const *) &link->to_state,
194 sizeof(link->to_state), link);
195 assert(link == link2);
201 fsg_model_null_trans_add(
fsg_model_t * fsg, int32 from, int32 to,
204 return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
215 E_INFO(
"Computing transitive closure for null transitions\n");
221 for (i = 0; i < fsg->
n_state; ++i) {
222 for (j = 0; j < fsg->
n_state; ++j) {
223 if ((null = fsg_model_null_trans(fsg, i, j)))
237 for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
241 assert(tl1->
wid < 0);
243 if (fsg->
trans[tl1->to_state].null_trans == NULL)
251 k = fsg_model_null_trans_add(fsg,
261 (fsg, tl1->from_state,
270 E_INFO(
"%d null transitions added\n", n);
276 fsg_model_trans(
fsg_model_t * fsg, int32 i, int32 j)
280 if (fsg->
trans[i].trans == NULL)
283 sizeof(j), &val) < 0)
289 fsg_model_null_trans(
fsg_model_t * fsg, int32 i, int32 j)
293 if (fsg->
trans[i].null_trans == NULL)
296 sizeof(j), &val) < 0)
306 if (fsg->
trans[i].trans == NULL && fsg->
trans[i].null_trans == NULL)
309 if (fsg->
trans[i].null_trans)
311 if (fsg->
trans[i].trans)
313 if (itor->itor != NULL)
324 else if (itor->null_itor)
335 itor->gn = gnode_next(itor->gn);
337 if (itor->gn == NULL) {
339 if (itor->itor != NULL)
341 else if (itor->null_itor == NULL)
346 if (itor->null_itor == NULL)
349 if (itor->null_itor == NULL)
354 fsg_arciter_free(itor);
370 fsg_model_word_id(
fsg_model_t * fsg,
char const *word)
375 for (wid = 0; wid < fsg->
n_word; ++wid) {
376 if (0 == strcmp(fsg->
vocab[wid], word))
386 fsg_model_word_add(
fsg_model_t * fsg,
char const *word)
391 wid = fsg_model_word_id(fsg, word);
400 sizeof(*fsg->
vocab));
415 fsg_model_add_silence(
fsg_model_t * fsg,
char const *silword,
416 int state, float32 silprob)
419 int n_trans, silwid, src;
421 E_INFO(
"Adding silence transitions for %s to FSG\n", silword);
423 silwid = fsg_model_word_add(fsg, silword);
431 for (src = 0; src < fsg->
n_state; src++) {
432 fsg_model_trans_add(fsg, src, src, logsilp, silwid);
437 fsg_model_trans_add(fsg, state, state, logsilp, silwid);
441 E_INFO(
"Added %d silence word transitions\n", n_trans);
446 fsg_model_add_alt(
fsg_model_t * fsg,
char const *baseword,
449 int i, basewid, altwid;
453 for (basewid = 0; basewid < fsg->
n_word; ++basewid)
454 if (0 == strcmp(fsg->
vocab[basewid], baseword))
456 if (basewid == fsg->
n_word) {
457 E_ERROR(
"Base word %s not present in FSG vocabulary!\n", baseword);
460 altwid = fsg_model_word_add(fsg, altword);
465 E_DEBUG(2, (
"Adding alternate word transitions (%s,%s) to FSG\n",
471 for (i = 0; i < fsg->
n_state; ++i) {
473 if (fsg->
trans[i].trans == NULL)
481 for (gn = trans; gn; gn = gnode_next(gn)) {
483 if (fl->
wid == basewid) {
488 link->from_state = fl->from_state;
489 link->to_state = fl->to_state;
501 E_DEBUG(2, (
"Added %d alternate word transitions\n", ntrans));
507 fsg_model_init(
char const *name,
logmath_t * lmath, float32 lw,
527 fsg_model_read(FILE * fp,
logmath_t * lmath, float32 lw)
538 int n_state, n_trans, n_null_trans;
552 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
554 E_ERROR(
"%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
558 if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
560 E_ERROR(
"Line[%d]: malformed FSG_BEGIN declaration\n",
574 E_WARN(
"FSG name is missing\n");
579 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
581 || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
582 && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
583 || (sscanf(wordptr[1],
"%d", &n_state) != 1)
586 (
"Line[%d]: #states declaration line missing or malformed\n",
592 fsg = fsg_model_init(fsgname, lmath, lw, n_state);
597 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
599 || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
600 && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
601 || (sscanf(wordptr[1],
"%d", &(fsg->
start_state)) != 1)
605 (
"Line[%d]: start state declaration line missing or malformed\n",
611 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
613 || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
614 && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
615 || (sscanf(wordptr[1],
"%d", &(fsg->
final_state)) != 1)
619 (
"Line[%d]: final state declaration line missing or malformed\n",
626 n_trans = n_null_trans = 0;
630 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
632 E_ERROR(
"Line[%d]: transition or FSG_END statement expected\n",
637 if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
641 if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
642 || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
645 if (((n != 4) && (n != 5))
646 || (sscanf(wordptr[1],
"%d", &i) != 1)
647 || (sscanf(wordptr[2],
"%d", &j) != 1)
648 || (i < 0) || (i >= fsg->
n_state)
649 || (j < 0) || (j >= fsg->
n_state)) {
651 (
"Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
657 if ((p <= 0.0) || (p > 1.0)) {
659 (
"Line[%d]: transition spec malformed; Expecting float as transition probability\n",
665 E_ERROR(
"Line[%d]: transition or FSG_END statement expected\n",
680 fsg_model_trans_add(fsg, i, j, tprob, wid);
684 if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
692 E_INFO(
"FSG: %d states, %d unique words, %d transitions (%d null)\n",
693 fsg->
n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
697 fsg->
n_word = hash_table_inuse(vocab);
702 char const *word = hash_entry_key(itor->
ent);
704 fsg->
vocab[wid] = (
char *) word;
709 nulls = fsg_model_null_trans_closure(fsg, nulls);
732 fsg_model_readfile(
const char *file,
logmath_t * lmath, float32 lw)
737 if ((fp = fopen(file,
"r")) == NULL) {
741 fsg = fsg_model_read(fp, lmath, lw);
761 if (fsg->
trans[i].trans) {
783 for (i = 0; i < fsg->
n_word; ++i)
785 for (i = 0; i < fsg->
n_state; ++i)
786 trans_list_free(fsg, i);
803 fprintf(fp,
"%s %s\n", FSG_MODEL_BEGIN_DECL,
805 fprintf(fp,
"%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->
n_state);
806 fprintf(fp,
"%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->
start_state);
807 fprintf(fp,
"%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->
final_state);
809 for (i = 0; i < fsg->
n_state; i++) {
812 for (itor = fsg_model_arcs(fsg, i); itor;
813 itor = fsg_arciter_next(itor)) {
816 fprintf(fp,
"%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
817 tl->from_state, tl->to_state,
820 (tl->
wid < 0) ?
"" : fsg_model_word_str(fsg, tl->
wid));
824 fprintf(fp,
"%s\n", FSG_MODEL_END_DECL);
830 fsg_model_writefile(
fsg_model_t * fsg,
char const *file)
836 E_INFO(
"Writing FSG file '%s'\n", file);
838 if ((fp = fopen(file,
"w")) == NULL) {
843 fsg_model_write(fsg, fp);
849 fsg_model_write_fsm_trans(
fsg_model_t * fsg,
int i, FILE * fp)
853 for (itor = fsg_model_arcs(fsg, i); itor;
854 itor = fsg_arciter_next(itor)) {
856 fprintf(fp,
"%d %d %s %f\n",
857 tl->from_state, tl->to_state,
858 (tl->
wid < 0) ?
"<eps>" : fsg_model_word_str(fsg, tl->
wid),
869 fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
872 for (i = 0; i < fsg->
n_state; i++) {
873 if (i == fsg_model_start_state(fsg))
875 fsg_model_write_fsm_trans(fsg, i, fp);
879 fprintf(fp,
"%d 0\n", fsg_model_final_state(fsg));
885 fsg_model_writefile_fsm(
fsg_model_t * fsg,
char const *file)
891 E_INFO(
"Writing FSM file '%s'\n", file);
893 if ((fp = fopen(file,
"w")) == NULL) {
898 fsg_model_write_fsm(fsg, fp);
904 fsg_model_write_symtab(
fsg_model_t * fsg, FILE * file)
908 fprintf(file,
"<eps> 0\n");
909 for (i = 0; i < fsg_model_n_word(fsg); ++i) {
910 fprintf(file,
"%s %d\n", fsg_model_word_str(fsg, i), i + 1);
916 fsg_model_writefile_symtab(
fsg_model_t * fsg,
char const *file)
922 E_INFO(
"Writing FSM symbol table '%s'\n", file);
924 if ((fp = fopen(file,
"w")) == NULL) {
925 E_ERROR(
"Failed to open symbol table '%s' for writing", file);
929 fsg_model_write_symtab(fsg, fp);
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
int32 start_state
Must be in the range [0..n_state-1].
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Miscellaneous useful string functions.
int refcount
Reference count.
hash_entry_t * ent
Current entry in that table.
int32 final_state
Must be in the range [0..n_state-1].
int32 n_word_alloc
Number of words allocated in vocab.
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
float32 lw
Language weight that's been applied to transition logprobs.
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
#define E_INFO
Print logging information to standard error stream.
listelem_alloc_t * link_alloc
Allocator for FSG links.
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
int32 n_word
Number of unique words in this FSG.
int32 logs2prob
log(transition probability)*lw
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
A node in a generic list.
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
#define ckd_salloc(ptr)
Macro for ckd_salloc
#define hash_entry_val(e)
Access macros.
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Adjacency list (opaque) for a state in an FSG.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
#define E_WARN
Print warning information to standard error stream.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Implementation of arc iterator.
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
#define gnode_ptr(g)
Head of a list of gnodes.
Implementation of logging routines.
int32 n_state
number of states in FSG
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
#define E_FATAL
Exit with non-zero status after error message.
#define E_ERROR
Print error message to standard error stream.
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
bitvec_t * altwords
Indicates which words are pronunciation alternates.
trans_list_t * trans
Transitions out of each state, if any.
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
int32 wid
Word-ID; <0 if epsilon or null transition.
Hash table implementation.
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Word level FSG definition.
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated...
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
bitvec_t * silwords
Indicates which words are silence/fillers.
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
logmath_t * lmath
Pointer to log math computation object.
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
file IO related operations.
#define bitvec_free(v)
Free a bit vector.
char * name
A unique string identifier for this FSG.
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
char ** vocab
Vocabulary for this FSG.