SphinxBase  0.6
fsg_model.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 
35 /* System headers. */
36 #ifdef _WIN32_WCE
37 /*MC in a debug build it's implicitly included by assert.h
38  but you need this in a release build */
39 #include <windows.h>
40 #else
41 #include <time.h>
42 #endif /* _WIN32_WCE */
43 #include <stdio.h>
44 #include <string.h>
45 #include <assert.h>
46 
47 /* SphinxBase headers. */
48 #include "sphinxbase/err.h"
49 #include "sphinxbase/pio.h"
50 #include "sphinxbase/ckd_alloc.h"
51 #include "sphinxbase/prim_type.h"
52 #include "sphinxbase/strfuncs.h"
53 #include "sphinxbase/hash_table.h"
54 #include "sphinxbase/fsg_model.h"
55 
63 struct trans_list_s {
64  hash_table_t *null_trans; /* Null transitions keyed by state. */
65  hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
66 };
67 
71 struct fsg_arciter_s {
72  hash_iter_t *itor, *null_itor;
73  gnode_t *gn;
74 };
75 
76 #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
77 #define FSG_MODEL_END_DECL "FSG_END"
78 #define FSG_MODEL_N_DECL "N"
79 #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
80 #define FSG_MODEL_S_DECL "S"
81 #define FSG_MODEL_START_STATE_DECL "START_STATE"
82 #define FSG_MODEL_F_DECL "F"
83 #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
84 #define FSG_MODEL_T_DECL "T"
85 #define FSG_MODEL_TRANSITION_DECL "TRANSITION"
86 #define FSG_MODEL_COMMENT_CHAR '#'
87 
88 
89 static int32
90 nextline_str2words(FILE * fp, int32 * lineno,
91  char **lineptr, char ***wordptr)
92 {
93  for (;;) {
94  size_t len;
95  int32 n;
96 
97  ckd_free(*lineptr);
98  if ((*lineptr = fread_line(fp, &len)) == NULL)
99  return -1;
100 
101  (*lineno)++;
102 
103  if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
104  continue; /* Skip comment lines */
105 
106  n = str2words(*lineptr, NULL, 0);
107  if (n == 0)
108  continue; /* Skip blank lines */
109 
110  /* Abuse of realloc(), but this doesn't have to be fast. */
111  if (*wordptr == NULL)
112  *wordptr = ckd_calloc(n, sizeof(**wordptr));
113  else
114  *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
115  return str2words(*lineptr, *wordptr, n);
116  }
117 }
118 
119 void
120 fsg_model_trans_add(fsg_model_t * fsg,
121  int32 from, int32 to, int32 logp, int32 wid)
122 {
123  fsg_link_t *link;
124  glist_t gl;
125  gnode_t *gn;
126 
127  if (fsg->trans[from].trans == NULL)
128  fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
129 
130  /* Check for duplicate link (i.e., link already exists with label=wid) */
131  for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
132  link = (fsg_link_t *) gnode_ptr(gn);
133  if (link->wid == wid) {
134  if (link->logs2prob < logp)
135  link->logs2prob = logp;
136  return;
137  }
138  }
139 
140  /* Create transition object */
141  link = listelem_malloc(fsg->link_alloc);
142  link->from_state = from;
143  link->to_state = to;
144  link->logs2prob = logp;
145  link->wid = wid;
146 
147  /* Add it to the list of transitions and update the hash table */
148  gl = glist_add_ptr(gl, (void *) link);
149  hash_table_replace_bkey(fsg->trans[from].trans,
150  (char const *) &link->to_state,
151  sizeof(link->to_state), gl);
152 }
153 
154 int32
155 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
156  int32 logp, int32 wid)
157 {
158  fsg_link_t *link, *link2;
159 
160  /* Check for transition probability */
161  if (logp > 0) {
162  E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
163  from, to);
164  }
165 
166  /* Self-loop null transitions (with prob <= 1.0) are redundant */
167  if (from == to)
168  return -1;
169 
170  if (fsg->trans[from].null_trans == NULL)
171  fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
172 
173  /* Check for a duplicate link; if found, keep the higher prob */
174  link = fsg_model_null_trans(fsg, from, to);
175  if (link) {
176  if (link->logs2prob < logp) {
177  link->logs2prob = logp;
178  return 0;
179  }
180  else
181  return -1;
182  }
183 
184  /* Create null transition object */
185  link = listelem_malloc(fsg->link_alloc);
186  link->from_state = from;
187  link->to_state = to;
188  link->logs2prob = logp;
189  link->wid = -1;
190 
191  link2 = (fsg_link_t *)
192  hash_table_enter_bkey(fsg->trans[from].null_trans,
193  (char const *) &link->to_state,
194  sizeof(link->to_state), link);
195  assert(link == link2);
196 
197  return 1;
198 }
199 
200 int32
201 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
202  int32 logp)
203 {
204  return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
205 }
206 
207 glist_t
208 fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
209 {
210  gnode_t *gn1;
211  int updated;
212  fsg_link_t *tl1, *tl2;
213  int32 k, n;
214 
215  E_INFO("Computing transitive closure for null transitions\n");
216 
217  if (nulls == NULL) {
218  fsg_link_t *null;
219  int i, j;
220 
221  for (i = 0; i < fsg->n_state; ++i) {
222  for (j = 0; j < fsg->n_state; ++j) {
223  if ((null = fsg_model_null_trans(fsg, i, j)))
224  nulls = glist_add_ptr(nulls, null);
225  }
226  }
227  }
228 
229  /*
230  * Probably not the most efficient closure implementation, in general, but
231  * probably reasonably efficient for a sparse null transition matrix.
232  */
233  n = 0;
234  do {
235  updated = FALSE;
236 
237  for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
238  hash_iter_t *itor;
239 
240  tl1 = (fsg_link_t *) gnode_ptr(gn1);
241  assert(tl1->wid < 0);
242 
243  if (fsg->trans[tl1->to_state].null_trans == NULL)
244  continue;
245 
246  for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
247  itor; itor = hash_table_iter_next(itor)) {
248 
249  tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
250 
251  k = fsg_model_null_trans_add(fsg,
252  tl1->from_state,
253  tl2->to_state,
254  tl1->logs2prob +
255  tl2->logs2prob);
256  if (k >= 0) {
257  updated = TRUE;
258  if (k > 0) {
259  nulls = glist_add_ptr(nulls, (void *)
260  fsg_model_null_trans
261  (fsg, tl1->from_state,
262  tl2->to_state));
263  n++;
264  }
265  }
266  }
267  }
268  } while (updated);
269 
270  E_INFO("%d null transitions added\n", n);
271 
272  return nulls;
273 }
274 
275 glist_t
276 fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
277 {
278  void *val;
279 
280  if (fsg->trans[i].trans == NULL)
281  return NULL;
282  if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
283  sizeof(j), &val) < 0)
284  return NULL;
285  return (glist_t) val;
286 }
287 
288 fsg_link_t *
289 fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
290 {
291  void *val;
292 
293  if (fsg->trans[i].null_trans == NULL)
294  return NULL;
295  if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
296  sizeof(j), &val) < 0)
297  return NULL;
298  return (fsg_link_t *) val;
299 }
300 
302 fsg_model_arcs(fsg_model_t * fsg, int32 i)
303 {
304  fsg_arciter_t *itor;
305 
306  if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
307  return NULL;
308  itor = ckd_calloc(1, sizeof(*itor));
309  if (fsg->trans[i].null_trans)
310  itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
311  if (fsg->trans[i].trans)
312  itor->itor = hash_table_iter(fsg->trans[i].trans);
313  if (itor->itor != NULL)
314  itor->gn = hash_entry_val(itor->itor->ent);
315  return itor;
316 }
317 
318 fsg_link_t *
319 fsg_arciter_get(fsg_arciter_t * itor)
320 {
321  /* Iterate over non-null arcs first. */
322  if (itor->gn)
323  return (fsg_link_t *) gnode_ptr(itor->gn);
324  else if (itor->null_itor)
325  return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
326  else
327  return NULL;
328 }
329 
331 fsg_arciter_next(fsg_arciter_t * itor)
332 {
333  /* Iterate over non-null arcs first. */
334  if (itor->gn) {
335  itor->gn = gnode_next(itor->gn);
336  /* Move to the next destination arc. */
337  if (itor->gn == NULL) {
338  itor->itor = hash_table_iter_next(itor->itor);
339  if (itor->itor != NULL)
340  itor->gn = hash_entry_val(itor->itor->ent);
341  else if (itor->null_itor == NULL)
342  goto stop_iteration;
343  }
344  }
345  else {
346  if (itor->null_itor == NULL)
347  goto stop_iteration;
348  itor->null_itor = hash_table_iter_next(itor->null_itor);
349  if (itor->null_itor == NULL)
350  goto stop_iteration;
351  }
352  return itor;
353  stop_iteration:
354  fsg_arciter_free(itor);
355  return NULL;
356 
357 }
358 
359 void
360 fsg_arciter_free(fsg_arciter_t * itor)
361 {
362  if (itor == NULL)
363  return;
364  hash_table_iter_free(itor->null_itor);
365  hash_table_iter_free(itor->itor);
366  ckd_free(itor);
367 }
368 
369 int
370 fsg_model_word_id(fsg_model_t * fsg, char const *word)
371 {
372  int wid;
373 
374  /* Search for an existing word matching this. */
375  for (wid = 0; wid < fsg->n_word; ++wid) {
376  if (0 == strcmp(fsg->vocab[wid], word))
377  break;
378  }
379  /* If not found, add this to the vocab. */
380  if (wid == fsg->n_word)
381  return -1;
382  return wid;
383 }
384 
385 int
386 fsg_model_word_add(fsg_model_t * fsg, char const *word)
387 {
388  int wid, old_size;
389 
390  /* Search for an existing word matching this. */
391  wid = fsg_model_word_id(fsg, word);
392  /* If not found, add this to the vocab. */
393  if (wid == -1) {
394  wid = fsg->n_word;
395  if (fsg->n_word == fsg->n_word_alloc) {
396  old_size = fsg->n_word_alloc;
397  fsg->n_word_alloc += 10;
398  fsg->vocab = ckd_realloc(fsg->vocab,
399  fsg->n_word_alloc *
400  sizeof(*fsg->vocab));
401  if (fsg->silwords)
402  fsg->silwords =
403  bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc);
404  if (fsg->altwords)
405  fsg->altwords =
406  bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc);
407  }
408  ++fsg->n_word;
409  fsg->vocab[wid] = ckd_salloc(word);
410  }
411  return wid;
412 }
413 
414 int
415 fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
416  int state, float32 silprob)
417 {
418  int32 logsilp;
419  int n_trans, silwid, src;
420 
421  E_INFO("Adding silence transitions for %s to FSG\n", silword);
422 
423  silwid = fsg_model_word_add(fsg, silword);
424  logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
425  if (fsg->silwords == NULL)
426  fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
427  bitvec_set(fsg->silwords, silwid);
428 
429  n_trans = 0;
430  if (state == -1) {
431  for (src = 0; src < fsg->n_state; src++) {
432  fsg_model_trans_add(fsg, src, src, logsilp, silwid);
433  ++n_trans;
434  }
435  }
436  else {
437  fsg_model_trans_add(fsg, state, state, logsilp, silwid);
438  ++n_trans;
439  }
440 
441  E_INFO("Added %d silence word transitions\n", n_trans);
442  return n_trans;
443 }
444 
445 int
446 fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
447  char const *altword)
448 {
449  int i, basewid, altwid;
450  int ntrans;
451 
452  /* FIXME: This will get slow, eventually... */
453  for (basewid = 0; basewid < fsg->n_word; ++basewid)
454  if (0 == strcmp(fsg->vocab[basewid], baseword))
455  break;
456  if (basewid == fsg->n_word) {
457  E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
458  return -1;
459  }
460  altwid = fsg_model_word_add(fsg, altword);
461  if (fsg->altwords == NULL)
462  fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
463  bitvec_set(fsg->altwords, altwid);
464 
465  E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
466  baseword, altword));
467 
468  /* Look for all transitions involving baseword and duplicate them. */
469  /* FIXME: This will also get slow, eventually... */
470  ntrans = 0;
471  for (i = 0; i < fsg->n_state; ++i) {
472  hash_iter_t *itor;
473  if (fsg->trans[i].trans == NULL)
474  continue;
475  for (itor = hash_table_iter(fsg->trans[i].trans); itor;
476  itor = hash_table_iter_next(itor)) {
477  glist_t trans;
478  gnode_t *gn;
479 
480  trans = hash_entry_val(itor->ent);
481  for (gn = trans; gn; gn = gnode_next(gn)) {
482  fsg_link_t *fl = gnode_ptr(gn);
483  if (fl->wid == basewid) {
484  fsg_link_t *link;
485 
486  /* Create transition object */
487  link = listelem_malloc(fsg->link_alloc);
488  link->from_state = fl->from_state;
489  link->to_state = fl->to_state;
490  link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
491  link->wid = altwid;
492 
493  trans = glist_add_ptr(trans, (void *) link);
494  ++ntrans;
495  }
496  }
497  hash_entry_val(itor->ent) = trans;
498  }
499  }
500 
501  E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
502  return ntrans;
503 }
504 
505 
506 fsg_model_t *
507 fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
508  int32 n_state)
509 {
510  fsg_model_t *fsg;
511 
512  /* Allocate basic stuff. */
513  fsg = ckd_calloc(1, sizeof(*fsg));
514  fsg->refcount = 1;
515  fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t));
516  fsg->lmath = lmath;
517  fsg->name = name ? ckd_salloc(name) : NULL;
518  fsg->n_state = n_state;
519  fsg->lw = lw;
520 
521  fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
522 
523  return fsg;
524 }
525 
526 fsg_model_t *
527 fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
528 {
529  fsg_model_t *fsg;
530  hash_table_t *vocab;
531  hash_iter_t *itor;
532  int32 lastwid;
533  char **wordptr;
534  char *lineptr;
535  char *fsgname;
536  int32 lineno;
537  int32 n, i, j;
538  int n_state, n_trans, n_null_trans;
539  glist_t nulls;
540  float32 p;
541 
542  lineno = 0;
543  vocab = hash_table_new(32, FALSE);
544  wordptr = NULL;
545  lineptr = NULL;
546  nulls = NULL;
547  fsgname = NULL;
548  fsg = NULL;
549 
550  /* Scan upto FSG_BEGIN header */
551  for (;;) {
552  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
553  if (n < 0) {
554  E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
555  goto parse_error;
556  }
557 
558  if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
559  if (n > 2) {
560  E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
561  lineno);
562  goto parse_error;
563  }
564  break;
565  }
566  }
567  /* Save FSG name, or it will get clobbered below :(.
568  * If name is missing, try the default.
569  */
570  if (n == 2) {
571  fsgname = ckd_salloc(wordptr[1]);
572  }
573  else {
574  E_WARN("FSG name is missing\n");
575  fsgname = ckd_salloc("unknown");
576  }
577 
578  /* Read #states */
579  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
580  if ((n != 2)
581  || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
582  && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
583  || (sscanf(wordptr[1], "%d", &n_state) != 1)
584  || (n_state <= 0)) {
585  E_ERROR
586  ("Line[%d]: #states declaration line missing or malformed\n",
587  lineno);
588  goto parse_error;
589  }
590 
591  /* Now create the FSG. */
592  fsg = fsg_model_init(fsgname, lmath, lw, n_state);
593  ckd_free(fsgname);
594  fsgname = NULL;
595 
596  /* Read start state */
597  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
598  if ((n != 2)
599  || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
600  && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
601  || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
602  || (fsg->start_state < 0)
603  || (fsg->start_state >= fsg->n_state)) {
604  E_ERROR
605  ("Line[%d]: start state declaration line missing or malformed\n",
606  lineno);
607  goto parse_error;
608  }
609 
610  /* Read final state */
611  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
612  if ((n != 2)
613  || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
614  && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
615  || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
616  || (fsg->final_state < 0)
617  || (fsg->final_state >= fsg->n_state)) {
618  E_ERROR
619  ("Line[%d]: final state declaration line missing or malformed\n",
620  lineno);
621  goto parse_error;
622  }
623 
624  /* Read transitions */
625  lastwid = 0;
626  n_trans = n_null_trans = 0;
627  for (;;) {
628  int32 wid, tprob;
629 
630  n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
631  if (n <= 0) {
632  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
633  lineno);
634  goto parse_error;
635  }
636 
637  if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
638  break;
639  }
640 
641  if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
642  || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
643 
644 
645  if (((n != 4) && (n != 5))
646  || (sscanf(wordptr[1], "%d", &i) != 1)
647  || (sscanf(wordptr[2], "%d", &j) != 1)
648  || (i < 0) || (i >= fsg->n_state)
649  || (j < 0) || (j >= fsg->n_state)) {
650  E_ERROR
651  ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
652  lineno);
653  goto parse_error;
654  }
655 
656  p = atof_c(wordptr[3]);
657  if ((p <= 0.0) || (p > 1.0)) {
658  E_ERROR
659  ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
660  lineno);
661  goto parse_error;
662  }
663  }
664  else {
665  E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
666  lineno);
667  goto parse_error;
668  }
669 
670  tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
671  /* Add word to "dictionary". */
672  if (n > 4) {
673  if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
674  (void) hash_table_enter_int32(vocab,
675  ckd_salloc(wordptr[4]),
676  lastwid);
677  wid = lastwid;
678  ++lastwid;
679  }
680  fsg_model_trans_add(fsg, i, j, tprob, wid);
681  ++n_trans;
682  }
683  else {
684  if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
685  ++n_null_trans;
686  nulls =
687  glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
688  }
689  }
690  }
691 
692  E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
693  fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
694 
695 
696  /* Now create a string table from the "dictionary" */
697  fsg->n_word = hash_table_inuse(vocab);
698  fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
699  fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
700  for (itor = hash_table_iter(vocab); itor;
701  itor = hash_table_iter_next(itor)) {
702  char const *word = hash_entry_key(itor->ent);
703  int32 wid = (int32) (long) hash_entry_val(itor->ent);
704  fsg->vocab[wid] = (char *) word;
705  }
706  hash_table_free(vocab);
707 
708  /* Do transitive closure on null transitions */
709  nulls = fsg_model_null_trans_closure(fsg, nulls);
710  glist_free(nulls);
711 
712  ckd_free(lineptr);
713  ckd_free(wordptr);
714 
715  return fsg;
716 
717  parse_error:
718  for (itor = hash_table_iter(vocab); itor;
719  itor = hash_table_iter_next(itor))
720  ckd_free((char *) hash_entry_key(itor->ent));
721  glist_free(nulls);
722  hash_table_free(vocab);
723  ckd_free(fsgname);
724  ckd_free(lineptr);
725  ckd_free(wordptr);
726  fsg_model_free(fsg);
727  return NULL;
728 }
729 
730 
731 fsg_model_t *
732 fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
733 {
734  FILE *fp;
735  fsg_model_t *fsg;
736 
737  if ((fp = fopen(file, "r")) == NULL) {
738  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
739  return NULL;
740  }
741  fsg = fsg_model_read(fp, lmath, lw);
742  fclose(fp);
743  return fsg;
744 }
745 
746 fsg_model_t *
747 fsg_model_retain(fsg_model_t * fsg)
748 {
749  ++fsg->refcount;
750  return fsg;
751 }
752 
753 static void
754 trans_list_free(fsg_model_t * fsg, int32 i)
755 {
756  hash_iter_t *itor;
757 
758  /* FIXME (maybe): FSG links will all get freed when we call
759  * listelem_alloc_free() so don't bother freeing them explicitly
760  * here. */
761  if (fsg->trans[i].trans) {
762  for (itor = hash_table_iter(fsg->trans[i].trans);
763  itor; itor = hash_table_iter_next(itor)) {
764  glist_t gl = (glist_t) hash_entry_val(itor->ent);
765  glist_free(gl);
766  }
767  }
768  hash_table_free(fsg->trans[i].trans);
769  hash_table_free(fsg->trans[i].null_trans);
770 }
771 
772 int
773 fsg_model_free(fsg_model_t * fsg)
774 {
775  int i;
776 
777  if (fsg == NULL)
778  return 0;
779 
780  if (--fsg->refcount > 0)
781  return fsg->refcount;
782 
783  for (i = 0; i < fsg->n_word; ++i)
784  ckd_free(fsg->vocab[i]);
785  for (i = 0; i < fsg->n_state; ++i)
786  trans_list_free(fsg, i);
787  ckd_free(fsg->trans);
788  ckd_free(fsg->vocab);
790  bitvec_free(fsg->silwords);
791  bitvec_free(fsg->altwords);
792  ckd_free(fsg->name);
793  ckd_free(fsg);
794  return 0;
795 }
796 
797 
798 void
799 fsg_model_write(fsg_model_t * fsg, FILE * fp)
800 {
801  int32 i;
802 
803  fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
804  fsg->name ? fsg->name : "");
805  fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
806  fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
807  fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
808 
809  for (i = 0; i < fsg->n_state; i++) {
810  fsg_arciter_t *itor;
811 
812  for (itor = fsg_model_arcs(fsg, i); itor;
813  itor = fsg_arciter_next(itor)) {
814  fsg_link_t *tl = fsg_arciter_get(itor);
815 
816  fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
817  tl->from_state, tl->to_state,
818  logmath_exp(fsg->lmath,
819  (int32) (tl->logs2prob / fsg->lw)),
820  (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
821  }
822  }
823 
824  fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
825 
826  fflush(fp);
827 }
828 
829 void
830 fsg_model_writefile(fsg_model_t * fsg, char const *file)
831 {
832  FILE *fp;
833 
834  assert(fsg);
835 
836  E_INFO("Writing FSG file '%s'\n", file);
837 
838  if ((fp = fopen(file, "w")) == NULL) {
839  E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
840  return;
841  }
842 
843  fsg_model_write(fsg, fp);
844 
845  fclose(fp);
846 }
847 
848 static void
849 fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
850 {
851  fsg_arciter_t *itor;
852 
853  for (itor = fsg_model_arcs(fsg, i); itor;
854  itor = fsg_arciter_next(itor)) {
855  fsg_link_t *tl = fsg_arciter_get(itor);
856  fprintf(fp, "%d %d %s %f\n",
857  tl->from_state, tl->to_state,
858  (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
859  -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
860  }
861 }
862 
863 void
864 fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
865 {
866  int i;
867 
868  /* Write transitions from initial state first. */
869  fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
870 
871  /* Other states. */
872  for (i = 0; i < fsg->n_state; i++) {
873  if (i == fsg_model_start_state(fsg))
874  continue;
875  fsg_model_write_fsm_trans(fsg, i, fp);
876  }
877 
878  /* Final state. */
879  fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
880 
881  fflush(fp);
882 }
883 
884 void
885 fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
886 {
887  FILE *fp;
888 
889  assert(fsg);
890 
891  E_INFO("Writing FSM file '%s'\n", file);
892 
893  if ((fp = fopen(file, "w")) == NULL) {
894  E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
895  return;
896  }
897 
898  fsg_model_write_fsm(fsg, fp);
899 
900  fclose(fp);
901 }
902 
903 void
904 fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
905 {
906  int i;
907 
908  fprintf(file, "<eps> 0\n");
909  for (i = 0; i < fsg_model_n_word(fsg); ++i) {
910  fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
911  }
912  fflush(file);
913 }
914 
915 void
916 fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
917 {
918  FILE *fp;
919 
920  assert(fsg);
921 
922  E_INFO("Writing FSM symbol table '%s'\n", file);
923 
924  if ((fp = fopen(file, "w")) == NULL) {
925  E_ERROR("Failed to open symbol table '%s' for writing", file);
926  return;
927  }
928 
929  fsg_model_write_symtab(fsg, fp);
930 
931  fclose(fp);
932 }
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
Definition: hash_table.c:329
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:101
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:542
Miscellaneous useful string functions.
int refcount
Reference count.
Definition: fsg_model.h:92
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:102
int32 n_word_alloc
Number of words allocated in vocab.
Definition: fsg_model.h:95
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
Definition: hash_table.h:228
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
Definition: hash_table.c:344
float32 lw
Language weight that's been applied to transition logprobs.
Definition: fsg_model.h:103
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
Definition: err.h:212
#define E_INFO
Print logging information to standard error stream.
Definition: err.h:147
listelem_alloc_t * link_alloc
Allocator for FSG links.
Definition: fsg_model.h:106
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:689
int32 n_word
Number of unique words in this FSG.
Definition: fsg_model.h:94
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
A node in a generic list.
Definition: glist.h:100
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:653
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
Adjacency list (opaque) for a state in an FSG.
Definition: fsg_model.c:63
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:241
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
#define E_WARN
Print warning information to standard error stream.
Definition: err.h:164
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:695
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
Definition: logmath.c:468
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
Definition: strfuncs.c:56
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition: pio.c:367
Implementation of arc iterator.
Definition: fsg_model.c:71
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed...
Definition: glist.c:133
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Implementation of logging routines.
int32 n_state
number of states in FSG
Definition: fsg_model.h:100
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
Definition: strfuncs.c:115
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:663
#define E_FATAL
Exit with non-zero status after error message.
Definition: err.h:127
#define E_ERROR
Print error message to standard error stream.
Definition: err.h:169
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
bitvec_t * altwords
Indicates which words are pronunciation alternates.
Definition: fsg_model.h:98
trans_list_t * trans
Transitions out of each state, if any.
Definition: fsg_model.h:105
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
Definition: bitvec.c:64
Hash table implementation.
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Definition: bitvec.h:95
Word level FSG definition.
Definition: fsg_model.h:91
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated...
Definition: hash_table.c:555
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
Definition: err.h:142
bitvec_t * silwords
Indicates which words are silence/fillers.
Definition: fsg_model.h:97
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
Definition: logmath.c:456
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition: ckd_alloc.h:258
logmath_t * lmath
Pointer to log math computation object.
Definition: fsg_model.h:99
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
Definition: bitvec.h:75
file IO related operations.
#define bitvec_free(v)
Free a bit vector.
Definition: bitvec.h:87
char * name
A unique string identifier for this FSG.
Definition: fsg_model.h:93
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
char ** vocab
Vocabulary for this FSG.
Definition: fsg_model.h:96