SphinxBase
0.6
|
Set of language models. More...
#include <string.h>
#include <stdlib.h>
#include "sphinxbase/err.h"
#include "sphinxbase/ckd_alloc.h"
#include "sphinxbase/strfuncs.h"
#include "sphinxbase/filename.h"
#include "ngram_model_set.h"
Go to the source code of this file.
Functions | |
ngram_model_t * | ngram_model_set_init (cmd_ln_t *config, ngram_model_t **models, char **names, const float32 *weights, int32 n_models) |
Create a set of language models sharing a common space of word IDs. More... | |
ngram_model_t * | ngram_model_set_read (cmd_ln_t *config, const char *lmctlfile, logmath_t *lmath) |
Read a set of language models from a control file. More... | |
int32 | ngram_model_set_count (ngram_model_t *base) |
Returns the number of language models in a set. | |
ngram_model_set_iter_t * | ngram_model_set_iter (ngram_model_t *base) |
Begin iterating over language models in a set. More... | |
ngram_model_set_iter_t * | ngram_model_set_iter_next (ngram_model_set_iter_t *itor) |
Move to the next language model in a set. More... | |
void | ngram_model_set_iter_free (ngram_model_set_iter_t *itor) |
Finish iteration over a langauge model set. | |
ngram_model_t * | ngram_model_set_iter_model (ngram_model_set_iter_t *itor, char const **lmname) |
Get language model and associated name from an iterator. More... | |
ngram_model_t * | ngram_model_set_lookup (ngram_model_t *base, const char *name) |
Look up a language model by name from a set. More... | |
ngram_model_t * | ngram_model_set_select (ngram_model_t *base, const char *name) |
Select a single language model from a set for scoring. More... | |
const char * | ngram_model_set_current (ngram_model_t *base) |
Get the current language model name, if any. | |
int32 | ngram_model_set_current_wid (ngram_model_t *base, int32 set_wid) |
Query the word-ID mapping for the current language model. More... | |
int32 | ngram_model_set_known_wid (ngram_model_t *base, int32 set_wid) |
Test whether a word ID corresponds to a known word in the current state of the language model set. More... | |
ngram_model_t * | ngram_model_set_interp (ngram_model_t *base, const char **names, const float32 *weights) |
Set interpolation weights for a set and enables interpolation. More... | |
ngram_model_t * | ngram_model_set_add (ngram_model_t *base, ngram_model_t *model, const char *name, float32 weight, int reuse_widmap) |
Add a language model to a set. More... | |
ngram_model_t * | ngram_model_set_remove (ngram_model_t *base, const char *name, int reuse_widmap) |
Remove a language model from a set. More... | |
void | ngram_model_set_map_words (ngram_model_t *base, const char **words, int32 n_words) |
Set the word-to-ID mapping for this model set. | |
Set of language models.
Definition in file ngram_model_set.c.
ngram_model_t* ngram_model_set_add | ( | ngram_model_t * | set, |
ngram_model_t * | model, | ||
const char * | name, | ||
float32 | weight, | ||
int | reuse_widmap | ||
) |
Add a language model to a set.
set | The language model set to add to. |
model | The language model to add. |
name | The name to associate with this model. |
weight | Interpolation weight for this model, relative to the uniform distribution. 1.0 is a safe value. |
reuse_widmap | Reuse the existing word-ID mapping in set . Any new words present in model will not be added to the word-ID mapping in this case. |
Definition at line 520 of file ngram_model_set.c.
References ckd_calloc_2d, ckd_free_2d(), ckd_realloc, ckd_salloc, ngram_model_s::lmath, ngram_model_set_s::lms, logmath_log(), ngram_model_set_s::lweights, ngram_model_set_s::maphist, ngram_model_s::n, ngram_model_set_s::n_models, ngram_model_s::n_words, ngram_model_set_s::names, ngram_wid(), ngram_model_set_s::widmap, and ngram_model_s::word_str.
int32 ngram_model_set_current_wid | ( | ngram_model_t * | set, |
int32 | set_wid | ||
) |
Query the word-ID mapping for the current language model.
set_wid
is invalid or interpolation is enabled. Definition at line 456 of file ngram_model_set.c.
References ngram_model_set_s::cur, ngram_model_s::n_words, NGRAM_INVALID_WID, and ngram_model_set_s::widmap.
ngram_model_t* ngram_model_set_init | ( | cmd_ln_t * | config, |
ngram_model_t ** | models, | ||
char ** | names, | ||
const float32 * | weights, | ||
int32 | n_models | ||
) |
Create a set of language models sharing a common space of word IDs.
This function creates a meta-language model which groups together a set of language models, synchronizing word IDs between them. To use this language model, you can either select a submodel to use exclusively using ngram_model_set_select(), or interpolate between scores from all models. To do the latter, you can either pass a non-NULL value of the weights
parameter, or re-activate interpolation later on by calling ngram_model_set_interp().
In order to make this efficient, there are some restrictions on the models that can be grouped together. The most important (and currently the only) one is that they must all share the same log-math parameters.
config | Any configuration parameters to be shared between models. |
models | Array of pointers to previously created language models. |
names | Array of strings to use as unique identifiers for LMs. |
weights | Array of weights to use in interpolating LMs, or NULL for no interpolation. |
n_models | Number of elements in the arrays passed to this function. |
Definition at line 121 of file ngram_model_set.c.
References ngram_model_set_s::base, ckd_calloc, ckd_salloc, ngram_model_set_s::cur, E_ERROR, ngram_model_s::lmath, ngram_model_set_s::lms, logmath_get_base(), logmath_get_shift(), logmath_log(), ngram_model_set_s::lweights, ngram_model_set_s::maphist, ngram_model_s::n, ngram_model_set_s::n_models, and ngram_model_set_s::names.
Referenced by ngram_model_set_read().
ngram_model_t* ngram_model_set_interp | ( | ngram_model_t * | set, |
const char ** | names, | ||
const float32 * | weights | ||
) |
Set interpolation weights for a set and enables interpolation.
If weights
is NULL, any previously initialized set of weights will be used. If no weights were specified to ngram_model_set_init(), then a uniform distribution will be used.
Definition at line 489 of file ngram_model_set.c.
References ngram_model_set_s::cur, E_ERROR, ngram_model_s::lmath, logmath_log(), ngram_model_set_s::lweights, ngram_model_set_s::n_models, and ngram_model_set_s::names.
ngram_model_set_iter_t* ngram_model_set_iter | ( | ngram_model_t * | set | ) |
Begin iterating over language models in a set.
Definition at line 368 of file ngram_model_set.c.
References ckd_calloc, and ngram_model_set_s::n_models.
ngram_model_t* ngram_model_set_iter_model | ( | ngram_model_set_iter_t * | itor, |
char const ** | lmname | ||
) |
Get language model and associated name from an iterator.
itor | the iterator |
lmname | Output: string name associated with this language model. |
Definition at line 397 of file ngram_model_set.c.
References ngram_model_set_s::lms, and ngram_model_set_s::names.
ngram_model_set_iter_t* ngram_model_set_iter_next | ( | ngram_model_set_iter_t * | itor | ) |
Move to the next language model in a set.
Definition at line 381 of file ngram_model_set.c.
References ngram_model_set_s::n_models, and ngram_model_set_iter_free().
int32 ngram_model_set_known_wid | ( | ngram_model_t * | set, |
int32 | set_wid | ||
) |
Test whether a word ID corresponds to a known word in the current state of the language model set.
set_wid
corresponds to a known word in that language model. Otherwise, returns non-zero if set_wid
corresponds to a known word in any language model. Definition at line 468 of file ngram_model_set.c.
References ngram_model_set_s::cur, ngram_model_set_s::lms, ngram_model_set_s::n_models, ngram_model_s::n_words, ngram_unknown_wid(), and ngram_model_set_s::widmap.
ngram_model_t* ngram_model_set_lookup | ( | ngram_model_t * | set, |
const char * | name | ||
) |
Look up a language model by name from a set.
name
, or NULL if no language model by that name exists. Definition at line 405 of file ngram_model_set.c.
References ngram_model_set_s::cur, ngram_model_set_s::lms, ngram_model_set_s::n_models, and ngram_model_set_s::names.
ngram_model_t* ngram_model_set_read | ( | cmd_ln_t * | config, |
const char * | lmctlfile, | ||
logmath_t * | lmath | ||
) |
Read a set of language models from a control file.
This file creates a language model set from a "control file" of the type used in Sphinx-II and Sphinx-III. File format (optional stuff is indicated by enclosing in []):
[{ LMClassFileName LMClassFilename ... }] TrigramLMFileName LMName [{ LMClassName LMClassName ... }] TrigramLMFileName LMName [{ LMClassName LMClassName ... }] ... (There should be whitespace around the { and } delimiters.)
This is an extension of the older format that had only TrigramLMFilenName and LMName pairs. The new format allows a set of LMClass files to be read in and referred to by the trigram LMs.
No "comments" allowed in this file.
config | Configuration parameters. |
lmctlfile | Path to the language model control file. |
lmath | Log-math parameters to use for probability calculations. Ownership of this object is assumed by the newly created ngram_model_t, and you should not attempt to free it manually. If you wish to reuse it elsewhere, you must retain it with logmath_retain(). |
Definition at line 182 of file ngram_model_set.c.
References ckd_calloc, ckd_free(), ckd_salloc, E_ERROR, E_ERROR_SYSTEM, E_INFO, glist_add_ptr(), glist_count(), glist_free(), glist_reverse(), gnode_ptr, hash_table_free(), hash_table_lookup(), hash_table_new(), hash_table_tolist(), NGRAM_AUTO, ngram_model_add_class(), ngram_model_free(), ngram_model_read(), ngram_model_set_init(), path_is_absolute(), string_join(), and hash_entry_s::val.
ngram_model_t* ngram_model_set_remove | ( | ngram_model_t * | set, |
const char * | name, | ||
int | reuse_widmap | ||
) |
Remove a language model from a set.
set | The language model set to remove from. |
name | The name associated with the model to remove. |
reuse_widmap | Reuse the existing word-ID mapping in set . |
Definition at line 580 of file ngram_model_set.c.
References ckd_free(), ngram_model_s::lmath, ngram_model_set_s::lms, ngram_model_s::log_zero, logmath_exp(), logmath_log(), ngram_model_set_s::lweights, ngram_model_s::n, ngram_model_set_s::n_models, ngram_model_s::n_words, ngram_model_set_s::names, and ngram_model_set_s::widmap.
ngram_model_t* ngram_model_set_select | ( | ngram_model_t * | set, |
const char * | name | ||
) |
Select a single language model from a set for scoring.
Definition at line 428 of file ngram_model_set.c.
References ngram_model_set_s::cur, ngram_model_set_s::lms, ngram_model_set_s::n_models, and ngram_model_set_s::names.