43 #ifndef __NGRAM_MODEL_INTERNAL_H__
44 #define __NGRAM_MODEL_INTERNAL_H__
100 #define NGRAM_HASH_SIZE 128
102 #define NGRAM_BASEWID(wid) ((wid)&0xffffff)
103 #define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f)
104 #define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid))
105 #define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000)
107 #define UG_ALLOC_STEP 10
151 int32 wid, int32 lweight);
216 int32 n, int32 n_unigram);
222 const char *file_name,
228 const char *file_name,
234 const char *file_name,
241 const char *file_name);
246 const char *file_name);
251 int32 read_classdef_file(
hash_table_t *classes,
const char *classdef_file);
262 int32 start_wid,
glist_t classwords);
280 int m,
int successor);
struct ngram_funcs_s * funcs
Implementation-specific methods.
int32 next
Index of next bucket (or -1 for no collision)
int32 log_uniform
Log of uniform (0-gram) probability.
int32 * tmp_wids
Temporary array of word IDs for ngram_model_get_ngram()
hash_table_t * wid
Mapping of unigram names to word IDs.
char ** word_str
Unigram names.
int32 log_uniform_weight
Log of uniform weight (i.e.
int32 n_hash
Number of buckets in nword_hash (power of 2)
void(* free)(ngram_model_t *model)
Implementation-specific function for freeing an ngram_model_t.
int(* apply_weights)(ngram_model_t *model, float32 lw, float32 wip, float32 uw)
Implementation-specific function for applying language model weights.
A node in a generic list.
uint8 writable
Are word strings writable?
int32 * n_counts
Counts for 1, 2, 3, ...
int32 n_words
Number of base words for this class.
int32 log_zero
Zero probability, cached here for quick lookup.
int refcount
Reference count.
int32 n_1g_alloc
Number of allocated word strings (for new word addition)
uint8 flags
Any other flags we might care about (FIXME: Merge this and writable)
int32(* raw_score)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist, int32 *n_used)
Implementation-specific function for querying raw language model probability.
Custom hash table for additional words.
int32 tag_wid
Base word ID for this class tag.
int32 n_hash_inuse
Number of words in nword_hash.
uint8 n
This is an n-gram model (1, 2, 3, ...).
logmath_t * lmath
Log-math object.
One class definition from a classdef file.
int32 log_uw
Log of unigram weight.
int32 start_wid
Starting base word ID for this class' words.
int32(* add_ug)(ngram_model_t *model, int32 wid, int32 lweight)
Implementation-specific function for adding unigrams.
uint8 n_classes
Number of classes (maximum 128)
Opaque structure used to hold the results of command-line parsing.
void(* iter_free)(ngram_iter_t *itor)
Implementation-specific function for iterating.
Implementation-specific functions for operating on ngram_model_t objects.
float32 lw
Language model scaling factor.
Base iterator structure for N-grams.
Implementation of ngram_class_t.
Hash table implementation.
int32 prob1
Probability for this word.
int32 * wids
Scratch space for word IDs.
void(* flush)(ngram_model_t *model)
Implementation-specific function for purging N-Gram cache.
Common implementation of ngram_model_t.
int32 wid
Word ID of this bucket.
int32(* score)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist, int32 *n_used)
Implementation-specific function for querying language model score.
struct ngram_class_s ** classes
Word class definitions.
int16 successor
Is this a successor iterator?
int32 * prob1
Probability table for base words.
int32 log_wip
Log of word insertion penalty.
int32 n_words
Number of actual word strings (NOT the same as the number of unigrams, due to class words)...