SphinxBase
0.6
Main Page
Related Pages
Data Structures
Files
File List
Globals
lm3g_model.h
1
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* ====================================================================
3
* Copyright (c) 1999-2007 Carnegie Mellon University. All rights
4
* reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
*
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in
15
* the documentation and/or other materials provided with the
16
* distribution.
17
*
18
* This work was supported in part by funding from the Defense Advanced
19
* Research Projects Agency and the National Science Foundation of the
20
* United States of America, and the CMU Sphinx Speech Consortium.
21
*
22
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
*
34
* ====================================================================
35
*
36
*/
37
/*
38
* \file lm3g_model.h Core Sphinx 3-gram code used in
39
* DMP/DMP32/ARPA (for now) model code.
40
*
41
* Author: A cast of thousands, probably.
42
*/
43
44
#ifndef __NGRAM_MODEL_LM3G_H__
45
#define __NGRAM_MODEL_LM3G_H__
46
47
#include "
sphinxbase/listelem_alloc.h
"
48
49
#include "ngram_model_internal.h"
50
54
typedef
union
{
55
float32 f;
56
int32 l;
57
}
lmprob_t
;
58
68
typedef
struct
sorted_entry_s
{
69
lmprob_t
val
;
70
uint32
lower
;
73
uint32
higher
;
76
}
sorted_entry_t
;
77
82
typedef
struct
{
83
sorted_entry_t
*list;
84
int32
free
;
85
int32 size;
86
}
sorted_list_t
;
87
91
typedef
struct
unigram_s
{
92
lmprob_t
prob1
;
93
lmprob_t
bo_wt1
;
94
int32
bigrams
;
95
}
unigram_t
;
96
100
typedef
struct
bigram_s
bigram_t
;
104
typedef
struct
trigram_s
trigram_t
;
105
106
107
/*
108
* To conserve space, bigram info is kept in many tables. Since the number
109
* of distinct values << #bigrams, these table indices can be 16-bit values.
110
* prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
111
* It is supposed to be the index of the first trigram entry for each bigram.
112
* But such an index cannot be represented in 16-bits, hence the following
113
* segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
114
* consecutive entries, such that #trigrams in each segment <= 2**16 (the
115
* corresponding trigram segment). The bigram_t.trigrams value is then a
116
* 16-bit relative index within the trigram segment. A separate table--
117
* lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
118
*/
119
#define BG_SEG_SZ 512
/* chosen so that #trigram/segment <= 2**16 */
120
#define LOG_BG_SEG_SZ 9
121
129
typedef
struct
tginfo_s
{
130
int32
w1
;
132
int32
n_tg
;
133
int32
bowt
;
134
int32
used
;
135
trigram_t
*
tg
;
136
struct
tginfo_s
*
next
;
137
}
tginfo_t
;
138
142
typedef
struct
lm3g_model_s
{
143
unigram_t
*unigrams;
144
bigram_t
*bigrams;
145
trigram_t
*trigrams;
146
lmprob_t
*
prob2
;
147
int32
n_prob2
;
148
lmprob_t
*
bo_wt2
;
149
int32
n_bo_wt2
;
150
lmprob_t
*
prob3
;
151
int32
n_prob3
;
152
int32 *
tseg_base
;
154
tginfo_t
**
tginfo
;
156
listelem_alloc_t
*
le
;
157
}
lm3g_model_t
;
158
159
void
lm3g_tginfo_free(
ngram_model_t
*base,
lm3g_model_t
*lm3g);
160
void
lm3g_tginfo_reset(
ngram_model_t
*base,
lm3g_model_t
*lm3g);
161
void
lm3g_apply_weights(
ngram_model_t
*base,
162
lm3g_model_t
*lm3g,
163
float32 lw, float32 wip, float32 uw);
164
int32 lm3g_add_ug(
ngram_model_t
*base,
165
lm3g_model_t
*lm3g, int32 wid, int32 lweight);
166
167
172
void
init_sorted_list(
sorted_list_t
*l);
173
void
free_sorted_list(
sorted_list_t
*l);
174
lmprob_t
*vals_in_sorted_list(
sorted_list_t
*l);
175
int32 sorted_id(
sorted_list_t
* l, int32 *val);
176
177
#endif
/* __NGRAM_MODEL_LM3G_H__ */
unigram_s::bo_wt1
lmprob_t bo_wt1
Unigram backoff weight.
Definition:
lm3g_model.h:93
lm3g_model_s::le
listelem_alloc_t * le
List element allocator for tginfo.
Definition:
lm3g_model.h:156
tginfo_s::w1
int32 w1
lw1 component of bigram lw1,lw2.
Definition:
lm3g_model.h:130
sorted_entry_s::val
lmprob_t val
value being kept in this node
Definition:
lm3g_model.h:69
lm3g_model_s::n_prob3
int32 n_prob3
prob3 size
Definition:
lm3g_model.h:151
lm3g_model_s::prob2
lmprob_t * prob2
Table of actual bigram probs.
Definition:
lm3g_model.h:146
bigram_s
Bigram structure.
Definition:
ngram_model_arpa.h:52
tginfo_s::used
int32 used
whether used since last lm_reset
Definition:
lm3g_model.h:134
unigram_s
Unigram structure (common among all lm3g implementations)
Definition:
lm3g_model.h:91
lm3g_model_s::n_bo_wt2
int32 n_bo_wt2
bo_wt2 size
Definition:
lm3g_model.h:149
unigram_s::bigrams
int32 bigrams
Index of 1st entry in lm_t.bigrams[].
Definition:
lm3g_model.h:94
lm3g_model_s::prob3
lmprob_t * prob3
Table of actual trigram probs.
Definition:
lm3g_model.h:150
tginfo_s::tg
trigram_t * tg
Trigrams for lw1,lw2.
Definition:
lm3g_model.h:135
tginfo_s
Trigram information cache.
Definition:
lm3g_model.h:129
listelem_alloc_s
Fast linked list allocator.
Definition:
listelem_alloc.c:65
trigram_s
Trigram structure.
Definition:
ngram_model_arpa.h:66
lm3g_model_s
Common internal structure for Sphinx 3-gram models.
Definition:
lm3g_model.h:142
listelem_alloc.h
Fast memory allocator for uniformly sized objects.
tginfo_s::n_tg
int32 n_tg
number tg for parent bigram lw1,lw2
Definition:
lm3g_model.h:132
sorted_list_t
The sorted list.
Definition:
lm3g_model.h:82
tginfo_s::next
struct tginfo_s * next
Next lw1 with same parent lw2; NULL if none.
Definition:
lm3g_model.h:136
sorted_entry_s::lower
uint32 lower
index of another entry.
Definition:
lm3g_model.h:70
unigram_s::prob1
lmprob_t prob1
Unigram probability.
Definition:
lm3g_model.h:92
lm3g_model_s::bo_wt2
lmprob_t * bo_wt2
Table of actual bigram backoff weights.
Definition:
lm3g_model.h:148
sorted_entry_s
Bigram probs and bo-wts, and trigram probs are kept in separate tables rather than within the bigram_...
Definition:
lm3g_model.h:68
ngram_model_s
Common implementation of ngram_model_t.
Definition:
ngram_model_internal.h:55
lmprob_t
Type used to store language model probabilities.
Definition:
lm3g_model.h:54
sorted_list_t::free
int32 free
first free element in list
Definition:
lm3g_model.h:84
tginfo_s::bowt
int32 bowt
tg bowt for lw1,lw2
Definition:
lm3g_model.h:133
lm3g_model_s::tginfo
tginfo_t ** tginfo
tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...
Definition:
lm3g_model.h:154
lm3g_model_s::n_prob2
int32 n_prob2
prob2 size
Definition:
lm3g_model.h:147
lm3g_model_s::tseg_base
int32 * tseg_base
tseg_base[i>>LOG_BG_SEG_SZ] = index of 1st trigram for bigram segment (i>>LOG_BG_SEG_SZ) ...
Definition:
lm3g_model.h:152
sorted_entry_s::higher
uint32 higher
index of another entry.
Definition:
lm3g_model.h:73
src
libsphinxbase
lm
lm3g_model.h
Generated on Fri Aug 22 2014 02:04:05 for SphinxBase by
1.8.7