43 #include "sphinxbase/byteorder.h"
63 uint32 nbits, codeword;
85 huff_node_new_int(int32 val)
93 huff_node_new_str(
char const *val)
107 if (r->nbits > l->nbits)
108 hn->nbits = r->nbits + 1;
110 hn->nbits = l->nbits + 1;
118 huff_node_free_int(root->l);
119 huff_node_free_int(root->r.r);
128 huff_node_free_str(root->l, freestr);
129 huff_node_free_str(root->r.r, freestr);
139 huff_code_build_tree(
heap_t *q)
150 p = huff_node_new_parent(l, r);
164 hc->firstcode =
ckd_calloc(hc->maxbits+1,
sizeof(*hc->firstcode));
165 hc->syms =
ckd_calloc(hc->maxbits+1,
sizeof(*hc->syms));
166 hc->numl =
ckd_calloc(hc->maxbits+1,
sizeof(*nextcode));
167 nextcode =
ckd_calloc(hc->maxbits+1,
sizeof(*nextcode));
178 node->l->nbits = node->nbits + 1;
180 node->r.r->nbits = node->nbits + 1;
184 hc->numl[node->nbits]++;
189 hc->syms[hc->maxbits] =
ckd_calloc(hc->numl[hc->maxbits],
sizeof(**hc->syms));
190 for (i = hc->maxbits - 1; i > 0; --i) {
191 hc->firstcode[i] = (hc->firstcode[i+1] + hc->numl[i+1]) / 2;
192 hc->syms[i] =
ckd_calloc(hc->numl[i],
sizeof(**hc->syms));
194 memcpy(nextcode, hc->firstcode, (hc->maxbits + 1) *
sizeof(*nextcode));
208 uint32 codeword = nextcode[node->nbits] & ((1 << node->nbits) - 1);
209 cw = hc->syms[node->nbits] + (codeword - hc->firstcode[node->nbits]);
210 cw->nbits = node->nbits;
211 cw->r.sval = node->r.sval;
212 cw->codeword = codeword;
213 if (hc->type == HUFF_CODE_INT) {
215 (
char const *)&cw->r.ival,
222 ++nextcode[node->nbits];
238 hc->type = HUFF_CODE_INT;
242 for (i = 0; i < nvals; ++i) {
244 huff_node_new_int(values[i]),
249 root = huff_code_build_tree(q);
251 if (root == NULL || root->nbits > 32) {
252 E_ERROR(
"Huffman trees currently limited to 32 bits\n");
253 huff_node_free_int(root);
259 hc->maxbits = root->nbits;
260 huff_code_canonicalize(hc, root);
263 huff_node_free_int(root);
278 hc->type = HUFF_CODE_STR;
282 for (i = 0; i < nvals; ++i) {
284 huff_node_new_str(values[i]),
289 root = huff_code_build_tree(q);
291 if (root == NULL || root->nbits > 32) {
292 E_ERROR(
"Huffman trees currently limited to 32 bits\n");
293 huff_node_free_str(root, TRUE);
299 hc->maxbits = root->nbits;
300 huff_code_canonicalize(hc, root);
303 huff_node_free_str(root, FALSE);
317 hc->maxbits = fgetc(infh);
318 hc->type = fgetc(infh);
325 hc->firstcode =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->firstcode));
326 hc->numl =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->numl));
327 hc->syms =
ckd_calloc(hc->maxbits + 1,
sizeof(*hc->syms));
331 for (i = 1; i <= hc->maxbits; ++i) {
332 if (fread(&hc->firstcode[i], 4, 1, infh) != 1)
334 SWAP_BE_32(&hc->firstcode[i]);
335 if (fread(&hc->numl[i], 4, 1, infh) != 1)
337 SWAP_BE_32(&hc->numl[i]);
338 hc->syms[i] =
ckd_calloc(hc->numl[i],
sizeof(**hc->syms));
339 for (j = 0; j < hc->numl[i]; ++j) {
342 cw->codeword = hc->firstcode[i] + j;
343 if (hc->type == HUFF_CODE_INT) {
344 if (fread(&cw->r.ival, 4, 1, infh) != 1)
346 SWAP_BE_32(&cw->r.ival);
348 (
char const *)&cw->r.ival,
355 cw->r.sval[len-1] =
'\0';
373 fputc(hc->maxbits, outfh);
375 fputc(hc->type, outfh);
380 for (i = 1; i <= hc->maxbits; ++i) {
384 val = hc->firstcode[i];
387 fwrite(&val, 4, 1, outfh);
390 fwrite(&val, 4, 1, outfh);
393 for (j = 0; j < hc->numl[i]; ++j) {
394 if (hc->type == HUFF_CODE_INT) {
395 int32 val = hc->syms[i][j].r.ival;
397 fwrite(&val, 4, 1, outfh);
402 fprintf(outfh,
"%s\n", hc->syms[i][j].r.sval);
410 huff_code_dump_codebits(FILE *dumpfh, uint32 nbits, uint32 codeword)
414 for (i = 0; i < nbits; ++i)
415 fputc((codeword & (1<<(nbits-i-1))) ?
'1' :
'0', dumpfh);
425 fprintf(dumpfh,
"Maximum codeword length: %d\n", hc->maxbits);
426 fprintf(dumpfh,
"Symbols are %s\n", (hc->type == HUFF_CODE_STR) ?
"strings" :
"ints");
427 fprintf(dumpfh,
"Codewords:\n");
428 for (i = 1; i <= hc->maxbits; ++i) {
429 for (j = 0; j < hc->numl[i]; ++j) {
430 if (hc->type == HUFF_CODE_STR)
431 fprintf(dumpfh,
"%-30s", hc->syms[i][j].r.sval);
433 fprintf(dumpfh,
"%-30d", hc->syms[i][j].r.ival);
434 huff_code_dump_codebits(dumpfh, hc->syms[i][j].nbits,
435 hc->syms[i][j].codeword);
436 fprintf(dumpfh,
"\n");
456 if (--hc->refcount > 0)
458 for (i = 0; i <= hc->maxbits; ++i) {
460 for (j = 0; j < hc->numl[i]; ++j) {
461 if (hc->type == HUFF_CODE_STR)
488 FILE *oldfh = hc->fh;
511 if (outcw) *outcw = cw->codeword;
526 if (outcw) *outcw = cw->codeword;
531 huff_code_decode_data(
huff_code_t *hc,
char const **inout_data,
532 size_t *inout_data_len,
int *inout_offset)
534 char const *data = *inout_data;
535 char const *end = data + *inout_data_len;
536 int offset = *inout_offset;
544 cw = !!(byte & (1 << (7-offset++)));
547 while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
556 cw |= !!(byte & (1 << (7-offset++)));
559 if (cwlen > hc->maxbits)
569 *inout_data_len = end - data;
571 *inout_offset = offset;
572 return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
582 if ((byte = fgetc(hc->fh)) == EOF)
584 cw = !!(byte & (1 << (7-hc->boff++)));
587 while (cwlen <= hc->maxbits && cw < hc->firstcode[cwlen]) {
591 if ((byte = fgetc(hc->fh)) == EOF)
595 cw |= !!(byte & (1 << (7-hc->boff++)));
598 if (cwlen > hc->maxbits)
603 ungetc(byte, hc->fh);
608 return hc->syms[cwlen] + (cw - hc->firstcode[cwlen]);
613 char const **inout_data,
614 size_t *inout_data_len,
int *inout_offset)
619 cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
621 cw = huff_code_decode_fh(hc);
628 *outval = cw->r.ival;
635 char const **inout_data,
636 size_t *inout_data_len,
int *inout_offset)
641 cw = huff_code_decode_data(hc, inout_data, inout_data_len, inout_offset);
643 cw = huff_code_decode_fh(hc);
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
bit_encode_t * bit_encode_attach(FILE *outfh)
Attach bitstream encoder to a file.
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, C-style key string.
SPHINXBASE_EXPORT int huff_code_free(huff_code_t *hc)
Release a pointer to a Huffman codec object.
SPHINXBASE_EXPORT int heap_destroy(heap_t *heap)
Destroy the given heap; free the heap nodes.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT heap_t * heap_new(void)
Allocate a new heap and return handle to it.
SPHINXBASE_EXPORT int huff_code_dump(huff_code_t *hc, FILE *dumpfh)
Print a codebook to a file as text (for debugging)
SPHINXBASE_EXPORT int huff_code_encode_str(huff_code_t *hc, char const *sym, uint32 *outcw)
Encode a string, writing it to the file handle, if any.
SPHINXBASE_EXPORT char const * huff_code_decode_str(huff_code_t *hc, char const **inout_data, size_t *inout_data_len, int *inout_offset)
Decode a string, reading it from the file if no data given.
A node in a generic list.
#define ckd_salloc(ptr)
Macro for ckd_salloc
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
SPHINXBASE_EXPORT huff_code_t * huff_code_retain(huff_code_t *hc)
Retain a pointer to a Huffman codec object.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
SPHINXBASE_EXPORT int huff_code_write(huff_code_t *hc, FILE *outfh)
Write a codebook to a file.
SPHINXBASE_EXPORT int heap_insert(heap_t *heap, void *data, int32 val)
Insert a new item into the given heap.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
SPHINXBASE_EXPORT int huff_code_encode_int(huff_code_t *hc, int32 sym, uint32 *outcw)
Encode an integer, writing it to the file handle, if any.
SPHINXBASE_EXPORT huff_code_t * huff_code_build_str(char *const *values, int32 const *frequencies, int nvals)
Create a codebook from string data.
SPHINXBASE_EXPORT int heap_pop(heap_t *heap, void **data, int32 *val)
Like heap_top but also pop the top item off the heap.
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
Write lowest-order bits of codeword to encoder.
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
#define gnode_ptr(g)
Head of a list of gnodes.
SPHINXBASE_EXPORT size_t heap_size(heap_t *heap)
Return the number of items in the heap.
Implementation of logging routines.
int bit_encode_free(bit_encode_t *be)
Release pointer to a bit encoder.
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
SPHINXBASE_EXPORT huff_code_t * huff_code_read(FILE *infh)
Read a codebook from a file.
#define E_ERROR
Print error message to standard error stream.
SPHINXBASE_EXPORT huff_code_t * huff_code_build_int(int32 const *values, int32 const *frequencies, int nvals)
Create a codebook from 32-bit integer data.
SPHINXBASE_EXPORT int huff_code_decode_int(huff_code_t *hc, int *outval, char const **inout_data, size_t *inout_data_len, int *inout_offset)
Decode an integer, reading it from the file if no data given.
SPHINXBASE_EXPORT FILE * huff_code_detach(huff_code_t *hc)
Detach a Huffman codec from its file handle.
Hash table implementation.
int bit_encode_flush(bit_encode_t *be)
Flush any unwritten bits, zero-padding if necessary.
Huffman code and bitstream implementation.
Internal heap data structure.
SPHINXBASE_EXPORT FILE * huff_code_attach(huff_code_t *hc, FILE *fh, char const *mode)
Attach a Huffman codec to a file handle for input/output.
file IO related operations.