45 #include "EST_String.h" 46 #include "EST_TList.h" 48 #include "EST_THash.h" 57 static
EST_IList int_EST_IList_kv_def_EST_IList_s;
58 static
int int_EST_IList_kv_def_int_s;
61 template <>
int *
EST_TKVL<
int,
EST_IList >::default_key=&int_EST_IList_kv_def_int_s;
63 Declare_TList_N(KVI_int_EST_IList_t, 0)
66 #if defined(INSTANTIATE_TEMPLATES) 67 #include "../base_class/EST_TList.cc" 71 #include "../base_class/EST_TKVL.cc" 80 Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
81 Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
85 template class
EST_TList< TLIST_KVI_int_EST_IList_t_VAL >;
86 template class
EST_TItem< TLIST_KVI_int_EST_IList_t_VAL >;
87 template const
char *error_name(
EST_TList< KVI_int_EST_IList_t > val);
89 Instantiate_TIterator_T(
EST_TList<KVI_int_EST_IList_t>,
EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt);
96 static enum wfst_state_type intersect_state_type(
wfst_list &wl,
98 static int check_distinguished(
const EST_WFST &nmwfst,
103 void EST_WFST_MultiState::add(
int i)
108 if (p_type == wfst_ms_set)
109 for (p=head(); p != 0; p=p->next())
113 else if (i < (*
this)(p))
134 for (p=ms->head(); p != 0; p = p->next())
135 istring += itoString((*ms)(p)) +
" ";
137 ns = index.
val(istring,found);
154 pairs_done.
val(p,found);
169 Agenda multistate_agenda;
176 p_in_symbols.copy(ndwfst.p_in_symbols);
177 p_out_symbols.copy(ndwfst.p_out_symbols);
181 start_state->add(ndwfst.start_state());
184 p_start_state = add_state(ndwfst.
ms_type(start_state));
185 start_state->set_name(p_start_state);
187 multistate_agenda.append(start_state);
189 while (multistate_agenda.length() > 0)
192 current = multistate_agenda.
first();
193 multistate_agenda.remove(multistate_agenda.head());
195 cout <<
"Determinizing " << summary() <<
" Agenda " 196 << multistate_agenda.length() << endl;
199 for (sp=current->head(); sp != 0; sp=sp->next())
202 for (tp=s->transitions.head(); tp != 0; tp = tp->next())
204 i = s->transitions(tp)->in_symbol();
205 o = s->transitions(tp)->out_symbol();
207 if (pair_check(pairs_done,i,o,p_out_symbols.length()) == 1)
213 if ((i==o) && (i==0))
215 nms = apply_multistate(ndwfst,current,i,o);
216 if ((nms->length() == 0) ||
217 (ndwfst.
ms_type(nms) == wfst_error))
222 new_name = multistate_index(index,nms,p_num_states);
223 if (new_name == p_num_states)
225 ns = add_state(ndwfst.
ms_type(nms));
227 multistate_agenda.append(nms);
231 nms->set_name(new_name);
236 p_states[current->name()]
237 ->add_transition(nms->weight(),
252 int in,
int out)
const 260 for (p=ms->head(); p != 0; p=p->next())
276 enum wfst_state_type r = wfst_nonfinal;
278 for (p=ms->head(); p != 0; p = p->next())
279 if (p_states((*ms)(p))->type() == wfst_error)
281 else if (p_states((*ms)(p))->type() == wfst_licence)
284 else if ((p_states((*ms)(p))->type() == wfst_final) &&
288 if (r == wfst_licence)
289 return wfst_nonfinal;
303 for (i=s->transitions.head(); i != 0; i=i->next())
305 if ((in == s->transitions(i)->in_symbol()) &&
306 (out == s->transitions(i)->out_symbol()))
307 ms->add(s->transitions(i)->state());
311 static int is_a_member(
const EST_IList &ii,
int i)
313 for (
EST_Litem *p=ii.head(); p != 0; p=p->next())
325 int ie = p_in_symbols.name(get_c_string(epsilon_label()));
326 int oe = p_out_symbols.name(get_c_string(epsilon_label()));
328 for (p=ms->head(); p != 0; p=p->next())
331 for (p=ii.head(); p != 0; p=p->next())
336 for (i=s->transitions.head(); i != 0; i=i->next())
338 if ((ie == s->transitions(i)->in_symbol()) &&
339 (oe == s->transitions(i)->out_symbol()))
342 int nstate = s->transitions(i)->state();
343 if (!is_a_member(ii,nstate))
362 Agenda multistate_agenda;
364 int i,o, new_name, n;
370 p_in_symbols.copy(wl.
first().p_in_symbols);
371 p_out_symbols.copy(wl.
first().p_out_symbols);
375 for (p=wl.tail(); p != 0; p=p->prev())
377 if (!wl(p).deterministic())
379 cout <<
"...intersection deterministing" << endl;
381 wl(p).determinize(tt);
383 start_state->add(wl(p).start_state());
386 p_start_state = add_state(intersect_state_type(wl,start_state));
388 start_state->set_name(p_start_state);
390 multistate_agenda.append(start_state);
392 while (multistate_agenda.length() > 0)
394 current = multistate_agenda.
first();
395 multistate_agenda.remove(multistate_agenda.head());
397 cout <<
"Intersection " << summary() <<
" Agenda " 398 << multistate_agenda.length() << endl;
402 for (i=0; i < p_in_symbols.length(); i++)
404 for (o=0; o < p_out_symbols.length(); o++)
406 if ((i==o) && (i==0))
411 for (n=0,p=wl.head(),q=current->head();
412 (p != 0) && (q != 0);
413 p=p->next(),q=q->next(),n++)
414 nms->add(wl(p).transition((*current)(q),i,o));
415 if (intersect_state_type(wl,nms) == wfst_error)
420 new_name = multistate_index(index,nms,p_num_states);
421 if (new_name == p_num_states)
423 ns = add_state(intersect_state_type(wl,nms));
425 multistate_agenda.append(nms);
429 nms->set_name(new_name);
434 p_states[current->name()]
435 ->add_transition(nms->weight(),nms->name(),i,o);
444 static enum wfst_state_type intersect_state_type(
wfst_list &wl,
451 enum wfst_state_type r = wfst_final;
453 for (p=wl.head(),q=ms->head();
454 (p != 0) && (q != 0);
455 p=p->next(),q=q->next())
457 if ((*ms)(q) == WFST_ERROR_STATE)
460 enum wfst_state_type dd = wl(p).state((*ms)(q))->type();
462 if (dd == wfst_error)
464 else if (dd == wfst_nonfinal)
492 for (p=0; p < nmwfst.num_states()-1; p++)
493 for (q=p+1; q < nmwfst.num_states(); q++)
494 check_distinguished(nmwfst,p,q,marks,assumptions);
502 marks.find_state_map(state_map,num_new_states);
506 p_in_symbols.copy(nmwfst.p_in_symbols);
507 p_out_symbols.copy(nmwfst.p_out_symbols);
509 init(num_new_states);
510 p_start_state = state_map(nmwfst.p_start_state);
512 for (i=0; i < nmwfst.num_states(); i++)
514 if (p_states[state_map(i)] == 0)
515 p_states[state_map(i)] =
516 copy_and_map_states(state_map,nmwfst.
state(i),nmwfst);
521 static int check_distinguished(
const EST_WFST &nmwfst,
530 if (marks.distinguished(p,q))
532 else if (marks.undistinguished(p,q))
536 else if ((nmwfst.
state(p)->type() != nmwfst.
state(q)->type()) ||
537 (nmwfst.
state(p)->num_transitions() !=
538 nmwfst.
state(q)->num_transitions()))
541 marks.distinguish(p,q);
546 for (t=nmwfst.
state(p)->transitions.head(); t != 0; t=t->next())
548 int in = nmwfst.
state(p)->transitions(t)->in_symbol();
549 int out = nmwfst.
state(p)->transitions(t)->out_symbol();
550 int y = nmwfst.
state(p)->transitions(t)->state();
552 if ((z == WFST_ERROR_STATE) ||
553 (marks.distinguished(y,z)))
555 marks.distinguish(p,q);
558 else if (equivalent_to(y,z,assumptions))
570 if (assumptions.
length() == 0)
573 add_assumption(p,q,assumptions);
574 for (yp=from_p.head(),zp=from_q.head();
575 yp != 0; yp=yp->next(),zp=zp->next())
576 if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
580 marks.distinguish(p,q);
589 mark_undistinguished(marks,assumptions);
596 void EST_WFST::extend_alphabets(
const EST_WFST &b)
604 for (i=0; i<p_in_symbols.length(); i++)
605 ivocab.
append(in_symbol(i));
606 for (i=0; i<b.p_in_symbols.
length(); i++)
607 if (!strlist_member(ivocab,b.
in_symbol(i)))
609 for (i=0; i<p_out_symbols.length(); i++)
610 ovocab.
append(out_symbol(i));
611 for (i=0; i<b.p_out_symbols.
length(); i++)
615 p_in_symbols.init(ivocab);
616 p_out_symbols.init(ovocab);
627 ns->set_type(s->type());
629 for (i=s->transitions.head(); i != 0; i=i->next())
631 int mapped_state= state_map(s->transitions(i)->state());
632 if (mapped_state != WFST_ERROR_STATE)
633 ns->add_transition(s->transitions(i)->weight(),
635 in_symbol(b.
in_symbol(s->transitions(i)->in_symbol())),
636 out_symbol(b.
out_symbol(s->transitions(i)->out_symbol())));
652 for (i=0; i < num_states(); i++)
654 if (p_states[i]->type() == wfst_final)
655 p_states[i]->set_type(wfst_nonfinal);
656 else if (p_states[i]->type() == wfst_nonfinal)
657 p_states[i]->set_type(wfst_final);
662 static int noloopstostart(
const EST_WFST &a)
669 for (i=0; i < a.num_states(); i++)
673 for (p=s->transitions.head(); p != 0; p=p->next())
675 if (s->transitions(p)->state() == a.start_state())
682 int EST_WFST::deterministiconstartstates(
const EST_WFST &a,
const EST_WFST &b)
const 695 tab(a.
state(a.start_state())->transitions(t)->in_symbol(),
696 a.
state(a.start_state())->transitions(t)->out_symbol()) = 1;
700 tt != 0; tt=tt->next())
708 else if (tab(in,out) == 1)
726 noloopstostart(a) && noloopstostart(b) &&
727 deterministiconstartstates(a,b))
731 smap.
resize(b.num_states());
732 smap[0] = start_state();
733 for (i=1; i < b.num_states(); ++i)
734 smap[i] = i+a.num_states()-1;
736 more_states(a.num_states()+b.num_states()-1);
737 p_num_states += b.num_states()-1;
738 for (i=1; i < b.num_states(); i++)
739 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
743 for (p=s->transitions.head(); p != 0; p=p->next())
745 int mapped_state= smap(s->transitions(p)->state());
746 if (mapped_state != WFST_ERROR_STATE)
747 p_states[start_state()]->
748 add_transition(s->transitions(p)->weight(),
750 in_symbol(b.
in_symbol(s->transitions(p)->in_symbol())),
751 out_symbol(b.
out_symbol(s->transitions(p)->out_symbol())));
756 smap.
resize(b.num_states());
757 for (i=0; i < b.num_states(); ++i)
758 smap[i] = i+a.num_states();
760 more_states(a.num_states()+b.num_states());
761 p_num_states += b.num_states();
762 for (i=0; i < b.num_states(); i++)
763 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
767 p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
768 in_epsilon(), out_epsilon());
784 smap.
resize(b.num_states());
785 for (i=0; i < b.num_states(); ++i)
786 smap[i] = i+a.num_states();
788 more_states(a.num_states()+b.num_states());
792 for (i=0; i < num_states(); i++)
794 if (p_states[i]->type() == wfst_final)
796 p_states[i]->set_type(wfst_nonfinal);
797 p_states[i]->add_transition(0.0,smap[b.start_state()],
798 in_epsilon(), out_epsilon());
802 p_num_states += b.num_states();
803 for (i=0; i < b.num_states(); i++)
804 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
816 Agenda multistate_agenda;
824 p_in_symbols.copy(a.p_in_symbols);
825 p_out_symbols.copy(b.p_out_symbols);
829 start_state->add(a.start_state());
831 start_state->add(b.start_state());
833 p_start_state = add_state(intersect_state_type(wl,start_state));
835 start_state->set_name(p_start_state);
837 multistate_agenda.append(start_state);
839 while (multistate_agenda.length() > 0)
841 current = multistate_agenda.
first();
842 multistate_agenda.remove(multistate_agenda.head());
845 for (i=0; i < p_in_symbols.length(); i++)
850 wl.
first().transduce(current->
first(),i,transa);
851 for (p=transa.head(); p != 0; p=p->next())
859 for (q = transb.head(); q != 0; q=q->next())
862 nms->add(transa(p)->state());
863 nms->add(transb(q)->state());
865 if (intersect_state_type(wl,nms) == wfst_error)
870 new_name = multistate_index(index,nms,p_num_states);
871 if (new_name == p_num_states)
873 int ns = add_state(intersect_state_type(wl,nms));
875 multistate_agenda.append(nms);
878 nms->set_name(new_name);
881 p_states[current->name()]
882 ->add_transition(nms->weight(),nms->name(),
883 i,transb(q)->out_symbol());
905 for (
int i=0; i < compb.num_states(); i++)
907 if (compb.p_states[i]->type() == wfst_final)
908 compb.p_states[i]->set_type(wfst_error);
926 ab.current_tag = ++traverse_tag;
927 for (
int i=0; i < ab.p_num_states; i++)
928 ab.can_reach_final(i);
934 int EST_WFST::can_reach_final(
int state)
938 if (p_states[state]->type() == wfst_final)
940 else if (p_states[state]->type() == wfst_error)
942 else if (p_states[state]->tag() == current_tag)
948 enum wfst_state_type current_val = p_states[state]->type();
949 enum wfst_state_type r = wfst_error;
951 p_states[state]->set_type(wfst_error);
953 for (i=p_states[state]->transitions.head(); i != 0; i=i->next())
956 if (can_reach_final(p_states[state]->transitions(i)->state()))
961 p_states[state]->set_type(r);
966 p_states[state]->set_tag(current_tag);
980 tab.
resize(p_in_symbols.length(),p_out_symbols.length());
982 for (
int i=0; i < p_num_states; i++)
985 for (
EST_Litem *t=state(i)->transitions.head(); t != 0; t=t->next())
987 if (tab(state(i)->transitions(t)->in_symbol(),
988 state(i)->transitions(t)->out_symbol()) == 1)
991 tab(state(i)->transitions(t)->in_symbol(),
992 state(i)->transitions(t)->out_symbol()) = 1;
void minimize(const EST_WFST &a)
Build minimized form of a.
enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const
Given a multi-state return type (final, ok, error)
void complement(const EST_WFST &a)
Build complement of a.
EST_Litem * insert_before(EST_Litem *ptr, const int &item)
const EST_WFST_State * state(int i) const
Return internal state information.
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
void intersection(EST_TList< EST_WFST > &wl)
void determinize(const EST_WFST &a)
Build determinized form of a.
V & val(const K &key, int &found) const
EST_WFST_MultiState * apply_multistate(const EST_WFST &wfst, EST_WFST_MultiState *ms, int in, int out) const
Transduce a multi-state given n and out.
void fill(const T &v)
fill matrix with value v
void compose(const EST_WFST &a, const EST_WFST &b)
void concat(const EST_WFST &a, const EST_WFST &b)
void uunion(EST_TList< EST_WFST > &wl)
void transition_all(int state, int in, int out, EST_WFST_MultiState *ms) const
Find all possible transitions for given state/input/output.
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
const int length(void) const
The number of members in the discrete.
void append(const int &item)
add item onto end of list
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
const int length() const
number of key value pairs in list
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
int deterministic() const
True if WFST is deterministic.
void remove_error_states(const EST_WFST &a)
Remove error states from the WFST.
void resize(int rows, int cols, int set=1)
resize matrix
void difference(const EST_WFST &a, const EST_WFST &b)
void add_epsilon_reachable(EST_WFST_MultiState *ms) const
Extend multi-state with epsilon reachable states.
void copy(const EST_WFST &wfst)
Copy from existing wfst.
const T & first() const
return const reference to first item in list
void clear(void)
remove all items in list
void resize(int n, int set=1)
resize vector
const T & last() const
return const reference to last item in list