diff --git a/tools/Makefile b/tools/Makefile index 9facddaa7..19b6ff22a 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -88,7 +88,7 @@ irstlm/Makefile: irstlm/.patched irstlm/.patched: | irstlm -cd irstlm;\ - patch -N -p0 < ../interpolatedwrite-5.60.02.patch + patch --verbose -N -p0 < ../interpolatedwrite-5.60.02.patch touch $@ irstlm: diff --git a/tools/openfst.patch b/tools/openfst.patch index 8c066df0a..4a0ef0a6e 100644 --- a/tools/openfst.patch +++ b/tools/openfst.patch @@ -1,161 +1,6 @@ -*** partition.h +*** minimize.h *************** -*** 41,48 **** - friend class PartitionIterator; - - struct Element { -! Element() : value(0), next(0), prev(0) {} -! Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; ---- 59,66 ---- - friend class PartitionIterator; - - struct Element { -! Element() : value(0), next(0), prev(0) {} -! Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; -*************** -*** 50,58 **** - }; - - public: -! Partition() {} - -! Partition(T num_states) { - Initialize(num_states); - } - ---- 68,78 ---- - }; - - public: -! Partition(bool allow_repeated_split): -! allow_repeated_split_(allow_repeated_split) {} - -! Partition(bool allow_repeated_split, T num_states): -! allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - -*************** -*** 135,150 **** - if (class_size_[class_id] == 1) return; - - // first time class is split -! if (split_size_[class_id] == 0) - visited_classes_.push_back(class_id); -! - // increment size of split (set of element at head of chain) - split_size_[class_id]++; -! - // update split point -! if (class_split_[class_id] == 0) -! class_split_[class_id] = classes_[class_id]; -! if (class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class ---- 155,170 ---- - if (class_size_[class_id] == 1) return; - - // first time class is split -! if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); -! class_split_[class_id] = classes_[class_id]; -! } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; -! - // update split point -! if (class_split_[class_id] != 0 -! && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class -*************** -*** 155,178 **** - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { - // only split if necessary -! if (class_size_[class_id] == split_size_[class_id]) { -! class_split_[class_id] = 0; - split_size_[class_id] = 0; - return -1; - } else { -- - T new_class = AddClass(); - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller -- Element* split_el = class_split_[class_id]; - classes_[new_class] = split_el; -- class_size_[class_id] = split_size_[class_id]; -- class_size_[new_class] = remainder; - split_el->prev->next = 0; - split_el->prev = 0; - } else { -- Element* split_el = class_split_[class_id]; - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; ---- 175,205 ---- - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { -+ -+ Element* split_el = class_split_[class_id]; - // only split if necessary -! //if (class_size_[class_id] == split_size_[class_id]) { -! if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { - T new_class = AddClass(); -+ -+ if(allow_repeated_split_) { // split_size_ is possibly -+ // inaccurate, so work it out exactly. -+ size_t split_count; Element *e; -+ for(split_count=0,e=classes_[class_id]; -+ e != split_el; split_count++, e=e->next); -+ split_size_[class_id] = split_count; -+ } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller - classes_[new_class] = split_el; - split_el->prev->next = 0; - split_el->prev = 0; -+ class_size_[class_id] = split_size_[class_id]; -+ class_size_[new_class] = remainder; - } else { - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; -*************** -*** 243,252 **** ---- 270,285 ---- - vector class_size_; - - // size of split for each class -+ // in the nondeterministic case, split_size_ is actually an upper -+ // bound on the size of split for each class. - vector split_size_; - - // set of visited classes to be used in split refine - vector visited_classes_; -+ -+ // true if input fst was deterministic: we can make -+ // certain assumptions in this case that speed up the algorithm. -+ bool allow_repeated_split_; - }; - - -*** minimize.h -*************** -*** 133,139 **** +*** 134,140 **** typedef typename A::Weight Weight; typedef ReverseArc RevA; @@ -163,7 +8,7 @@ Initialize(fst); Compute(fst); } ---- 150,163 ---- +--- 134,147 ---- typedef typename A::Weight Weight; typedef ReverseArc RevA; @@ -179,7 +24,7 @@ Compute(fst); } *************** -*** 314,320 **** +*** 315,321 **** typedef typename A::StateId ClassId; typedef typename A::Weight Weight; @@ -187,7 +32,7 @@ Initialize(fst); Refine(fst); } ---- 338,350 ---- +--- 322,334 ---- typedef typename A::StateId ClassId; typedef typename A::Weight Weight; @@ -224,3 +69,158 @@ if (!(props & kAcceptor)) { // weighted transducer VectorFst< GallicArc > gfst; +*** partition.h +*************** +*** 43,50 **** + friend class PartitionIterator; + + struct Element { +! Element() : value(0), next(0), prev(0) {} +! Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; +--- 43,50 ---- + friend class PartitionIterator; + + struct Element { +! Element() : value(0), next(0), prev(0) {} +! Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; +*************** +*** 52,60 **** + }; + + public: +! Partition() {} + +! Partition(T num_states) { + Initialize(num_states); + } + +--- 52,62 ---- + }; + + public: +! Partition(bool allow_repeated_split): +! allow_repeated_split_(allow_repeated_split) {} + +! Partition(bool allow_repeated_split, T num_states): +! allow_repeated_split_(allow_repeated_split) { + Initialize(num_states); + } + +*************** +*** 137,152 **** + if (class_size_[class_id] == 1) return; + + // first time class is split +! if (split_size_[class_id] == 0) + visited_classes_.push_back(class_id); +! + // increment size of split (set of element at head of chain) + split_size_[class_id]++; +! + // update split point +! if (class_split_[class_id] == 0) +! class_split_[class_id] = classes_[class_id]; +! if (class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class +--- 139,154 ---- + if (class_size_[class_id] == 1) return; + + // first time class is split +! if (split_size_[class_id] == 0) { + visited_classes_.push_back(class_id); +! class_split_[class_id] = classes_[class_id]; +! } + // increment size of split (set of element at head of chain) + split_size_[class_id]++; +! + // update split point +! if (class_split_[class_id] != 0 +! && class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class +*************** +*** 157,180 **** + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { + // only split if necessary +! if (class_size_[class_id] == split_size_[class_id]) { +! class_split_[class_id] = 0; + split_size_[class_id] = 0; + return -1; + } else { +- + T new_class = AddClass(); + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller +- Element* split_el = class_split_[class_id]; + classes_[new_class] = split_el; +- class_size_[class_id] = split_size_[class_id]; +- class_size_[new_class] = remainder; + split_el->prev->next = 0; + split_el->prev = 0; + } else { +- Element* split_el = class_split_[class_id]; + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; +--- 159,189 ---- + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { ++ ++ Element* split_el = class_split_[class_id]; + // only split if necessary +! //if (class_size_[class_id] == split_size_[class_id]) { +! if(split_el == NULL) { // we split on everything... + split_size_[class_id] = 0; + return -1; + } else { + T new_class = AddClass(); ++ ++ if(allow_repeated_split_) { // split_size_ is possibly ++ // inaccurate, so work it out exactly. ++ size_t split_count; Element *e; ++ for(split_count=0,e=classes_[class_id]; ++ e != split_el; split_count++, e=e->next); ++ split_size_[class_id] = split_count; ++ } + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller + classes_[new_class] = split_el; + split_el->prev->next = 0; + split_el->prev = 0; ++ class_size_[class_id] = split_size_[class_id]; ++ class_size_[new_class] = remainder; + } else { + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; +*************** +*** 245,254 **** +--- 254,269 ---- + vector class_size_; + + // size of split for each class ++ // in the nondeterministic case, split_size_ is actually an upper ++ // bound on the size of split for each class. + vector split_size_; + + // set of visited classes to be used in split refine + vector visited_classes_; ++ ++ // true if input fst was deterministic: we can make ++ // certain assumptions in this case that speed up the algorithm. ++ bool allow_repeated_split_; + }; + +