Difference: HfstPerformanceTestingNotes (1 vs. 72)

Revision 722011-01-19 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"
Changed:
<
<

HFST Performance Testing

>
>

HFST Performance Testing: Observations

 

Composition

Line: 364 to 364
  --> -- ErikAxelson - 2009-05-13 \ No newline at end of file
Added:
>
>
META TOPICMOVED by="eaxelson" date="1295443453" from="KitWiki.HfstPerformanceTesting" to="KitWiki.HfstPerformanceTestingNotes"

Revision 712010-01-11 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 702010-01-07 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 692009-12-10 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 682009-12-09 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 672009-12-02 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 355 to 355
 hfst-fst2txt comphyphen.sfsta | perl -pe 's/\t/ /g;' | cut -f1,3,4 -d' ' | perl -pe "s/^([^ ]+) ([^ ]+) ([^ ]+)$/\1 \2\3/g;" | sort -n | uniq -c | sort -nr | less
Added:
>
>
for i in phonology inflection stemfill stubify find-gradation plurale-tantum omorfi_1 omorfi_2 exceptions compounds omorfi; do echo $i ":"; hfst-summarize ./hfst-files/$i.sfsta | egrep '(of states)|(of arcs)|(cyclic)'  ; done
 

Revision 662009-12-01 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 351 to 351
 gv pic.ps;
Added:
>
>
hfst-fst2txt comphyphen.sfsta | perl -pe 's/\t/ /g;' | cut -f1,3,4 -d' ' | perl -pe "s/^([^ ]+) ([^ ]+) ([^ ]+)$/\1 \2\3/g;" | sort -n | uniq -c | sort -nr | less
 

Revision 652009-11-30 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 642009-11-19 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 632009-11-18 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 622009-11-16 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 612009-11-13 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 335 to 335
 
for i in 0 1 2 3 4 5 6 7 8 9; do   cat make-omorfi-time | perl -pe 's/HFST/hwfst/g; s/LOG/hwfst.LOG/g;'   ; done;
Changed:
<
<
for i in 0 1 2 3 4 5 6 7 8 9; do cat make-omorfi-time | perl -pe 's/HFST/hffst/g; s/\-i/-O -i/g; s/LOG/hfst.m.LOG/g;' ; done;
>
>
for i in 0 1 2 3 4 5 6 7 8 9; do cat make-omorfi-time | perl -pe 's/HFST/hfst/g; s/\-i/-O -i/g; s/LOG/hfst.m.LOG/g;' ; done;
 for i in 0 1 2 3 4 5 6 7 8 9; do cat make-omorfi-time | perl -pe 's/HFST/hfst/g; s/LOG/hfst.LOG/g;' ; done;
Line: 343 to 343
 

Produce a non-minimal transducer from a list of words and draw it

Changed:
<
<
echo "<> - <>" | hfst-calculate > words.hfst; cat words.txt | while read line; do echo $line | hfst-calculate | hfst-disjunct words.hfst > tmp.hfst; mv tmp.hfst words.hfst; done;
>
>
echo "<>" | hwfst-calculate > words.hfst; cat words.txt | while read line; do echo $line | hwfst-calculate | hfst-disjunct -1 words.hfst > tmp.hfst; mv tmp.hfst words.hfst; done;
 cat words.hfst | hfst-remove-epsilons | hfst-fst2fst --format openfst | fstdraw > pic;
Changed:
<
<
cat pic | perl -pe 's/orientation = Landscape\;\n//g; s/\"([a-z0-9]+):[a-z0-9]+\"/\"\1\"/g;' > TMP; mv TMP pic;
>
>
cat pic | perl -pe 's/orientation = Landscape\;\n//g; s/\"([a-z0-9]+):[a-z0-9]+\"/\"\1\"/g; s/(0 \[label = \"0\", shape =) doublecircle/\1 circle/g;' > TMP; mv TMP pic;
 dot -Tps pic -o pic.ps; gv pic.ps;

Revision 602009-11-11 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 339 to 339
 for i in 0 1 2 3 4 5 6 7 8 9; do cat make-omorfi-time | perl -pe 's/HFST/hfst/g; s/LOG/hfst.LOG/g;' ; done;
Added:
>
>

Produce a non-minimal transducer from a list of words and draw it

echo "<> - <>" | hfst-calculate > words.hfst;
cat words.txt | while read line; do   echo $line | hfst-calculate | hfst-disjunct words.hfst > tmp.hfst; mv tmp.hfst words.hfst;    done;
cat words.hfst | hfst-remove-epsilons | hfst-fst2fst --format openfst | fstdraw > pic;
cat pic | perl -pe 's/orientation = Landscape\;\n//g; s/\"([a-z0-9]+):[a-z0-9]+\"/\"\1\"/g;' > TMP; mv TMP pic;
dot -Tps pic -o pic.ps;
gv pic.ps;
 

Revision 592009-11-10 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 318 to 318
 

calculate the average from a logfile

Changed:
<
<
cat FOO.log | grep 'real|user|sys' | perl -pe 's/.*([0-9]+m[0-9\.]+s).*/\1/g; s/m/*60 + /g; s/s//g;' | bc > TIMES; cat TIMES | perl -pe 's/\n/ +0/g;' > FOO; (cat FOO; echo -e "\n") | bc > SUM; cat SUM | perl -pe 's/^(.*)$/scale=2; \1\/5/g;' | bc;
>
>
# not in tabular format for i in .LOG; do echo $i": " | perl -pe 's/\n//g;' >> AVERAGES; for j in real user sys; do cat $i | grep $j | perl -pe 's/[^0-9]*([0-9]+m[0-9\.]+s).*/\1/g; s/m/*60 + /g; s/s//g;' | bc > TIMES; cat TIMES | perl -pe 's/\n/ +0/g;' > FOO; (cat FOO; echo -e "\n") | bc > SUM; cat SUM | perl -pe 's/^(.)$/scale=2; \1\/10/g;' | bc | perl -pe 's/\n/ /g;' >> AVERAGES; done; echo "" >> AVERAGES; done;

# in seconds for k in phonology inflection stemfill stubify find-gradation plurale-tantum omorfi_1 omorfi_2 exceptions compounds omorfi; do echo $k | perl -pe 's/\n/: /g;' >> AVERAGES; for i in $k.hfst.LOG $k.hfst.m.LOG $k.hwfst.LOG; do for j in real user sys; do cat $i | grep $j | perl -pe 's/[^0-9]*([0-9]+m[0-9\.]+s).*/\1/g; s/m/*60 + /g; s/s//g;' | bc > TIMES; cat TIMES | perl -pe 's/\n/ +0/g;' > FOO; (cat FOO; echo -e "\n") | bc > SUM; cat SUM | perl -pe 's/^(.*)$/scale=2; \1\/10/g;' | bc | perl -pe 's/\n/ /g;' >> AVERAGES; done; done; echo "" >> AVERAGES; done

# in minutes and seconds for k in phonology inflection stemfill stubify find-gradation plurale-tantum omorfi_1 omorfi_2 exceptions compounds omorfi; do echo $k | perl -pe 's/\n/: /g;' >> AVERAGES; for i in $k.hfst.LOG $k.hfst.m.LOG $k.hwfst.LOG; do for j in real user sys; do cat $i | grep $j | perl -pe 's/[^0-9]*([0-9]+m[0-9\.]+s).*/\1/g; s/m/*60 + /g; s/s//g;' | bc > TIMES; cat TIMES | perl -pe 's/\n/ +0/g;' > FOO; (cat FOO; echo -e "\n") | bc > SUM; cat SUM | perl -pe 's/^(.*)$/scale=2; \1\/10/g;' | bc > AV; cat AV | perl -pe 's/^(.*)$/\1 \/ 60/g;' | bc | perl -pe 's/\n/m/g;' >> AVERAGES; cat AV | perl -pe 's/^(.*)$/\1 \% 60/g;' | bc | perl -pe 's/\n/s /g;' >> AVERAGES; done; done; echo "" >> AVERAGES; done

cat AVERAGES_SEC | cut -d' ' -f 2 | perl -pe 's/\n/+0/g;' > TMP; echo "" >> TMP; cat TMP | bc

 

Revision 582009-11-09 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 272 to 272
  (It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)
Added:
>
>

Miscellaneous scripts

make-omorfi

/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i phonology.sfst -o phonology.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i inflection.sfst -o inflection.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stemfill.sfst -o stemfill.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stubify.sfst -o stubify.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i find-gradation.sfst -o find-gradation.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i plurale-tantum.sfst -o plurale-tantum.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_1.sfst -o omorfi_1.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_2.sfst -o omorfi_2.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i exceptions.sfst -o exceptions.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i compounds.sfst -o compounds.sfsta
/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi.sfst -o omorfi.sfsta
mv *.sfsta ./HFST-files/

make-omorfi-time

time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i phonology.sfst -o phonology.sfsta) 2>> phonology.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i inflection.sfst -o inflection.sfsta) 2>> inflection.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stemfill.sfst -o stemfill.sfsta) 2>> stemfill.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stubify.sfst -o stubify.sfsta) 2>> stubify.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i find-gradation.sfst -o find-gradation.sfsta) 2>> find-gradation.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i plurale-tantum.sfst -o plurale-tantum.sfsta) 2>> plurale-tantum.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_1.sfst -o omorfi_1.sfsta) 2>> omorfi_1.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_2.sfst -o omorfi_2.sfsta) 2>> omorfi_2.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i exceptions.sfst -o exceptions.sfsta) 2>> exceptions.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i compounds.sfst -o compounds.sfsta) 2>> compounds.LOG 
time (/home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi.sfst -o omorfi.sfsta) 2>> omorfi.LOG 
mv *.sfsta ./HFST-files/

make-omorfi-time-total

time ( /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i phonology.sfst -o phonology.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i inflection.sfst -o inflection.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stemfill.sfst -o stemfill.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i stubify.sfst -o stubify.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i find-gradation.sfst -o find-gradation.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i plurale-tantum.sfst -o plurale-tantum.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_1.sfst -o omorfi_1.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi_2.sfst -o omorfi_2.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i exceptions.sfst -o exceptions.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i compounds.sfst -o compounds.sfsta; /home/eaxelson/hfst/my-trunk/hfst-2.0/hfst-tools/src/HFST-calculate -i omorfi.sfst -o omorfi.sfsta;) 2>> LOG; mv *.sfsta ./HFST-files/

calculate the average from a logfile

cat FOO.log | grep 'real|user|sys' | perl -pe 's/.*([0-9]+m[0-9\.]+s).*/\1/g; s/m/*60 + /g; s/s//g;' | bc > TIMES; cat TIMES | perl -pe 's/\n/ +0/g;' > FOO; (cat FOO; echo -e "\n") | bc > SUM; cat SUM | perl -pe 's/^(.*)$/scale=2; \1\/5/g;' | bc;

performance-tests

for i in 0 1 2 3 4 5 6 7 8 9; do   cat make-omorfi-time | perl -pe 's/HFST/hwfst/g; s/LOG/hwfst.LOG/g;'   ; done;
for i in 0 1 2 3 4 5 6 7 8 9; do   cat make-omorfi-time | perl -pe 's/HFST/hffst/g; s/\-i/-O -i/g; s/LOG/hfst.m.LOG/g;'   ; done;
for i in 0 1 2 3 4 5 6 7 8 9; do   cat make-omorfi-time | perl -pe 's/HFST/hfst/g; s/LOG/hfst.LOG/g;'   ; done;
 

Revision 572009-11-09 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 562009-11-03 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 552009-11-02 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 111 to 111
 where labels with output epsilons (in the first transducer) come before labels with input epsilons (in the second transducer).
Changed:
<
<
TODO: Why does not pushing labels produce equivalent results?
>
>
Pushing labels does not produce equivalent results. It is not possible to push labels in a minimal transducer. AN EXAMPLE why this is not possible.
  Roark, Sproat: Computational Approaches to Morphology and Syntax, pages 14-15 Pereira, Riley: Finite-State Language Processing, pages 439-442
Line: 132 to 133
  This would clearly remove the redundant paths where input epsilons come before output epsilons. However, the filter does not know where these paths have originated; such paths that are present
Changed:
<
<
in one of the transducers before composition are also removed.

Changing the composition algorithm:

>
>
in one of the transducers before composition are also removed. If single epsilons (epsilons that occur only either in the input or output of a transition) in the argument transducers are substituted with an unused marker before composition and then substituted back to epsilons after composition and filtering? How much time does it take to do the filtering afterwards?
 
Added:
>
>
 

Revision 542009-10-21 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 532009-10-20 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 522009-10-19 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 512009-09-29 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 502009-09-28 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 492009-09-23 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 482009-09-21 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 472009-09-17 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 462009-09-15 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 452009-09-14 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 253 to 253
 
HFST::subtract complete alphabet, negation (arguments minimised) and intersection
HWFST::subtract arguments epsilons removed, encoded, arcs sorted second argument determinised
Changed:
<
<
A transducer of 450,000 paths:
>
>
A transducer of 450,000 words:
 
program unweighted user time weighted user time
Changed:
<
<
hfst-compose 0m2.550s (0m2.350s) 0m1.190s (0m1.030s)
hfst-intersect 0m1.690s (0m1.870s) 0m2.520s (0m2.440s)
hfst-subtract 0m2.460s (0m2.010s) 0m3.510s (0m3.340s)
>
>
hfst-compose 0m2.350s 0m1.030s
hfst-intersect 0m1.870s 0m2.440s
hfst-subtract 0m2.010s 0m3.340s

A transducer of 450,000 wordpairs:

program unweighted user time weighted user time
hfst-compose 3m2.280s 2m52.990s
hfst-intersect 3m7.840s 3m15.530s
hfst-subtract 3m28.380s 3m23.390s
  (It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)

Revision 442009-09-14 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 241 to 241
  91,393,080,369 (HFST::make_replace_in_context)
80,506,430,283 (HFST::operator!)
Changed:
<
<
37,666,701,957 (HWFST::make_replace_incontext)
>
>
37,666,701,957 (HWFST::make_replace_in_context)
 28,325,895,797 (HWFST::negate)
Changed:
<
<
function  
>
>
function argument processing
 
HFST::compose -
HWFST::compose arcs sorted
HFST::intersect arguments determinised
Changed:
<
<
HWFST::intersect arguments epsilons removed, arcs sorted, encoded, determinised(removed)
>
>
HWFST::intersect arguments epsilons removed, encoded, arcs sorted ( not determinised any more )
 
HFST::subtract complete alphabet, negation (arguments minimised) and intersection
Changed:
<
<
HWFST::subtract arguments epsilons removed, arcs sorted, encoded, second argument determinised
>
>
HWFST::subtract arguments epsilons removed, encoded, arcs sorted second argument determinised

A transducer of 450,000 paths:

 
Added:
>
>
program unweighted user time weighted user time
hfst-compose 0m2.550s (0m2.350s) 0m1.190s (0m1.030s)
hfst-intersect 0m1.690s (0m1.870s) 0m2.520s (0m2.440s)
hfst-subtract 0m2.460s (0m2.010s) 0m3.510s (0m3.340s)
  (It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)

Revision 432009-09-14 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 422009-09-13 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 237 to 237
 valgrind --tool=callgrind hfst-calculate -i phonology.sfst -o phonology.sfsta >& LOG
Added:
>
>
First 10 replace rules in phonology.sfst:

91,393,080,369 (HFST::make_replace_in_context)
80,506,430,283 (HFST::operator!)
37,666,701,957 (HWFST::make_replace_incontext)
28,325,895,797 (HWFST::negate)

function  
HFST::compose -
HWFST::compose arcs sorted
HFST::intersect arguments determinised
HWFST::intersect arguments epsilons removed, arcs sorted, encoded, determinised(removed)
HFST::subtract complete alphabet, negation (arguments minimised) and intersection
HWFST::subtract arguments epsilons removed, arcs sorted, encoded, second argument determinised
  (It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)

Revision 412009-09-12 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 136 to 136
  Changing the composition algorithm:
Changed:
<
<
1) If input epsilon labels are inserted, no output epsilon labels are allowed to be inserted right after them 2) If an epsilon label is inserted to a state that already exists, the state must be checked for output epsilon labels 3) If the state contains them, a new state is created where all labels except the output epsilon labels are copied
>
>
1) If input epsilon labels are inserted, no output epsilon labels are allowed to be inserted right after them
2) If an epsilon label is inserted to a state that already exists, the state must be checked for output epsilon labels
3) If the state contains them, a new state is created where all labels except the output epsilon labels are copied
 4) If the target state was the same as the originating state, an extra loop is added
Line: 231 to 231
 cat LOG | egrep 'TEST\:|definitely|possibly'
Added:
>
>
Testing where the resources go:

valgrind --tool=callgrind hfst-calculate -i phonology.sfst -o phonology.sfsta >& LOG
 

(It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)

Revision 402009-09-11 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 63 to 63
 

OMorFi

Changed:
<
<
Compiling omorfi on hfst computer:
>
>
Below are compilation times for OMorFi. fst-compiler-utf8 is SFST version 1.3 compiler, hfst-calculate unweighted HFST compiler and hwfst-calculate weighted HFST compiler.
 
time fst-compiler-utf8 hfst-calculate hwfst-calculate
real 25m10.831s 25m15.849s 7m54.459s
user 25m8.830s 24m55.530s 7m52.230s
sys 0m1.890s 0m20.140s 0m2.180s
Changed:
<
<
EARLIER:
>
>
Results from SFST and unweighted HFST are equivalent. Unweighted and weighted HFST do not yield equivalent results, but input and output projections of the results are equivalent. This suggests that the results are functionally the same, but differ in their alignment.
 
Changed:
<
<
omorfi.hfst and omorfi.hwfst are not equivalent, but their input and output projections are equivalent. In omorfi.hfst, the same input/output strings are included many times with different alignments. omorfi.hwfst accepts only one alignment. For example:
>
>
The equivalence of input and output projections does not necessarily mean that each input produces the same output in weighted and unweighted results. Because the results are cyclic, it is impossible to check all possible input/output relations.

Subtracting unweighted and weighted results shows that the weighted result is a subset of the unweighted one. It seems that the unweighted result accepts more alignments for some input/output relations than the weighted one.

We can test this by subtracting the weighted result from the unweighted one and taking the input projection. Then we can test the weighted and unweighted results with these strings. It seems that the weighted result accepts only one alignment for each input/output relation but the unweighted one accepts several alignments. For example the input rht<53><34> produces in weighted result only one output:

 
Changed:
<
<
rht:<>:<>:<> {<53>:<>:<><>:<~T>, <53>:<><>:<~T>:<>, <>:<~T><53>:<>:<>} <34>
>
>
r h t :<> :<> :<> <53>:<> :<> <>:<~T> <34>
 
Changed:
<
<
TODO: Where does this difference come from?
>
>
but in unweighted one three same outputs with different alignments:
 
Changed:
<
<
NOW:

omorfi.output.hfst is hard to minimize. From omorfi_1.sfsta onwards, weighted and unweighted hfst transducers are not equivalent. From compounds.sfsta onwards, transducers are cyclic.

>
>
<wb>  r  h t :<> :<> <verb>:<> <53>:<> <f>:<> <>:<~T> <pcpneg><34><c>
<wb>  r  h t :<> :<> <verb>:<> <53>:<> <>:<~T> <f>:<> <pcpneg><34><c>
<wb>  r  h t :<> :<> <verb>:<> <>:<~T> <53>:<> <f>:<> <pcpneg><34><c>
 
Changed:
<
<
omorfi_1.input.sfsta are equivalent, omorfi_1.output.sfsta are not (unweighted/weighted transformation in comparison works in both ways, so problem is probably not in compare program). (two unweighted/weighted transforms in a pipe work for weighted and unweighted transducers, so problem is probably neither in transform programs)
>
>
The reason for this is that implementations of composition differ in SFST and OpenFst. SFST implements a naive composition algorithm where labels with output epsilons (in the first transducer) and labels with input epsilons (in the second transducer) can appear in any order. In unweighted transducers this produces redundant paths but the result is still correct. However, in weighted transducers a naive composition will produce wrong results. That is why OpenFst's composition algorithm allows only one alignment. By default this is the alignment where labels with output epsilons (in the first transducer) come before labels with input epsilons (in the second transducer).
 
Changed:
<
<
result: unweighted transducer has some states and arcs more than weighted one. weighted transducer is a subset of unweighted one. unweighted transducer has 684257 extra output words and 1019429 extra input/output word pairs.
>
>
TODO: Why does not pushing labels produce equivalent results?
 
Changed:
<
<
input kukintaaika<9> seems to generate outputs that have nothing or small a with a '^' between the words in unweighted transducer.
>
>
Roark, Sproat: Computational Approaches to Morphology and Syntax, pages 14-15 Pereira, Riley: Finite-State Language Processing, pages 439-442 Mohri, Pereira, Riley: SPEECH RECOGNITION WITH WEIGHTED FINITE-STATE TRANSDUCERS, pages 13-14
 
Changed:
<
<
In omorfi_1.sfst, $wbtohyphen$ are not equivalent:
>
>
Could OpenFst's algorithm be implemented in SFST? At least for testing purposes. How about filtering afterwards. If the naive composition is intersected with the following filter:
 
Changed:
<
<
$wbtohyphen$ = {}:{} ^->? ([#WordChars#]__[#WordChars#])
>
>
0 0 0 x x 0 0 x e 0 1 e x 1 1 1 e x 1 0 x x
 
Changed:
<
<
FIXED: There was an error in hwfst's optional make_replace. The pi machine was not closured.
>
>
This would clearly remove the redundant paths where input epsilons come before output epsilons. However, the filter does not know where these paths have originated; such paths that are present in one of the transducers before composition are also removed.

Changing the composition algorithm:

1) If input epsilon labels are inserted, no output epsilon labels are allowed to be inserted right after them 2) If an epsilon label is inserted to a state that already exists, the state must be checked for output epsilon labels 3) If the state contains them, a new state is created where all labels except the output epsilon labels are copied 4) If the target state was the same as the originating state, an extra loop is added

 
Deleted:
<
<
Now omorfi_1.sfstas are equivalent, but omorfi_2.sfstas are not.
 
Deleted:
<
<
Reason: no filtering on SFSTs compose. Overgeneration of paths.
 

SMOR

phon.fst

Deleted:
<
<
eaxelson@hfst:~/morphisto/SFST/data/SMOR$
 SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8. Original ISO files are moved to their own directory:

cat original-iso-8859-files/FOO.fst | iconv -f ISO-8859-1 -t UTF-8 > FOO.fst
Changed:
<
<
First, phon.fst is compiled:

../../src/fst-compiler original-iso-8859-files/phon.fst phon.iso.sfst
../../src/fst-compiler-utf8 phon.fst phon.sfst
hfst-calculate -i phon.fst -o phon.hfst
hwfst-calculate -i phon.fst -o phon.hwfst

phon.sfst and phon.hfst are equivalent, but phon.hwfst is not. Compiling of phon.fst takes about 3 seconds with all compilers.

(There was a problem if UTF-8 was used in sfst or hfst. On line 239 of file phon.fst, minimizing the transducer variable (first determinization) took all resources. Changing iconv -f ISO-8859-16 to ISO-8859-1 solved the problem.)

>
>
First, phon.fst is compiled. phon.sfst and phon.hfst are equivalent, but phon.hwfst is not. Compiling of phon.fst takes about 3 seconds with all compilers.
  Let's find out where hwfst-calculate goes wrong. Let's compare intermediate results from hfst-calculate and hwfst-calculate. All intermediate results are equivalent. The problem was in hfst-weighted2unweighted: in OpenFst the initial state number is not
Line: 164 to 181
 
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
Changed:
<
<
smor.sfst and smor.hfst are equal, smor.hwfst is not. Let's check the equality of input and output projections.
>
>
smor.sfst and smor.hfst are equivalent, smor.hwfst is not. Let's check the equality of input and output projections.
 Number of epsilon transitions increases: determinization takes 300 GB of memory and more than a day to complete.
Line: 196 to 213
 cat smor.max20.[input|output].hfst | hfst-unweighted2weighted | hfst-compare smor.max20.[input|output].hwfst
Changed:
<
<
Size of smor.max20.hfst is 320 MB. (output equivalence test started 14:56 Wed on hfst)
>
>
Size of smor.max20.hfst is 320 MB.
 
Changed:
<
<
A corpus of 700 words gives equal results.
>
>
A corpus of 700 words gives equivalent results.
  NOTE: -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and -O3 options are added to Makefiles of ofst (subdirectories already had -O3 options) and hfst-tools.

Revision 392009-09-08 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 210 to 210
 g++ -o testi.exe testi.C -g -pg valgrind --tool=callgrind ./testi.exe callgrind_annotate --inclusive=yes callgrind.out.
Added:
>
>
make -n test | head -10 | perl -pe "s/^(.*)$/echo TEST\: \'\1\'\; valgrind \1/; s/2\>1//g;" | sh >& LOG cat LOG | egrep 'TEST\:|definitely|possibly'
 

Revision 382009-09-08 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Revision 372009-09-07 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Composition

Changed:
<
<
for i in a b c d e; do for j in '' a b c d e; do for k in '' a b c d e; do for l in '' a b c d e; do for m in '' a b c d e; do echo $i $j $k $l $m; done; done; done; done; done > 6480_words=
>
>
for i in a b c d e; do for j in '' a b c d e; do for k in '' a b c d e; do for l in '' a b c d e; do for m in '' a b c d e; do echo $i $j $k $l $m; done; done; done; done; done > 6480_words

for i in a b c d e f g h i j k l m n o p q r s t u v w x y z; do  \
for j in a b c d e f g h i j k l m n o p q r s t u v w x y z; do  \
for k in a b c d e f g h i j k l m n o p q r s t u v w x y z; do  \
for l in a b c d e f g h i j k l m n o p q r s t u v w x y z; do  \
for m in a b c d e f g h i j k l m n o p q r s t u v w x y z; do  \
echo $i $j $k $l $m;\
done; done; done; done; done \
> N_words
 

time cat 6480_words | ../hfst-strings2fst -e '<>' -p -S -j -o 6480_words.hfst

Line: 40 to 57
 
user 0m0.012s 0m0.052s
sys 0m0.004s 0m0.008s
Deleted:
<
<
TODO: Why is OpenFst faster?
 
Changed:
<
<
Inputs and results are in both cases non-minimal. Reading and writing could take more time in unweighted case?
>
>
Inputs and results are in both cases non-minimal.
 

OMorFi

Line: 195 to 212
 callgrind_annotate --inclusive=yes callgrind.out.
Changed:
<
<
It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.
>
>

(It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.)

 

Revision 362009-09-07 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 195 to 195
 callgrind_annotate --inclusive=yes callgrind.out.
Added:
>
>
It seems that composition takes most of the time in unweighted hfst. Epsilons are not filtered. Arcs are not sorted.
 

Revision 352009-08-31 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 49 to 49
 Compiling omorfi on hfst computer:

time fst-compiler-utf8 hfst-calculate hwfst-calculate
Changed:
<
<
real 54m13.369s 28m13.361s 25m34.886s
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s

Compiling omorfi on hippu:

time fst-compiler-utf8 hfst-calculate hwfst-calculate
real      
user      
sys      
>
>
real 25m10.831s 25m15.849s 7m54.459s
user 25m8.830s 24m55.530s 7m52.230s
sys 0m1.890s 0m20.140s 0m2.180s
  EARLIER:
Line: 99 to 92
  Now omorfi_1.sfstas are equivalent, but omorfi_2.sfstas are not.
Added:
>
>
Reason: no filtering on SFSTs compose. Overgeneration of paths.
 

SMOR

phon.fst

Revision 342009-08-31 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 89 to 89
 input kukintaaika<9> seems to generate outputs that have nothing or small a with a '^' between the words in unweighted transducer.
Added:
>
>
In omorfi_1.sfst, $wbtohyphen$ are not equivalent:

$wbtohyphen$ = {<wb>}:{} ^->? ([#WordChars#]__[#WordChars#])

FIXED: There was an error in hwfst's optional make_replace. The pi machine was not closured.

Now omorfi_1.sfstas are equivalent, but omorfi_2.sfstas are not.

 

SMOR

Revision 332009-08-28 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 60 to 60
 
user      
sys      
Added:
>
>
EARLIER:
 omorfi.hfst and omorfi.hwfst are not equivalent, but their input and output projections are equivalent. In omorfi.hfst, the same input/output strings are included many times with different alignments. omorfi.hwfst accepts only one alignment. For example:
Line: 72 to 74
  TODO: Where does this difference come from?
Added:
>
>
NOW:

omorfi.output.hfst is hard to minimize. From omorfi_1.sfsta onwards, weighted and unweighted hfst transducers are not equivalent. From compounds.sfsta onwards, transducers are cyclic.

omorfi_1.input.sfsta are equivalent, omorfi_1.output.sfsta are not (unweighted/weighted transformation in comparison works in both ways, so problem is probably not in compare program). (two unweighted/weighted transforms in a pipe work for weighted and unweighted transducers, so problem is probably neither in transform programs)

result: unweighted transducer has some states and arcs more than weighted one. weighted transducer is a subset of unweighted one. unweighted transducer has 684257 extra output words and 1019429 extra input/output word pairs.

input kukintaaika<9> seems to generate outputs that have nothing or small a with a '^' between the words in unweighted transducer.

 

SMOR

phon.fst

Revision 322009-08-27 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 40 to 40
 
user 0m0.012s 0m0.052s
sys 0m0.004s 0m0.008s
Changed:
<
<
TODO: Are both results minimised/not-minimised?
>
>
TODO: Why is OpenFst faster?

Inputs and results are in both cases non-minimal. Reading and writing could take more time in unweighted case?

 

OMorFi

Revision 312009-08-27 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 40 to 40
 
user 0m0.012s 0m0.052s
sys 0m0.004s 0m0.008s
Added:
>
>
TODO: Are both results minimised/not-minimised?
 

OMorFi

Revision 302009-08-26 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 99 to 99
 All intermediate results are equivalent. The problem was in hfst-weighted2unweighted: in OpenFst the initial state number is not always zero as in SFST.
Changed:
<
<
TODO: hfst-fst2txt should swap numbers of initial state and state number zero if initial state is not number zero.
>
>
FIXED: hfst-fst2txt swaps numbers of initial state and state number zero if initial state is not number zero.
 

smor.fst

Revision 292009-08-26 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 137 to 137
 # of output epsilons 390780
Added:
>
>
Let's check the difference of smor.hfst and smor.hwfst: It seems that smor.hwfst is a subset of smor.hfst.

cat smor.hfst | hfst-unweighted2weighted > smor.hfst.hwfst
hfst-minus smor.hfst.hwfst smor.hwfst  # 862084 states, 2139005 arcs
hfst-minus smor.hwfst smor.hfst.hwfst  # empty

Let's try to limit the number of paths in smor.h(w)fst by intersecting it with a transducer accepting only paths not longer than 20 transitions.

hfst-intersect smor.h(w)fst max20.h(w)fst > smor.max20.h(w)fst
cat smor.max20.h(w)fst | hfst-project -p [input|output] > smor.max20.[input|output].h(w)fst 
cat smor.max20.[input|output].hfst | hfst-unweighted2weighted | hfst-compare smor.max20.[input|output].hwfst

Size of smor.max20.hfst is 320 MB. (output equivalence test started 14:56 Wed on hfst)

  A corpus of 700 words gives equal results.

Revision 282009-08-26 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Composition

Changed:
<
<
for i in a b c d e; do  for j in '' a b c d e; do  for k in '' a b c d e; do  for l in '' a b c d e; do  for m in '' a b c d e; do   echo $i $j $k $l $m;   done; done; done; done; done  > 6480_words
>
>
for i in a b c d e; do  for j in '' a b c d e; do  for k in '' a b c d e; do  for l in '' a b c d e; do  for m in '' a b c d e; do \
echo $i $j $k $l $m;   done; done; done; done; done  > 6480_words=
  time cat 6480_words | ../hfst-strings2fst -e '<>' -p -S -j -o 6480_words.hfst
Line: 40 to 43
 

OMorFi

Changed:
<
<
Compiling omorfi on hfst computer (option -g?):
>
>
Compiling omorfi on hfst computer:
 
time fst-compiler-utf8 hfst-calculate hwfst-calculate
real 54m13.369s 28m13.361s 25m34.886s
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
Changed:
<
<
Compiling omorfi on hippu (option -g?):
>
>
Compiling omorfi on hippu:
 
time fst-compiler-utf8 hfst-calculate hwfst-calculate
real      
user      
sys      
Changed:
<
<
Omorfi compiled with hfst and hwfst differ, but input and output projections are equal. The same input-output strings are included many times in hfst version with different alignments. The hwfst version accepts only one alignment. For example:
>
>
omorfi.hfst and omorfi.hwfst are not equivalent, but their input and output projections are equivalent. In omorfi.hfst, the same input/output strings are included many times with different alignments. omorfi.hwfst accepts only one alignment. For example:
 
<wb>rht:<>:<><verb>:<> 
Line: 62 to 67
 <34>
Added:
>
>
TODO: Where does this difference come from?
 

SMOR

phon.fst

Line: 85 to 92
  phon.sfst and phon.hfst are equivalent, but phon.hwfst is not. Compiling of phon.fst takes about 3 seconds with all compilers.
Changed:
<
<
(There was a problem if UTF-8 was used in sfst or hfst. On line 239 of file phon.fst, minimizing the transducer variable (first determinization) took all resources. Changing iconv -f ISO-8859-16 to ISO-8859-1 solved the problem.)
>
>
(There was a problem if UTF-8 was used in sfst or hfst. On line 239 of file phon.fst, minimizing the transducer variable (first determinization) took all resources. Changing iconv -f ISO-8859-16 to ISO-8859-1 solved the problem.)
  Let's find out where hwfst-calculate goes wrong. Let's compare intermediate results from hfst-calculate and hwfst-calculate.
Added:
>
>
All intermediate results are equivalent. The problem was in hfst-weighted2unweighted: in OpenFst the initial state number is not always zero as in SFST.
 
Changed:
<
<
 
hfst-calculate -i phon.fst -o phon.hfst
ls R* | egrep '[0-9]+$' | perl -pe 's/^(.*)$/mv \1 \1\.hfst/g;' | sh
hwfst-calculate -i phon.fst -o phon.hwfst
ls R* | egrep '[0-9]+$' | perl -pe 's/^(.*)$/mv \1 \1\.hwfst/g;' | sh

for i in 0 1 2 3 4 5 6 7 8 9; do hfst-weighted2unweighted R$i.hwfst | hfst-compare R$i.hfst; done;
for i in 11 12 13 14 15 16 17 18 19 20 21; do hfst-weighted2unweighted R$i.hwfst | hfst-compare R$i.hfst; done;
for i in 1 2 3 4 5 6; do hfst-weighted2unweighted T$i.hwfst | hfst-compare T$i.hfst; done;
for i in X1 X2; do hfst-weighted2unweighted $i.hwfst | hfst-compare $i.hfst; done;

All intermediate results are equivalent.

Test equivalence for transducers like [a:<> <>:b], [<>:b a:<>] and [a:b]. None of these are equivalent when testing with weighted or unweighted transducers. Test intersection, difference and composition. Intersections are empty, differences are equal. Compositions are equal, but not minimized in hwfst.

hfst-compose "a:<>_<>:b.hfst" "<>:b_a:<>.hfst"
>
>
TODO: hfst-fst2txt should swap numbers of initial state and state number zero if initial state is not number zero.
 
Deleted:
<
<
0 1 a <> 0 3 <> b 1 2 <> b 3 2 a <>

hfst-compose "a:<>_<>:b.hfst" "a:b.hfst"

0 1 a <>

hfst-compose "a:b.hfst" "<>:b_a:<>.hfst"

0 1 <> b

All weighted compositions are empty.

hfst-weighted2unweighted phon.hwfst | hfst-unweighted2weighted | hfst-compare phon.hwfst different! reason: weighted2unweighted allways makes state number zero the start state? In HWFST in hfst-fst2txt the initial state must be indicated?

 

smor.fst

Next, smor.fst is compiled:

Deleted:
<
<
time ../../src/fst-compiler-utf8 smor.fst smor.sfst
smor.fst: 9
reading transducer from phon.sfst...finished
smor.fst: 11
  map.fst: 277
smor.fst: 15
reading words from lexicon...
10000 words
finished
smor.fst: 29
  NUM.fst: 104
smor.fst: 125
  deko.fst: 353
smor.fst: 126
  flexion.fst: 1429
smor.fst: 127
  defaults.fst: 142
smor.fst: 168
  FIX.fst: 24
smor.fst: 169
  PRO.fst: 1039
smor.fst: 214

real    107m57.402s
user    107m51.076s
sys     0m4.712s

Let's also try using the original iso-encoded files:

time ../../src/fst-compiler original-iso-8859-files/smor.fst smor.iso.sfst
original-iso-8859-files/smor.fst: 9
reading transducer from phon.a...finished
original-iso-8859-files/smor.fst: 11
  map.fst: 277
original-iso-8859-files/smor.fst: 15
reading words from lexicon...
10000 words
finished
original-iso-8859-files/smor.fst: 29
  NUM.fst: 104
original-iso-8859-files/smor.fst: 125
  deko.fst: 353
original-iso-8859-files/smor.fst: 126
  flexion.fst: 1429
original-iso-8859-files/smor.fst: 127
  defaults.fst: 63
defaults.fst:63: warning: assignment of empty transducer to: $R1$
  defaults.fst: 69
defaults.fst:69: warning: assignment of empty transducer to: $Uml$
  defaults.fst: 74
defaults.fst:74: warning: assignment of empty transducer to: $DefDerivNN$
  defaults.fst: 114
defaults.fst:114: warning: assignment of empty transducer to: $DefDerivV$
  defaults.fst: 124
defaults.fst:124: warning: assignment of empty transducer to: $DefDerivV$
  defaults.fst: 142
original-iso-8859-files/smor.fst: 168
  FIX.fst: 24
original-iso-8859-files/smor.fst: 169
  PRO.fst: 1039
original-iso-8859-files/smor.fst: 214

real    29m33.761s
user    29m32.127s
sys     0m1.608s

There seems to be problems with hwfst:

time hwfst-calculate -i smor.fst -o smor.hwfst
smor.fst: 11
  map.fst: 277
smor.fst: 15
reading words from lexicon...
smor.fst: 29
  NUM.fst: 104
smor.fst: 125
  deko.fst: 353
smor.fst: 126
  flexion.fst: 1429
smor.fst: 127
  defaults.fst: 25
defaults.fst: 25: warning: assignment of empty transducer to!: $TMP$
  defaults.fst: 36
defaults.fst: 36: warning: assignment of empty transducer to!: $DefKomposNN$
  defaults.fst: 43
defaults.fst: 43: warning: assignment of empty transducer to!: $T$
  defaults.fst: 46
defaults.fst: 46: warning: assignment of empty transducer to!: $DefKomposNN$
  defaults.fst: 74
defaults.fst: 74: warning: assignment of empty transducer to!: $DefDerivNN$
  defaults.fst: 80
defaults.fst: 80: warning: assignment of empty transducer to!: $DefDerivNE$
  defaults.fst: 85
defaults.fst: 85: warning: assignment of empty transducer to!: $DefKomposNE$
  defaults.fst: 94
defaults.fst: 94: warning: assignment of empty transducer to!: $DefBaseADJ$
  defaults.fst: 101
defaults.fst: 101: warning: assignment of empty transducer to!: $DefKomposADJ$
  defaults.fst: 103
defaults.fst: 103: warning: assignment of empty transducer to!: $DefDerivADJ$
  defaults.fst: 109
defaults.fst: 109: warning: assignment of empty transducer to!: $DefKomposV$
  defaults.fst: 114
defaults.fst: 114: warning: assignment of empty transducer to!: $DefDerivV$
  defaults.fst: 124
defaults.fst: 124: warning: assignment of empty transducer to!: $DefDerivV$
  defaults.fst: 142
smor.fst: 168
  FIX.fst: 24
smor.fst: 169
  PRO.fst: 1039
smor.fst: 181
smor.fst: 181: warning: assignment of empty transducer to!: $BASE$
smor.fst: 190
smor.fst: 190: warning: assignment of empty transducer to!: $UC$
smor.fst: 198
smor.fst: 198: warning: assignment of empty transducer to!: $CAP$
smor.fst: 213

real    1m14.223s
user    1m13.925s
sys     0m0.192s

There are differences in warnings that come from empty transducer variables:

 
Changed:
<
<
  SFST (ISO) SFST (UTF-8) HFST HWFST
defaults.fst: 25: $TMP$   - x x
defaults.fst: 36: $DefKomposNN$   - x x
defaults.fst: 43: $T$   - x x
defaults.fst: 46: $DefKomposNN$   - x x
defaults.fst: 63: $R1$ x -    
defaults.fst: 69: $Uml$ x -    
defaults.fst: 74: $DefDerivNN$ x - x x
defaults.fst: 80: $DefDerivNE$   - x x
defaults.fst: 85: $DefKomposNE$   - x x
defaults.fst: 94: $DefBaseADJ$   - x x
defaults.fst: 101: $DefKomposADJ$   - x x
defaults.fst: 103: $DefDerivADJ$   - x x
defaults.fst: 109: $DefKomposV$   - x x
defaults.fst: 114: $DefDerivV$ x - x x
defaults.fst: 124: : $DefDerivV$ x - x x
smor.fst: 181: !: $BASE$   - x x
smor.fst: 190: $UC$   - x x
smor.fst: 198: $CAP$   - x x

=smor.sfst= is not equivalent to smor.hwfst but their difference is empty?

eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.iso.sfst
weighted                        n
has HFST symbol table           y
# of states                     1180629
# of arcs                       2779164
# of final states               7
# of input/output epsilons      0
# of input epsilons             106390
# of output epsilons            203199
# of accessible states          1180629
# of coaccessible states        1180629
# of connected states           1180629
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.sfst
weighted                        n
has HFST symbol table           y
# of states                     1883419
# of arcs                       4645142
# of final states               7
# of input/output epsilons      0
# of input epsilons             174646
# of output epsilons            390780
# of accessible states          1883419
# of coaccessible states        1883419
# of connected states           1883419
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n

Compiling smor (phon.fst not included) on hfst computer (option -g?):

>
>
Compile times of smor (phon.fst not included) on hfst computer:
 
time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real 107m33.037s 107m28.501s 108m9.322s 15m23.387s
Line: 351 to 115
 
sys 0m4.430s 0m4.940s 0m5.540s 0m1.770s
Changed:
<
<
Compiling smor (phon.fst not included) on hippu (option -g?):
>
>
Compile times of smor (phon.fst not included) on hippu:
 
time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real   192m20.061s 187m17.151s 29m51.434s
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
Changed:
<
<
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections:
>
>
smor.sfst and smor.hfst are equal, smor.hwfst is not. Let's check the equality of input and output projections. Number of epsilon transitions increases: determinization takes 300 GB of memory and more than a day to complete.

hfst-summarize smor.hfst
# of input/output epsilons      0
# of input epsilons             174646
# of output epsilons            390780

hfst-summarize smor.hwfst
# of input/output epsilons      0
# of input epsilons             173920
# of output epsilons            390780
  A corpus of 700 words gives equal results.
Changed:
<
<
NOTE -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and
>
>
NOTE: -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and
 -O3 options are added to Makefiles of ofst (subdirectories already had -O3 options) and hfst-tools.
Added:
>
>
This can have some effect on the compiling times.
 

Callgrind

Revision 272009-08-25 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 359 to 359
 
sys   0m6.558s 0m6.485s 0m3.305s

hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections:

Changed:
<
<
on hfst: epsilons were removed and transducers are compared.
>
>
 A corpus of 700 words gives equal results.

NOTE -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and

Revision 262009-08-25 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 359 to 359
 
sys   0m6.558s 0m6.485s 0m3.305s

hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections:

Changed:
<
<
result not ready (300 GB of memory, one day, compare of OpenFst, hippu).
>
>
on hfst: epsilons were removed and transducers are compared.
 A corpus of 700 words gives equal results.

NOTE -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and

Revision 252009-08-25 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 358 to 358
 
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
Changed:
<
<
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections: result not ready (275 GB of memory, one day).
>
>
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections: result not ready (300 GB of memory, one day, compare of OpenFst, hippu). A corpus of 700 words gives equal results.
  NOTE -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and -O3 options are added to Makefiles of ofst (subdirectories already had -O3 options) and hfst-tools.

Revision 242009-08-25 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 346 to 346
 Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
Changed:
<
<
real 29m32.751s 107m28.501s 108m9.322s 15m23.387s
user 29m31.351s 107m23.030s 108m2.310s 15m21.480s
sys 0m1.472s 0m4.940s 0m5.540s 0m1.770s

smor.iso.sfst ready on tuesday

>
>
real 107m33.037s 107m28.501s 108m9.322s 15m23.387s
user 107m28.220s 107m23.030s 108m2.310s 15m21.480s
sys 0m4.430s 0m4.940s 0m5.540s 0m1.770s
 

Compiling smor (phon.fst not included) on hippu (option -g?):

Line: 360 to 358
 
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
Changed:
<
<
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections (ready on tuesday)
>
>
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections: result not ready (275 GB of memory, one day).

NOTE -g options are now removed from Makefiles of ofst/fst/lib and hfst-tools and -O3 options are added to Makefiles of ofst (subdirectories already had -O3 options) and hfst-tools.

 

Callgrind

Revision 232009-08-24 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 346 to 346
 Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
Changed:
<
<
real 29m32.751s 107m28.501s    
user 29m31.351s 107m23.030s    
sys 0m1.472s 0m4.940s    
>
>
real 29m32.751s 107m28.501s 108m9.322s 15m23.387s
user 29m31.351s 107m23.030s 108m2.310s 15m21.480s
sys 0m1.472s 0m4.940s 0m5.540s 0m1.770s

smor.iso.sfst ready on tuesday

 

Compiling smor (phon.fst not included) on hippu (option -g?):

Line: 358 to 360
 
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
Changed:
<
<
sfst and hfst are equal, hwfst is not. Input projections are equal, output projections
>
>
hippu: sfst and hfst are equal, hwfst is not. Input projections are equal, output projections (ready on tuesday)
 

Callgrind

Revision 222009-08-24 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 103 to 103
 for i in X1 X2; do hfst-weighted2unweighted $i.hwfst | hfst-compare $i.hfst; done;
Changed:
<
<
All intermediate results are equivalent. If X1.hwfst and X2.hwfst are composed in commandline, the result is equivalent to phon.hfst but not to phon.hwfst? A further test:

$RESULT$ = $X1$ || $X2$
$RESULT$ >> "RESULT"
$RESULT$

  RESULT.hfst phon.hwfst
phon.hfst Eq Eq/Eq
RESULT.hwfst Eq/Not Eq

hfst-weighted2unweighted RESULT.hwfst | hfst-compare RESULT.hfst --> not equivalent hfst-unweighted2weighted RESULT.hfst | hfst-compare RESULT.hwfst --> equivalent

$RESULT$ = $X1$ || $X2$
$RESULT$

hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent

$X1$ || $X2$

hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> not equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent

>
>
All intermediate results are equivalent.
  Test equivalence for transducers like [a:<> <>:b], [<>:b a:<>] and [a:b]. None of these are equivalent when testing with weighted or unweighted transducers. Test intersection, difference and composition. Intersections
Changed:
<
<
are empty, differences are equal. Compositions are not equal.
>
>
are empty, differences are equal. Compositions are equal, but not minimized in hwfst.
 
hfst-compose "a:<>_<>:b.hfst" "<>:b_a:<>.hfst"
Line: 156 to 128
  All weighted compositions are empty.
Deleted:
<
<
If phon.hfst is read and written to a file PHON.hfst, there are 207 symbols in the alphabet of phon.hfst but only one alphabet (<> 0) in PHON.hfst. read_transducer_and_harmonize must be fixed (DONE).
  hfst-weighted2unweighted phon.hwfst | hfst-unweighted2weighted | hfst-compare phon.hwfst different! reason: weighted2unweighted allways makes state number zero the start state? In HWFST in hfst-fst2txt the initial state must
Line: 376 to 346
 Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
Changed:
<
<
real 29m32.751s      
user 29m31.351s      
sys 0m1.472s      
>
>
real 29m32.751s 107m28.501s    
user 29m31.351s 107m23.030s    
sys 0m1.472s 0m4.940s    
 

Compiling smor (phon.fst not included) on hippu (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
Changed:
<
<
real        
user        
sys        
>
>
real   192m20.061s 187m17.151s 29m51.434s
user   192m8.728s 187m6.549s 29m44.068s
sys   0m6.558s 0m6.485s 0m3.305s
 
Added:
>
>
sfst and hfst are equal, hwfst is not. Input projections are equal, output projections
 

Callgrind

Revision 212009-08-20 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 381 to 381
 
sys 0m1.472s      
Added:
>
>
Compiling smor (phon.fst not included) on hippu (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real        
user        
sys        
 

Callgrind

Revision 202009-08-20 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 47 to 47
 
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
Added:
>
>
Compiling omorfi on hippu (option -g?):

time fst-compiler-utf8 hfst-calculate hwfst-calculate
real      
user      
sys      
 Omorfi compiled with hfst and hwfst differ, but input and output projections are equal. The same input-output strings are included many times in hfst version with different alignments. The hwfst version accepts only one alignment. For example:

Revision 192009-08-13 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 313 to 313
 
smor.fst: 198: $CAP$   - x x
Added:
>
>
=smor.sfst= is not equivalent to smor.hwfst but their difference is empty?
 
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.iso.sfst
weighted                        n

Revision 182009-08-11 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 150 to 150
 All weighted compositions are empty.

If phon.hfst is read and written to a file PHON.hfst, there are 207 symbols in the alphabet of phon.hfst but only one alphabet (<> 0) in PHON.hfst.

Changed:
<
<
read_transducer_and_harmonize must be fixed!
>
>
read_transducer_and_harmonize must be fixed (DONE).

hfst-weighted2unweighted phon.hwfst | hfst-unweighted2weighted | hfst-compare phon.hwfst different! reason: weighted2unweighted allways makes state number zero the start state? In HWFST in hfst-fst2txt the initial state must be indicated?

 

smor.fst

Revision 172009-08-10 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 149 to 149
  All weighted compositions are empty.
Added:
>
>
If phon.hfst is read and written to a file PHON.hfst, there are 207 symbols in the alphabet of phon.hfst but only one alphabet (<> 0) in PHON.hfst. read_transducer_and_harmonize must be fixed!
 

smor.fst

Next, smor.fst is compiled:

Revision 162009-08-10 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 38 to 38
 
sys 0m0.004s 0m0.008s
Changed:
<
<

OMorFi

>
>

OMorFi

  Compiling omorfi on hfst computer (option -g?):
Line: 57 to 57
 

SMOR

Added:
>
>

phon.fst

 eaxelson@hfst:~/morphisto/SFST/data/SMOR$

SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8. Original ISO files are moved to their own directory:

Line: 124 to 126
 hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> not equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent
Changed:
<
<
Test equivalence for transducers like [a:<> <>:b], [<>:b a:<>] and [a:b].
>
>
Test equivalence for transducers like [a:<> <>:b], [<>:b a:<>] and [a:b]. None of these are equivalent when testing with weighted or unweighted transducers. Test intersection, difference and composition. Intersections are empty, differences are equal. Compositions are not equal.

hfst-compose "a:<>_<>:b.hfst" "<>:b_a:<>.hfst"

0       1       a       <>
0       3       <>      b
1       2       <>      b
3       2       a       <>

hfst-compose "a:<>_<>:b.hfst" "a:b.hfst"

0       1       a       <>

hfst-compose "a:b.hfst" "<>:b_a:<>.hfst"

0       1       <>      b

All weighted compositions are empty.

smor.fst

  Next, smor.fst is compiled:
Line: 199 to 224
 sys 0m1.608s
Deleted:
<
<
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.iso.sfst
weighted                        n
has HFST symbol table           y
# of states                     1180629
# of arcs                       2779164
# of final states               7
# of input/output epsilons      0
# of input epsilons             106390
# of output epsilons            203199
# of accessible states          1180629
# of coaccessible states        1180629
# of connected states           1180629
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.sfst
weighted                        n
has HFST symbol table           y
# of states                     1883419
# of arcs                       4645142
# of final states               7
# of input/output epsilons      0
# of input epsilons             174646
# of output epsilons            390780
# of accessible states          1883419
# of coaccessible states        1883419
# of connected states           1883419
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
 There seems to be problems with hwfst:
Line: 309 to 283
 sys 0m0.192s
Added:
>
>
There are differences in warnings that come from empty transducer variables:

  SFST (ISO) SFST (UTF-8) HFST HWFST
defaults.fst: 25: $TMP$   - x x
defaults.fst: 36: $DefKomposNN$   - x x
defaults.fst: 43: $T$   - x x
defaults.fst: 46: $DefKomposNN$   - x x
defaults.fst: 63: $R1$ x -    
defaults.fst: 69: $Uml$ x -    
defaults.fst: 74: $DefDerivNN$ x - x x
defaults.fst: 80: $DefDerivNE$   - x x
defaults.fst: 85: $DefKomposNE$   - x x
defaults.fst: 94: $DefBaseADJ$   - x x
defaults.fst: 101: $DefKomposADJ$   - x x
defaults.fst: 103: $DefDerivADJ$   - x x
defaults.fst: 109: $DefKomposV$   - x x
defaults.fst: 114: $DefDerivV$ x - x x
defaults.fst: 124: : $DefDerivV$ x - x x
smor.fst: 181: !: $BASE$   - x x
smor.fst: 190: $UC$   - x x
smor.fst: 198: $CAP$   - x x

eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.iso.sfst
weighted                        n
has HFST symbol table           y
# of states                     1180629
# of arcs                       2779164
# of final states               7
# of input/output epsilons      0
# of input epsilons             106390
# of output epsilons            203199
# of accessible states          1180629
# of coaccessible states        1180629
# of connected states           1180629
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.sfst
weighted                        n
has HFST symbol table           y
# of states                     1883419
# of arcs                       4645142
# of final states               7
# of input/output epsilons      0
# of input epsilons             174646
# of output epsilons            390780
# of accessible states          1883419
# of coaccessible states        1883419
# of connected states           1883419
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
 Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)

Revision 152009-08-07 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 124 to 124
 hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> not equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent
Added:
>
>
Test equivalence for transducers like [a:<> <>:b], [<>:b a:<>] and [a:b].
 Next, smor.fst is compiled:

Revision 142009-08-07 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 57 to 57
 

SMOR

Changed:
<
<
SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8:
>
>
eaxelson@hfst:~/morphisto/SFST/data/SMOR$

SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8. Original ISO files are moved to their own directory:

 
cat original-iso-8859-files/FOO.fst | iconv -f ISO-8859-1 -t UTF-8 > FOO.fst
Line: 66 to 68
 First, phon.fst is compiled:
Added:
>
>
../../src/fst-compiler original-iso-8859-files/phon.fst phon.iso.sfst
 ../../src/fst-compiler-utf8 phon.fst phon.sfst hfst-calculate -i phon.fst -o phon.hfst hwfst-calculate -i phon.fst -o phon.hwfst
Added:
>
>
phon.sfst and phon.hfst are equivalent, but phon.hwfst is not. Compiling of phon.fst takes about 3 seconds with all compilers.
 (There was a problem if UTF-8 was used in sfst or hfst. On line 239 of file phon.fst, minimizing the transducer variable (first determinization) took all resources. Changing iconv -f ISO-8859-16 to ISO-8859-1 solved the problem.)
Changed:
<
<
Let's also try using the original iso-encoded files:
>
>
Let's find out where hwfst-calculate goes wrong. Let's compare intermediate results from hfst-calculate and hwfst-calculate.

 
hfst-calculate -i phon.fst -o phon.hfst
ls R* | egrep '[0-9]+$' | perl -pe 's/^(.*)$/mv \1 \1\.hfst/g;' | sh
hwfst-calculate -i phon.fst -o phon.hwfst
ls R* | egrep '[0-9]+$' | perl -pe 's/^(.*)$/mv \1 \1\.hwfst/g;' | sh

for i in 0 1 2 3 4 5 6 7 8 9; do hfst-weighted2unweighted R$i.hwfst | hfst-compare R$i.hfst; done;
for i in 11 12 13 14 15 16 17 18 19 20 21; do hfst-weighted2unweighted R$i.hwfst | hfst-compare R$i.hfst; done;
for i in 1 2 3 4 5 6; do hfst-weighted2unweighted T$i.hwfst | hfst-compare T$i.hfst; done;
for i in X1 X2; do hfst-weighted2unweighted $i.hwfst | hfst-compare $i.hfst; done;

All intermediate results are equivalent. If X1.hwfst and X2.hwfst are composed in commandline, the result is equivalent to phon.hfst but not to phon.hwfst? A further test:

$RESULT$ = $X1$ || $X2$
$RESULT$ >> "RESULT"
$RESULT$

  RESULT.hfst phon.hwfst
phon.hfst Eq Eq/Eq
RESULT.hwfst Eq/Not Eq

hfst-weighted2unweighted RESULT.hwfst | hfst-compare RESULT.hfst --> not equivalent hfst-unweighted2weighted RESULT.hfst | hfst-compare RESULT.hwfst --> equivalent

$RESULT$ = $X1$ || $X2$
$RESULT$

hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent

$X1$ || $X2$

hfst-weighted2unweighted phon.hwfst | hfst-compare phon.hfst --> not equivalent hfst-unweighted2weighted phon.hfst | hfst-compare phon.hwfst --> equivalent

Next, smor.fst is compiled:

 
time ../../src/fst-compiler-utf8 smor.fst smor.sfst
Line: 104 to 155
 sys 0m4.712s
Added:
>
>
Let's also try using the original iso-encoded files:
 
time ../../src/fst-compiler original-iso-8859-files/smor.fst smor.iso.sfst
original-iso-8859-files/smor.fst: 9

Revision 132009-08-06 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 76 to 76
 Let's also try using the original iso-encoded files:
Changed:
<
<
time ../../src/fst-compiler original-iso-8859-files/smor.fst smor.sfst
>
>
time ../../src/fst-compiler-utf8 smor.fst smor.sfst smor.fst: 9 reading transducer from phon.sfst...finished smor.fst: 11 map.fst: 277 smor.fst: 15 reading words from lexicon... 10000 words finished smor.fst: 29 NUM.fst: 104 smor.fst: 125 deko.fst: 353 smor.fst: 126 flexion.fst: 1429 smor.fst: 127 defaults.fst: 142 smor.fst: 168 FIX.fst: 24 smor.fst: 169 PRO.fst: 1039 smor.fst: 214

real 107m57.402s user 107m51.076s sys 0m4.712s

time ../../src/fst-compiler original-iso-8859-files/smor.fst smor.iso.sfst
 original-iso-8859-files/smor.fst: 9 reading transducer from phon.a...finished original-iso-8859-files/smor.fst: 11
Line: 114 to 143
 sys 0m1.608s
Added:
>
>
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.iso.sfst
weighted                        n
has HFST symbol table           y
# of states                     1180629
# of arcs                       2779164
# of final states               7
# of input/output epsilons      0
# of input epsilons             106390
# of output epsilons            203199
# of accessible states          1180629
# of coaccessible states        1180629
# of connected states           1180629
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
eaxelson@hfst:~/morphisto/SFST/data/SMOR$ hfst-summarize smor.sfst
weighted                        n
has HFST symbol table           y
# of states                     1883419
# of arcs                       4645142
# of final states               7
# of input/output epsilons      0
# of input epsilons             174646
# of output epsilons            390780
# of accessible states          1883419
# of coaccessible states        1883419
# of connected states           1883419
acceptor                        n
input deterministic             n
output deterministic            n
input/output epsilons           n
input epsilons                  y
output epsilons                 y
cyclic                          y
cyclic at initial state         n
accessible                      y
coaccessible                    y
deterministic                   y
minimised                       n
 There seems to be problems with hwfst:

Revision 122009-08-06 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 60 to 60
 SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8:
Changed:
<
<
cat original-iso-8859-files/FOO.fst | iconv -f ISO-8859-16 -t UTF-8 > FOO.fst
>
>
cat original-iso-8859-files/FOO.fst | iconv -f ISO-8859-1 -t UTF-8 > FOO.fst
 

First, phon.fst is compiled:

Changed:
<
<
../../src/fst-compiler phon.fst phon.sfst // this does the same as next sometimes?? hfst-calculate -i phon.fst -o phon.hfst // this stucks to line 239 and takes all CPU resources??
>
>
../../src/fst-compiler-utf8 phon.fst phon.sfst hfst-calculate -i phon.fst -o phon.hfst
 hwfst-calculate -i phon.fst -o phon.hwfst
Added:
>
>
(There was a problem if UTF-8 was used in sfst or hfst. On line 239 of file phon.fst, minimizing the transducer variable (first determinization) took all resources. Changing iconv -f ISO-8859-16 to ISO-8859-1 solved the problem.)

Let's also try using the original iso-encoded files:

time ../../src/fst-compiler original-iso-8859-files/smor.fst smor.sfst
original-iso-8859-files/smor.fst: 9
reading transducer from phon.a...finished
original-iso-8859-files/smor.fst: 11
  map.fst: 277
original-iso-8859-files/smor.fst: 15
reading words from lexicon...
10000 words
finished
original-iso-8859-files/smor.fst: 29
  NUM.fst: 104
original-iso-8859-files/smor.fst: 125
  deko.fst: 353
original-iso-8859-files/smor.fst: 126
  flexion.fst: 1429
original-iso-8859-files/smor.fst: 127
  defaults.fst: 63
defaults.fst:63: warning: assignment of empty transducer to: $R1$
  defaults.fst: 69
defaults.fst:69: warning: assignment of empty transducer to: $Uml$
  defaults.fst: 74
defaults.fst:74: warning: assignment of empty transducer to: $DefDerivNN$
  defaults.fst: 114
defaults.fst:114: warning: assignment of empty transducer to: $DefDerivV$
  defaults.fst: 124
defaults.fst:124: warning: assignment of empty transducer to: $DefDerivV$
  defaults.fst: 142
original-iso-8859-files/smor.fst: 168
  FIX.fst: 24
original-iso-8859-files/smor.fst: 169
  PRO.fst: 1039
original-iso-8859-files/smor.fst: 214

real    29m33.761s
user    29m32.127s
sys     0m1.608s

There seems to be problems with hwfst:

time hwfst-calculate -i smor.fst -o smor.hwfst
smor.fst: 11
  map.fst: 277
smor.fst: 15
reading words from lexicon...
smor.fst: 29
  NUM.fst: 104
smor.fst: 125
  deko.fst: 353
smor.fst: 126
  flexion.fst: 1429
smor.fst: 127
  defaults.fst: 25
defaults.fst: 25: warning: assignment of empty transducer to!: $TMP$
  defaults.fst: 36
defaults.fst: 36: warning: assignment of empty transducer to!: $DefKomposNN$
  defaults.fst: 43
defaults.fst: 43: warning: assignment of empty transducer to!: $T$
  defaults.fst: 46
defaults.fst: 46: warning: assignment of empty transducer to!: $DefKomposNN$
  defaults.fst: 74
defaults.fst: 74: warning: assignment of empty transducer to!: $DefDerivNN$
  defaults.fst: 80
defaults.fst: 80: warning: assignment of empty transducer to!: $DefDerivNE$
  defaults.fst: 85
defaults.fst: 85: warning: assignment of empty transducer to!: $DefKomposNE$
  defaults.fst: 94
defaults.fst: 94: warning: assignment of empty transducer to!: $DefBaseADJ$
  defaults.fst: 101
defaults.fst: 101: warning: assignment of empty transducer to!: $DefKomposADJ$
  defaults.fst: 103
defaults.fst: 103: warning: assignment of empty transducer to!: $DefDerivADJ$
  defaults.fst: 109
defaults.fst: 109: warning: assignment of empty transducer to!: $DefKomposV$
  defaults.fst: 114
defaults.fst: 114: warning: assignment of empty transducer to!: $DefDerivV$
  defaults.fst: 124
defaults.fst: 124: warning: assignment of empty transducer to!: $DefDerivV$
  defaults.fst: 142
smor.fst: 168
  FIX.fst: 24
smor.fst: 169
  PRO.fst: 1039
smor.fst: 181
smor.fst: 181: warning: assignment of empty transducer to!: $BASE$
smor.fst: 190
smor.fst: 190: warning: assignment of empty transducer to!: $UC$
smor.fst: 198
smor.fst: 198: warning: assignment of empty transducer to!: $CAP$
smor.fst: 213

real    1m14.223s
user    1m13.925s
sys     0m0.192s
 Compiling smor (phon.fst not included) on hfst computer (option -g?):
Changed:
<
<
time fst-compiler (ISO-8859) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real 29m32.751s    
user 29m31.351s    
sys 0m1.472s    
>
>
time fst-compiler (ISO-8859) fst-compiler-utf8 (UTF-8) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real 29m32.751s      
user 29m31.351s      
sys 0m1.472s      
 

Callgrind

Revision 112009-08-05 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Added:
>
>

Composition

 for i in a b c d e; do  for j in '' a b c d e; do  for k in '' a b c d e; do  for l in '' a b c d e; do  for m in '' a b c d e; do   echo $i $j $k $l $m;   done; done; done; done; done  > 6480_words

time cat 6480_words | ../hfst-strings2fst -e '<>' -p -S -j -o 6480_words.hfst

Line: 39 to 38
 
sys 0m0.004s 0m0.008s
Added:
>
>

OMorFi

 Compiling omorfi on hfst computer (option -g?):

time fst-compiler-utf8 hfst-calculate hwfst-calculate
Line: 46 to 47
 
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
Added:
>
>
Omorfi compiled with hfst and hwfst differ, but input and output projections are equal. The same input-output strings are included many times in hfst version with different alignments. The hwfst version accepts only one alignment. For example:

<wb>rht:<>:<><verb>:<> 
{<53>:<><f>:<><>:<~T>, <53>:<><>:<~T><f>:<>, <>:<~T><53>:<><f>:<>} 
<pcpneg><34><c>

SMOR

SMOR files are encoded with ISO-8859. Because HFST does not support this encoding, files are converted to UTF-8:

cat original-iso-8859-files/FOO.fst | iconv -f ISO-8859-16 -t UTF-8 > FOO.fst

First, phon.fst is compiled:

../../src/fst-compiler phon.fst phon.sfst // this does the same as next sometimes??
hfst-calculate -i phon.fst -o phon.hfst // this stucks to line 239 and takes all CPU resources??
hwfst-calculate -i phon.fst -o phon.hwfst
 Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
Line: 53 to 78
 
user 29m31.351s    
sys 0m1.472s    
Added:
>
>

Callgrind

 
g++ -o testi.exe testi.C -g -pg
valgrind --tool=callgrind ./testi.exe

Revision 102009-08-05 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 46 to 46
 
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
Added:
>
>
Compiling smor (phon.fst not included) on hfst computer (option -g?):

time fst-compiler (ISO-8859) hfst-calculate (UTF-8) hwfst-calculate (UTF-8)
real 29m32.751s    
user 29m31.351s    
sys 0m1.472s    
 
g++ -o testi.exe testi.C -g -pg

Revision 92009-07-27 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 50 to 50
 
g++ -o testi.exe testi.C -g -pg
valgrind --tool=callgrind ./testi.exe
Changed:
<
<
callgrind_annotate callgrind.out.
>
>
callgrind_annotate --inclusive=yes callgrind.out.
 


Revision 82009-06-29 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 39 to 39
 
sys 0m0.004s 0m0.008s
Changed:
<
<
Compiling omorfi on hfst computer:
>
>
Compiling omorfi on hfst computer (option -g?):
 
time fst-compiler-utf8 hfst-calculate hwfst-calculate
real 54m13.369s 28m13.361s 25m34.886s

Revision 72009-06-29 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 46 to 46
 
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
Deleted:
<
<
Problems with hwfst-calculate:
 
Changed:
<
<
eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare ../../omorfi-hfst-tulokset/plurale-tantum.sfsta Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/plurale-tantum.sfsta Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat 3.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/3.sfsta Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat 3.sfsta | hfst-weighted2unweighted | hfst-compare ../../omorfi-hfst-tulokset/3.sfsta Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ hfst-compose 3.sfsta plurale-tantum.sfsta > weighted_composition eaxelson@hfst:~/kokeilu/omorfi/src$ hfst-compose ../../omorfi-hfst-tulokset/3.sfsta ../../omorfi-hfst-tulokset/plurale-tantum.sfsta > unweighted_composition eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/plurale-tantum.sfsta Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare unweighted_composition Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare -m unweighted_composition Not equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-minimize > weighted_composition.min eaxelson@hfst:~/kokeilu/omorfi/src$ cat unweighted_composition | hfst-minimize > unweighted_composition.min eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare -m unweighted_composition Not equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition.min | hfst-weighted2unweighted | hfst-compare -m unweighted_composition.min Equivalent eaxelson@hfst:~/kokeilu/omorfi/src$ cat unweighted_composition.min | hfst-fst2strings | sort > unweighted_composition.min.words eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition.min | hfst-fst2strings | sort > weighted_composition.min.words eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l weighted_composition.min.words 93154 weighted_composition.min.words eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l unweighted_composition.min.words 93154 unweighted_composition.min.words eaxelson@hfst:~/kokeilu/omorfi/src$ diff weighted_composition.min.words unweighted_composition.min.words > differences eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l differences 0 differences

WRONG IN WEIGHTED: vihki¤iset<38>:vihki¤iset<38> virkaanastujaiset<38>:virkaanastujaiset<38> virkaanasettajaiset<38>:virkaanasettajaiset<38> viinakset<39>:viinak<39> nokoset<38>:nokoset<38> n¤k¤r¤iset<38>:n¤k¤r¤iset<38> naamiaiset<38>:naamiaiset<38> naapurukset<39>:naapuruk<39> nimi¤iset<38>:nimi¤iset<38> nivuset<38>:nivuset<38>

RIGHT IN UNWEIGHTED: vihki¤iset<38>:vihki¤inen<38> virkaanastujaiset<38>:virkaanastujainen<38> virkaanasettajaiset<38>:virkaanasettajainen<38> viinakset<39>:viinas<39> nokoset<38>:nokonen<38> n¤k¤r¤iset<38>:n¤k¤r¤inen<38> naamiaiset<38>:naamiainen<38> naapurukset<39>:naapurus<39> nimi¤iset<38>:nimi¤inen<38> nivuset<38>:nivunen<38>

>
>
g++ -o testi.exe testi.C -g -pg valgrind --tool=callgrind ./testi.exe callgrind_annotate callgrind.out.
 


Revision 62009-06-25 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 42 to 42
 Compiling omorfi on hfst computer:

time fst-compiler-utf8 hfst-calculate hwfst-calculate
Changed:
<
<
real 54m13.369s 28m13.361s  
user 54m9.439s 27m52.777s  
sys 0m4.380s 0m22.241s  
>
>
real 54m13.369s 28m13.361s 25m34.886s
user 54m9.439s 27m52.777s 25m33.472s
sys 0m4.380s 0m22.241s 0m3.076s
  Problems with hwfst-calculate:

Revision 52009-06-02 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 79 to 79
 eaxelson@hfst:~/kokeilu/omorfi/src$ diff weighted_composition.min.words unweighted_composition.min.words > differences eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l differences 0 differences
Added:
>
>
WRONG IN WEIGHTED: vihki¤iset<38>:vihki¤iset<38> virkaanastujaiset<38>:virkaanastujaiset<38> virkaanasettajaiset<38>:virkaanasettajaiset<38> viinakset<39>:viinak<39> nokoset<38>:nokoset<38> n¤k¤r¤iset<38>:n¤k¤r¤iset<38> naamiaiset<38>:naamiaiset<38> naapurukset<39>:naapuruk<39> nimi¤iset<38>:nimi¤iset<38> nivuset<38>:nivuset<38>

RIGHT IN UNWEIGHTED: vihki¤iset<38>:vihki¤inen<38> virkaanastujaiset<38>:virkaanastujainen<38> virkaanasettajaiset<38>:virkaanasettajainen<38> viinakset<39>:viinas<39> nokoset<38>:nokonen<38> n¤k¤r¤iset<38>:n¤k¤r¤inen<38> naamiaiset<38>:naamiainen<38> naapurukset<39>:naapurus<39> nimi¤iset<38>:nimi¤inen<38> nivuset<38>:nivunen<38>

 


Revision 42009-06-02 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 46 to 46
 
user 54m9.439s 27m52.777s  
sys 0m4.380s 0m22.241s  
Added:
>
>
Problems with hwfst-calculate:

eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare ../../omorfi-hfst-tulokset/plurale-tantum.sfsta
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/plurale-tantum.sfsta
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat 3.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/3.sfsta
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat 3.sfsta | hfst-weighted2unweighted | hfst-compare ../../omorfi-hfst-tulokset/3.sfsta  Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ hfst-compose 3.sfsta plurale-tantum.sfsta > weighted_composition
eaxelson@hfst:~/kokeilu/omorfi/src$ hfst-compose ../../omorfi-hfst-tulokset/3.sfsta ../../omorfi-hfst-tulokset/plurale-tantum.sfsta > unweighted_composition
eaxelson@hfst:~/kokeilu/omorfi/src$ cat plurale-tantum.sfsta | hfst-weighted2unweighted | hfst-compare -m ../../omorfi-hfst-tulokset/plurale-tantum.sfsta
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare unweighted_composition 
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare -m unweighted_composition
Not equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-minimize > weighted_composition.min
eaxelson@hfst:~/kokeilu/omorfi/src$ cat unweighted_composition | hfst-minimize > unweighted_composition.min
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition | hfst-weighted2unweighted | hfst-compare -m unweighted_composition
Not equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition.min | hfst-weighted2unweighted | hfst-compare -m unweighted_composition.min
Equivalent
eaxelson@hfst:~/kokeilu/omorfi/src$ cat unweighted_composition.min | hfst-fst2strings | sort > unweighted_composition.min.words
eaxelson@hfst:~/kokeilu/omorfi/src$ cat weighted_composition.min | hfst-fst2strings | sort > weighted_composition.min.words  
eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l weighted_composition.min.words
93154 weighted_composition.min.words
eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l unweighted_composition.min.words
93154 unweighted_composition.min.words
eaxelson@hfst:~/kokeilu/omorfi/src$ diff weighted_composition.min.words unweighted_composition.min.words > differences
eaxelson@hfst:~/kokeilu/omorfi/src$ wc -l differences
0 differences
 

Revision 32009-06-01 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 21 to 21
 sys 0m0.004s
Deleted:
<
<
Compiling omorfi with fst-compiler-utf8

real    54m13.369s
user    54m9.439s
sys     0m4.380s
 

time ../hfst-compose 6480_words.hfst 6480_words.hfst -o composed.hfst

Line: 47 to 39
 
sys 0m0.004s 0m0.008s
Added:
>
>
Compiling omorfi on hfst computer:

time fst-compiler-utf8 hfst-calculate hwfst-calculate
real 54m13.369s 28m13.361s  
user 54m9.439s 27m52.777s  
sys 0m4.380s 0m22.241s  
 

Revision 22009-05-20 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstHome"

HFST Performance Testing

Line: 21 to 21
 sys 0m0.004s
Added:
>
>
Compiling omorfi with fst-compiler-utf8

real    54m13.369s
user    54m9.439s
sys     0m4.380s
 

time ../hfst-compose 6480_words.hfst 6480_words.hfst -o composed.hfst

Revision 12009-05-13 - ErikAxelson

Line: 1 to 1
Added:
>
>
META TOPICPARENT name="HfstHome"

HFST Performance Testing

for i in a b c d e; do  for j in '' a b c d e; do  for k in '' a b c d e; do  for l in '' a b c d e; do  for m in '' a b c d e; do   echo $i $j $k $l $m;   done; done; done; done; done  > 6480_words

time cat 6480_words | ../hfst-strings2fst -e '<>' -p -S -j -o 6480_words.hfst

real    0m0.057s
user    0m0.056s
sys     0m0.008s

time cat 6480_words | ../hfst-strings2fst -w -e '<>' -p -S -j -o 6480_words.hwfst

real    0m0.050s
user    0m0.048s
sys     0m0.004s

time ../hfst-compose 6480_words.hfst 6480_words.hfst -o composed.hfst

time no epsilons epsilons
real 0m0.097s 0m0.243s
user 0m0.036s 0m0.112s
sys 0m0.060s 0m0.132s

time ../hfst-compose 6480_words.hwfst 6480_words.hwfst -o composed.hwfst

time no epsilons epsilons
real 0m0.016s 0m0.056s
user 0m0.012s 0m0.052s
sys 0m0.004s 0m0.008s


<--  
-->
-- ErikAxelson - 2009-05-13
 
This site is powered by the TWiki collaboration platform Powered by PerlCopyright © 2008-2019 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback