HFST: Date Parser Script

See HfstDateParser for more information.

NOTE: This script should be updated because some parts of it are implemented differently from the solution given in HfstDateParser.

# Numbers from one to nine.
echo "1
2
3
4
5
6
7
8
9" | hfst-strings2fst -f $FORMAT -j > OneToNine.hfst

# Numbers from zero to nine.
echo "0" | hfst-strings2fst -f $FORMAT | hfst-disjunct OneToNine.hfst > ZeroToNine.hfst

# Even numbers.
echo "0
2
4
6
8" | hfst-strings2fst -f $FORMAT -j > Even.hfst

# Odd numbers.
echo "1
3
5
7
9" | hfst-strings2fst -f $FORMAT -j > Odd.hfst

# Even and odd numbers.
hfst-disjunct Even.hfst Odd.hfst > N.hfst

# Days of the week.
echo "Monday
Tuesday
Wednesday
Thursday
Friday
Saturday
Sunday" | hfst-strings2fst -f $FORMAT -j > Day.hfst

# A special month that usually has only 28 days.
echo "February" | hfst-strings2fst -f $FORMAT > Month28.hfst

# Months that have 30 days.
echo "April
June
September
November" | hfst-strings2fst -f $FORMAT -j > Month30.hfst

# Months that have 31 days.
echo "January
March
May
July
August
October
December" | hfst-strings2fst -f $FORMAT -j > Month31.hfst

# All months.
hfst-disjunct Month28.hfst Month30.hfst | hfst-disjunct Month31.hfst > Month.hfst

# Numbers from 1 to 31
echo "1
2" | hfst-strings2fst -f $FORMAT -j | hfst-concatenate ZeroToNine.hfst |
hfst-disjunct OneToNine.hfst > TMP.hfst;
echo "30
31" | hfst-strings2fst -f $FORMAT -j | hfst-disjunct TMP.hfst > Date.hfst

# Numbers from 1 to 9999
hfst-repeat -t 3 ZeroToNine.hfst | hfst-concatenate -1 OneToNine.hfst > Year.hfst

# Day or [Month and Date] with optional Day and Year, excluding leap dates.
echo ", " | hfst-strings2fst -f $FORMAT > CommaSpace.hfst;
echo " " | hfst-strings2fst -f $FORMAT > Space.hfst;
echo "" | hfst-strings2fst -f $FORMAT > Epsilon.hfst;

# Day followed by a comma and a space. e.g. "Thursday, ".
hfst-concatenate Day.hfst CommaSpace.hfst | hfst-disjunct Epsilon.hfst > OptionalDay.hfst

# Month and a date, e.g. "January, 14".
hfst-concatenate Month.hfst Space.hfst | hfst-concatenate -2 Date.hfst > MonthDate_.hfst

# Constraints on dates 29, 30 and 31.
echo "29" | hfst-strings2fst -f $FORMAT > 29.hfst;
echo "30" | hfst-strings2fst -f $FORMAT > 30.hfst;
echo "31" | hfst-strings2fst -f $FORMAT > 31.hfst;

hfst-concatenate Month30.hfst Space.hfst | hfst-concatenate 31.hfst > Constraint30.hfst;
hfst-disjunct 30.hfst 31.hfst | hfst-disjunct 29.hfst > TMP.hfst;
hfst-concatenate Month28.hfst Space.hfst | hfst-concatenate TMP.hfst > Constraint28.hfst;

hfst-subtract MonthDate_.hfst Constraint30.hfst | hfst-subtract -2 Constraint28.hfst > MonthDate.hfst;

#An optional year, e.g. ", 1995".
hfst-concatenate CommaSpace.hfst Year.hfst | hfst-disjunct Epsilon.hfst > OptionalYear.hfst

# Get all valid dates, except leap dates.
hfst-concatenate OptionalDay.hfst MonthDate.hfst | hfst-concatenate -2 OptionalYear.hfst | hfst-disjunct Day.hfst > ValidDates.hfst

# Get numbers divisible by 4.
# Of single digit numbers, 4 and 8 are divisible by 4.
# In larger numbers divisible with 4, if the penultimate
# is even, the last number is 0, 4, or 8. If the penultimate
# is odd, the last number is 2 or 6. This time we resort to
# the SFST programming language parser.
echo "4 | 8 | (0|1|2|3|4|5|6|7|8|9)* ( (0|2|4|6|8)(0|4|8) | (1|3|5|7|9)(2|6) )" | hfst-sfstpl2fst -f $FORMAT > Div4.hfst

# Leap years are divisible by 4 but we have to subtract centuries that are not divisible by 400.
# Centuries that are not divisible by 400 are of format "a number that is not divisible by 4 followed
# by two zeros", e.g. 1500 or 2100.
echo "00" | hfst-strings2fst -f $FORMAT > 00.hfst;
hfst-repeat -f 1 N.hfst |  # all integers
hfst-subtract -2 Div4.hfst |  # all integers not divisible by 4
hfst-concatenate -2 00.hfst |  # all centuries not divisible by 4
hfst-subtract -1 Div4.hfst |  # all leap years
hfst-conjunct Year.hfst > LeapYear.hfst  # get rid of leap years that are bigger than 9999 

# An optional leap year, e.g. =", 1916"=.
hfst-concatenate CommaSpace.hfst LeapYear.hfst | 
hfst-disjunct Epsilon.hfst > OptionalLeapYear.hfst

# Construct leap dates.
hfst-concatenate Month28.hfst Space.hfst | hfst-concatenate 29.hfst | hfst-concatenate -1 OptionalDay.hfst | hfst-concatenate -2 OptionalLeapYear.hfst > LeapDates.hfst

# Get all possible dates.
hfst-disjunct ValidDates.hfst LeapDates.hfst > Dates.hfst

# We can now use the files false_dates:
echo "February 29, 1900
Monday, February 29, 1700
Wednesday, December 32, 2003
June 31" | hfst-strings2fst -f $FORMAT -j > false_dates.hfst

# and correct_dates:
echo "February 29, 1916
Saturday, February 29, 1708
Thursday, December 31, 2005
July 31" | hfst-strings2fst -f $FORMAT -j > correct_dates.hfst

# to test if the transducer Dates.hfst accepts all correct
# dates and rejects all false dates:
hfst-conjunct false_dates.hfst Dates.hfst > TMP
echo "" | hfst-txt2fst -f $FORMAT > empty.hfst
if (hfst-compare -s TMP empty.hfst)
then
  echo "OK: first test passed"
else
  echo "FAIL: first test failed"
fi
hfst-conjunct correct_dates.hfst Dates.hfst > TMP
if (hfst-compare -s TMP correct_dates.hfst)
then
  echo "OK: second test passed"
else
  echo "FAIL: second test failed"
fi


-- ErikAxelson - 2011-09-01