Difference: HfstLingala (1 vs. 3)

Revision 32016-05-18 - KristerLinden

Line: 1 to 1
 
META TOPICPARENT name="HfstAllPages"

HFST: Lingala

Line: 184 to 184
 
<--  
-->
-- ErikAxelson - 2011-09-26
Added:
>
>
META PREFERENCE name="VIEW_TEMPLATE" title="VIEW_TEMPLATE" type="Set" value="FinCLARIN.ViewFinClarinWideEngTemplate"

Revision 22011-09-26 - ErikAxelson

Line: 1 to 1
 
META TOPICPARENT name="HfstAllPages"
Changed:
<
<

HFST: (name of this topic page)

>
>

HFST: Lingala

 
Changed:
<
<
(contents of this page)
>
>
We examplify the use of HFST command line tools with an example taken from Beesley & Karttunen that generates a lexical analyzer for Lingala, a Bantu language # spoken along the Zaire river. $FORMAT is the implementation type of the transducer. The solution given on this page can also be executed with a single script.

echo "[ {bet} | {béb} | {bomb} | {bóndel} | {bóngol} | {bót} | {búk} |
 {fung} | {kabol} | {kang} | {kom} | {kund} | {kóm} | {lakis} |
 {lí}  | {lob} | {luk} | {ndim} | {palangan} | {pangwis} | {sál} |
 {sepel} | {sómb} | {tál} | {támbol} | {tambwis} | {tataban} |
 {tún} | {yébis}
]" | hfst-regexp2fst -f $FORMAT > Stems

echo "[a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
       á|é|í|ó|ú]" | hfst-regexp2fst -f $FORMAT > L

echo '[Per ":" 1]' | hfst-regexp2fst -f $FORMAT > Person1
echo '[Per ":" 2]' | hfst-regexp2fst -f $FORMAT > Person2
echo '[Per ":" 3]' | hfst-regexp2fst -f $FORMAT > Person3
echo '[Num ":" [Sg|Pl] ]' | hfst-regexp2fst -f $FORMAT > Number
# define Gender [Gen ":" [[1["0"|1]|2|3|4|5|6|7|8|9] "." [1[4|5]|[1|9|10]a]]];
echo '[Gen ":" [1 "." 2 | 1a "." 2 | 3 "." 4 | 5 "." 6 |
                7 "." 8 | 9a "." 10a | 10 | 11 "." 6 |
                14 "." 6 | 15]]' | hfst-regexp2fst -f $FORMAT > Gender3
 
# define Reflexive [No | Yes];

# Passive: -am-
# Causative: -is-
# Reciprocal: -an-
# Applicative:l -el-
# Reversive: -ol-
# Iterative: reduplication of the root or root plus other morpheme;
#  the two copies are separated by -a-; the second copy of the root
#  has low tone on the first syllable
# That is, the order of the morphemes is: REV, CAUS, PASS, APPL, RECIP

# alobaloba 'he/she speak SP ITER'
# alíalia   'he eat SP ITER'

echo '[Past ":" [Rec|Hist|MoreRem|MostRem]]' | hfst-regexp2fst -f $FORMAT > PastTense
echo '[Pres ":" [Cont|Hab1|Hab2]]' | hfst-regexp2fst -f $FORMAT > PresTense
echo '[Fut ":" [Immed|MostRem]]' | hfst-regexp2fst -f $FORMAT > FutTense

# define Reflexive [Refl ":" [No|Yes]];
echo '[Tns ":" [PastTense|PresTense|FutTense]]' | hfst-regexp2fst -f $FORMAT > Tense
# define Polarity [Pol ":" [Pos|Neg]];

define Agreement [[[Person1 | Person2] " " Number] |
                  [Person3 " " Number " " Gender3]];

# For Gender 15 we have only singular subject marker, no plural
# and no object markers. Missing info?

echo '[Sub ":" Agreement]  - [$Pl & $15]' | hfst-regexp2fst -f $FORMAT > SubjAgr
echo ' [Obj ":" Agreement] - $15 ' | hfst-regexp2fst -f $FORMAT > ObjAgr
echo '  [Func ":" Agreement]' | hfst-regexp2fst -f $FORMAT > Agr

#define Features [SubjAgr " " ObjAgr " "  Tense " " Polarity];

echo ' [SubjAgr " " ObjAgr " " Tense]' | hfst-regexp2fst -f $FORMAT > Features

echo '"<" Stems "," Features ">" ' | hfst-regexp2fst -f $FORMAT > VerbLex

# Common singular agreement markers.

echo '[[. .] -> {mo} || "<" _ [$[Agr & $Person3 & $Sg & $4]]]' | hfst-regexp2fst -f $FORMAT > RAgr1
echo '[[. .] -> {li} || "<" _ [$[Agr & $Person3 & $Sg & $5]]]' | hfst-regexp2fst -f $FORMAT > RAgr2
echo '[[. .] -> e  || "<" _ [$[Agr & $Person3 & $Sg & $[9a"."10a]]]]' | hfst-regexp2fst -f $FORMAT > RAgr3
echo '[[. .] -> {lo} || "<" _ [$[Agr & $Person3 & $Sg & $[10|11]]]]' | hfst-regexp2fst -f $FORMAT > RAgr4
echo '[[. .] -> {bo} || "<" _ [$[Agr & $Person3 & $Sg & $14]]]' | hfst-regexp2fst -f $FORMAT > RAgr5

# Common plural agreement markers

echo '[[. .] -> {bo} || "<" _ [$[Agr & $Person2 & $Pl]]] ' | hfst-regexp2fst -f $FORMAT > RAgr6
echo '[[. .] -> {ba} || "<" _ [$[Agr & $Person3 & $Pl & $2]]]' | hfst-regexp2fst -f $FORMAT > RAgr7
echo '[[. .] -> {mi} || "<" _ [$[Agr & $Person3 & $Pl & $4]]]' | hfst-regexp2fst -f $FORMAT > RAgr8
echo '[[. .] -> {ma} || "<" _ [$[Agr & $Person3 & $Pl & $[5|6]]]]' | hfst-regexp2fst -f $FORMAT > RAgr9
echo '[[. .] -> {bi}  || "<" _ [$[Agr & $Person3 & $Pl & $7]]]' | hfst-regexp2fst -f $FORMAT > RAgr10
echo '[[. .] -> i  || "<" _ [$[Agr & $Person3 & $Pl & $[9a|10]]]]' | hfst-regexp2fst -f $FORMAT > RAgr11

# Rule Block 1

# Singular specific subject markers

echo '[[. .] -> {na} || "<" _ [$[SubjAgr & $Person1 & $Sg]]] ' | hfst-regexp2fst -f $FORMAT > R101
echo '[[. .] -> o || "<" _ [$[SubjAgr & $Person2 & $Sg]]] ' | hfst-regexp2fst -f $FORMAT > R102
echo '[[. .] -> a || "<" _ [$[SubjAgr & $Person3 & $Sg & $2]]]' | hfst-regexp2fst -f $FORMAT > R103
echo '[[. .] -> e  || "<" _ [$[SubjAgr & $Person3 & $Sg & $7]]]' | hfst-regexp2fst -f $FORMAT > R104
echo '[[. .] -> {ei} || "<" _[$[SubjAgr & $Person3 & $Sg & $15]]]' | hfst-regexp2fst -f $FORMAT > R105

# Rules of referral for singular subject markers

echo '`[RAgr1, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R106
echo '`[RAgr2, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R107
echo '`[RAgr3, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R108
echo '`[RAgr4, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R109
echo '`[RAgr5, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R110

# Plural specific subject markers

echo '[[. .] -> {to} || "<" _ [$[SubjAgr & $Person1 & $Pl]]] ' | hfst-regexp2fst -f $FORMAT > R111

# Rules of referral for plural subject markers.

echo '`[RAgr6, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R112
echo '`[RAgr7, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R113
echo '`[RAgr8, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R114
echo '`[RAgr9, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R115
echo '`[RAgr10, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R116
echo '`[RAgr11, Func, Sub]' | hfst-regexp2fst -f $FORMAT > R117

echo '[[. .] -> {ko} || "<" _ [$[Fut":"Immed]]] ' | hfst-regexp2fst -f $FORMAT > R201

# Singular specific object markers

echo '[[. .] -> n || "<" _ [$[ObjAgr & $Person1 & $Sg]]] ' | hfst-regexp2fst -f $FORMAT > R301
echo '[[. .] -> {ko} || "<" _ [$[ObjAgr & $Person2 & $Sg]]] ' | hfst-regexp2fst -f $FORMAT > R302
echo '[[. .] -> {mo} || "<" _ [$[ObjAgr & $Person3 & $Sg & $2]]]' | hfst-regexp2fst -f $FORMAT > R303
echo '[[. .] -> {ei}  || "<" _ [$[ObjAgr & $Person3 & $Sg & $7]]]' | hfst-regexp2fst -f $FORMAT > R304

# Rules of referral for singular object markers

echo '`[RAgr1, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R305
echo '`[RAgr2, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R306
echo '`[RAgr3, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R307
echo '`[RAgr4, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R308
echo '`[RAgr5, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R309

# Plural specific object markers

echo '[[. .] -> {lo} || "<" _ [$[ObjAgr & $Person1 & $Pl]]] ' | hfst-regexp2fst -f $FORMAT > R310

# Rules of referral for plural object markers

echo '`[RAgr6, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R311
echo '`[RAgr7, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R312
echo '`[RAgr8, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R313
echo '`[RAgr9, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R314
echo '`[RAgr10, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R315
echo '`[RAgr11, Func, Obj]' | hfst-regexp2fst -f $FORMAT > R316

# Tense rules
echo '[[. .] -> {ak} || _ "," [$[Pres":"[Hab1|Hab2]|
                                 Past":"[Hist|MostRem]]]]' | hfst-regexp2fst -f $FORMAT > R401 
echo '[[. .] -> a    || _ "," [$[Pres":"Cont|Fut":"Immed]]]' | hfst-regexp2fst -f $FORMAT > R402

echo '[[. .] -> i || _ "," [$[Fut":"MostRem|Past":"[Rec|Hist]]]]' | hfst-regexp2fst -f $FORMAT > R501

# Eliminate Features and auxiliary symbols from the lower side.

echo '\L -> 0' | hfst-regexp2fst -f $FORMAT > Cleanup

# Build the Lingala Verb Transducer

echo  'VerbLex
        .o.
   R301 .o. R302 .o. R303 .o. R304 .o. R305 .o.
   R306 .o. R307 .o. R308 .o. R309
        .o.
   R310 .o. R311 .o. R312 .o. R313 .o. R314 .o.
   R315 .o. R316
        .o.
        R201                                   
        .o.
   R101 .o. R102 .o. R103 .o. R104 .o. R105 .o.
   R106 .o. R107 .o. R108 .o. R109 .o. R110
        .o.
   R111 .o. R112 .o. R113 .o. R114 .o. R115 .o.
   R116 .o. R117
        .o.
   R401 .o. R402 .o. R501                      
        .o.
       Cleanup' | hfst-fst2regexp -f $FORMAT > Lingala
 

Revision 12011-09-26 - ErikAxelson

Line: 1 to 1
Added:
>
>
META TOPICPARENT name="HfstAllPages"

HFST: (name of this topic page)

(contents of this page)


<--  
-->
-- ErikAxelson - 2011-09-26
 
This site is powered by the TWiki collaboration platform Powered by PerlCopyright © 2008-2019 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback