%% This OTP, part of the Makor2 package for typesetting Hebrew with
%% Omega, is responsible for the minimal contextual analysis that
%% Hebrew demands.  That is, where appropriate, consonants at the ends
%% of words are converted to final forms.

%% This OTP handles the special case of lamed or lameddagesh followed
%% by the holam dot.

%% Also, this OTP converts the pattern `_a<gutteral>' at a word end to
%% `<gutteral>a'.

input: 
        1;
output: 
        1;
aliases:
        %% Here is a subset of the gutterals...
        GUTTERAL        = (103 | 114) % het and ayin
                        ;
        USCORE          = 95 % underscore character
                        ;
        CIRCUM          = 94 % circumflex char
                        ;
        FINAL           = (107|110|112|116|118|%khaf,mem,nun,phe,tsadi
                           155|158|160|164|166) % dagesh forms
                        ;
        PATAH           = 71
                        ;
        VOWEL           = (65-73|75-77|182-187) % vwls EXCEPT sheva
                        ;
        SHEVA           = 74
                        ;
        HOLAM           = 85
                        ;
        ALLVWL          = ({SHEVA}|{VOWEL})
                        ;
        TROPE   = (209-225|227-239) % cantorial trope
                ;
        LAMED   = 108
                ;
        LAMEDDOT        = 156
                ;
        BLAMED  = 140 % bent lamed
                ;
        BLAMEDDOT       = 172
                ;
        HOLAMDOT        = 73
                ;
        TSADI   = 118
                ;
        RQUOTE  = 39
                ;
%% Here we identify the characters which are valid parts of words.
        VALID   =  (85|96-122|128-135|140|144-173|176-181|189|192-199)
                ;
        INVALID = ^(85|96-122|128-135|140|144-173|176-181|189|192-199)
                ;
        SPACE   = 32
                ;
        DOTORCOLON      = (46|58) % period or colon
                        ;
expressions:
%% Hebrew has no word initial forms, so we can get rid of the start-of-word
%% markers right away...
          254 `('         =>
                          ;
%% Let's get the LAMED stuff out of the way...
        {LAMED} {HOLAMDOT}      => 180
                ;
        {LAMEDDOT} {HOLAMDOT}   => 196
                ;
        {BLAMED} {HOLAMDOT}     => 173
                ;
        {BLAMEDDOT} {HOLAMDOT}  => 189
                ;

%% Here is the convention for gutteral+patah at the word end...
        {USCORE}{PATAH}{GUTTERAL}
                        =>\3 \2
                        ;
%% There are two `kinds' of vowels---regular vowels, and trope.  The
%% may precede or follow the vowels.  Usually, there's one trope (at most)
%% per syllable, but it's possible for there to be two.

%% First, we handle the final-suppressing machinery...
        {FINAL}{USCORE} => \1
                        ;
%% Now for the enforced finals...
        {FINAL}{CIRCUM} => #(\1 - 1)
                        ;
%% Now, the finals.  But first, another special case:
%% tsadi at the end of a word followed by a
%% single right quote.
        {TSADI}{SHEVA} `)' 254   => #(\1 - 1) 39
                        ;
%% Because of some bug or other in Omega1.15, it's not possible for
%% m2contest.otp to be as general as it should be.  Consequently,
%% we need the following rule, which can hopefully be removed when
%% Omega is fixed...
        {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>{DOTORCOLON}     
                                => #(\1 - 1) \(*  + 1 )
                                ;
%% Words can be ended by invalids, spaces, or the end
%% of input..
        {FINAL}{TROPE}<0,1>{ALLVWL}<0,1>{TROPE}<0,1>`)' 254
                        => #(\1 - 1 ) \(*  + 1 - 2)
                        ;
%% Finally, let's strip off any remaining end-of-word markers...
        `)' 254         =>
                        ;
