diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
| commit | 7f16ae0286ab7492eee9f4019d976bc5ca95d556 (patch) | |
| tree | 811d0f5b4713260b5a9ffb9ab64d676c3746dbc3 /indefinite.txt | |
| parent | 235208946ceb2bf0f422956a3ebc0ebb88ba28b6 (diff) | |
Indefinites
Diffstat (limited to 'indefinite.txt')
| -rw-r--r-- | indefinite.txt | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/indefinite.txt b/indefinite.txt new file mode 100644 index 0000000..dcf2997 --- /dev/null +++ b/indefinite.txt @@ -0,0 +1,75 @@ +# Special cases of A/AN... +my $ORDINAL_AN = qr{\A [aefhilmnorsx] -?th \Z}ix; +my $ORDINAL_A = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix; +my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix; +my $SINGLE_AN = qr{\A [aefhilmnorsx] \Z}ix; +my $SINGLE_A = qr{\A [bcdgjkpqtuvwyz] \Z}ix; + +# This pattern matches strings of capitals (i.e. abbreviations) that +# start with a "vowel-sound" consonant followed by another consonant, +# and which are not likely to be real words +# (oh, all right then, it's just magic!)... + +my $ABBREV_AN = qr{ + \A + (?! FJO | [HLMNS]Y. | RY[EO] | SQU + | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU] + ) + [FHLMNRSX][A-Z] +}xms; + +# This pattern codes the beginnings of all english words begining with a +# 'Y' followed by a consonant. Any other Y-consonant prefix therefore +# implies an abbreviation... + +my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi; + + + + + +sub select_indefinite_article { + my ($word) = @_; + + # Handle ordinal forms... + return "a" if $word =~ $ORDINAL_A; + return "an" if $word =~ $ORDINAL_AN; + + # Handle special cases... + return "an" if $word =~ $EXPLICIT_AN; + return "an" if $word =~ $SINGLE_AN; + return "a" if $word =~ $SINGLE_A; + + # Handle abbreviations... + return "an" if $word =~ $ABBREV_AN; + return "an" if $word =~ /\A [aefhilmnorsx][.-]/xi; + return "a" if $word =~ /\A [a-z][.-]/xi; + + # Handle consonants + + return "a" if $word =~ /\A [^aeiouy] /xi; + + # Handle special vowel-forms + + return "a" if $word =~ /\A e [uw] /xi; + return "a" if $word =~ /\A onc?e \b /xi; + return "a" if $word =~ /\A uni (?: [^nmd] | mo) /xi; + return "an" if $word =~ /\A ut[th] /xi; + return "a" if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi; + + # Handle special capitals + + return "a" if $word =~ /\A U [NK] [AIEO]? /x; + + # Handle vowels + + return "an" if $word =~ /\A [aeiou]/xi; + + # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound) + return "an" if $word =~ $INITIAL_Y_AN; + + # Otherwise, guess "A" + return "a"; +} + + |
