# Special cases of A/AN... my $ORDINAL_AN = qr{\A [aefhilmnorsx] -?th \Z}ix; my $ORDINAL_A = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix; my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix; my $SINGLE_AN = qr{\A [aefhilmnorsx] \Z}ix; my $SINGLE_A = qr{\A [bcdgjkpqtuvwyz] \Z}ix; # This pattern matches strings of capitals (i.e. abbreviations) that # start with a "vowel-sound" consonant followed by another consonant, # and which are not likely to be real words # (oh, all right then, it's just magic!)... my $ABBREV_AN = qr{ \A (?! FJO | [HLMNS]Y. | RY[EO] | SQU | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU] ) [FHLMNRSX][A-Z] }xms; # This pattern codes the beginnings of all english words begining with a # 'Y' followed by a consonant. Any other Y-consonant prefix therefore # implies an abbreviation... my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi; sub select_indefinite_article { my ($word) = @_; # Handle ordinal forms... return "a" if $word =~ $ORDINAL_A; return "an" if $word =~ $ORDINAL_AN; # Handle special cases... return "an" if $word =~ $EXPLICIT_AN; return "an" if $word =~ $SINGLE_AN; return "a" if $word =~ $SINGLE_A; # Handle abbreviations... return "an" if $word =~ $ABBREV_AN; return "an" if $word =~ /\A [aefhilmnorsx][.-]/xi; return "a" if $word =~ /\A [a-z][.-]/xi; # Handle consonants return "a" if $word =~ /\A [^aeiouy] /xi; # Handle special vowel-forms return "a" if $word =~ /\A e [uw] /xi; return "a" if $word =~ /\A onc?e \b /xi; return "a" if $word =~ /\A uni (?: [^nmd] | mo) /xi; return "an" if $word =~ /\A ut[th] /xi; return "a" if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi; # Handle special capitals return "a" if $word =~ /\A U [NK] [AIEO]? /x; # Handle vowels return "an" if $word =~ /\A [aeiou]/xi; # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound) return "an" if $word =~ $INITIAL_Y_AN; # Otherwise, guess "A" return "a"; }