summaryrefslogtreecommitdiff
path: root/indefinite.txt
diff options
context:
space:
mode:
Diffstat (limited to 'indefinite.txt')
-rw-r--r--indefinite.txt75
1 files changed, 75 insertions, 0 deletions
diff --git a/indefinite.txt b/indefinite.txt
new file mode 100644
index 0000000..dcf2997
--- /dev/null
+++ b/indefinite.txt
@@ -0,0 +1,75 @@
+# Special cases of A/AN...
+my $ORDINAL_AN = qr{\A [aefhilmnorsx] -?th \Z}ix;
+my $ORDINAL_A = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix;
+my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix;
+my $SINGLE_AN = qr{\A [aefhilmnorsx] \Z}ix;
+my $SINGLE_A = qr{\A [bcdgjkpqtuvwyz] \Z}ix;
+
+# This pattern matches strings of capitals (i.e. abbreviations) that
+# start with a "vowel-sound" consonant followed by another consonant,
+# and which are not likely to be real words
+# (oh, all right then, it's just magic!)...
+
+my $ABBREV_AN = qr{
+ \A
+ (?! FJO | [HLMNS]Y. | RY[EO] | SQU
+ | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]
+ )
+ [FHLMNRSX][A-Z]
+}xms;
+
+# This pattern codes the beginnings of all english words begining with a
+# 'Y' followed by a consonant. Any other Y-consonant prefix therefore
+# implies an abbreviation...
+
+my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi;
+
+
+
+
+
+sub select_indefinite_article {
+ my ($word) = @_;
+
+ # Handle ordinal forms...
+ return "a" if $word =~ $ORDINAL_A;
+ return "an" if $word =~ $ORDINAL_AN;
+
+ # Handle special cases...
+ return "an" if $word =~ $EXPLICIT_AN;
+ return "an" if $word =~ $SINGLE_AN;
+ return "a" if $word =~ $SINGLE_A;
+
+ # Handle abbreviations...
+ return "an" if $word =~ $ABBREV_AN;
+ return "an" if $word =~ /\A [aefhilmnorsx][.-]/xi;
+ return "a" if $word =~ /\A [a-z][.-]/xi;
+
+ # Handle consonants
+
+ return "a" if $word =~ /\A [^aeiouy] /xi;
+
+ # Handle special vowel-forms
+
+ return "a" if $word =~ /\A e [uw] /xi;
+ return "a" if $word =~ /\A onc?e \b /xi;
+ return "a" if $word =~ /\A uni (?: [^nmd] | mo) /xi;
+ return "an" if $word =~ /\A ut[th] /xi;
+ return "a" if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi;
+
+ # Handle special capitals
+
+ return "a" if $word =~ /\A U [NK] [AIEO]? /x;
+
+ # Handle vowels
+
+ return "an" if $word =~ /\A [aeiou]/xi;
+
+ # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound)
+ return "an" if $word =~ $INITIAL_Y_AN;
+
+ # Otherwise, guess "A"
+ return "a";
+}
+
+