blob: dcf2997ad2743a5cab6523d08a436469e2087e34 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# Special cases of A/AN...
my $ORDINAL_AN = qr{\A [aefhilmnorsx] -?th \Z}ix;
my $ORDINAL_A = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix;
my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix;
my $SINGLE_AN = qr{\A [aefhilmnorsx] \Z}ix;
my $SINGLE_A = qr{\A [bcdgjkpqtuvwyz] \Z}ix;
# This pattern matches strings of capitals (i.e. abbreviations) that
# start with a "vowel-sound" consonant followed by another consonant,
# and which are not likely to be real words
# (oh, all right then, it's just magic!)...
my $ABBREV_AN = qr{
\A
(?! FJO | [HLMNS]Y. | RY[EO] | SQU
| ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]
)
[FHLMNRSX][A-Z]
}xms;
# This pattern codes the beginnings of all english words begining with a
# 'Y' followed by a consonant. Any other Y-consonant prefix therefore
# implies an abbreviation...
my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi;
sub select_indefinite_article {
my ($word) = @_;
# Handle ordinal forms...
return "a" if $word =~ $ORDINAL_A;
return "an" if $word =~ $ORDINAL_AN;
# Handle special cases...
return "an" if $word =~ $EXPLICIT_AN;
return "an" if $word =~ $SINGLE_AN;
return "a" if $word =~ $SINGLE_A;
# Handle abbreviations...
return "an" if $word =~ $ABBREV_AN;
return "an" if $word =~ /\A [aefhilmnorsx][.-]/xi;
return "a" if $word =~ /\A [a-z][.-]/xi;
# Handle consonants
return "a" if $word =~ /\A [^aeiouy] /xi;
# Handle special vowel-forms
return "a" if $word =~ /\A e [uw] /xi;
return "a" if $word =~ /\A onc?e \b /xi;
return "a" if $word =~ /\A uni (?: [^nmd] | mo) /xi;
return "an" if $word =~ /\A ut[th] /xi;
return "a" if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi;
# Handle special capitals
return "a" if $word =~ /\A U [NK] [AIEO]? /x;
# Handle vowels
return "an" if $word =~ /\A [aeiou]/xi;
# Handle Y... (before certain consonants implies (unnaturalized) "I.." sound)
return "an" if $word =~ $INITIAL_Y_AN;
# Otherwise, guess "A"
return "a";
}
|