summaryrefslogtreecommitdiff
path: root/indefinite.txt
blob: dcf2997ad2743a5cab6523d08a436469e2087e34 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Special cases of A/AN...
my $ORDINAL_AN  = qr{\A [aefhilmnorsx]   -?th \Z}ix;
my $ORDINAL_A   = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix;
my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix;
my $SINGLE_AN   = qr{\A [aefhilmnorsx]   \Z}ix;
my $SINGLE_A    = qr{\A [bcdgjkpqtuvwyz] \Z}ix;

# This pattern matches strings of capitals (i.e. abbreviations) that
# start with a "vowel-sound" consonant followed by another consonant,
# and which are not likely to be real words
# (oh, all right then, it's just magic!)...

my $ABBREV_AN = qr{
    \A
    (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
    |   ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]
    )
    [FHLMNRSX][A-Z]
}xms;

# This pattern codes the beginnings of all english words begining with a
# 'Y' followed by a consonant. Any other Y-consonant prefix therefore
# implies an abbreviation...

my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi;





sub select_indefinite_article {
    my ($word) = @_;

    # Handle ordinal forms...
    return "a"   if $word =~ $ORDINAL_A;
    return "an"  if $word =~ $ORDINAL_AN;

    # Handle special cases...
    return "an"  if $word =~ $EXPLICIT_AN;
    return "an"  if $word =~ $SINGLE_AN;
    return "a"   if $word =~ $SINGLE_A;

    # Handle abbreviations...
    return "an"  if $word =~ $ABBREV_AN;
    return "an"  if $word =~ /\A [aefhilmnorsx][.-]/xi;
    return "a"   if $word =~ /\A [a-z][.-]/xi;

    # Handle consonants

    return "a"   if $word =~ /\A [^aeiouy] /xi;

    # Handle special vowel-forms

    return "a"   if $word =~ /\A e [uw] /xi;
    return "a"   if $word =~ /\A onc?e \b /xi;
    return "a"   if $word =~ /\A uni (?: [^nmd] | mo) /xi;
    return "an"  if $word =~ /\A ut[th] /xi;
    return "a"   if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi;

    # Handle special capitals

    return "a"   if $word =~ /\A U [NK] [AIEO]? /x;

    # Handle vowels

    return "an"  if $word =~ /\A [aeiou]/xi;

    # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound)
    return "an"  if $word =~ $INITIAL_Y_AN;

    # Otherwise, guess "A"
    return "a";
}