summaryrefslogtreecommitdiff
path: root/indefinite.txt
blob: 0e9460831c837569f36579c7b3f3c44e34185f9f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Special cases of A/AN...
my $ORDINAL_AN  = qr{\A [aefhilmnorsx]   -?th \Z}ix;
my $ORDINAL_A   = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix;
my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix;
my $SINGLE_AN   = qr{\A [aefhilmnorsx]   \Z}ix;
my $SINGLE_A    = qr{\A [bcdgjkpqtuvwyz] \Z}ix;

# This pattern matches strings of capitals (i.e. abbreviations) that
# start with a "vowel-sound" consonant followed by another consonant,
# and which are not likely to be real words
# (oh, all right then, it's just magic!)...

my $ABBREV_AN = qr{
    \A
    (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
    |   ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]
    )
    [FHLMNRSX][A-Z]
}xms;

# This pattern codes the beginnings of all english words begining with a
# 'Y' followed by a consonant. Any other Y-consonant prefix therefore
# implies an abbreviation...

my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi;





sub select_indefinite_article {
    my ($word) = @_;

    # Handle ordinal forms...
    return "a"   if $word =~ $ORDINAL_A;
    return "an"  if $word =~ $ORDINAL_AN;

    # Handle special cases...
    return "an"  if $word =~ $EXPLICIT_AN;
    return "an"  if $word =~ $SINGLE_AN;
    return "a"   if $word =~ $SINGLE_A;

    # Handle abbreviations...
    return "an"  if $word =~ $ABBREV_AN;
    return "an"  if $word =~ /\A [aefhilmnorsx][.-]/xi;
    return "a"   if $word =~ /\A [a-z][.-]/xi;

    # Handle consonants

    return "a"   if $word =~ /\A [^aeiouy] /xi;

    # Handle special vowel-forms

    return "a"   if $word =~ /\A e [uw] /xi;
    return "a"   if $word =~ /\A onc?e \b /xi;
    return "a"   if $word =~ /\A uni (?: [^nmd] | mo) /xi;
    return "an"  if $word =~ /\A ut[th] /xi;
    return "a"   if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi;

    # Handle special capitals

    return "a"   if $word =~ /\A U [NK] [AIEO]? /x;

    # Handle vowels

    return "an"  if $word =~ /\A [aeiou]/xi;

    # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound)
    return "an"  if $word =~ $INITIAL_Y_AN;

    # Otherwise, guess "A"
    return "a";
}

public static String get(String phrase) {
		Pattern pattern;
		Matcher matcher;
		String word, lowercaseWord;
		
		if (phrase.length() == 0) {
			return "a";
		}
		
		// Getting the first word 
		pattern = Pattern.compile("(\\w+)\\s*.*");
		matcher = pattern.matcher(phrase);
		if(matcher.matches() == true) {
			word = matcher.group(1);
		} else {
			return "an";
		}

		lowercaseWord = word.toLowerCase();
	    
	    // Specific start of words that should be preceded by 'an'
	    String [] altCases = { "euler", "heir", "honest", "hono" };
	    for (String altCase : altCases) {
	        if (lowercaseWord.startsWith(altCase) == true) {
	            return "an";
	        }
	    }
	    
	    if (lowercaseWord.startsWith("hour") == true && lowercaseWord.startsWith("houri") == false) {
	    	return "an";
	    }

	    
	    // Single letter word which should be preceded by 'an'
	    if (lowercaseWord.length() == 1) {
	        if ("aedhilmnorsx".indexOf(lowercaseWord) >= 0) {
	            return "an";
	        } else {
	            return "a";
	        }
	    }
	    
	    // Capital words which should likely be preceded by 'an'
	    if (word.matches("(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]")) {
	        return "an";
	    }
	    
	    // Special cases where a word that begins with a vowel should be preceded by 'a'
	    String [] regexes = { "^e[uw]", "^onc?e\\b", "^uni([^nmd]|mo)", "^u[bcfhjkqrst][aeiou]" };
	    
	    for (String regex : regexes) {
	        if (lowercaseWord.matches(regex+".*") == true) {
	            return "a";
	        }
	    }
	    
	    // Special capital words (UK, UN)
	    if (word.matches("^U[NK][AIEO].*") == true) {
	        return "a";
	    } else if (word == word.toUpperCase()) {
	        if ("aedhilmnorsx".indexOf(lowercaseWord.substring(0, 1)) >= 0) {
	            return "an";
	        } else {
	            return "a";
	        }
	    }
	    
	    // Basic method of words that begin with a vowel being preceded by 'an'
	    if ("aeiou".indexOf(lowercaseWord.substring(0, 1)) >= 0) {
	        return "an";
	    }
	    
	    // Instances where y followed by specific letters is preceded by 'an'
	    if (lowercaseWord.matches("^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt).*")) {
	        return "an";
	    }
	    
		return "a";
	}