# Special cases of A/AN... my $ORDINAL_AN = qr{\A [aefhilmnorsx] -?th \Z}ix; my $ORDINAL_A = qr{\A [bcdgjkpqtuvwyz] -?th \Z}ix; my $EXPLICIT_AN = qr{\A (?: euler | hour(?!i) | heir | honest | hono )}ix; my $SINGLE_AN = qr{\A [aefhilmnorsx] \Z}ix; my $SINGLE_A = qr{\A [bcdgjkpqtuvwyz] \Z}ix; # This pattern matches strings of capitals (i.e. abbreviations) that # start with a "vowel-sound" consonant followed by another consonant, # and which are not likely to be real words # (oh, all right then, it's just magic!)... my $ABBREV_AN = qr{ \A (?! FJO | [HLMNS]Y. | RY[EO] | SQU | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU] ) [FHLMNRSX][A-Z] }xms; # This pattern codes the beginnings of all english words begining with a # 'Y' followed by a consonant. Any other Y-consonant prefix therefore # implies an abbreviation... my $INITIAL_Y_AN = qr{\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)}xi; sub select_indefinite_article { my ($word) = @_; # Handle ordinal forms... return "a" if $word =~ $ORDINAL_A; return "an" if $word =~ $ORDINAL_AN; # Handle special cases... return "an" if $word =~ $EXPLICIT_AN; return "an" if $word =~ $SINGLE_AN; return "a" if $word =~ $SINGLE_A; # Handle abbreviations... return "an" if $word =~ $ABBREV_AN; return "an" if $word =~ /\A [aefhilmnorsx][.-]/xi; return "a" if $word =~ /\A [a-z][.-]/xi; # Handle consonants return "a" if $word =~ /\A [^aeiouy] /xi; # Handle special vowel-forms return "a" if $word =~ /\A e [uw] /xi; return "a" if $word =~ /\A onc?e \b /xi; return "a" if $word =~ /\A uni (?: [^nmd] | mo) /xi; return "an" if $word =~ /\A ut[th] /xi; return "a" if $word =~ /\A u [bcfhjkqrst] [aeiou] /xi; # Handle special capitals return "a" if $word =~ /\A U [NK] [AIEO]? /x; # Handle vowels return "an" if $word =~ /\A [aeiou]/xi; # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound) return "an" if $word =~ $INITIAL_Y_AN; # Otherwise, guess "A" return "a"; } public static String get(String phrase) { Pattern pattern; Matcher matcher; String word, lowercaseWord; if (phrase.length() == 0) { return "a"; } // Getting the first word pattern = Pattern.compile("(\\w+)\\s*.*"); matcher = pattern.matcher(phrase); if(matcher.matches() == true) { word = matcher.group(1); } else { return "an"; } lowercaseWord = word.toLowerCase(); // Specific start of words that should be preceded by 'an' String [] altCases = { "euler", "heir", "honest", "hono" }; for (String altCase : altCases) { if (lowercaseWord.startsWith(altCase) == true) { return "an"; } } if (lowercaseWord.startsWith("hour") == true && lowercaseWord.startsWith("houri") == false) { return "an"; } // Single letter word which should be preceded by 'an' if (lowercaseWord.length() == 1) { if ("aedhilmnorsx".indexOf(lowercaseWord) >= 0) { return "an"; } else { return "a"; } } // Capital words which should likely be preceded by 'an' if (word.matches("(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]")) { return "an"; } // Special cases where a word that begins with a vowel should be preceded by 'a' String [] regexes = { "^e[uw]", "^onc?e\\b", "^uni([^nmd]|mo)", "^u[bcfhjkqrst][aeiou]" }; for (String regex : regexes) { if (lowercaseWord.matches(regex+".*") == true) { return "a"; } } // Special capital words (UK, UN) if (word.matches("^U[NK][AIEO].*") == true) { return "a"; } else if (word == word.toUpperCase()) { if ("aedhilmnorsx".indexOf(lowercaseWord.substring(0, 1)) >= 0) { return "an"; } else { return "a"; } } // Basic method of words that begin with a vowel being preceded by 'an' if ("aeiou".indexOf(lowercaseWord.substring(0, 1)) >= 0) { return "an"; } // Instances where y followed by specific letters is preceded by 'an' if (lowercaseWord.matches("^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt).*")) { return "an"; } return "a"; }