diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
| commit | 7f16ae0286ab7492eee9f4019d976bc5ca95d556 (patch) | |
| tree | 811d0f5b4713260b5a9ffb9ab64d676c3746dbc3 /src/main/java/bjc/inflexion/EnglishUtils.java | |
| parent | 235208946ceb2bf0f422956a3ebc0ebb88ba28b6 (diff) | |
Indefinites
Diffstat (limited to 'src/main/java/bjc/inflexion/EnglishUtils.java')
| -rw-r--r-- | src/main/java/bjc/inflexion/EnglishUtils.java | 70 |
1 files changed, 67 insertions, 3 deletions
diff --git a/src/main/java/bjc/inflexion/EnglishUtils.java b/src/main/java/bjc/inflexion/EnglishUtils.java index 28fc6c6..e233018 100644 --- a/src/main/java/bjc/inflexion/EnglishUtils.java +++ b/src/main/java/bjc/inflexion/EnglishUtils.java @@ -1,6 +1,4 @@ -/** - * (C) Copyright 2017 Benjamin Culkin. - * +/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -15,6 +13,8 @@ */ package bjc.inflexion; +import java.util.regex.Pattern; + /** * General utils for dealing with english. * @@ -72,4 +72,68 @@ public class EnglishUtils { return "many"; } + + private static Pattern AN_ORD = Pattern.compile("(?i)\\A[aefhilmnorsx]-?th\\Z"); + private static Pattern A_ORD = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]-?th\\Z"); + private static Pattern EXP_AN = Pattern.compile("(?i)\\A(?:euler|hour(?!i)|heir|honest|hono)"); + private static Pattern SIN_AN = Pattern.compile("(?i)\\A[aefhilmnorst]\\Z"); + private static Pattern SIN_A = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]\\Z"); + + private static Pattern ABBREV_AN = Pattern.compile("\\A(?!FJO|[HLMNS]Y|RY[EQ]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]"); + + private static Pattern IN_Y_AN = Pattern.compile("(?i)\\Ay(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)"); + + private static Pattern ABBREV_C2 = Pattern.compile("(?i)\\A[aefhilmnorsx][.-]"); + private static Pattern ABBREV_C3 = Pattern.compile("(?i)\\A[a-z][.-]"); + + private static Pattern CONSONANT = Pattern.compile("(?i)\\A[^aeiouy]"); + + private static Pattern SPECVOWEL_C1 = Pattern.compile("(?i)\\Ae[uw]"); + private static Pattern SPECVOWEL_C2 = Pattern.compile("(?i)\\Aonc?e\b"); + private static Pattern SPECVOWEL_C3 = Pattern.compile("(?i)\\Auni(?:[^nmd]|mo)"); + private static Pattern SPECVOWEL_C4 = Pattern.compile("(?i)\\Aut[th]"); + private static Pattern SPECVOWEL_C5 = Pattern.compile("(?i)\\Au[bcfhjkqrst][aeiou]"); + + private static Pattern SPECCAP_C1 = Pattern.compile("\\AU[NK][AIEO]?"); + + private static Pattern VOWEL = Pattern.compile("(?i)\\A[aeiou]\\Z"); + + public static String pickIndefinite(String word) { + // Handle ordinal forms + if(A_ORD.matcher(word).find()) return "a"; + if(AN_ORD.matcher(word).find()) return "an"; + + // Handle special cases + if(EXP_AN.matcher(word).find()) return "an"; + if(SIN_AN.matcher(word).find()) return "an"; + if(SIN_A.matcher(word).find()) return "a"; + + // Handle abbreviations + if(ABBREV_AN.matcher(word).find()) return "an"; + if(ABBREV_C2.matcher(word).find()) return "an"; + if(ABBREV_C3.matcher(word).find()) return "a"; + + // Handle consonants + if(CONSONANT.matcher(word).find()) return "a"; + + // Handle special vowel forms + if(SPECVOWEL_C1.matcher(word).find()) return "a"; + if(SPECVOWEL_C2.matcher(word).find()) return "a"; + if(SPECVOWEL_C3.matcher(word).find()) return "a"; + if(SPECVOWEL_C4.matcher(word).find()) return "an"; + if(SPECVOWEL_C5.matcher(word).find()) return "a"; + + // Handle special capitals + if(SPECCAP_C1.matcher(word).find()) return "a"; + + // Handle vowels + if(VOWEL.matcher(word).find()) return "an"; + + // Handle Y (before certain consonants, it implies a + // (unnaturalized) "I" sound) + if(IN_Y_AN.matcher(word).find()) return "an"; + + // Guess "A" + return "a"; + } } |
