summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin J. Culkin <bjculkin@mix.wvu.edu>2018-09-16 22:27:15 -0300
committerBenjamin J. Culkin <bjculkin@mix.wvu.edu>2018-09-16 22:27:15 -0300
commita883e7d100c54451fb9256cb3867c2571ee4fff1 (patch)
tree86c9ea712ace6cb33ce0787e6d3ad903c2e44db2
parentfe80a37fbe5b8a996a949f6a2e9d24d0ee22d892 (diff)
Update
-rw-r--r--pom.xml8
-rw-r--r--src/main/java/bjc/inflexion/EnglishUtils.java156
-rw-r--r--src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/NounInflection.java1
-rw-r--r--src/test/java/bjc/inflexion/InflectionMLTest.java15
5 files changed, 111 insertions, 70 deletions
diff --git a/pom.xml b/pom.xml
index dca51db..adfb4a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,7 +11,7 @@
<description>Java based implementation of Damian Conway's Lingua::EN::Inflexion module for perl</description>
<properties>
- <main.class>bjc.inflexion.examples.IndefTester</main.class>
+ <main.class>bjc.inflexion.examples.InflexionTester</main.class>
</properties>
<licenses>
@@ -96,5 +96,11 @@
<artifactId>commons-compress</artifactId>
<version>1.13</version>
</dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ </dependency>
</dependencies>
</project>
diff --git a/src/main/java/bjc/inflexion/EnglishUtils.java b/src/main/java/bjc/inflexion/EnglishUtils.java
index e233018..197b7cf 100644
--- a/src/main/java/bjc/inflexion/EnglishUtils.java
+++ b/src/main/java/bjc/inflexion/EnglishUtils.java
@@ -13,6 +13,7 @@
*/
package bjc.inflexion;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -28,16 +29,16 @@ public class EnglishUtils {
private static String[] summaryNums = new String[] { "no", "one", "a couple of", "a few", "several" };
private static int[] summaryMap = new int[] {
- /* no */
- 0,
- /* one */
- 1,
- /* a couple of */
- 2,
- /* a few */
- 3, 3, 3,
- /* several */
- 4, 4, 4, 4
+ /* no */
+ 0,
+ /* one */
+ 1,
+ /* a couple of */
+ 2,
+ /* a few */
+ 3, 3, 3,
+ /* several */
+ 4, 4, 4, 4
};
/**
@@ -73,67 +74,84 @@ public class EnglishUtils {
return "many";
}
- private static Pattern AN_ORD = Pattern.compile("(?i)\\A[aefhilmnorsx]-?th\\Z");
- private static Pattern A_ORD = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]-?th\\Z");
- private static Pattern EXP_AN = Pattern.compile("(?i)\\A(?:euler|hour(?!i)|heir|honest|hono)");
- private static Pattern SIN_AN = Pattern.compile("(?i)\\A[aefhilmnorst]\\Z");
- private static Pattern SIN_A = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]\\Z");
- private static Pattern ABBREV_AN = Pattern.compile("\\A(?!FJO|[HLMNS]Y|RY[EQ]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]");
+ public static String pickIndefinite(String phrase) {
+ Pattern pattern;
+ Matcher matcher;
+ String word, lowercaseWord;
+
+ if (phrase.length() == 0) {
+ return "a";
+ }
+
+ // Getting the first word
+ pattern = Pattern.compile("(\\w+)\\s*.*");
+ matcher = pattern.matcher(phrase);
+ if(matcher.matches() == true) {
+ word = matcher.group(1);
+ } else {
+ return "an";
+ }
+
+ lowercaseWord = word.toLowerCase();
+
+ // Specific start of words that should be preceded by 'an'
+ String [] altCases = { "euler", "heir", "honest", "hono" };
+ for (String altCase : altCases) {
+ if (lowercaseWord.startsWith(altCase) == true) {
+ return "an";
+ }
+ }
+
+ if (lowercaseWord.startsWith("hour") == true && lowercaseWord.startsWith("houri") == false) {
+ return "an";
+ }
+
+
+ // Single letter word which should be preceded by 'an'
+ if (lowercaseWord.length() == 1) {
+ if ("aedhilmnorsx".indexOf(lowercaseWord) >= 0) {
+ return "an";
+ } else {
+ return "a";
+ }
+ }
+
+ // Capital words which should likely be preceded by 'an'
+ if (word.matches("(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]")) {
+ return "an";
+ }
+
+ // Special cases where a word that begins with a vowel should be preceded by 'a'
+ String [] regexes = { "^e[uw]", "^onc?e\\b", "^uni([^nmd]|mo)", "^u[bcfhjkqrst][aeiou]" };
+
+ for (String regex : regexes) {
+ if (lowercaseWord.matches(regex+".*") == true) {
+ return "a";
+ }
+ }
+
+ // Special capital words (UK, UN)
+ if (word.matches("^U[NK][AIEO].*") == true) {
+ return "a";
+ } else if (word == word.toUpperCase()) {
+ if ("aedhilmnorsx".indexOf(lowercaseWord.substring(0, 1)) >= 0) {
+ return "an";
+ } else {
+ return "a";
+ }
+ }
+
+ // Basic method of words that begin with a vowel being preceded by 'an'
+ if ("aeiou".indexOf(lowercaseWord.substring(0, 1)) >= 0) {
+ return "an";
+ }
+
+ // Instances where y followed by specific letters is preceded by 'an'
+ if (lowercaseWord.matches("^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt).*")) {
+ return "an";
+ }
- private static Pattern IN_Y_AN = Pattern.compile("(?i)\\Ay(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)");
-
- private static Pattern ABBREV_C2 = Pattern.compile("(?i)\\A[aefhilmnorsx][.-]");
- private static Pattern ABBREV_C3 = Pattern.compile("(?i)\\A[a-z][.-]");
-
- private static Pattern CONSONANT = Pattern.compile("(?i)\\A[^aeiouy]");
-
- private static Pattern SPECVOWEL_C1 = Pattern.compile("(?i)\\Ae[uw]");
- private static Pattern SPECVOWEL_C2 = Pattern.compile("(?i)\\Aonc?e\b");
- private static Pattern SPECVOWEL_C3 = Pattern.compile("(?i)\\Auni(?:[^nmd]|mo)");
- private static Pattern SPECVOWEL_C4 = Pattern.compile("(?i)\\Aut[th]");
- private static Pattern SPECVOWEL_C5 = Pattern.compile("(?i)\\Au[bcfhjkqrst][aeiou]");
-
- private static Pattern SPECCAP_C1 = Pattern.compile("\\AU[NK][AIEO]?");
-
- private static Pattern VOWEL = Pattern.compile("(?i)\\A[aeiou]\\Z");
-
- public static String pickIndefinite(String word) {
- // Handle ordinal forms
- if(A_ORD.matcher(word).find()) return "a";
- if(AN_ORD.matcher(word).find()) return "an";
-
- // Handle special cases
- if(EXP_AN.matcher(word).find()) return "an";
- if(SIN_AN.matcher(word).find()) return "an";
- if(SIN_A.matcher(word).find()) return "a";
-
- // Handle abbreviations
- if(ABBREV_AN.matcher(word).find()) return "an";
- if(ABBREV_C2.matcher(word).find()) return "an";
- if(ABBREV_C3.matcher(word).find()) return "a";
-
- // Handle consonants
- if(CONSONANT.matcher(word).find()) return "a";
-
- // Handle special vowel forms
- if(SPECVOWEL_C1.matcher(word).find()) return "a";
- if(SPECVOWEL_C2.matcher(word).find()) return "a";
- if(SPECVOWEL_C3.matcher(word).find()) return "a";
- if(SPECVOWEL_C4.matcher(word).find()) return "an";
- if(SPECVOWEL_C5.matcher(word).find()) return "a";
-
- // Handle special capitals
- if(SPECCAP_C1.matcher(word).find()) return "a";
-
- // Handle vowels
- if(VOWEL.matcher(word).find()) return "an";
-
- // Handle Y (before certain consonants, it implies a
- // (unnaturalized) "I" sound)
- if(IN_Y_AN.matcher(word).find()) return "an";
-
- // Guess "A"
return "a";
}
}
diff --git a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
index fee0b33..9fafcff 100644
--- a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
@@ -1,4 +1,5 @@
/*
+ * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/NounInflection.java b/src/main/java/bjc/inflexion/nouns/NounInflection.java
index 94161c5..978efdb 100644
--- a/src/main/java/bjc/inflexion/nouns/NounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/NounInflection.java
@@ -1,4 +1,5 @@
/**
+ * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/test/java/bjc/inflexion/InflectionMLTest.java b/src/test/java/bjc/inflexion/InflectionMLTest.java
new file mode 100644
index 0000000..8436e7f
--- /dev/null
+++ b/src/test/java/bjc/inflexion/InflectionMLTest.java
@@ -0,0 +1,15 @@
+package bjc.inflexion;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import static bjc.inflexion.InflectionML.inflect;
+
+public class InflectionMLTest {
+ @Test
+ public void testNumDirective() {
+ assertEquals("no results", inflect("<#n:0> <N:results>"));
+ assertEquals("7 results", inflect("<#n:7> <N:results>"));
+ }
+}