summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/examples/java/bjc/inflexion/examples/IndefTester.java23
-rw-r--r--src/main/java/bjc/inflexion/EnglishUtils.java70
-rw-r--r--src/main/java/bjc/inflexion/InflectionML.java62
-rw-r--r--src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java3
-rw-r--r--src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/InflectionAffix.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/InflectionAffixes.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/InflectionException.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/Noun.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/NounInflection.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/Nouns.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/Prepositions.java1
-rw-r--r--src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java1
15 files changed, 136 insertions, 33 deletions
diff --git a/src/examples/java/bjc/inflexion/examples/IndefTester.java b/src/examples/java/bjc/inflexion/examples/IndefTester.java
new file mode 100644
index 0000000..fd1929c
--- /dev/null
+++ b/src/examples/java/bjc/inflexion/examples/IndefTester.java
@@ -0,0 +1,23 @@
+package bjc.inflexion.examples;
+
+import java.util.Scanner;
+
+import bjc.inflexion.EnglishUtils;
+
+public class IndefTester {
+ public static void main(String[] args) {
+ Scanner scn = new Scanner(System.in);
+
+ System.out.print("Enter word: ");
+ String word = scn.nextLine().trim();
+
+ while(!word.equals("")) {
+ System.out.printf("\t%s %s\n", EnglishUtils.pickIndefinite(word), word);
+
+ System.out.print("Enter word: ");
+ word = scn.nextLine().trim();
+ }
+
+ scn.close();
+ }
+}
diff --git a/src/main/java/bjc/inflexion/EnglishUtils.java b/src/main/java/bjc/inflexion/EnglishUtils.java
index 28fc6c6..e233018 100644
--- a/src/main/java/bjc/inflexion/EnglishUtils.java
+++ b/src/main/java/bjc/inflexion/EnglishUtils.java
@@ -1,6 +1,4 @@
-/**
- * (C) Copyright 2017 Benjamin Culkin.
- *
+/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -15,6 +13,8 @@
*/
package bjc.inflexion;
+import java.util.regex.Pattern;
+
/**
* General utils for dealing with english.
*
@@ -72,4 +72,68 @@ public class EnglishUtils {
return "many";
}
+
+ private static Pattern AN_ORD = Pattern.compile("(?i)\\A[aefhilmnorsx]-?th\\Z");
+ private static Pattern A_ORD = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]-?th\\Z");
+ private static Pattern EXP_AN = Pattern.compile("(?i)\\A(?:euler|hour(?!i)|heir|honest|hono)");
+ private static Pattern SIN_AN = Pattern.compile("(?i)\\A[aefhilmnorst]\\Z");
+ private static Pattern SIN_A = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]\\Z");
+
+ private static Pattern ABBREV_AN = Pattern.compile("\\A(?!FJO|[HLMNS]Y|RY[EQ]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]");
+
+ private static Pattern IN_Y_AN = Pattern.compile("(?i)\\Ay(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)");
+
+ private static Pattern ABBREV_C2 = Pattern.compile("(?i)\\A[aefhilmnorsx][.-]");
+ private static Pattern ABBREV_C3 = Pattern.compile("(?i)\\A[a-z][.-]");
+
+ private static Pattern CONSONANT = Pattern.compile("(?i)\\A[^aeiouy]");
+
+ private static Pattern SPECVOWEL_C1 = Pattern.compile("(?i)\\Ae[uw]");
+ private static Pattern SPECVOWEL_C2 = Pattern.compile("(?i)\\Aonc?e\b");
+ private static Pattern SPECVOWEL_C3 = Pattern.compile("(?i)\\Auni(?:[^nmd]|mo)");
+ private static Pattern SPECVOWEL_C4 = Pattern.compile("(?i)\\Aut[th]");
+ private static Pattern SPECVOWEL_C5 = Pattern.compile("(?i)\\Au[bcfhjkqrst][aeiou]");
+
+ private static Pattern SPECCAP_C1 = Pattern.compile("\\AU[NK][AIEO]?");
+
+ private static Pattern VOWEL = Pattern.compile("(?i)\\A[aeiou]\\Z");
+
+ public static String pickIndefinite(String word) {
+ // Handle ordinal forms
+ if(A_ORD.matcher(word).find()) return "a";
+ if(AN_ORD.matcher(word).find()) return "an";
+
+ // Handle special cases
+ if(EXP_AN.matcher(word).find()) return "an";
+ if(SIN_AN.matcher(word).find()) return "an";
+ if(SIN_A.matcher(word).find()) return "a";
+
+ // Handle abbreviations
+ if(ABBREV_AN.matcher(word).find()) return "an";
+ if(ABBREV_C2.matcher(word).find()) return "an";
+ if(ABBREV_C3.matcher(word).find()) return "a";
+
+ // Handle consonants
+ if(CONSONANT.matcher(word).find()) return "a";
+
+ // Handle special vowel forms
+ if(SPECVOWEL_C1.matcher(word).find()) return "a";
+ if(SPECVOWEL_C2.matcher(word).find()) return "a";
+ if(SPECVOWEL_C3.matcher(word).find()) return "a";
+ if(SPECVOWEL_C4.matcher(word).find()) return "an";
+ if(SPECVOWEL_C5.matcher(word).find()) return "a";
+
+ // Handle special capitals
+ if(SPECCAP_C1.matcher(word).find()) return "a";
+
+ // Handle vowels
+ if(VOWEL.matcher(word).find()) return "an";
+
+ // Handle Y (before certain consonants, it implies a
+ // (unnaturalized) "I" sound)
+ if(IN_Y_AN.matcher(word).find()) return "an";
+
+ // Guess "A"
+ return "a";
+ }
}
diff --git a/src/main/java/bjc/inflexion/InflectionML.java b/src/main/java/bjc/inflexion/InflectionML.java
index 939e96e..9ee175d 100644
--- a/src/main/java/bjc/inflexion/InflectionML.java
+++ b/src/main/java/bjc/inflexion/InflectionML.java
@@ -1,6 +1,4 @@
-/**
- * (C) Copyright 2017 Benjamin Culkin.
- *
+/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -15,9 +13,11 @@
*/
package bjc.inflexion;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
+import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -45,6 +45,8 @@ public class InflectionML {
private static Pattern FORM_MARKER =
Pattern.compile("<(?<command>[#N])(?<options>[^:]*):(?<text>[^>]*)>");
+ private static Pattern AN_MARKER = Pattern.compile("\\{an(\\d+)\\}");
+
/* The database of nouns. */
private static Nouns nounDB;
@@ -66,14 +68,19 @@ public class InflectionML {
* @return
* The inflected string.
*/
- public static String inflect(final String form) {
- final Matcher formMatcher = FORM_MARKER.matcher(form);
-
- final StringBuffer formBuffer = new StringBuffer();
+ public static String inflect(String form) {
+ Matcher formMatcher = FORM_MARKER.matcher(form);
+ StringBuffer formBuffer = new StringBuffer();
int curCount = 1;
+
boolean inflectSingular = true;
+ int anCount = 0;
+ List<String> anVals = new ArrayList<>();
+
+ boolean pendingAN = false;
+
while (formMatcher.find()) {
final String command = formMatcher.group("command");
final String options = formMatcher.group("options");
@@ -129,9 +136,12 @@ public class InflectionML {
}
if (optionSet.contains("a")) {
- /* :InflectionML
- * Implement a/an for nouns.
- */
+ if (curCount == 1) {
+ anCount += 1;
+ rep = "{an" + anCount + "}";
+
+ pendingAN = true;
+ }
}
/* Break out of switch. */
@@ -140,10 +150,7 @@ public class InflectionML {
break;
}
- final boolean shouldOverride =
- !(rep.equals("no") ||
- rep.equals("a") ||
- rep.equals("an") );
+ final boolean shouldOverride = !(rep.equals("no") || rep.matches("\\{an\\d+\\}"));
if (optionSet.contains("w") && shouldOverride) {
rep = EnglishUtils.smallIntToWord(curCount);
@@ -162,15 +169,25 @@ public class InflectionML {
case "N":
final Noun noun = nounDB.getNoun(text);
+ String nounVal;
+
if (optionSet.contains("p") || !inflectSingular) {
if (optionSet.contains("c")) {
- formMatcher.appendReplacement(formBuffer, noun.classicalPlural());
+ nounVal = noun.classicalPlural();
} else {
- formMatcher.appendReplacement(formBuffer, noun.modernPlural());
+ nounVal = noun.modernPlural();
}
} else {
- formMatcher.appendReplacement(formBuffer, noun.singular());
+ nounVal = noun.singular();
}
+
+ formMatcher.appendReplacement(formBuffer, nounVal);
+ if(pendingAN) {
+ anVals.add(EnglishUtils.pickIndefinite(nounVal));
+
+ pendingAN = false;
+ }
+
break;
default:
final String msg = String.format("Unknown command '%s'", command);
@@ -181,6 +198,17 @@ public class InflectionML {
formMatcher.appendTail(formBuffer);
+ String res = formBuffer.toString();
+ formBuffer = new StringBuffer();
+
+ Matcher anMat = AN_MARKER.matcher(res);
+
+ Iterator<String> anItr = anVals.iterator();
+ while(anMat.find()) {
+ anMat.appendReplacement(formBuffer, anItr.next());
+ }
+ anMat.appendTail(formBuffer);
+
return formBuffer.toString();
}
diff --git a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
index 1371ab3..fee0b33 100644
--- a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java
@@ -1,5 +1,4 @@
-/**
- * (C) Copyright 2017 Benjamin Culkin.
+/*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java b/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java
index 6edcb54..bd36202 100644
--- a/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java b/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java
index e982bc9..570aa25 100644
--- a/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/InflectionAffix.java b/src/main/java/bjc/inflexion/nouns/InflectionAffix.java
index d224340..65c6500 100644
--- a/src/main/java/bjc/inflexion/nouns/InflectionAffix.java
+++ b/src/main/java/bjc/inflexion/nouns/InflectionAffix.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java b/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java
index facf9d0..645e73a 100644
--- a/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java
+++ b/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/InflectionException.java b/src/main/java/bjc/inflexion/nouns/InflectionException.java
index 56715ff..74a88b6 100644
--- a/src/main/java/bjc/inflexion/nouns/InflectionException.java
+++ b/src/main/java/bjc/inflexion/nouns/InflectionException.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java b/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java
index 471a99e..b336e85 100644
--- a/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/Noun.java b/src/main/java/bjc/inflexion/nouns/Noun.java
index f94e0bc..cd7c855 100644
--- a/src/main/java/bjc/inflexion/nouns/Noun.java
+++ b/src/main/java/bjc/inflexion/nouns/Noun.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/NounInflection.java b/src/main/java/bjc/inflexion/nouns/NounInflection.java
index 978efdb..94161c5 100644
--- a/src/main/java/bjc/inflexion/nouns/NounInflection.java
+++ b/src/main/java/bjc/inflexion/nouns/NounInflection.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/Nouns.java b/src/main/java/bjc/inflexion/nouns/Nouns.java
index aeb2f2f..6a36752 100644
--- a/src/main/java/bjc/inflexion/nouns/Nouns.java
+++ b/src/main/java/bjc/inflexion/nouns/Nouns.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/Prepositions.java b/src/main/java/bjc/inflexion/nouns/Prepositions.java
index 0d36c7e..9564baf 100644
--- a/src/main/java/bjc/inflexion/nouns/Prepositions.java
+++ b/src/main/java/bjc/inflexion/nouns/Prepositions.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java b/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java
index 87991b5..93a22e6 100644
--- a/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java
+++ b/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java
@@ -1,5 +1,4 @@
/**
- * (C) Copyright 2017 Benjamin Culkin.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.