diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2018-06-07 16:26:46 -0300 |
| commit | 7f16ae0286ab7492eee9f4019d976bc5ca95d556 (patch) | |
| tree | 811d0f5b4713260b5a9ffb9ab64d676c3746dbc3 /src/main/java | |
| parent | 235208946ceb2bf0f422956a3ebc0ebb88ba28b6 (diff) | |
Indefinites
Diffstat (limited to 'src/main/java')
14 files changed, 113 insertions, 33 deletions
diff --git a/src/main/java/bjc/inflexion/EnglishUtils.java b/src/main/java/bjc/inflexion/EnglishUtils.java index 28fc6c6..e233018 100644 --- a/src/main/java/bjc/inflexion/EnglishUtils.java +++ b/src/main/java/bjc/inflexion/EnglishUtils.java @@ -1,6 +1,4 @@ -/** - * (C) Copyright 2017 Benjamin Culkin. - * +/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -15,6 +13,8 @@ */ package bjc.inflexion; +import java.util.regex.Pattern; + /** * General utils for dealing with english. * @@ -72,4 +72,68 @@ public class EnglishUtils { return "many"; } + + private static Pattern AN_ORD = Pattern.compile("(?i)\\A[aefhilmnorsx]-?th\\Z"); + private static Pattern A_ORD = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]-?th\\Z"); + private static Pattern EXP_AN = Pattern.compile("(?i)\\A(?:euler|hour(?!i)|heir|honest|hono)"); + private static Pattern SIN_AN = Pattern.compile("(?i)\\A[aefhilmnorst]\\Z"); + private static Pattern SIN_A = Pattern.compile("(?i)\\A[bcdgjkpqtuvwyz]\\Z"); + + private static Pattern ABBREV_AN = Pattern.compile("\\A(?!FJO|[HLMNS]Y|RY[EQ]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]"); + + private static Pattern IN_Y_AN = Pattern.compile("(?i)\\Ay(?:b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)"); + + private static Pattern ABBREV_C2 = Pattern.compile("(?i)\\A[aefhilmnorsx][.-]"); + private static Pattern ABBREV_C3 = Pattern.compile("(?i)\\A[a-z][.-]"); + + private static Pattern CONSONANT = Pattern.compile("(?i)\\A[^aeiouy]"); + + private static Pattern SPECVOWEL_C1 = Pattern.compile("(?i)\\Ae[uw]"); + private static Pattern SPECVOWEL_C2 = Pattern.compile("(?i)\\Aonc?e\b"); + private static Pattern SPECVOWEL_C3 = Pattern.compile("(?i)\\Auni(?:[^nmd]|mo)"); + private static Pattern SPECVOWEL_C4 = Pattern.compile("(?i)\\Aut[th]"); + private static Pattern SPECVOWEL_C5 = Pattern.compile("(?i)\\Au[bcfhjkqrst][aeiou]"); + + private static Pattern SPECCAP_C1 = Pattern.compile("\\AU[NK][AIEO]?"); + + private static Pattern VOWEL = Pattern.compile("(?i)\\A[aeiou]\\Z"); + + public static String pickIndefinite(String word) { + // Handle ordinal forms + if(A_ORD.matcher(word).find()) return "a"; + if(AN_ORD.matcher(word).find()) return "an"; + + // Handle special cases + if(EXP_AN.matcher(word).find()) return "an"; + if(SIN_AN.matcher(word).find()) return "an"; + if(SIN_A.matcher(word).find()) return "a"; + + // Handle abbreviations + if(ABBREV_AN.matcher(word).find()) return "an"; + if(ABBREV_C2.matcher(word).find()) return "an"; + if(ABBREV_C3.matcher(word).find()) return "a"; + + // Handle consonants + if(CONSONANT.matcher(word).find()) return "a"; + + // Handle special vowel forms + if(SPECVOWEL_C1.matcher(word).find()) return "a"; + if(SPECVOWEL_C2.matcher(word).find()) return "a"; + if(SPECVOWEL_C3.matcher(word).find()) return "a"; + if(SPECVOWEL_C4.matcher(word).find()) return "an"; + if(SPECVOWEL_C5.matcher(word).find()) return "a"; + + // Handle special capitals + if(SPECCAP_C1.matcher(word).find()) return "a"; + + // Handle vowels + if(VOWEL.matcher(word).find()) return "an"; + + // Handle Y (before certain consonants, it implies a + // (unnaturalized) "I" sound) + if(IN_Y_AN.matcher(word).find()) return "an"; + + // Guess "A" + return "a"; + } } diff --git a/src/main/java/bjc/inflexion/InflectionML.java b/src/main/java/bjc/inflexion/InflectionML.java index 939e96e..9ee175d 100644 --- a/src/main/java/bjc/inflexion/InflectionML.java +++ b/src/main/java/bjc/inflexion/InflectionML.java @@ -1,6 +1,4 @@ -/** - * (C) Copyright 2017 Benjamin Culkin. - * +/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -15,9 +13,11 @@ */ package bjc.inflexion; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; +import java.util.Iterator; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -45,6 +45,8 @@ public class InflectionML { private static Pattern FORM_MARKER = Pattern.compile("<(?<command>[#N])(?<options>[^:]*):(?<text>[^>]*)>"); + private static Pattern AN_MARKER = Pattern.compile("\\{an(\\d+)\\}"); + /* The database of nouns. */ private static Nouns nounDB; @@ -66,14 +68,19 @@ public class InflectionML { * @return * The inflected string. */ - public static String inflect(final String form) { - final Matcher formMatcher = FORM_MARKER.matcher(form); - - final StringBuffer formBuffer = new StringBuffer(); + public static String inflect(String form) { + Matcher formMatcher = FORM_MARKER.matcher(form); + StringBuffer formBuffer = new StringBuffer(); int curCount = 1; + boolean inflectSingular = true; + int anCount = 0; + List<String> anVals = new ArrayList<>(); + + boolean pendingAN = false; + while (formMatcher.find()) { final String command = formMatcher.group("command"); final String options = formMatcher.group("options"); @@ -129,9 +136,12 @@ public class InflectionML { } if (optionSet.contains("a")) { - /* :InflectionML - * Implement a/an for nouns. - */ + if (curCount == 1) { + anCount += 1; + rep = "{an" + anCount + "}"; + + pendingAN = true; + } } /* Break out of switch. */ @@ -140,10 +150,7 @@ public class InflectionML { break; } - final boolean shouldOverride = - !(rep.equals("no") || - rep.equals("a") || - rep.equals("an") ); + final boolean shouldOverride = !(rep.equals("no") || rep.matches("\\{an\\d+\\}")); if (optionSet.contains("w") && shouldOverride) { rep = EnglishUtils.smallIntToWord(curCount); @@ -162,15 +169,25 @@ public class InflectionML { case "N": final Noun noun = nounDB.getNoun(text); + String nounVal; + if (optionSet.contains("p") || !inflectSingular) { if (optionSet.contains("c")) { - formMatcher.appendReplacement(formBuffer, noun.classicalPlural()); + nounVal = noun.classicalPlural(); } else { - formMatcher.appendReplacement(formBuffer, noun.modernPlural()); + nounVal = noun.modernPlural(); } } else { - formMatcher.appendReplacement(formBuffer, noun.singular()); + nounVal = noun.singular(); } + + formMatcher.appendReplacement(formBuffer, nounVal); + if(pendingAN) { + anVals.add(EnglishUtils.pickIndefinite(nounVal)); + + pendingAN = false; + } + break; default: final String msg = String.format("Unknown command '%s'", command); @@ -181,6 +198,17 @@ public class InflectionML { formMatcher.appendTail(formBuffer); + String res = formBuffer.toString(); + formBuffer = new StringBuffer(); + + Matcher anMat = AN_MARKER.matcher(res); + + Iterator<String> anItr = anVals.iterator(); + while(anMat.find()) { + anMat.appendReplacement(formBuffer, anItr.next()); + } + anMat.appendTail(formBuffer); + return formBuffer.toString(); } diff --git a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java index 1371ab3..fee0b33 100644 --- a/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java +++ b/src/main/java/bjc/inflexion/nouns/CategoricalNounInflection.java @@ -1,5 +1,4 @@ -/** - * (C) Copyright 2017 Benjamin Culkin. +/* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java b/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java index 6edcb54..bd36202 100644 --- a/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java +++ b/src/main/java/bjc/inflexion/nouns/CompoundNounInflection.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java b/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java index e982bc9..570aa25 100644 --- a/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java +++ b/src/main/java/bjc/inflexion/nouns/DefaultNounInflection.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/InflectionAffix.java b/src/main/java/bjc/inflexion/nouns/InflectionAffix.java index d224340..65c6500 100644 --- a/src/main/java/bjc/inflexion/nouns/InflectionAffix.java +++ b/src/main/java/bjc/inflexion/nouns/InflectionAffix.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java b/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java index facf9d0..645e73a 100644 --- a/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java +++ b/src/main/java/bjc/inflexion/nouns/InflectionAffixes.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/InflectionException.java b/src/main/java/bjc/inflexion/nouns/InflectionException.java index 56715ff..74a88b6 100644 --- a/src/main/java/bjc/inflexion/nouns/InflectionException.java +++ b/src/main/java/bjc/inflexion/nouns/InflectionException.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java b/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java index 471a99e..b336e85 100644 --- a/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java +++ b/src/main/java/bjc/inflexion/nouns/IrregularNounInflection.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/Noun.java b/src/main/java/bjc/inflexion/nouns/Noun.java index f94e0bc..cd7c855 100644 --- a/src/main/java/bjc/inflexion/nouns/Noun.java +++ b/src/main/java/bjc/inflexion/nouns/Noun.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/NounInflection.java b/src/main/java/bjc/inflexion/nouns/NounInflection.java index 978efdb..94161c5 100644 --- a/src/main/java/bjc/inflexion/nouns/NounInflection.java +++ b/src/main/java/bjc/inflexion/nouns/NounInflection.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/Nouns.java b/src/main/java/bjc/inflexion/nouns/Nouns.java index aeb2f2f..6a36752 100644 --- a/src/main/java/bjc/inflexion/nouns/Nouns.java +++ b/src/main/java/bjc/inflexion/nouns/Nouns.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/Prepositions.java b/src/main/java/bjc/inflexion/nouns/Prepositions.java index 0d36c7e..9564baf 100644 --- a/src/main/java/bjc/inflexion/nouns/Prepositions.java +++ b/src/main/java/bjc/inflexion/nouns/Prepositions.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java b/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java index 87991b5..93a22e6 100644 --- a/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java +++ b/src/main/java/bjc/inflexion/nouns/SimpleInflectionAffix.java @@ -1,5 +1,4 @@ /** - * (C) Copyright 2017 Benjamin Culkin. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. |
