summaryrefslogtreecommitdiff
path: root/base/src/main/java/bjc/utils/esodata/AbbrevMap.java
diff options
context:
space:
mode:
Diffstat (limited to 'base/src/main/java/bjc/utils/esodata/AbbrevMap.java')
-rw-r--r--base/src/main/java/bjc/utils/esodata/AbbrevMap.java227
1 files changed, 227 insertions, 0 deletions
diff --git a/base/src/main/java/bjc/utils/esodata/AbbrevMap.java b/base/src/main/java/bjc/utils/esodata/AbbrevMap.java
new file mode 100644
index 0000000..0d54471
--- /dev/null
+++ b/base/src/main/java/bjc/utils/esodata/AbbrevMap.java
@@ -0,0 +1,227 @@
+package bjc.utils.esodata;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+import bjc.utils.funcdata.FunctionalMap;
+import bjc.utils.funcdata.IMap;
+
+/**
+ * Represents a mapping from a set of strings to a mapping of all unambiguous
+ * prefixes of their respective strings.
+ *
+ * This works the same as Ruby's Abbrev.
+ *
+ * @author EVE
+ *
+ */
+public class AbbrevMap {
+ /*
+ * All of the words we have abbreviations for.
+ */
+ private final Set<String> wrds;
+
+ /*
+ * Maps abbreviations to their strings.
+ */
+ private IMap<String, String> abbrevMap;
+
+ /*
+ * Counts how many times we've seen a substring.
+ */
+ private Set<String> seen;
+
+ /*
+ * Maps ambiguous abbreviations to the strings they could be.
+ */
+ private SetMultimap<String, String> ambMap;
+
+ /**
+ * Create a new abbreviation map.
+ *
+ * @param words
+ * The initial set of words to put in the map.
+ */
+ public AbbrevMap(final String... words) {
+ wrds = new HashSet<>(Arrays.asList(words));
+
+ recalculate();
+ }
+
+ /**
+ * Recalculate all the abbreviations in this map.
+ */
+ public void recalculate() {
+ abbrevMap = new FunctionalMap<>();
+
+ ambMap = HashMultimap.create();
+
+ seen = new HashSet<>();
+
+ for (final String word : wrds) {
+ /*
+ * A word always abbreviates to itself.
+ */
+ abbrevMap.put(word, word);
+
+ intAddWord(word);
+ }
+ }
+
+ /**
+ * Adds words to the abbreviation map.
+ *
+ * @param words
+ * The words to add to the abbreviation map.
+ */
+ public void addWords(final String... words) {
+ wrds.addAll(Arrays.asList(words));
+
+ for (final String word : words) {
+ /*
+ * A word always abbreviates to itself.
+ */
+ abbrevMap.put(word, word);
+
+ intAddWord(word);
+ }
+ }
+
+ /*
+ * Actually add abbreviations of a word.
+ */
+ private void intAddWord(final String word) {
+ /*
+ * Skip blank words.
+ */
+ if (word.equals("")) return;
+
+ /*
+ * Handle each possible abbreviation.
+ */
+ for (int i = word.length(); i > 0; i--) {
+ final String subword = word.substring(0, i);
+
+ if (seen.contains(subword)) {
+ /*
+ * Remove a mapping if its ambiguous and not a
+ * whole word.
+ */
+ if (abbrevMap.containsKey(subword) && !wrds.contains(subword)) {
+ final String oldword = abbrevMap.remove(subword);
+
+ ambMap.put(subword, oldword);
+ ambMap.put(subword, word);
+ } else if (!wrds.contains(subword)) {
+ ambMap.put(subword, word);
+ }
+ } else {
+ seen.add(subword);
+
+ abbrevMap.put(subword, word);
+ }
+ }
+ }
+
+ /**
+ * Removes words from the abbreviation map.
+ *
+ * NOTE: There may be inconsistent behavior after removing a word from
+ * the map. Use {@link AbbrevMap#recalculate()} to fix it if it occurs.
+ *
+ * @param words
+ * The words to remove.
+ */
+ public void removeWords(final String... words) {
+ wrds.removeAll(Arrays.asList(words));
+
+ for (final String word : words) {
+ intRemoveWord(word);
+ }
+ }
+
+ /*
+ * Actually remove a word.
+ */
+ private void intRemoveWord(final String word) {
+ /*
+ * Skip blank words.
+ */
+ if (word.equals("")) return;
+
+ /*
+ * Handle each possible abbreviation.
+ */
+ for (int i = word.length(); i > 0; i--) {
+ final String subword = word.substring(0, i);
+
+ if (abbrevMap.containsKey(subword)) {
+ abbrevMap.remove(subword);
+ } else {
+ ambMap.remove(subword, word);
+
+ final Set<String> possWords = ambMap.get(subword);
+
+ if (possWords.size() == 0) {
+ seen.remove(subword);
+ } else if (possWords.size() == 1) {
+ final String newWord = possWords.iterator().next();
+
+ abbrevMap.put(subword, newWord);
+ ambMap.remove(subword, newWord);
+ }
+ }
+ }
+ }
+
+ /**
+ * Convert an abbreviation into all the strings it could abbreviate
+ * into.
+ *
+ * @param abbrev
+ * The abbreviation to convert.
+ *
+ * @return All the expansions for the provided abbreviation.
+ */
+ public String[] deabbrev(final String abbrev) {
+ if (abbrevMap.containsKey(abbrev))
+ return new String[] { abbrevMap.get(abbrev) };
+ else return ambMap.get(abbrev).toArray(new String[0]);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+
+ int result = 1;
+ result = prime * result + (wrds == null ? 0 : wrds.hashCode());
+
+ return result;
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (!(obj instanceof AbbrevMap)) return false;
+
+ final AbbrevMap other = (AbbrevMap) obj;
+
+ if (wrds == null) {
+ if (other.wrds != null) return false;
+ } else if (!wrds.equals(other.wrds)) return false;
+
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ final String fmt = "AbbrevMap [wrds=%s, abbrevMap=%s, seen=%s, ambMap=%s]";
+
+ return String.format(fmt, wrds, abbrevMap, seen, ambMap);
+ }
+}