From c82e3b3b2de0633317ec8fc85925e91422820597 Mon Sep 17 00:00:00 2001 From: "Benjamin J. Culkin" Date: Sun, 8 Oct 2017 22:39:59 -0300 Subject: Start splitting into maven modules --- .../src/main/java/bjc/utils/esodata/AbbrevMap.java | 227 +++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 base/src/main/java/bjc/utils/esodata/AbbrevMap.java (limited to 'base/src/main/java/bjc/utils/esodata/AbbrevMap.java') diff --git a/base/src/main/java/bjc/utils/esodata/AbbrevMap.java b/base/src/main/java/bjc/utils/esodata/AbbrevMap.java new file mode 100644 index 0000000..0d54471 --- /dev/null +++ b/base/src/main/java/bjc/utils/esodata/AbbrevMap.java @@ -0,0 +1,227 @@ +package bjc.utils.esodata; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.SetMultimap; + +import bjc.utils.funcdata.FunctionalMap; +import bjc.utils.funcdata.IMap; + +/** + * Represents a mapping from a set of strings to a mapping of all unambiguous + * prefixes of their respective strings. + * + * This works the same as Ruby's Abbrev. + * + * @author EVE + * + */ +public class AbbrevMap { + /* + * All of the words we have abbreviations for. + */ + private final Set wrds; + + /* + * Maps abbreviations to their strings. + */ + private IMap abbrevMap; + + /* + * Counts how many times we've seen a substring. + */ + private Set seen; + + /* + * Maps ambiguous abbreviations to the strings they could be. + */ + private SetMultimap ambMap; + + /** + * Create a new abbreviation map. + * + * @param words + * The initial set of words to put in the map. + */ + public AbbrevMap(final String... words) { + wrds = new HashSet<>(Arrays.asList(words)); + + recalculate(); + } + + /** + * Recalculate all the abbreviations in this map. + */ + public void recalculate() { + abbrevMap = new FunctionalMap<>(); + + ambMap = HashMultimap.create(); + + seen = new HashSet<>(); + + for (final String word : wrds) { + /* + * A word always abbreviates to itself. + */ + abbrevMap.put(word, word); + + intAddWord(word); + } + } + + /** + * Adds words to the abbreviation map. + * + * @param words + * The words to add to the abbreviation map. + */ + public void addWords(final String... words) { + wrds.addAll(Arrays.asList(words)); + + for (final String word : words) { + /* + * A word always abbreviates to itself. + */ + abbrevMap.put(word, word); + + intAddWord(word); + } + } + + /* + * Actually add abbreviations of a word. + */ + private void intAddWord(final String word) { + /* + * Skip blank words. + */ + if (word.equals("")) return; + + /* + * Handle each possible abbreviation. + */ + for (int i = word.length(); i > 0; i--) { + final String subword = word.substring(0, i); + + if (seen.contains(subword)) { + /* + * Remove a mapping if its ambiguous and not a + * whole word. + */ + if (abbrevMap.containsKey(subword) && !wrds.contains(subword)) { + final String oldword = abbrevMap.remove(subword); + + ambMap.put(subword, oldword); + ambMap.put(subword, word); + } else if (!wrds.contains(subword)) { + ambMap.put(subword, word); + } + } else { + seen.add(subword); + + abbrevMap.put(subword, word); + } + } + } + + /** + * Removes words from the abbreviation map. + * + * NOTE: There may be inconsistent behavior after removing a word from + * the map. Use {@link AbbrevMap#recalculate()} to fix it if it occurs. + * + * @param words + * The words to remove. + */ + public void removeWords(final String... words) { + wrds.removeAll(Arrays.asList(words)); + + for (final String word : words) { + intRemoveWord(word); + } + } + + /* + * Actually remove a word. + */ + private void intRemoveWord(final String word) { + /* + * Skip blank words. + */ + if (word.equals("")) return; + + /* + * Handle each possible abbreviation. + */ + for (int i = word.length(); i > 0; i--) { + final String subword = word.substring(0, i); + + if (abbrevMap.containsKey(subword)) { + abbrevMap.remove(subword); + } else { + ambMap.remove(subword, word); + + final Set possWords = ambMap.get(subword); + + if (possWords.size() == 0) { + seen.remove(subword); + } else if (possWords.size() == 1) { + final String newWord = possWords.iterator().next(); + + abbrevMap.put(subword, newWord); + ambMap.remove(subword, newWord); + } + } + } + } + + /** + * Convert an abbreviation into all the strings it could abbreviate + * into. + * + * @param abbrev + * The abbreviation to convert. + * + * @return All the expansions for the provided abbreviation. + */ + public String[] deabbrev(final String abbrev) { + if (abbrevMap.containsKey(abbrev)) + return new String[] { abbrevMap.get(abbrev) }; + else return ambMap.get(abbrev).toArray(new String[0]); + } + + @Override + public int hashCode() { + final int prime = 31; + + int result = 1; + result = prime * result + (wrds == null ? 0 : wrds.hashCode()); + + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!(obj instanceof AbbrevMap)) return false; + + final AbbrevMap other = (AbbrevMap) obj; + + if (wrds == null) { + if (other.wrds != null) return false; + } else if (!wrds.equals(other.wrds)) return false; + + return true; + } + + @Override + public String toString() { + final String fmt = "AbbrevMap [wrds=%s, abbrevMap=%s, seen=%s, ambMap=%s]"; + + return String.format(fmt, wrds, abbrevMap, seen, ambMap); + } +} -- cgit v1.2.3