summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java
diff options
context:
space:
mode:
authorBenjamin J. Culkin <bjculkin@mix.wvu.edu>2017-10-08 22:39:59 -0300
committerBenjamin J. Culkin <bjculkin@mix.wvu.edu>2017-10-08 22:39:59 -0300
commitc82e3b3b2de0633317ec8fc85925e91422820597 (patch)
tree96567416ce23c5ce85601f9cedc3a94bb1c55cba /BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java
parentb3ac1c8690c3e14c879913e5dcc03a5f5e14876e (diff)
Start splitting into maven modules
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java227
1 files changed, 0 insertions, 227 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java b/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java
deleted file mode 100644
index 0d54471..0000000
--- a/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java
+++ /dev/null
@@ -1,227 +0,0 @@
-package bjc.utils.esodata;
-
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.SetMultimap;
-
-import bjc.utils.funcdata.FunctionalMap;
-import bjc.utils.funcdata.IMap;
-
-/**
- * Represents a mapping from a set of strings to a mapping of all unambiguous
- * prefixes of their respective strings.
- *
- * This works the same as Ruby's Abbrev.
- *
- * @author EVE
- *
- */
-public class AbbrevMap {
- /*
- * All of the words we have abbreviations for.
- */
- private final Set<String> wrds;
-
- /*
- * Maps abbreviations to their strings.
- */
- private IMap<String, String> abbrevMap;
-
- /*
- * Counts how many times we've seen a substring.
- */
- private Set<String> seen;
-
- /*
- * Maps ambiguous abbreviations to the strings they could be.
- */
- private SetMultimap<String, String> ambMap;
-
- /**
- * Create a new abbreviation map.
- *
- * @param words
- * The initial set of words to put in the map.
- */
- public AbbrevMap(final String... words) {
- wrds = new HashSet<>(Arrays.asList(words));
-
- recalculate();
- }
-
- /**
- * Recalculate all the abbreviations in this map.
- */
- public void recalculate() {
- abbrevMap = new FunctionalMap<>();
-
- ambMap = HashMultimap.create();
-
- seen = new HashSet<>();
-
- for (final String word : wrds) {
- /*
- * A word always abbreviates to itself.
- */
- abbrevMap.put(word, word);
-
- intAddWord(word);
- }
- }
-
- /**
- * Adds words to the abbreviation map.
- *
- * @param words
- * The words to add to the abbreviation map.
- */
- public void addWords(final String... words) {
- wrds.addAll(Arrays.asList(words));
-
- for (final String word : words) {
- /*
- * A word always abbreviates to itself.
- */
- abbrevMap.put(word, word);
-
- intAddWord(word);
- }
- }
-
- /*
- * Actually add abbreviations of a word.
- */
- private void intAddWord(final String word) {
- /*
- * Skip blank words.
- */
- if (word.equals("")) return;
-
- /*
- * Handle each possible abbreviation.
- */
- for (int i = word.length(); i > 0; i--) {
- final String subword = word.substring(0, i);
-
- if (seen.contains(subword)) {
- /*
- * Remove a mapping if its ambiguous and not a
- * whole word.
- */
- if (abbrevMap.containsKey(subword) && !wrds.contains(subword)) {
- final String oldword = abbrevMap.remove(subword);
-
- ambMap.put(subword, oldword);
- ambMap.put(subword, word);
- } else if (!wrds.contains(subword)) {
- ambMap.put(subword, word);
- }
- } else {
- seen.add(subword);
-
- abbrevMap.put(subword, word);
- }
- }
- }
-
- /**
- * Removes words from the abbreviation map.
- *
- * NOTE: There may be inconsistent behavior after removing a word from
- * the map. Use {@link AbbrevMap#recalculate()} to fix it if it occurs.
- *
- * @param words
- * The words to remove.
- */
- public void removeWords(final String... words) {
- wrds.removeAll(Arrays.asList(words));
-
- for (final String word : words) {
- intRemoveWord(word);
- }
- }
-
- /*
- * Actually remove a word.
- */
- private void intRemoveWord(final String word) {
- /*
- * Skip blank words.
- */
- if (word.equals("")) return;
-
- /*
- * Handle each possible abbreviation.
- */
- for (int i = word.length(); i > 0; i--) {
- final String subword = word.substring(0, i);
-
- if (abbrevMap.containsKey(subword)) {
- abbrevMap.remove(subword);
- } else {
- ambMap.remove(subword, word);
-
- final Set<String> possWords = ambMap.get(subword);
-
- if (possWords.size() == 0) {
- seen.remove(subword);
- } else if (possWords.size() == 1) {
- final String newWord = possWords.iterator().next();
-
- abbrevMap.put(subword, newWord);
- ambMap.remove(subword, newWord);
- }
- }
- }
- }
-
- /**
- * Convert an abbreviation into all the strings it could abbreviate
- * into.
- *
- * @param abbrev
- * The abbreviation to convert.
- *
- * @return All the expansions for the provided abbreviation.
- */
- public String[] deabbrev(final String abbrev) {
- if (abbrevMap.containsKey(abbrev))
- return new String[] { abbrevMap.get(abbrev) };
- else return ambMap.get(abbrev).toArray(new String[0]);
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
-
- int result = 1;
- result = prime * result + (wrds == null ? 0 : wrds.hashCode());
-
- return result;
- }
-
- @Override
- public boolean equals(final Object obj) {
- if (this == obj) return true;
- if (obj == null) return false;
- if (!(obj instanceof AbbrevMap)) return false;
-
- final AbbrevMap other = (AbbrevMap) obj;
-
- if (wrds == null) {
- if (other.wrds != null) return false;
- } else if (!wrds.equals(other.wrds)) return false;
-
- return true;
- }
-
- @Override
- public String toString() {
- final String fmt = "AbbrevMap [wrds=%s, abbrevMap=%s, seen=%s, ambMap=%s]";
-
- return String.format(fmt, wrds, abbrevMap, seen, ambMap);
- }
-}