From ee87cdff7dfbe3782f75db3dc17be23f6edb54ec Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 19:11:36 -0400 Subject: Add abbreviation map. An abbreviation map allows you to figure out which of the possible strings in a given set could be abbreviated by a particular string. --- .../java/bjc/utils/examples/AbbrevMapTest.java | 59 +++++ .../src/main/java/bjc/utils/esodata/AbbrevMap.java | 240 +++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 BJC-Utils2/src/examples/java/bjc/utils/examples/AbbrevMapTest.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java diff --git a/BJC-Utils2/src/examples/java/bjc/utils/examples/AbbrevMapTest.java b/BJC-Utils2/src/examples/java/bjc/utils/examples/AbbrevMapTest.java new file mode 100644 index 0000000..ddf8e0c --- /dev/null +++ b/BJC-Utils2/src/examples/java/bjc/utils/examples/AbbrevMapTest.java @@ -0,0 +1,59 @@ +package bjc.utils.examples; + +import bjc.utils.esodata.AbbrevMap; +import bjc.utils.funcutils.StringUtils; + +import java.util.Scanner; + +/** + * Test for abbreviation map. + * + * @author EVE + * + */ +public class AbbrevMapTest { + /** + * Main method. + * + * @param args + * Unused CLI args. + */ + public static void main(String[] args) { + Scanner scn = new Scanner(System.in); + + AbbrevMap map = new AbbrevMap(); + + System.out.print("Enter a command (blank line to quit): "); + String ln = scn.nextLine(); + + while(!ln.equals("")) { + String[] commParts = ln.split(" "); + + switch(commParts[0]) { + case "add": + map.addWords(commParts[1]); + break; + case "remove": + map.removeWords(commParts[1]); + break; + case "recalc": + map.recalculate(); + break; + case "check": + String list = StringUtils.toEnglishList(map.deabbrev(commParts[1]), false); + System.out.println(list); + break; + case "debug": + System.out.println(map.toString()); + break; + default: + System.out.println("Unknown command: " + ln); + } + + System.out.print("Enter a command (blank line to quit): "); + ln = scn.nextLine(); + } + + scn.close(); + } +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java b/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java new file mode 100644 index 0000000..ebab5f0 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/esodata/AbbrevMap.java @@ -0,0 +1,240 @@ +package bjc.utils.esodata; + +import bjc.utils.funcdata.FunctionalMap; +import bjc.utils.funcdata.IMap; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.SetMultimap; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +/** + * Represents a mapping from a set of strings to a mapping of all unambiguous + * prefixes of their respective strings. + * + * This works the same as Ruby's Abbrev. + * + * @author EVE + * + */ +public class AbbrevMap { + /* + * All of the words we have abbreviations for. + */ + private Set wrds; + + /* + * Maps abbreviations to their strings. + */ + private IMap abbrevMap; + + /* + * Counts how many times we've seen a substring. + */ + private Set seen; + + /* + * Maps ambiguous abbreviations to the strings they could be. + */ + private SetMultimap ambMap; + + /** + * Create a new abbreviation map. + * + * @param words + * The initial set of words to put in the map. + */ + public AbbrevMap(String... words) { + wrds = new HashSet<>(Arrays.asList(words)); + + recalculate(); + } + + /** + * Recalculate all the abbreviations in this map. + */ + public void recalculate() { + abbrevMap = new FunctionalMap<>(); + + ambMap = HashMultimap.create(); + + seen = new HashSet<>(); + + for(String word : wrds) { + /* + * A word always abbreviates to itself. + */ + abbrevMap.put(word, word); + + intAddWord(word); + } + } + + /** + * Adds words to the abbreviation map. + * + * @param words + * The words to add to the abbreviation map. + */ + public void addWords(String... words) { + wrds.addAll(Arrays.asList(words)); + + for(String word : words) { + /* + * A word always abbreviates to itself. + */ + abbrevMap.put(word, word); + + intAddWord(word); + } + } + + /* + * Actually add abbreviations of a word. + */ + private void intAddWord(String word) { + /* + * Skip blank words. + */ + if(word.equals("")) return; + + /* + * Handle each possible abbreviation. + */ + for(int i = word.length(); i > 0; i--) { + String subword = word.substring(0, i); + + if(seen.contains(subword)) { + /* + * Remove a mapping if its ambiguous and not a + * whole word. + */ + if(abbrevMap.containsKey(subword) && !wrds.contains(subword)) { + String oldword = abbrevMap.remove(subword); + + ambMap.put(subword, oldword); + ambMap.put(subword, word); + } else if(!wrds.contains(subword)) { + ambMap.put(subword, word); + } + } else { + seen.add(subword); + + abbrevMap.put(subword, word); + } + } + } + + /** + * Removes words from the abbreviation map. + * + * NOTE: There may be inconsistent behavior. Use + * {@link AbbrevMap#recalculate()} to fix it if it occurs. + * + * @param words + * The words to remove. + */ + public void removeWords(String... words) { + wrds.removeAll(Arrays.asList(words)); + + for(String word : words) { + intRemoveWord(word); + } + } + + /* + * Actually remove a word. + */ + private void intRemoveWord(String word) { + /* + * Skip blank words. + */ + if(word.equals("")) return; + + /* + * Handle each possible abbreviation. + */ + for(int i = word.length(); i > 0; i--) { + String subword = word.substring(0, i); + + if(abbrevMap.containsKey(subword)) + abbrevMap.remove(subword); + else { + ambMap.remove(subword, word); + + Set possWords = ambMap.get(subword); + + if(possWords.size() == 0) { + seen.remove(subword); + } else if(possWords.size() == 1) { + String newWord = possWords.iterator().next(); + + abbrevMap.put(subword, newWord); + ambMap.remove(subword, newWord); + } + } + } + } + + /** + * Convert an abbreviation into all the strings it could abbreviate + * into. + * + * @param abbrev + * The abbreviation to convert. + * + * @return All the expansions for the provided abbreviation. + */ + public String[] deabbrev(String abbrev) { + if(abbrevMap.containsKey(abbrev)) + return new String[] { abbrevMap.get(abbrev) }; + else + return ambMap.get(abbrev).toArray(new String[0]); + } + + @Override + public int hashCode() { + final int prime = 31; + + int result = 1; + result = prime * result + ((wrds == null) ? 0 : wrds.hashCode()); + + return result; + } + + @Override + public boolean equals(Object obj) { + if(this == obj) return true; + if(obj == null) return false; + if(getClass() != obj.getClass()) return false; + + AbbrevMap other = (AbbrevMap) obj; + + if(wrds == null) { + if(other.wrds != null) return false; + } else if(!wrds.equals(other.wrds)) return false; + + return true; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("AbbrevMap [wrds="); + builder.append(wrds); + builder.append("\n\t, abbrevMap="); + builder.append(abbrevMap); + builder.append("\n\t, seen="); + builder.append(seen); + builder.append("\n\t, ambMap="); + builder.append(ambMap); + builder.append("]"); + + return builder.toString(); + } + + +} -- cgit v1.2.3