From 9d89261fedf23c11b684eb66cefdd86a9378ad20 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 08:34:57 -0400 Subject: Move parsing utilities. Moved the parsing utilities SequenceDelimiter and TokenSplitter to the parserutils package, instead of the funcutils package. --- .../bjc/utils/funcutils/SequenceDelimiter.java | 524 --------------------- .../java/bjc/utils/funcutils/StringDelimiter.java | 31 -- .../java/bjc/utils/funcutils/TokenSplitter.java | 161 ------- 3 files changed, 716 deletions(-) delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/funcutils') diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java deleted file mode 100644 index 625c9ec..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java +++ /dev/null @@ -1,524 +0,0 @@ -package bjc.utils.funcutils; - -import bjc.utils.data.ITree; -import bjc.utils.data.Tree; -import bjc.utils.esodata.PushdownMap; -import bjc.utils.esodata.SimpleStack; -import bjc.utils.esodata.Stack; -import bjc.utils.funcdata.IMap; - -import com.google.common.collect.HashMultiset; -import com.google.common.collect.Multiset; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * Convert linear sequences into trees that represent group structure. - * - * @author EVE - * - * @param - * The type of items in the sequence. - */ -public class SequenceDelimiter { - /** - * Represents a possible delimiter group to match. - * - * @author EVE - * - * @param - * The type of items in the sequence. - */ - public static class DelimiterGroup { - /** - * The name of this delimiter group. - */ - public final T2 groupName; - - /* - * The delimiters that close this group. - */ - private Set closingDelimiters; - - /* - * The groups that can't occur in the top level of this group. - */ - private Set topLevelExclusions; - - /* - * The groups that can't occur anywhere inside this group. - */ - private Set groupExclusions; - - /** - * Create a new empty delimiter group. - * - * @param name - * The name of the delimiter group - */ - public DelimiterGroup(T2 name) { - if(name == null) throw new NullPointerException("Group name must not be null"); - - groupName = name; - - closingDelimiters = new HashSet<>(); - topLevelExclusions = new HashSet<>(); - groupExclusions = new HashSet<>(); - } - - /** - * Check if the provided delimiter would close this group. - * - * @param del - * The string to check as a closing delimiter. - * - * @return Whether or not the provided delimiter closes this - * group. - */ - public boolean isClosing(T2 del) { - return closingDelimiters.contains(del); - } - - /** - * Adds one or more delimiters that close this group. - * - * @param closers - * Delimiters that close this group. - */ - @SafeVarargs - public final void addClosing(T2... closers) { - List closerList = Arrays.asList(closers); - - for(T2 closer : closerList) { - if(closer == null) { - throw new NullPointerException("Closing delimiter must not be null"); - } else if(closer.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - closingDelimiters.add(closer); - } - } - } - - /** - * Adds one or more groups that cannot occur in the top level of - * this group. - * - * @param exclusions - * The groups forbidden in the top level of this - * group. - */ - @SafeVarargs - public final void addTopLevelForbid(T2... exclusions) { - for(T2 exclusion : exclusions) { - if(exclusion == null) { - throw new NullPointerException("Exclusion must not be null"); - } else if(exclusion.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - topLevelExclusions.add(exclusion); - } - } - } - - /** - * Adds one or more groups that cannot occur at all in this - * group. - * - * @param exclusions - * The groups forbidden inside this group. - */ - @SafeVarargs - public final void addGroupForbid(T2... exclusions) { - for(T2 exclusion : exclusions) { - if(exclusion == null) { - throw new NullPointerException("Exclusion must not be null"); - } else if(exclusion.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - groupExclusions.add(exclusion); - } - } - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - - builder.append("("); - - builder.append("groupName=["); - builder.append(groupName); - builder.append("], "); - - builder.append("closingDelimiters=["); - for(T2 closer : closingDelimiters) { - builder.append(closer + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - - if(topLevelExclusions != null && !topLevelExclusions.isEmpty()) { - builder.append(", "); - builder.append("topLevelExclusions=["); - for(T2 exclusion : topLevelExclusions) { - builder.append(exclusion + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - } - - if(groupExclusions != null && !groupExclusions.isEmpty()) { - builder.append(", "); - builder.append("groupExclusions=["); - for(T2 exclusion : topLevelExclusions) { - builder.append(exclusion + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - } - - builder.append(" )"); - - return builder.toString(); - } - - } - - /** - * The superclass for exceptions thrown during sequence delimitation. - */ - public static class DelimiterException extends RuntimeException { - /** - * Create a new generic delimiter exception. - * - * @param res - * The reason for this exception. - */ - public DelimiterException(String res) { - super(res); - } - } - - /* - * Mapping from opening delimiters to the names of the groups they open - */ - private Map openDelimiters; - - /* - * Mapping from group names to actual groups. - */ - private Map> groups; - - /** - * Create a new sequence delimiter. - */ - public SequenceDelimiter() { - openDelimiters = new HashMap<>(); - - groups = new HashMap<>(); - } - - /** - * Convert a linear sequence into a tree that matches the delimiter - * structure. - * - * Essentially, creates a parse tree of the expression against the - * following grammar while obeying the defined group rules. - * - *
-	 *           -> ( | )*
-	 *          ->   
-	 *           -> STRING
-	 *           -> STRING
-	 *          -> STRING
-	 * 
- * - * @param seq - * The sequence to delimit. - * - * @param root - * The root of the returned tree. - * - * @param contents - * The item to use to mark the contents of a group - * - * @return The sequence as a tree that matches its group structure. Each - * node in the tree is either a data node or a group node. - * - * A data node is a leaf node whose data is the string it - * represents. - * - * A group node is a node with three children, and the name of - * the group as its label. The first child is the opening - * delimiter, the second is the group contents, and the third is - * the closing delimiter. The delimiters are leaf nodes labeled - * with their contents, while the group node contains a list of - * data and group nodes. - * - * @throws DelimiterException - * Thrown if something went wrong during sequence - * delimitation. - * - */ - public ITree delimitSequence(T root, T contents, @SuppressWarnings("unchecked") T... seq) throws DelimiterException { - /* - * The root node of the tree to give back. - */ - ITree res = new Tree<>(root); - - /* - * Handle the trivial case where there are no groups. - */ - if(openDelimiters.isEmpty()) { - for(T tok : seq) { - res.addChild(new Tree<>(tok)); - } - - return res; - } - - /* - * The stack of trees that represent the sequence. - */ - Stack> trees = new SimpleStack<>(); - trees.push(res); - - /* - * The stack of opened and not yet closed groups. - */ - Stack> groupStack = new SimpleStack<>(); - - /* - * Groups that aren't allowed to be opened at the moment. - */ - Multiset forbiddenDelimiters = HashMultiset.create(); - - /* - * Map of who forbid what for debugging purposes. - */ - IMap whoForbid = new PushdownMap<>(); - - for(int i = 0; i < seq.length; i++) { - T tok = seq[i]; - - /* - * If we have an opening delimiter, handle it. - */ - if(openDelimiters.containsKey(tok)) { - T groupName = openDelimiters.get(tok); - DelimiterGroup group = groups.get(groupName); - - /* - * Error on groups that can't open in this - * context. - * - * This means groups that can't occur at the - * top-level of this group, as well as nested - * exclusions from all enclosing groups. - */ - if(isForbidden(groupStack, forbiddenDelimiters, groupName)) { - StringBuilder msgBuilder = new StringBuilder(); - - T forbiddenBy; - - if(whoForbid.containsKey(tok)) { - forbiddenBy = whoForbid.get(tok); - } else { - forbiddenBy = groupStack.top().groupName; - } - - String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); - - msgBuilder.append("Group '"); - msgBuilder.append(group); - msgBuilder.append("' can't be opened in this context."); - msgBuilder.append(" (forbidden by '"); - msgBuilder.append(forbiddenBy); - msgBuilder.append("')\nContext stack: "); - msgBuilder.append(ctxList); - - throw new DelimiterException(msgBuilder.toString()); - } - - /* - * Add an open group. - */ - groupStack.push(group); - - /* - * The tree that represents the opened group. - */ - ITree groupTree = new Tree<>(groupName); - groupTree.addChild(new Tree<>(tok)); - - /* - * The tree that represents the contents of the - * opened group. - */ - ITree groupContents = new Tree<>(contents); - - /* - * Add the trees to the open trees. - */ - trees.push(groupTree); - trees.push(groupContents); - - /* - * Add the nested exclusions from this group - */ - for(T exclusion : group.groupExclusions) { - forbiddenDelimiters.add(exclusion); - - whoForbid.put(exclusion, groupName); - } - } else if(!groupStack.empty() && groupStack.top().isClosing(tok)) { - /* - * Close the group. - */ - DelimiterGroup closed = groupStack.pop(); - - /* - * Remove the contents of the group and the - * group itself from the stack. - */ - ITree contentTree = trees.pop(); - ITree groupTree = trees.pop(); - - /* - * Fill in the group node. - */ - groupTree.addChild(contentTree); - groupTree.addChild(new Tree<>(tok)); - - /* - * Add the group node to the group that - * contained it. - */ - trees.top().addChild(groupTree); - - /* - * Remove nested exclusions from this group. - */ - for(T excludedGroup : closed.groupExclusions) { - forbiddenDelimiters.remove(excludedGroup); - - whoForbid.remove(excludedGroup); - } - } else { - trees.top().addChild(new Tree<>(tok)); - } - } - - /* - * Error if not all groups were closed. - */ - if(!groupStack.empty()) { - DelimiterGroup group = groupStack.top(); - StringBuilder msgBuilder = new StringBuilder(); - - String closingDelims = StringUtils.toEnglishList(group.closingDelimiters.toArray(), false); - - String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); - - msgBuilder.append("Unclosed group '"); - msgBuilder.append(group.groupName); - msgBuilder.append("'. Expected one of "); - msgBuilder.append(closingDelims); - msgBuilder.append(" to close it\nOpen groups: "); - msgBuilder.append(ctxList); - - throw new DelimiterException(msgBuilder.toString()); - } - - return res; - } - - private boolean isForbidden(Stack> groupStack, Multiset forbiddenDelimiters, T groupName) { - boolean localForbid; - if(groupStack.empty()) - localForbid = false; - else - localForbid = groupStack.top().topLevelExclusions.contains(groupName); - - return localForbid || forbiddenDelimiters.contains(groupName); - } - - /** - * Add a open delimiter for the specified group. - * - * @param open - * The open delimiter. - * @param groupName - * The name of the group it opens. - */ - public void addOpener(T open, T groupName) { - if(open == null) { - throw new NullPointerException("Opener must not be null"); - } else if(open.equals("")) { - throw new IllegalArgumentException("Empty string is not a valid opening delimiter"); - } else if(groupName == null) { - throw new NullPointerException("Group name must not be null"); - } else if(!groups.containsKey(groupName)) { - throw new IllegalArgumentException("Group " + groupName + " doesn't exist."); - } - - openDelimiters.put(open, groupName); - } - - /** - * Add a delimiter group. - * - * @param group - * The delimiter group. - */ - public void addGroup(DelimiterGroup group) { - if(group == null) { - throw new NullPointerException("Group must not be null"); - } - - groups.put(group.groupName, group); - } - - /** - * Creates and adds a delimiter group using the provided settings. - * - * @param openers - * The tokens that open this group - * @param groupName - * The name of the group - * @param closers - * The tokens that close this group - */ - public void addGroup(T[] openers, T groupName, @SuppressWarnings("unchecked") T... closers) { - DelimiterGroup group = new DelimiterGroup<>(groupName); - - group.addClosing(closers); - - addGroup(group); - - for(T open : openers) { - addOpener(open, groupName); - } - } -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java deleted file mode 100644 index e4303c7..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java +++ /dev/null @@ -1,31 +0,0 @@ -package bjc.utils.funcutils; - -import bjc.utils.data.ITree; - -/** - * A sequence delimiter specialized for strings. - * - * @author EVE - * - */ -public class StringDelimiter extends SequenceDelimiter { - - /** - * Override of - * {@link SequenceDelimiter#delimitSequence(Object, Object, Object...)} - * for ease of use for strings. - * - * @param seq - * The sequence to delimit. - * - * @return The sequence as a tree. - * - * @throws DelimiterException - * if something went wrong with delimiting the sequence. - * - * @see SequenceDelimiter - */ - public ITree delimitSequence(String... seq) throws DelimiterException { - return super.delimitSequence("root", "contents", seq); - } -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java deleted file mode 100644 index 206fbcd..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java +++ /dev/null @@ -1,161 +0,0 @@ -package bjc.utils.funcutils; - -import java.util.regex.Pattern; - -/** - * Split a string and keep given delimiters. - * - * @author Ben Culkin - */ -public class TokenSplitter { - /* - * This string is a format template for the delimiter matching regex - * - * It does two things: - * - *
    - *
  1. Match to the left of the provided delimiter by positive lookahead
  2. - *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. - *
- * - * Thus, it will only match in places where the delimiter is, but won't - * actually match the delimiter, leaving split to put it into the stream - */ - private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; - - /* - * This string is a format template for the multi-delimiter matching - * regex. - * - * It does the same thing as the single delimiter regex, but has to have - * some negative lookahead/lookbehind assertions to avoid splitting a - * delimiter into pieces. - */ - private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? - * The splitter must be compiled first. - *

- * - * @param inp - * The string to split. - * - * @return The split string, including delimiters. - * - * @throws IllegalStateException - * If the splitter isn't compiled. - */ - public String[] split(String inp) { - if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); - - /* - * Don't split something that we should exclude from being split. - */ - if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; - - return compPatt.split(inp); - } - - /** - * Adds one or more strings as matched delimiters to split on. - * - * Only works for fixed length delimiters. - * - * The provided strings are regex-escaped before being used. - * - * @param delims - * The delimiters to match on. - */ - public void addDelimiter(String... delims) { - for(String delim : delims) { - String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:" + quoteDelim + ")"); - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:" + quoteDelim + ")"); - } - } - } - - /** - * Adds a character class as a matched delimiter to split on. - * - * The provided string should be a pattern to match one or more - * occurances of. - * - * @param delim - * The delimiter to split on. - */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); - - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); - - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); - } - } - - /** - * Marks strings matching the pattern delim as non-splittable. - * - * @param delim - * The regex to not splitting matching strings. - */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); - } - } - /** - * Compiles the current set of delimiters to a pattern. - * - * Makes this splitter ready to use. - */ - public void compile() { - compPatt = Pattern.compile(currPatt.toString()); - exclusionPatt = Pattern.compile(currExclusionPatt.toString()); - } -} -- cgit v1.2.3