From 9d89261fedf23c11b684eb66cefdd86a9378ad20 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 08:34:57 -0400 Subject: Move parsing utilities. Moved the parsing utilities SequenceDelimiter and TokenSplitter to the parserutils package, instead of the funcutils package. --- .../java/bjc/utils/examples/DelimSplitterTest.java | 8 +- .../bjc/utils/funcutils/SequenceDelimiter.java | 524 -------------------- .../java/bjc/utils/funcutils/StringDelimiter.java | 31 -- .../java/bjc/utils/funcutils/TokenSplitter.java | 161 ------- .../bjc/utils/parserutils/SequenceDelimiter.java | 525 +++++++++++++++++++++ .../bjc/utils/parserutils/StringDelimiter.java | 31 ++ .../java/bjc/utils/parserutils/TokenSplitter.java | 161 +++++++ 7 files changed, 721 insertions(+), 720 deletions(-) delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDelimiter.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java (limited to 'BJC-Utils2') diff --git a/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java b/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java index feb0631..521c521 100644 --- a/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java +++ b/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java @@ -1,10 +1,10 @@ package bjc.utils.examples; import bjc.utils.data.ITree; -import bjc.utils.funcutils.SequenceDelimiter; -import bjc.utils.funcutils.SequenceDelimiter.DelimiterException; -import bjc.utils.funcutils.StringDelimiter; -import bjc.utils.funcutils.TokenSplitter; +import bjc.utils.parserutils.SequenceDelimiter; +import bjc.utils.parserutils.StringDelimiter; +import bjc.utils.parserutils.TokenSplitter; +import bjc.utils.parserutils.SequenceDelimiter.DelimiterException; import java.util.Arrays; import java.util.Scanner; diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java deleted file mode 100644 index 625c9ec..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/SequenceDelimiter.java +++ /dev/null @@ -1,524 +0,0 @@ -package bjc.utils.funcutils; - -import bjc.utils.data.ITree; -import bjc.utils.data.Tree; -import bjc.utils.esodata.PushdownMap; -import bjc.utils.esodata.SimpleStack; -import bjc.utils.esodata.Stack; -import bjc.utils.funcdata.IMap; - -import com.google.common.collect.HashMultiset; -import com.google.common.collect.Multiset; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * Convert linear sequences into trees that represent group structure. - * - * @author EVE - * - * @param - * The type of items in the sequence. - */ -public class SequenceDelimiter { - /** - * Represents a possible delimiter group to match. - * - * @author EVE - * - * @param - * The type of items in the sequence. - */ - public static class DelimiterGroup { - /** - * The name of this delimiter group. - */ - public final T2 groupName; - - /* - * The delimiters that close this group. - */ - private Set closingDelimiters; - - /* - * The groups that can't occur in the top level of this group. - */ - private Set topLevelExclusions; - - /* - * The groups that can't occur anywhere inside this group. - */ - private Set groupExclusions; - - /** - * Create a new empty delimiter group. - * - * @param name - * The name of the delimiter group - */ - public DelimiterGroup(T2 name) { - if(name == null) throw new NullPointerException("Group name must not be null"); - - groupName = name; - - closingDelimiters = new HashSet<>(); - topLevelExclusions = new HashSet<>(); - groupExclusions = new HashSet<>(); - } - - /** - * Check if the provided delimiter would close this group. - * - * @param del - * The string to check as a closing delimiter. - * - * @return Whether or not the provided delimiter closes this - * group. - */ - public boolean isClosing(T2 del) { - return closingDelimiters.contains(del); - } - - /** - * Adds one or more delimiters that close this group. - * - * @param closers - * Delimiters that close this group. - */ - @SafeVarargs - public final void addClosing(T2... closers) { - List closerList = Arrays.asList(closers); - - for(T2 closer : closerList) { - if(closer == null) { - throw new NullPointerException("Closing delimiter must not be null"); - } else if(closer.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - closingDelimiters.add(closer); - } - } - } - - /** - * Adds one or more groups that cannot occur in the top level of - * this group. - * - * @param exclusions - * The groups forbidden in the top level of this - * group. - */ - @SafeVarargs - public final void addTopLevelForbid(T2... exclusions) { - for(T2 exclusion : exclusions) { - if(exclusion == null) { - throw new NullPointerException("Exclusion must not be null"); - } else if(exclusion.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - topLevelExclusions.add(exclusion); - } - } - } - - /** - * Adds one or more groups that cannot occur at all in this - * group. - * - * @param exclusions - * The groups forbidden inside this group. - */ - @SafeVarargs - public final void addGroupForbid(T2... exclusions) { - for(T2 exclusion : exclusions) { - if(exclusion == null) { - throw new NullPointerException("Exclusion must not be null"); - } else if(exclusion.equals("")) { - /* - * We can do this because equals works - * on arbitrary objects, not just those - * of the same type. - */ - throw new IllegalArgumentException("Empty string is not a valid exclusion"); - } else { - groupExclusions.add(exclusion); - } - } - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - - builder.append("("); - - builder.append("groupName=["); - builder.append(groupName); - builder.append("], "); - - builder.append("closingDelimiters=["); - for(T2 closer : closingDelimiters) { - builder.append(closer + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - - if(topLevelExclusions != null && !topLevelExclusions.isEmpty()) { - builder.append(", "); - builder.append("topLevelExclusions=["); - for(T2 exclusion : topLevelExclusions) { - builder.append(exclusion + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - } - - if(groupExclusions != null && !groupExclusions.isEmpty()) { - builder.append(", "); - builder.append("groupExclusions=["); - for(T2 exclusion : topLevelExclusions) { - builder.append(exclusion + ","); - } - builder.deleteCharAt(builder.length() - 1); - builder.append("]"); - } - - builder.append(" )"); - - return builder.toString(); - } - - } - - /** - * The superclass for exceptions thrown during sequence delimitation. - */ - public static class DelimiterException extends RuntimeException { - /** - * Create a new generic delimiter exception. - * - * @param res - * The reason for this exception. - */ - public DelimiterException(String res) { - super(res); - } - } - - /* - * Mapping from opening delimiters to the names of the groups they open - */ - private Map openDelimiters; - - /* - * Mapping from group names to actual groups. - */ - private Map> groups; - - /** - * Create a new sequence delimiter. - */ - public SequenceDelimiter() { - openDelimiters = new HashMap<>(); - - groups = new HashMap<>(); - } - - /** - * Convert a linear sequence into a tree that matches the delimiter - * structure. - * - * Essentially, creates a parse tree of the expression against the - * following grammar while obeying the defined group rules. - * - *
-	 *           -> ( | )*
-	 *          ->   
-	 *           -> STRING
-	 *           -> STRING
-	 *          -> STRING
-	 * 
- * - * @param seq - * The sequence to delimit. - * - * @param root - * The root of the returned tree. - * - * @param contents - * The item to use to mark the contents of a group - * - * @return The sequence as a tree that matches its group structure. Each - * node in the tree is either a data node or a group node. - * - * A data node is a leaf node whose data is the string it - * represents. - * - * A group node is a node with three children, and the name of - * the group as its label. The first child is the opening - * delimiter, the second is the group contents, and the third is - * the closing delimiter. The delimiters are leaf nodes labeled - * with their contents, while the group node contains a list of - * data and group nodes. - * - * @throws DelimiterException - * Thrown if something went wrong during sequence - * delimitation. - * - */ - public ITree delimitSequence(T root, T contents, @SuppressWarnings("unchecked") T... seq) throws DelimiterException { - /* - * The root node of the tree to give back. - */ - ITree res = new Tree<>(root); - - /* - * Handle the trivial case where there are no groups. - */ - if(openDelimiters.isEmpty()) { - for(T tok : seq) { - res.addChild(new Tree<>(tok)); - } - - return res; - } - - /* - * The stack of trees that represent the sequence. - */ - Stack> trees = new SimpleStack<>(); - trees.push(res); - - /* - * The stack of opened and not yet closed groups. - */ - Stack> groupStack = new SimpleStack<>(); - - /* - * Groups that aren't allowed to be opened at the moment. - */ - Multiset forbiddenDelimiters = HashMultiset.create(); - - /* - * Map of who forbid what for debugging purposes. - */ - IMap whoForbid = new PushdownMap<>(); - - for(int i = 0; i < seq.length; i++) { - T tok = seq[i]; - - /* - * If we have an opening delimiter, handle it. - */ - if(openDelimiters.containsKey(tok)) { - T groupName = openDelimiters.get(tok); - DelimiterGroup group = groups.get(groupName); - - /* - * Error on groups that can't open in this - * context. - * - * This means groups that can't occur at the - * top-level of this group, as well as nested - * exclusions from all enclosing groups. - */ - if(isForbidden(groupStack, forbiddenDelimiters, groupName)) { - StringBuilder msgBuilder = new StringBuilder(); - - T forbiddenBy; - - if(whoForbid.containsKey(tok)) { - forbiddenBy = whoForbid.get(tok); - } else { - forbiddenBy = groupStack.top().groupName; - } - - String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); - - msgBuilder.append("Group '"); - msgBuilder.append(group); - msgBuilder.append("' can't be opened in this context."); - msgBuilder.append(" (forbidden by '"); - msgBuilder.append(forbiddenBy); - msgBuilder.append("')\nContext stack: "); - msgBuilder.append(ctxList); - - throw new DelimiterException(msgBuilder.toString()); - } - - /* - * Add an open group. - */ - groupStack.push(group); - - /* - * The tree that represents the opened group. - */ - ITree groupTree = new Tree<>(groupName); - groupTree.addChild(new Tree<>(tok)); - - /* - * The tree that represents the contents of the - * opened group. - */ - ITree groupContents = new Tree<>(contents); - - /* - * Add the trees to the open trees. - */ - trees.push(groupTree); - trees.push(groupContents); - - /* - * Add the nested exclusions from this group - */ - for(T exclusion : group.groupExclusions) { - forbiddenDelimiters.add(exclusion); - - whoForbid.put(exclusion, groupName); - } - } else if(!groupStack.empty() && groupStack.top().isClosing(tok)) { - /* - * Close the group. - */ - DelimiterGroup closed = groupStack.pop(); - - /* - * Remove the contents of the group and the - * group itself from the stack. - */ - ITree contentTree = trees.pop(); - ITree groupTree = trees.pop(); - - /* - * Fill in the group node. - */ - groupTree.addChild(contentTree); - groupTree.addChild(new Tree<>(tok)); - - /* - * Add the group node to the group that - * contained it. - */ - trees.top().addChild(groupTree); - - /* - * Remove nested exclusions from this group. - */ - for(T excludedGroup : closed.groupExclusions) { - forbiddenDelimiters.remove(excludedGroup); - - whoForbid.remove(excludedGroup); - } - } else { - trees.top().addChild(new Tree<>(tok)); - } - } - - /* - * Error if not all groups were closed. - */ - if(!groupStack.empty()) { - DelimiterGroup group = groupStack.top(); - StringBuilder msgBuilder = new StringBuilder(); - - String closingDelims = StringUtils.toEnglishList(group.closingDelimiters.toArray(), false); - - String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); - - msgBuilder.append("Unclosed group '"); - msgBuilder.append(group.groupName); - msgBuilder.append("'. Expected one of "); - msgBuilder.append(closingDelims); - msgBuilder.append(" to close it\nOpen groups: "); - msgBuilder.append(ctxList); - - throw new DelimiterException(msgBuilder.toString()); - } - - return res; - } - - private boolean isForbidden(Stack> groupStack, Multiset forbiddenDelimiters, T groupName) { - boolean localForbid; - if(groupStack.empty()) - localForbid = false; - else - localForbid = groupStack.top().topLevelExclusions.contains(groupName); - - return localForbid || forbiddenDelimiters.contains(groupName); - } - - /** - * Add a open delimiter for the specified group. - * - * @param open - * The open delimiter. - * @param groupName - * The name of the group it opens. - */ - public void addOpener(T open, T groupName) { - if(open == null) { - throw new NullPointerException("Opener must not be null"); - } else if(open.equals("")) { - throw new IllegalArgumentException("Empty string is not a valid opening delimiter"); - } else if(groupName == null) { - throw new NullPointerException("Group name must not be null"); - } else if(!groups.containsKey(groupName)) { - throw new IllegalArgumentException("Group " + groupName + " doesn't exist."); - } - - openDelimiters.put(open, groupName); - } - - /** - * Add a delimiter group. - * - * @param group - * The delimiter group. - */ - public void addGroup(DelimiterGroup group) { - if(group == null) { - throw new NullPointerException("Group must not be null"); - } - - groups.put(group.groupName, group); - } - - /** - * Creates and adds a delimiter group using the provided settings. - * - * @param openers - * The tokens that open this group - * @param groupName - * The name of the group - * @param closers - * The tokens that close this group - */ - public void addGroup(T[] openers, T groupName, @SuppressWarnings("unchecked") T... closers) { - DelimiterGroup group = new DelimiterGroup<>(groupName); - - group.addClosing(closers); - - addGroup(group); - - for(T open : openers) { - addOpener(open, groupName); - } - } -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java deleted file mode 100644 index e4303c7..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringDelimiter.java +++ /dev/null @@ -1,31 +0,0 @@ -package bjc.utils.funcutils; - -import bjc.utils.data.ITree; - -/** - * A sequence delimiter specialized for strings. - * - * @author EVE - * - */ -public class StringDelimiter extends SequenceDelimiter { - - /** - * Override of - * {@link SequenceDelimiter#delimitSequence(Object, Object, Object...)} - * for ease of use for strings. - * - * @param seq - * The sequence to delimit. - * - * @return The sequence as a tree. - * - * @throws DelimiterException - * if something went wrong with delimiting the sequence. - * - * @see SequenceDelimiter - */ - public ITree delimitSequence(String... seq) throws DelimiterException { - return super.delimitSequence("root", "contents", seq); - } -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java deleted file mode 100644 index 206fbcd..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java +++ /dev/null @@ -1,161 +0,0 @@ -package bjc.utils.funcutils; - -import java.util.regex.Pattern; - -/** - * Split a string and keep given delimiters. - * - * @author Ben Culkin - */ -public class TokenSplitter { - /* - * This string is a format template for the delimiter matching regex - * - * It does two things: - * - *
    - *
  1. Match to the left of the provided delimiter by positive lookahead
  2. - *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. - *
- * - * Thus, it will only match in places where the delimiter is, but won't - * actually match the delimiter, leaving split to put it into the stream - */ - private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; - - /* - * This string is a format template for the multi-delimiter matching - * regex. - * - * It does the same thing as the single delimiter regex, but has to have - * some negative lookahead/lookbehind assertions to avoid splitting a - * delimiter into pieces. - */ - private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? - * The splitter must be compiled first. - *

- * - * @param inp - * The string to split. - * - * @return The split string, including delimiters. - * - * @throws IllegalStateException - * If the splitter isn't compiled. - */ - public String[] split(String inp) { - if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); - - /* - * Don't split something that we should exclude from being split. - */ - if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; - - return compPatt.split(inp); - } - - /** - * Adds one or more strings as matched delimiters to split on. - * - * Only works for fixed length delimiters. - * - * The provided strings are regex-escaped before being used. - * - * @param delims - * The delimiters to match on. - */ - public void addDelimiter(String... delims) { - for(String delim : delims) { - String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:" + quoteDelim + ")"); - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:" + quoteDelim + ")"); - } - } - } - - /** - * Adds a character class as a matched delimiter to split on. - * - * The provided string should be a pattern to match one or more - * occurances of. - * - * @param delim - * The delimiter to split on. - */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); - - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); - - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); - } - } - - /** - * Marks strings matching the pattern delim as non-splittable. - * - * @param delim - * The regex to not splitting matching strings. - */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); - } - } - /** - * Compiles the current set of delimiters to a pattern. - * - * Makes this splitter ready to use. - */ - public void compile() { - compPatt = Pattern.compile(currPatt.toString()); - exclusionPatt = Pattern.compile(currExclusionPatt.toString()); - } -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java new file mode 100644 index 0000000..1180017 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java @@ -0,0 +1,525 @@ +package bjc.utils.parserutils; + +import bjc.utils.data.ITree; +import bjc.utils.data.Tree; +import bjc.utils.esodata.PushdownMap; +import bjc.utils.esodata.SimpleStack; +import bjc.utils.esodata.Stack; +import bjc.utils.funcdata.IMap; +import bjc.utils.funcutils.StringUtils; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Convert linear sequences into trees that represent group structure. + * + * @author EVE + * + * @param + * The type of items in the sequence. + */ +public class SequenceDelimiter { + /** + * Represents a possible delimiter group to match. + * + * @author EVE + * + * @param + * The type of items in the sequence. + */ + public static class DelimiterGroup { + /** + * The name of this delimiter group. + */ + public final T2 groupName; + + /* + * The delimiters that close this group. + */ + private Set closingDelimiters; + + /* + * The groups that can't occur in the top level of this group. + */ + private Set topLevelExclusions; + + /* + * The groups that can't occur anywhere inside this group. + */ + private Set groupExclusions; + + /** + * Create a new empty delimiter group. + * + * @param name + * The name of the delimiter group + */ + public DelimiterGroup(T2 name) { + if(name == null) throw new NullPointerException("Group name must not be null"); + + groupName = name; + + closingDelimiters = new HashSet<>(); + topLevelExclusions = new HashSet<>(); + groupExclusions = new HashSet<>(); + } + + /** + * Check if the provided delimiter would close this group. + * + * @param del + * The string to check as a closing delimiter. + * + * @return Whether or not the provided delimiter closes this + * group. + */ + public boolean isClosing(T2 del) { + return closingDelimiters.contains(del); + } + + /** + * Adds one or more delimiters that close this group. + * + * @param closers + * Delimiters that close this group. + */ + @SafeVarargs + public final void addClosing(T2... closers) { + List closerList = Arrays.asList(closers); + + for(T2 closer : closerList) { + if(closer == null) { + throw new NullPointerException("Closing delimiter must not be null"); + } else if(closer.equals("")) { + /* + * We can do this because equals works + * on arbitrary objects, not just those + * of the same type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + closingDelimiters.add(closer); + } + } + } + + /** + * Adds one or more groups that cannot occur in the top level of + * this group. + * + * @param exclusions + * The groups forbidden in the top level of this + * group. + */ + @SafeVarargs + public final void addTopLevelForbid(T2... exclusions) { + for(T2 exclusion : exclusions) { + if(exclusion == null) { + throw new NullPointerException("Exclusion must not be null"); + } else if(exclusion.equals("")) { + /* + * We can do this because equals works + * on arbitrary objects, not just those + * of the same type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + topLevelExclusions.add(exclusion); + } + } + } + + /** + * Adds one or more groups that cannot occur at all in this + * group. + * + * @param exclusions + * The groups forbidden inside this group. + */ + @SafeVarargs + public final void addGroupForbid(T2... exclusions) { + for(T2 exclusion : exclusions) { + if(exclusion == null) { + throw new NullPointerException("Exclusion must not be null"); + } else if(exclusion.equals("")) { + /* + * We can do this because equals works + * on arbitrary objects, not just those + * of the same type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + groupExclusions.add(exclusion); + } + } + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("("); + + builder.append("groupName=["); + builder.append(groupName); + builder.append("], "); + + builder.append("closingDelimiters=["); + for(T2 closer : closingDelimiters) { + builder.append(closer + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + + if(topLevelExclusions != null && !topLevelExclusions.isEmpty()) { + builder.append(", "); + builder.append("topLevelExclusions=["); + for(T2 exclusion : topLevelExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + if(groupExclusions != null && !groupExclusions.isEmpty()) { + builder.append(", "); + builder.append("groupExclusions=["); + for(T2 exclusion : topLevelExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + builder.append(" )"); + + return builder.toString(); + } + + } + + /** + * The superclass for exceptions thrown during sequence delimitation. + */ + public static class DelimiterException extends RuntimeException { + /** + * Create a new generic delimiter exception. + * + * @param res + * The reason for this exception. + */ + public DelimiterException(String res) { + super(res); + } + } + + /* + * Mapping from opening delimiters to the names of the groups they open + */ + private Map openDelimiters; + + /* + * Mapping from group names to actual groups. + */ + private Map> groups; + + /** + * Create a new sequence delimiter. + */ + public SequenceDelimiter() { + openDelimiters = new HashMap<>(); + + groups = new HashMap<>(); + } + + /** + * Convert a linear sequence into a tree that matches the delimiter + * structure. + * + * Essentially, creates a parse tree of the expression against the + * following grammar while obeying the defined group rules. + * + *
+	 *           -> ( | )*
+	 *          ->   
+	 *           -> STRING
+	 *           -> STRING
+	 *          -> STRING
+	 * 
+ * + * @param seq + * The sequence to delimit. + * + * @param root + * The root of the returned tree. + * + * @param contents + * The item to use to mark the contents of a group + * + * @return The sequence as a tree that matches its group structure. Each + * node in the tree is either a data node or a group node. + * + * A data node is a leaf node whose data is the string it + * represents. + * + * A group node is a node with three children, and the name of + * the group as its label. The first child is the opening + * delimiter, the second is the group contents, and the third is + * the closing delimiter. The delimiters are leaf nodes labeled + * with their contents, while the group node contains a list of + * data and group nodes. + * + * @throws DelimiterException + * Thrown if something went wrong during sequence + * delimitation. + * + */ + public ITree delimitSequence(T root, T contents, @SuppressWarnings("unchecked") T... seq) throws DelimiterException { + /* + * The root node of the tree to give back. + */ + ITree res = new Tree<>(root); + + /* + * Handle the trivial case where there are no groups. + */ + if(openDelimiters.isEmpty()) { + for(T tok : seq) { + res.addChild(new Tree<>(tok)); + } + + return res; + } + + /* + * The stack of trees that represent the sequence. + */ + Stack> trees = new SimpleStack<>(); + trees.push(res); + + /* + * The stack of opened and not yet closed groups. + */ + Stack> groupStack = new SimpleStack<>(); + + /* + * Groups that aren't allowed to be opened at the moment. + */ + Multiset forbiddenDelimiters = HashMultiset.create(); + + /* + * Map of who forbid what for debugging purposes. + */ + IMap whoForbid = new PushdownMap<>(); + + for(int i = 0; i < seq.length; i++) { + T tok = seq[i]; + + /* + * If we have an opening delimiter, handle it. + */ + if(openDelimiters.containsKey(tok)) { + T groupName = openDelimiters.get(tok); + DelimiterGroup group = groups.get(groupName); + + /* + * Error on groups that can't open in this + * context. + * + * This means groups that can't occur at the + * top-level of this group, as well as nested + * exclusions from all enclosing groups. + */ + if(isForbidden(groupStack, forbiddenDelimiters, groupName)) { + StringBuilder msgBuilder = new StringBuilder(); + + T forbiddenBy; + + if(whoForbid.containsKey(tok)) { + forbiddenBy = whoForbid.get(tok); + } else { + forbiddenBy = groupStack.top().groupName; + } + + String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + msgBuilder.append("Group '"); + msgBuilder.append(group); + msgBuilder.append("' can't be opened in this context."); + msgBuilder.append(" (forbidden by '"); + msgBuilder.append(forbiddenBy); + msgBuilder.append("')\nContext stack: "); + msgBuilder.append(ctxList); + + throw new DelimiterException(msgBuilder.toString()); + } + + /* + * Add an open group. + */ + groupStack.push(group); + + /* + * The tree that represents the opened group. + */ + ITree groupTree = new Tree<>(groupName); + groupTree.addChild(new Tree<>(tok)); + + /* + * The tree that represents the contents of the + * opened group. + */ + ITree groupContents = new Tree<>(contents); + + /* + * Add the trees to the open trees. + */ + trees.push(groupTree); + trees.push(groupContents); + + /* + * Add the nested exclusions from this group + */ + for(T exclusion : group.groupExclusions) { + forbiddenDelimiters.add(exclusion); + + whoForbid.put(exclusion, groupName); + } + } else if(!groupStack.empty() && groupStack.top().isClosing(tok)) { + /* + * Close the group. + */ + DelimiterGroup closed = groupStack.pop(); + + /* + * Remove the contents of the group and the + * group itself from the stack. + */ + ITree contentTree = trees.pop(); + ITree groupTree = trees.pop(); + + /* + * Fill in the group node. + */ + groupTree.addChild(contentTree); + groupTree.addChild(new Tree<>(tok)); + + /* + * Add the group node to the group that + * contained it. + */ + trees.top().addChild(groupTree); + + /* + * Remove nested exclusions from this group. + */ + for(T excludedGroup : closed.groupExclusions) { + forbiddenDelimiters.remove(excludedGroup); + + whoForbid.remove(excludedGroup); + } + } else { + trees.top().addChild(new Tree<>(tok)); + } + } + + /* + * Error if not all groups were closed. + */ + if(!groupStack.empty()) { + DelimiterGroup group = groupStack.top(); + StringBuilder msgBuilder = new StringBuilder(); + + String closingDelims = StringUtils.toEnglishList(group.closingDelimiters.toArray(), false); + + String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + msgBuilder.append("Unclosed group '"); + msgBuilder.append(group.groupName); + msgBuilder.append("'. Expected one of "); + msgBuilder.append(closingDelims); + msgBuilder.append(" to close it\nOpen groups: "); + msgBuilder.append(ctxList); + + throw new DelimiterException(msgBuilder.toString()); + } + + return res; + } + + private boolean isForbidden(Stack> groupStack, Multiset forbiddenDelimiters, T groupName) { + boolean localForbid; + if(groupStack.empty()) + localForbid = false; + else + localForbid = groupStack.top().topLevelExclusions.contains(groupName); + + return localForbid || forbiddenDelimiters.contains(groupName); + } + + /** + * Add a open delimiter for the specified group. + * + * @param open + * The open delimiter. + * @param groupName + * The name of the group it opens. + */ + public void addOpener(T open, T groupName) { + if(open == null) { + throw new NullPointerException("Opener must not be null"); + } else if(open.equals("")) { + throw new IllegalArgumentException("Empty string is not a valid opening delimiter"); + } else if(groupName == null) { + throw new NullPointerException("Group name must not be null"); + } else if(!groups.containsKey(groupName)) { + throw new IllegalArgumentException("Group " + groupName + " doesn't exist."); + } + + openDelimiters.put(open, groupName); + } + + /** + * Add a delimiter group. + * + * @param group + * The delimiter group. + */ + public void addGroup(DelimiterGroup group) { + if(group == null) { + throw new NullPointerException("Group must not be null"); + } + + groups.put(group.groupName, group); + } + + /** + * Creates and adds a delimiter group using the provided settings. + * + * @param openers + * The tokens that open this group + * @param groupName + * The name of the group + * @param closers + * The tokens that close this group + */ + public void addGroup(T[] openers, T groupName, @SuppressWarnings("unchecked") T... closers) { + DelimiterGroup group = new DelimiterGroup<>(groupName); + + group.addClosing(closers); + + addGroup(group); + + for(T open : openers) { + addOpener(open, groupName); + } + } +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDelimiter.java new file mode 100644 index 0000000..4c5b972 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDelimiter.java @@ -0,0 +1,31 @@ +package bjc.utils.parserutils; + +import bjc.utils.data.ITree; + +/** + * A sequence delimiter specialized for strings. + * + * @author EVE + * + */ +public class StringDelimiter extends SequenceDelimiter { + + /** + * Override of + * {@link SequenceDelimiter#delimitSequence(Object, Object, Object...)} + * for ease of use for strings. + * + * @param seq + * The sequence to delimit. + * + * @return The sequence as a tree. + * + * @throws DelimiterException + * if something went wrong with delimiting the sequence. + * + * @see SequenceDelimiter + */ + public ITree delimitSequence(String... seq) throws DelimiterException { + return super.delimitSequence("root", "contents", seq); + } +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java new file mode 100644 index 0000000..e6191b9 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -0,0 +1,161 @@ +package bjc.utils.parserutils; + +import java.util.regex.Pattern; + +/** + * Split a string and keep given delimiters. + * + * @author Ben Culkin + */ +public class TokenSplitter { + /* + * This string is a format template for the delimiter matching regex + * + * It does two things: + * + *
    + *
  1. Match to the left of the provided delimiter by positive lookahead
  2. + *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. + *
+ * + * Thus, it will only match in places where the delimiter is, but won't + * actually match the delimiter, leaving split to put it into the stream + */ + private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; + + /* + * This string is a format template for the multi-delimiter matching + * regex. + * + * It does the same thing as the single delimiter regex, but has to have + * some negative lookahead/lookbehind assertions to avoid splitting a + * delimiter into pieces. + */ + private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? + * The splitter must be compiled first. + *

+ * + * @param inp + * The string to split. + * + * @return The split string, including delimiters. + * + * @throws IllegalStateException + * If the splitter isn't compiled. + */ + public String[] split(String inp) { + if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); + + /* + * Don't split something that we should exclude from being split. + */ + if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; + + return compPatt.split(inp); + } + + /** + * Adds one or more strings as matched delimiters to split on. + * + * Only works for fixed length delimiters. + * + * The provided strings are regex-escaped before being used. + * + * @param delims + * The delimiters to match on. + */ + public void addDelimiter(String... delims) { + for(String delim : delims) { + String quoteDelim = Pattern.quote(delim); + String delimPat = String.format(WITH_DELIM, quoteDelim); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:" + quoteDelim + ")"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:" + quoteDelim + ")"); + } + } + } + + /** + * Adds a character class as a matched delimiter to split on. + * + * The provided string should be a pattern to match one or more + * occurances of. + * + * @param delim + * The delimiter to split on. + */ + public void addMultiDelimiter(String delim) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); + + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } + } + + /** + * Marks strings matching the pattern delim as non-splittable. + * + * @param delim + * The regex to not splitting matching strings. + */ + public void addNonMatcher(String delim) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + } + /** + * Compiles the current set of delimiters to a pattern. + * + * Makes this splitter ready to use. + */ + public void compile() { + compPatt = Pattern.compile(currPatt.toString()); + exclusionPatt = Pattern.compile(currExclusionPatt.toString()); + } +} -- cgit v1.2.3