From e78e3ac11870926332f47e6c3522ad819c3917b8 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Mon, 20 Mar 2017 16:19:20 -0400 Subject: Move delimiter stuff to a new package --- .../parserutils/delims/DelimiterException.java | 16 + .../utils/parserutils/delims/DelimiterGroup.java | 485 +++++++++++++++++++++ .../delims/SequenceCharacteristics.java | 101 +++++ .../parserutils/delims/SequenceDelimiter.java | 304 +++++++++++++ .../utils/parserutils/delims/StringDelimiter.java | 31 ++ 5 files changed, 937 insertions(+) create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/delims') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java new file mode 100644 index 0000000..3aba434 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java @@ -0,0 +1,16 @@ +package bjc.utils.parserutils.delims; + +/** + * The superclass for exceptions thrown during sequence delimitation. + */ +public class DelimiterException extends RuntimeException { + /** + * Create a new generic delimiter exception. + * + * @param res + * The reason for this exception. + */ + public DelimiterException(String res) { + super(res); + } +} \ No newline at end of file diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java new file mode 100644 index 0000000..23a3b9f --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java @@ -0,0 +1,485 @@ +package bjc.utils.parserutils.delims; + +import bjc.utils.data.IPair; +import bjc.utils.data.ITree; +import bjc.utils.data.Pair; +import bjc.utils.data.Tree; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.function.BiPredicate; +import java.util.function.Function; + +/** + * Represents a possible delimiter group to match. + * + * @author EVE + * + * @param + * The type of items in the sequence. + */ +public class DelimiterGroup { + /** + * Represents an instance of a delimiter group. + * + * @author EVE + * + */ + public class OpenGroup { + private Deque> contents; + + private IList> currentGroup; + + private T opener; + + private T[] params; + + /** + * Create a new instance of a delimiter group. + * + * @param open + * The item that opened this group. + * + * @param parms + * Any parameters from the opener. + */ + public OpenGroup(T open, T[] parms) { + opener = open; + params = parms; + + contents = new LinkedList<>(); + + currentGroup = new FunctionalList<>(); + } + + /** + * Add an item to this group instance. + * + * @param itm + * The item to add to this group instance. + */ + public void addItem(ITree itm) { + currentGroup.add(itm); + } + + /** + * Mark a subgroup. + * + * @param marker + * The item that indicated this subgroup. + * + * @param chars + * The characteristics for building the tree. + */ + public void markSubgroup(T marker, SequenceCharacteristics chars) { + ITree subgroupContents = new Tree<>(chars.contents); + for(ITree itm : currentGroup) { + subgroupContents.addChild(itm); + } + + while(!contents.isEmpty()) { + ITree possibleSubordinate = contents.peek(); + + if(possibleSubordinate.getHead().equals(chars.subgroup)) { + T otherMarker = possibleSubordinate.getChild(1).getHead(); + + if(subgroups.get(marker) > subgroups.get(otherMarker)) { + subgroupContents.prependChild(contents.pop()); + } else { + break; + } + } else { + subgroupContents.prependChild(contents.pop()); + } + } + + Tree subgroup = new Tree<>(chars.subgroup, subgroupContents, new Tree<>(marker)); + + //System.out.println("\tTRACE: generated subgroup\n" + subgroup + "\n\n"); + contents.push(subgroup); + + currentGroup = new FunctionalList<>(); + } + + /** + * Convert this group into a tree. + * + * @param closer + * The item that closed this group. + * + * @param chars + * The characteristics for building the tree. + * + * @return This group as a tree. + */ + public ITree toTree(T closer, SequenceCharacteristics chars) { + if(impliedSubgroups.containsKey(closer)) { + markSubgroup(impliedSubgroups.get(closer), chars); + } + + ITree res = new Tree<>(chars.contents); + + if(contents.isEmpty()) { + currentGroup.forEach(res::addChild); + } else { + while(!contents.isEmpty()) { + res.prependChild(contents.poll()); + } + + currentGroup.forEach(res::addChild); + } + + return new Tree<>(groupName, new Tree<>(opener), res, new Tree<>(closer)); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("OpenGroup [contents="); + builder.append(contents); + builder.append(", currentGroup="); + builder.append(currentGroup); + builder.append(", opener="); + builder.append(opener); + builder.append("]"); + + return builder.toString(); + } + + /** + * Check if a group is excluded at the top level of this group. + * + * @param groupName + * The group to check. + * + * @return Whether or not the provided group is excluded. + */ + public boolean excludes(T groupName) { + return topLevelExclusions.contains(groupName); + } + + /** + * Check if the provided delimiter would close this group. + * + * @param del + * The string to check as a closing delimiter. + * + * @return Whether or not the provided delimiter closes this + * group. + */ + public boolean isClosing(T del) { + if(closingDelimiters.contains(del)) { + return true; + } + + for(BiPredicate pred : predClosers) { + if(pred.test(del, params)) { + return true; + } + } + + return closingDelimiters.contains(del); + } + + /** + * Get the name of the group this is an instance of. + * + * @return The name of the group this is an instance of. + */ + public T getName() { + return groupName; + } + + /** + * Get the groups that aren't allowed at all in this group. + * + * @return The groups that aren't allowed at all in this group. + */ + public Set getNestingExclusions() { + return groupExclusions; + } + + /** + * Checks if a given token marks a subgroup. + * + * @param tok + * The token to check. + * + * @return Whether or not the token marks a subgroup. + */ + public boolean marksSubgroup(T tok) { + return subgroups.containsKey(tok); + } + + /** + * Checks if a given token opens a group. + * + * @param marker + * The token to check. + * + * @return The name of the group T opens, or null if it doesn't + * open one. + */ + public IPair doesOpen(T marker) { + if(openDelimiters.containsKey(marker)) { + return new Pair<>(openDelimiters.get(marker), null); + } + + for(Function> pred : predOpeners) { + IPair par = pred.apply(marker); + + if(par.getLeft() != null) { + return par; + } + } + + return new Pair<>(null, null); + } + } + + /** + * The name of this delimiter group. + */ + public final T groupName; + + /* + * The delimiters that open groups at the top level of this group. + */ + private Map openDelimiters; + + /* + * The delimiters that close this group. + */ + private Set closingDelimiters; + + /* + * The groups that can't occur in the top level of this group. + */ + private Set topLevelExclusions; + + /* + * The groups that can't occur anywhere inside this group. + */ + private Set groupExclusions; + + /* + * Mapping from sub-group delimiters, to any sub-groups enclosed in + * them. + */ + private Map subgroups; + + /* + * Subgroups implied by a particular closing delimiter + */ + private Map impliedSubgroups; + + /* + * Allows more complex openings + */ + private List>> predOpeners; + + /* + * Allow more complex closings + */ + private List> predClosers; + + /** + * Create a new empty delimiter group. + * + * @param name + * The name of the delimiter group + */ + public DelimiterGroup(T name) { + if(name == null) throw new NullPointerException("Group name must not be null"); + + groupName = name; + + openDelimiters = new HashMap<>(); + closingDelimiters = new HashSet<>(); + + topLevelExclusions = new HashSet<>(); + groupExclusions = new HashSet<>(); + + subgroups = new HashMap<>(); + impliedSubgroups = new HashMap<>(); + + predOpeners = new LinkedList<>(); + predClosers = new LinkedList<>(); + } + + /** + * Adds one or more delimiters that close this group. + * + * @param closers + * Delimiters that close this group. + */ + @SafeVarargs + public final void addClosing(T... closers) { + List closerList = Arrays.asList(closers); + + for(T closer : closerList) { + if(closer == null) { + throw new NullPointerException("Closing delimiter must not be null"); + } else if(closer.equals("")) { + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + closingDelimiters.add(closer); + } + } + } + + /** + * Adds one or more groups that cannot occur in the top level of this + * group. + * + * @param exclusions + * The groups forbidden in the top level of this group. + */ + @SafeVarargs + public final void addTopLevelForbid(T... exclusions) { + for(T exclusion : exclusions) { + if(exclusion == null) { + throw new NullPointerException("Exclusion must not be null"); + } else if(exclusion.equals("")) { + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + topLevelExclusions.add(exclusion); + } + } + } + + /** + * Adds one or more groups that cannot occur at all in this group. + * + * @param exclusions + * The groups forbidden inside this group. + */ + @SafeVarargs + public final void addGroupForbid(T... exclusions) { + for(T exclusion : exclusions) { + if(exclusion == null) { + throw new NullPointerException("Exclusion must not be null"); + } else if(exclusion.equals("")) { + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + } else { + groupExclusions.add(exclusion); + } + } + } + + /** + * Adds sub-group markers to this group. + * + * @param subgroup + * The token to mark a sub-group. + * + * @param priority + * The priority of this sub-group. + * + * @param contained + * Any sub-groups to enclose in this group. + */ + public void addSubgroup(T subgroup, int priority) { + if(subgroup == null) { + throw new NullPointerException("Subgroup marker must not be null"); + } + + subgroups.put(subgroup, priority); + } + + public void addOpener(T opener, T group) { + openDelimiters.put(opener, group); + } + + public void implySubgroup(T closer, T subgroup) { + impliedSubgroups.put(closer, subgroup); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("("); + + builder.append("groupName=["); + builder.append(groupName); + builder.append("], "); + + builder.append("closingDelimiters=["); + for(T closer : closingDelimiters) { + builder.append(closer + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + + if(topLevelExclusions != null && !topLevelExclusions.isEmpty()) { + builder.append(", "); + builder.append("topLevelExclusions=["); + for(T exclusion : topLevelExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + if(groupExclusions != null && !groupExclusions.isEmpty()) { + builder.append(", "); + builder.append("groupExclusions=["); + for(T exclusion : groupExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + builder.append(" )"); + + return builder.toString(); + } + + /** + * Open an instance of this group. + * + * @param opener + * The item that opened this group. + * + * @return An opened instance of this group. + */ + public OpenGroup open(T opener, T[] parms) { + return new OpenGroup(opener, parms); + } + + public void addPredOpener(Function> pred) { + predOpeners.add(pred); + } + + public void addPredCloser(BiPredicate pred) { + predClosers.add(pred); + } + +} \ No newline at end of file diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java new file mode 100644 index 0000000..f053ef6 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java @@ -0,0 +1,101 @@ +package bjc.utils.parserutils.delims; + +/** + * Marks the parameters for building a sequence tree. + * + * @author EVE + * + * @param + * The type of item in the tree. + */ +public class SequenceCharacteristics { + /** + * The item to mark the root of the tree. + */ + public final T root; + + /** + * The item to mark the contents of a group/subgroup. + */ + + public final T contents; + + /** + * The item to mark a subgroup. + */ + public final T subgroup; + + /** + * Create a new set of parameters for building a tree. + * + * @param root + * The root marker. + * @param contents + * The group/subgroup contents marker. + * @param subgroup + * The subgroup marker. + */ + public SequenceCharacteristics(T root, T contents, T subgroup) { + this.root = root; + this.contents = contents; + this.subgroup = subgroup; + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((contents == null) ? 0 : contents.hashCode()); + result = prime * result + ((root == null) ? 0 : root.hashCode()); + result = prime * result + ((subgroup == null) ? 0 : subgroup.hashCode()); + return result; + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if(this == obj) return true; + if(obj == null) return false; + if(!(obj instanceof SequenceCharacteristics)) return false; + + SequenceCharacteristics other = (SequenceCharacteristics) obj; + + if(contents == null) { + if(other.contents != null) return false; + } else if(!contents.equals(other.contents)) return false; + + if(root == null) { + if(other.root != null) return false; + } else if(!root.equals(other.root)) return false; + + if(subgroup == null) { + if(other.subgroup != null) return false; + } else if(!subgroup.equals(other.subgroup)) return false; + + return true; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("SequenceCharacteristics [root="); + builder.append(root == null ? "(null)" : root); + builder.append(", contents="); + builder.append(contents == null ? "(null)" : contents); + builder.append(", subgroup="); + builder.append(subgroup == null ? "(null)" : subgroup); + builder.append("]"); + + return builder.toString(); + } +} \ No newline at end of file diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java new file mode 100644 index 0000000..dee5034 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java @@ -0,0 +1,304 @@ +package bjc.utils.parserutils.delims; + +import bjc.utils.data.IPair; +import bjc.utils.data.ITree; +import bjc.utils.data.Tree; +import bjc.utils.esodata.PushdownMap; +import bjc.utils.esodata.SimpleStack; +import bjc.utils.esodata.Stack; +import bjc.utils.funcdata.IMap; +import bjc.utils.funcutils.StringUtils; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; + +import java.util.HashMap; +import java.util.Map; + +/** + * Convert linear sequences into trees that represent group structure. + * + * @author EVE + * + * @param + * The type of items in the sequence. + */ +public class SequenceDelimiter { + /* + * Mapping from opening delimiters to the names of the groups they open + */ + + /* + * Mapping from group names to actual groups. + */ + private Map> groups; + + private DelimiterGroup initialGroup; + + /** + * Create a new sequence delimiter. + */ + public SequenceDelimiter() { + groups = new HashMap<>(); + } + + /** + * Convert a linear sequence into a tree that matches the delimiter + * structure. + * + * Essentially, creates a parse tree of the expression against the + * following grammar while obeying the defined group rules. + * + *
+	 *              -> ( |  | )*
+	 *          ->  
+	 *             ->   
+	 *         
+	 *              -> STRING
+	 *              -> STRING
+	 *             -> STRING
+	 *            -> STRING
+	 * 
+ * + * @param chars + * The parameters on how to mark certain portions of the + * tree. + * @param seq + * The sequence to delimit. + * + * @return The sequence as a tree that matches its group structure. Each + * node in the tree is either a data node, a subgroup node, or a + * group node. + * + * A data node is a leaf node whose data is the string it + * represents. + * + * A subgroup node is a node with two children, and the name of + * the sub-group as its label. The first child is the contents + * of the sub-group, and the second is the marker that started + * the subgroup. The marker is a leaf node labeled with its + * contents, and the contents contains a recursive tree. + * + * A group node is a node with three children, and the name of + * the group as its label. The first child is the opening + * delimiter, the second is the group contents, and the third is + * the closing delimiter. The delimiters are leaf nodes labeled + * with their contents, while the group node contains a + * recursive tree. + * + * @throws DelimiterException + * Thrown if something went wrong during sequence + * delimitation. + * + */ + public ITree delimitSequence(SequenceCharacteristics chars, @SuppressWarnings("unchecked") T... seq) + throws DelimiterException { + if(initialGroup == null) { + throw new NullPointerException("Initial group must be specified."); + } + + /* + * The stack of opened and not yet closed groups. + */ + Stack.OpenGroup> groupStack = new SimpleStack<>(); + + /* + * Open initial group. + */ + groupStack.push(initialGroup.open(chars.root, null)); + + /* + * Groups that aren't allowed to be opened at the moment. + */ + Multiset forbiddenDelimiters = HashMultiset.create(); + + /* + * Map of who forbid what for debugging purposes. + */ + IMap whoForbid = new PushdownMap<>(); + + for(int i = 0; i < seq.length; i++) { + T tok = seq[i]; + + IPair possibleOpenPar = groupStack.top().doesOpen(tok); + T possibleOpen = possibleOpenPar.getLeft(); + + /* + * If we have an opening delimiter, handle it. + */ + if(possibleOpen != null) { + DelimiterGroup group = groups.get(possibleOpen); + + /* + * Error on groups that can't open in this + * context. + * + * This means groups that can't occur at the + * top-level of this group, as well as nested + * exclusions from all enclosing groups. + */ + if(isForbidden(groupStack, forbiddenDelimiters, possibleOpen)) { + StringBuilder msgBuilder = new StringBuilder(); + + T forbiddenBy; + + if(whoForbid.containsKey(tok)) { + forbiddenBy = whoForbid.get(tok); + } else { + forbiddenBy = groupStack.top().getName(); + } + + String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + msgBuilder.append("Group '"); + msgBuilder.append(group); + msgBuilder.append("' can't be opened in this context."); + msgBuilder.append(" (forbidden by '"); + msgBuilder.append(forbiddenBy); + msgBuilder.append("')\nContext stack: "); + msgBuilder.append(ctxList); + + throw new DelimiterException(msgBuilder.toString()); + } + + /* + * Add an open group. + */ + DelimiterGroup.OpenGroup open = group.open(tok, possibleOpenPar.getRight()); + groupStack.push(open); + + /* + * Add the nested exclusions from this group + */ + for(T exclusion : open.getNestingExclusions()) { + forbiddenDelimiters.add(exclusion); + + whoForbid.put(exclusion, possibleOpen); + } + } else if(!groupStack.empty() && groupStack.top().isClosing(tok)) { + /* + * Close the group. + */ + DelimiterGroup.OpenGroup closed = groupStack.pop(); + + groupStack.top().addItem(closed.toTree(tok, chars)); + + /* + * Remove nested exclusions from this group. + */ + for(T excludedGroup : closed.getNestingExclusions()) { + forbiddenDelimiters.remove(excludedGroup); + + whoForbid.remove(excludedGroup); + } + } else if(!groupStack.empty() && groupStack.top().marksSubgroup(tok)) { + groupStack.top().markSubgroup(tok, chars); + } else { + groupStack.top().addItem(new Tree<>(tok)); + } + } + + /* + * Error if not all groups were closed. + */ + if(groupStack.size() > 1) { + DelimiterGroup.OpenGroup group = groupStack.top(); + + StringBuilder msgBuilder = new StringBuilder(); + + String closingDelims = StringUtils.toEnglishList(group.getNestingExclusions().toArray(), false); + + String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + msgBuilder.append("Unclosed group '"); + msgBuilder.append(group.getName()); + msgBuilder.append("'. Expected one of "); + msgBuilder.append(closingDelims); + msgBuilder.append(" to close it\nOpen groups: "); + msgBuilder.append(ctxList); + + throw new DelimiterException(msgBuilder.toString()); + } + + return groupStack.pop().toTree(chars.root, chars); + } + + private boolean isForbidden(Stack.OpenGroup> groupStack, Multiset forbiddenDelimiters, + T groupName) { + boolean localForbid; + if(groupStack.empty()) + localForbid = false; + else + localForbid = groupStack.top().excludes(groupName); + + return localForbid || forbiddenDelimiters.contains(groupName); + } + + /** + * Add a delimiter group. + * + * @param group + * The delimiter group. + */ + public void addGroup(DelimiterGroup group) { + if(group == null) { + throw new NullPointerException("Group must not be null"); + } + + groups.put(group.groupName, group); + } + + /** + * Creates and adds a delimiter group using the provided settings. + * + * @param openers + * The tokens that open this group + * @param groupName + * The name of the group + * @param closers + * The tokens that close this group + */ + public void addGroup(T[] openers, T groupName, @SuppressWarnings("unchecked") T... closers) { + DelimiterGroup group = new DelimiterGroup<>(groupName); + + group.addClosing(closers); + + addGroup(group); + + for(T open : openers) { + group.addOpener(open, groupName); + } + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("SequenceDelimiter ["); + + if(groups != null) { + builder.append("groups="); + builder.append(groups); + builder.append(","); + } + + if(initialGroup != null) { + builder.append("initialGroup="); + builder.append(initialGroup); + } + + builder.append("]"); + + return builder.toString(); + } + + /** + * Set the initial group of this delimiter. + * + * @param initialGroup + * The initial group of this delimiter. + */ + public void setInitialGroup(DelimiterGroup initialGroup) { + this.initialGroup = initialGroup; + } +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java new file mode 100644 index 0000000..9799ea9 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java @@ -0,0 +1,31 @@ +package bjc.utils.parserutils.delims; + +import bjc.utils.data.ITree; + +/** + * A sequence delimiter specialized for strings. + * + * @author EVE + * + */ +public class StringDelimiter extends SequenceDelimiter { + + /** + * Override of + * {@link SequenceDelimiter#delimitSequence(Object, Object, Object...)} + * for ease of use for strings. + * + * @param seq + * The sequence to delimit. + * + * @return The sequence as a tree. + * + * @throws DelimiterException + * if something went wrong with delimiting the sequence. + * + * @see SequenceDelimiter + */ + public ITree delimitSequence(String... seq) throws DelimiterException { + return super.delimitSequence(new SequenceCharacteristics("root", "contents", "subgroup"), seq); + } +} -- cgit v1.2.3