diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-18 19:58:22 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-18 19:58:22 -0400 |
| commit | 415f5689fe900a04bf64d41878cfa225905b6617 (patch) | |
| tree | 2b9cdff843f3eadb2c7eb282335ec9bac4776bb8 /BJC-Utils2/src/main | |
| parent | 527271d943c01a3e03e4e312a9961f3f64909a55 (diff) | |
Attempt to get subgroups working
Diffstat (limited to 'BJC-Utils2/src/main')
3 files changed, 166 insertions, 49 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java index 86ea884..34e074b 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java +++ b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java @@ -104,7 +104,9 @@ public class Tree<ContainedType> implements ITree<ContainedType> { @Override public void doForChildren(Consumer<ITree<ContainedType>> action) { - children.forEach(action); + if(childCount > 0) { + children.forEach(action); + } } @Override diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java index 96a6c65..af6ba81 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java @@ -76,6 +76,7 @@ public class SequenceDelimiter<T> { closingDelimiters = new HashSet<>(); topLevelExclusions = new HashSet<>(); groupExclusions = new HashSet<>(); + subgroups = new HashMap<>(); } /** @@ -473,60 +474,60 @@ public class SequenceDelimiter<T> { whoForbid.remove(excludedGroup); } - } else if(!groupStack.empty() && groupStack.top().subgroups.containsKey(tok)){ + } else if(!groupStack.empty() && groupStack.top().subgroups.containsKey(tok)) { /* * Parse a sub-group. */ - + /* * The set of enclosed groups. */ Set<T> enclosed = groupStack.top().subgroups.get(tok); - + /* * The current contents of this group. */ ITree<T> contentTree = trees.pop(); - + /* - * Find the first element to enclose in the subgroup. + * Find the first element to enclose in the + * subgroup. */ int ind = contentTree.revFind((chd) -> { - if(chd.getHead().equals(subgroup)) { - return !enclosed.contains(chd.getChild(1)); - } else { - return false; - } + return checkChild(subgroup, enclosed, chd); }); - + + if(ind == -1) ind = 0; + ITree<T> newContentTree = new Tree<>(contentTree.getHead()); ITree<T> subgroupContents = new Tree<>(contents); - + /* - * Split content tree into an untouched tree, and the subgroup. + * Split content tree into an untouched tree, + * and the subgroup. */ for(int j = 0; j < contentTree.getChildrenCount(); j++) { ITree<T> child = contentTree.getChild(j); - + if(j < ind) { newContentTree.addChild(child); } else { subgroupContents.addChild(child); } } - + /* * Construct the subgroup. */ ITree<T> subgroupTree = new Tree<>(subgroup); subgroupTree.addChild(subgroupContents); subgroupTree.addChild(new Tree<>(tok)); - + /* * Add the subgroup to the group. */ newContentTree.addChild(subgroupTree); - + /* * Add the group contents. */ @@ -560,6 +561,19 @@ public class SequenceDelimiter<T> { return res; } + private boolean checkChild(T subgroup, Set<T> enclosed, ITree<T> chd) { + System.out.println("Checking child '" + chd.getHead() + "' for subgroups."); + + if(chd.getHead().equals(subgroup)) { + System.out.println("Checking if '" + chd.getChild(1) + "' is a subordinate group."); + boolean contains = enclosed.contains(chd.getChild(1)); + System.out.println("It " + (contains ? "was" : "wasn't")); + return contains; + } else { + return false; + } + } + private boolean isForbidden(Stack<DelimiterGroup<T>> groupStack, Multiset<T> forbiddenDelimiters, T groupName) { boolean localForbid; if(groupStack.empty()) @@ -627,4 +641,26 @@ public class SequenceDelimiter<T> { addOpener(open, groupName); } } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("SequenceDelimiter ["); + + if(openDelimiters != null) { + builder.append("openDelimiters="); + builder.append(openDelimiters); + builder.append(", "); + } + + if(groups != null) { + builder.append("groups="); + builder.append(groups); + } + + builder.append("]"); + + return builder.toString(); + } } diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java index e6191b9..ec69ade 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -1,5 +1,7 @@ package bjc.utils.parserutils; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; /** @@ -13,10 +15,9 @@ public class TokenSplitter { * * It does two things: * - * <ol> - * <li> Match to the left of the provided delimiter by positive lookahead </li> - * <li> Match to the right of the provided delimiter by positive lookbehind </li> - * </ol> + * <ol> <li> Match to the left of the provided delimiter by positive + * lookahead </li> <li> Match to the right of the provided delimiter by + * positive lookbehind </li> </ol> * * Thus, it will only match in places where the delimiter is, but won't * actually match the delimiter, leaving split to put it into the stream @@ -47,10 +48,20 @@ public class TokenSplitter { private Pattern compPatt; private Pattern exclusionPatt; + /* + * These represent info for debugging. + */ + private Set<String> delimSet; + private Set<String> multidelimSet; + private Set<String> exclusionSet; + /** * Create a new token splitter. */ public TokenSplitter() { + delimSet = new HashSet<>(); + multidelimSet = new HashSet<>(); + exclusionSet = new HashSet<>(); } /** @@ -73,7 +84,8 @@ public class TokenSplitter { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* - * Don't split something that we should exclude from being split. + * Don't split something that we should exclude from being + * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; @@ -93,18 +105,20 @@ public class TokenSplitter { public void addDelimiter(String... delims) { for(String delim : delims) { String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - + String delimPat = String.format(WITH_DELIM, quoteDelim); + if(currPatt == null) { - currPatt = new StringBuilder(); + currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); - + currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } + + delimSet.add(delim); } } @@ -114,48 +128,113 @@ public class TokenSplitter { * The provided string should be a pattern to match one or more * occurances of. * - * @param delim + * @param delims * The delimiter to split on. */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + public void addMultiDelimiter(String... delims) { + for(String delim : delims) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * - * @param delim - * The regex to not splitting matching strings. + * @param delimSet + * The regex to not splitting matching strings. */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); + public void addNonMatcher(String... delims) { + for(String delim : delims) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + + exclusionSet.add(delim); } } + /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { - compPatt = Pattern.compile(currPatt.toString()); + compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("TokenSplitter ["); + + if(currPatt != null) { + builder.append("currPatt="); + builder.append(currPatt); + builder.append("\n\t, "); + } + + if(currExclusionPatt != null) { + builder.append("currExclusionPatt="); + builder.append(currExclusionPatt); + builder.append("\n\t, "); + } + + if(compPatt != null) { + builder.append("compPatt="); + builder.append(compPatt); + builder.append("\n\t, "); + } + + if(exclusionPatt != null) { + builder.append("exclusionPatt="); + builder.append(exclusionPatt); + builder.append("\n\t, "); + } + + if(delimSet != null) { + builder.append("delimSet="); + builder.append(delimSet); + builder.append("\n\t, "); + } + + if(multidelimSet != null) { + builder.append("multidelimSet="); + builder.append(multidelimSet); + builder.append("\n\t, "); + } + + if(exclusionSet != null) { + builder.append("exclusionSet="); + builder.append(exclusionSet); + } + + builder.append("]"); + return builder.toString(); + } } |
