From 415f5689fe900a04bf64d41878cfa225905b6617 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Sat, 18 Mar 2017 19:58:22 -0400 Subject: Attempt to get subgroups working --- .../java/bjc/utils/parserutils/TokenSplitter.java | 141 ++++++++++++++++----- 1 file changed, 110 insertions(+), 31 deletions(-) (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java index e6191b9..ec69ade 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -1,5 +1,7 @@ package bjc.utils.parserutils; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; /** @@ -13,10 +15,9 @@ public class TokenSplitter { * * It does two things: * - *
    - *
  1. Match to the left of the provided delimiter by positive lookahead
  2. - *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. - *
+ *
  1. Match to the left of the provided delimiter by positive + * lookahead
  2. Match to the right of the provided delimiter by + * positive lookbehind
* * Thus, it will only match in places where the delimiter is, but won't * actually match the delimiter, leaving split to put it into the stream @@ -47,10 +48,20 @@ public class TokenSplitter { private Pattern compPatt; private Pattern exclusionPatt; + /* + * These represent info for debugging. + */ + private Set delimSet; + private Set multidelimSet; + private Set exclusionSet; + /** * Create a new token splitter. */ public TokenSplitter() { + delimSet = new HashSet<>(); + multidelimSet = new HashSet<>(); + exclusionSet = new HashSet<>(); } /** @@ -73,7 +84,8 @@ public class TokenSplitter { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* - * Don't split something that we should exclude from being split. + * Don't split something that we should exclude from being + * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; @@ -93,18 +105,20 @@ public class TokenSplitter { public void addDelimiter(String... delims) { for(String delim : delims) { String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - + String delimPat = String.format(WITH_DELIM, quoteDelim); + if(currPatt == null) { - currPatt = new StringBuilder(); + currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); - + currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } + + delimSet.add(delim); } } @@ -114,48 +128,113 @@ public class TokenSplitter { * The provided string should be a pattern to match one or more * occurances of. * - * @param delim + * @param delims * The delimiter to split on. */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + public void addMultiDelimiter(String... delims) { + for(String delim : delims) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * - * @param delim - * The regex to not splitting matching strings. + * @param delimSet + * The regex to not splitting matching strings. */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); + public void addNonMatcher(String... delims) { + for(String delim : delims) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + + exclusionSet.add(delim); } } + /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { - compPatt = Pattern.compile(currPatt.toString()); + compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("TokenSplitter ["); + + if(currPatt != null) { + builder.append("currPatt="); + builder.append(currPatt); + builder.append("\n\t, "); + } + + if(currExclusionPatt != null) { + builder.append("currExclusionPatt="); + builder.append(currExclusionPatt); + builder.append("\n\t, "); + } + + if(compPatt != null) { + builder.append("compPatt="); + builder.append(compPatt); + builder.append("\n\t, "); + } + + if(exclusionPatt != null) { + builder.append("exclusionPatt="); + builder.append(exclusionPatt); + builder.append("\n\t, "); + } + + if(delimSet != null) { + builder.append("delimSet="); + builder.append(delimSet); + builder.append("\n\t, "); + } + + if(multidelimSet != null) { + builder.append("multidelimSet="); + builder.append(multidelimSet); + builder.append("\n\t, "); + } + + if(exclusionSet != null) { + builder.append("exclusionSet="); + builder.append(exclusionSet); + } + + builder.append("]"); + return builder.toString(); + } } -- cgit v1.2.3