diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-18 19:58:22 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-18 19:58:22 -0400 |
| commit | 415f5689fe900a04bf64d41878cfa225905b6617 (patch) | |
| tree | 2b9cdff843f3eadb2c7eb282335ec9bac4776bb8 /BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java | |
| parent | 527271d943c01a3e03e4e312a9961f3f64909a55 (diff) | |
Attempt to get subgroups working
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java | 141 |
1 files changed, 110 insertions, 31 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java index e6191b9..ec69ade 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -1,5 +1,7 @@ package bjc.utils.parserutils; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; /** @@ -13,10 +15,9 @@ public class TokenSplitter { * * It does two things: * - * <ol> - * <li> Match to the left of the provided delimiter by positive lookahead </li> - * <li> Match to the right of the provided delimiter by positive lookbehind </li> - * </ol> + * <ol> <li> Match to the left of the provided delimiter by positive + * lookahead </li> <li> Match to the right of the provided delimiter by + * positive lookbehind </li> </ol> * * Thus, it will only match in places where the delimiter is, but won't * actually match the delimiter, leaving split to put it into the stream @@ -47,10 +48,20 @@ public class TokenSplitter { private Pattern compPatt; private Pattern exclusionPatt; + /* + * These represent info for debugging. + */ + private Set<String> delimSet; + private Set<String> multidelimSet; + private Set<String> exclusionSet; + /** * Create a new token splitter. */ public TokenSplitter() { + delimSet = new HashSet<>(); + multidelimSet = new HashSet<>(); + exclusionSet = new HashSet<>(); } /** @@ -73,7 +84,8 @@ public class TokenSplitter { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* - * Don't split something that we should exclude from being split. + * Don't split something that we should exclude from being + * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; @@ -93,18 +105,20 @@ public class TokenSplitter { public void addDelimiter(String... delims) { for(String delim : delims) { String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - + String delimPat = String.format(WITH_DELIM, quoteDelim); + if(currPatt == null) { - currPatt = new StringBuilder(); + currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); - + currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } + + delimSet.add(delim); } } @@ -114,48 +128,113 @@ public class TokenSplitter { * The provided string should be a pattern to match one or more * occurances of. * - * @param delim + * @param delims * The delimiter to split on. */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + public void addMultiDelimiter(String... delims) { + for(String delim : delims) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * - * @param delim - * The regex to not splitting matching strings. + * @param delimSet + * The regex to not splitting matching strings. */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); + public void addNonMatcher(String... delims) { + for(String delim : delims) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + + exclusionSet.add(delim); } } + /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { - compPatt = Pattern.compile(currPatt.toString()); + compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("TokenSplitter ["); + + if(currPatt != null) { + builder.append("currPatt="); + builder.append(currPatt); + builder.append("\n\t, "); + } + + if(currExclusionPatt != null) { + builder.append("currExclusionPatt="); + builder.append(currExclusionPatt); + builder.append("\n\t, "); + } + + if(compPatt != null) { + builder.append("compPatt="); + builder.append(compPatt); + builder.append("\n\t, "); + } + + if(exclusionPatt != null) { + builder.append("exclusionPatt="); + builder.append(exclusionPatt); + builder.append("\n\t, "); + } + + if(delimSet != null) { + builder.append("delimSet="); + builder.append(delimSet); + builder.append("\n\t, "); + } + + if(multidelimSet != null) { + builder.append("multidelimSet="); + builder.append(multidelimSet); + builder.append("\n\t, "); + } + + if(exclusionSet != null) { + builder.append("exclusionSet="); + builder.append(exclusionSet); + } + + builder.append("]"); + return builder.toString(); + } } |
