From 9d89261fedf23c11b684eb66cefdd86a9378ad20 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 08:34:57 -0400 Subject: Move parsing utilities. Moved the parsing utilities SequenceDelimiter and TokenSplitter to the parserutils package, instead of the funcutils package. --- .../java/bjc/utils/parserutils/TokenSplitter.java | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java new file mode 100644 index 0000000..e6191b9 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -0,0 +1,161 @@ +package bjc.utils.parserutils; + +import java.util.regex.Pattern; + +/** + * Split a string and keep given delimiters. + * + * @author Ben Culkin + */ +public class TokenSplitter { + /* + * This string is a format template for the delimiter matching regex + * + * It does two things: + * + *
    + *
  1. Match to the left of the provided delimiter by positive lookahead
  2. + *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. + *
+ * + * Thus, it will only match in places where the delimiter is, but won't + * actually match the delimiter, leaving split to put it into the stream + */ + private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; + + /* + * This string is a format template for the multi-delimiter matching + * regex. + * + * It does the same thing as the single delimiter regex, but has to have + * some negative lookahead/lookbehind assertions to avoid splitting a + * delimiter into pieces. + */ + private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? + * The splitter must be compiled first. + *

+ * + * @param inp + * The string to split. + * + * @return The split string, including delimiters. + * + * @throws IllegalStateException + * If the splitter isn't compiled. + */ + public String[] split(String inp) { + if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); + + /* + * Don't split something that we should exclude from being split. + */ + if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; + + return compPatt.split(inp); + } + + /** + * Adds one or more strings as matched delimiters to split on. + * + * Only works for fixed length delimiters. + * + * The provided strings are regex-escaped before being used. + * + * @param delims + * The delimiters to match on. + */ + public void addDelimiter(String... delims) { + for(String delim : delims) { + String quoteDelim = Pattern.quote(delim); + String delimPat = String.format(WITH_DELIM, quoteDelim); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:" + quoteDelim + ")"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:" + quoteDelim + ")"); + } + } + } + + /** + * Adds a character class as a matched delimiter to split on. + * + * The provided string should be a pattern to match one or more + * occurances of. + * + * @param delim + * The delimiter to split on. + */ + public void addMultiDelimiter(String delim) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); + + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } + } + + /** + * Marks strings matching the pattern delim as non-splittable. + * + * @param delim + * The regex to not splitting matching strings. + */ + public void addNonMatcher(String delim) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + } + /** + * Compiles the current set of delimiters to a pattern. + * + * Makes this splitter ready to use. + */ + public void compile() { + compPatt = Pattern.compile(currPatt.toString()); + exclusionPatt = Pattern.compile(currExclusionPatt.toString()); + } +} -- cgit v1.2.3