summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
diff options
context:
space:
mode:
authorbjculkin <bjculkin@mix.wvu.edu>2017-03-17 08:34:57 -0400
committerbjculkin <bjculkin@mix.wvu.edu>2017-03-17 08:34:57 -0400
commit9d89261fedf23c11b684eb66cefdd86a9378ad20 (patch)
tree5158fdaedcd2951fbd41d49b72f7e09200fa3192 /BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
parenta63c30f5fe9ee302e73bb30e35095d789adb1a80 (diff)
Move parsing utilities.
Moved the parsing utilities SequenceDelimiter and TokenSplitter to the parserutils package, instead of the funcutils package.
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java161
1 files changed, 0 insertions, 161 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
deleted file mode 100644
index 206fbcd..0000000
--- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
+++ /dev/null
@@ -1,161 +0,0 @@
-package bjc.utils.funcutils;
-
-import java.util.regex.Pattern;
-
-/**
- * Split a string and keep given delimiters.
- *
- * @author Ben Culkin
- */
-public class TokenSplitter {
- /*
- * This string is a format template for the delimiter matching regex
- *
- * It does two things:
- *
- * <ol>
- * <li> Match to the left of the provided delimiter by positive lookahead </li>
- * <li> Match to the right of the provided delimiter by positive lookbehind </li>
- * </ol>
- *
- * Thus, it will only match in places where the delimiter is, but won't
- * actually match the delimiter, leaving split to put it into the stream
- */
- private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))";
-
- /*
- * This string is a format template for the multi-delimiter matching
- * regex.
- *
- * It does the same thing as the single delimiter regex, but has to have
- * some negative lookahead/lookbehind assertions to avoid splitting a
- * delimiter into pieces.
- */
- private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
-
- /*
- * These represent the internal state of the splitter.
- */
- private StringBuilder currPatt;
- private StringBuilder currExclusionPatt;
-
- /*
- * These represent the external state of the splitter.
- *
- * Compilation causes internal to become external.
- */
- private Pattern compPatt;
- private Pattern exclusionPatt;
-
- /**
- * Create a new token splitter.
- */
- public TokenSplitter() {
- }
-
- /**
- * Split a provided string using configured delimiters, and keeping the
- * delimiters.
- *
- * <p>
- * The splitter must be compiled first.
- * </p>
- *
- * @param inp
- * The string to split.
- *
- * @return The split string, including delimiters.
- *
- * @throws IllegalStateException
- * If the splitter isn't compiled.
- */
- public String[] split(String inp) {
- if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet");
-
- /*
- * Don't split something that we should exclude from being split.
- */
- if(exclusionPatt.matcher(inp).matches()) return new String[] { inp };
-
- return compPatt.split(inp);
- }
-
- /**
- * Adds one or more strings as matched delimiters to split on.
- *
- * Only works for fixed length delimiters.
- *
- * The provided strings are regex-escaped before being used.
- *
- * @param delims
- * The delimiters to match on.
- */
- public void addDelimiter(String... delims) {
- for(String delim : delims) {
- String quoteDelim = Pattern.quote(delim);
- String delimPat = String.format(WITH_DELIM, quoteDelim);
-
- if(currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currPatt.append("(?:" + delimPat + ")");
- currExclusionPatt.append("(?:" + quoteDelim + ")");
- } else {
- currPatt.append("|(?:" + delimPat + ")");
- currExclusionPatt.append("|(?:" + quoteDelim + ")");
- }
- }
- }
-
- /**
- * Adds a character class as a matched delimiter to split on.
- *
- * The provided string should be a pattern to match one or more
- * occurances of.
- *
- * @param delim
- * The delimiter to split on.
- */
- public void addMultiDelimiter(String delim) {
- String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
-
- if(currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currPatt.append("(?:" + delimPat + ")");
- currExclusionPatt.append("(?:(?:" + delim + ")+)");
-
- } else {
- currPatt.append("|(?:" + delimPat + ")");
- currExclusionPatt.append("|(?:(?:" + delim + ")+)");
- }
- }
-
- /**
- * Marks strings matching the pattern delim as non-splittable.
- *
- * @param delim
- * The regex to not splitting matching strings.
- */
- public void addNonMatcher(String delim) {
- if(currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currExclusionPatt.append("(?:" + delim + ")");
- } else {
- currExclusionPatt.append("|(?:" + delim + ")");
- }
- }
- /**
- * Compiles the current set of delimiters to a pattern.
- *
- * Makes this splitter ready to use.
- */
- public void compile() {
- compPatt = Pattern.compile(currPatt.toString());
- exclusionPatt = Pattern.compile(currExclusionPatt.toString());
- }
-}