summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
diff options
context:
space:
mode:
authorEVE <EVE@EVE-PC>2017-03-15 19:06:48 -0400
committerEVE <EVE@EVE-PC>2017-03-15 19:06:48 -0400
commit72e8de605598f62efbd63c17897e80cec181ff2b (patch)
tree7925a4fba447f213bc69bb14a56848e047f21239 /BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
parent9201148259345c3a48bf2b46cd9badddf44a77e9 (diff)
Remove old splitter code, and swap naming to match.
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java168
1 files changed, 139 insertions, 29 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
index 84f5270..084bdae 100644
--- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
+++ b/BJC-Utils2/src/main/java/bjc/utils/funcutils/TokenSplitter.java
@@ -1,47 +1,157 @@
package bjc.utils.funcutils;
-import bjc.utils.funcdata.FunctionalList;
-import bjc.utils.funcdata.IList;
+import java.util.regex.Pattern;
-import java.util.Iterator;
-import java.util.function.BiFunction;
+/**
+ * Split a string and keep given delimiters.
+ *
+ * @author Ben Culkin
+ */
+public class TokenSplitter {
+ /*
+ * This string is a format template for the delimiter matching regex
+ *
+ * It does two things:
+ *
+ * <ol>
+ * <li> Match to the left of the provided delimiter by positive lookahead </li>
+ * <li> Match to the right of the provided delimiter by positive lookbehind </li>
+ * </ol>
+ *
+ * Thus, it will only match in places where the delimiter is, but won't
+ * actually match the delimiter, leaving split to put it into the stream
+ */
+ private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))";
-final class TokenSplitter implements BiFunction<String, String, IList<String>> {
- private String tokenToSplit;
+ /*
+ * This string is a format template for the multi-delimiter matching
+ * regex.
+ *
+ * It does the same thing as the single delimiter regex, but has to have
+ * some negative lookahead/lookbehind assertions to avoid splitting a
+ * delimiter into pieces.
+ */
+ private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
- public TokenSplitter(String tok) {
- this.tokenToSplit = tok;
+ /*
+ * These represent the internal state of the splitter.
+ */
+ private StringBuilder currPatt;
+ private StringBuilder currExclusionPatt;
+
+ /*
+ * These represent the external state of the splitter.
+ *
+ * Compilation causes internal to become external.
+ */
+ private Pattern compPatt;
+ private Pattern exclusionPatt;
+
+ /**
+ * Create a new token splitter.
+ */
+ public TokenSplitter() {
}
- @Override
- public IList<String> apply(String operatorName, String operatorRegex) {
- if(operatorName == null)
- throw new NullPointerException("Operator name must not be null");
- else if(operatorRegex == null) throw new NullPointerException("Operator regex must not be null");
+ /**
+ * Split a provided string using configured delimiters, and keeping the
+ * delimiters.
+ *
+ * The splitter must be compiled first.
+ *
+ * @param inp
+ * The string to split.
+ *
+ * @return The split string, including delimiters.
+ *
+ * @throws IllegalStateException
+ * If the splitter isn't compiled.
+ */
+ public String[] split(String inp) {
+ if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet");
+
+ /*
+ * Don't split something that we should exclude from being split.
+ */
+ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp };
- if(tokenToSplit.contains(operatorName)) {
- if(StringUtils.containsOnly(tokenToSplit, operatorRegex))
- return new FunctionalList<>(tokenToSplit);
+ return compPatt.split(inp);
+ }
- IList<String> splitTokens = new FunctionalList<>(tokenToSplit.split(operatorRegex));
- IList<String> result = new FunctionalList<>();
+ /**
+ * Adds a string as a matched delimiter to split on.
+ *
+ * Only works for fixed length delimiters.
+ *
+ * The provided string is regex-escaped before being used.
+ *
+ * @param delim
+ * The delimiter to match on.
+ */
+ public void addDelimiter(String delim) {
+ String quoteDelim = Pattern.quote(delim);
+ String delimPat = String.format(WITH_DELIM, quoteDelim);
- Iterator<String> itr = splitTokens.toIterable().iterator();
- int tokenExpansionSize = splitTokens.getSize();
+ if(currPatt == null) {
+ currPatt = new StringBuilder();
+ currExclusionPatt = new StringBuilder();
- String elm = itr.next();
+ currPatt.append("(?:" + delimPat + ")");
+ currExclusionPatt.append("(?:" + quoteDelim + ")");
+ } else {
+ currPatt.append("|(?:" + delimPat + ")");
+ currExclusionPatt.append("|(?:" + quoteDelim + ")");
+ }
+ }
- for(int i = 0; itr.hasNext(); elm = itr.next()) {
- result.add(elm);
+ /**
+ * Adds a character class as a matched delimiter to split on.
+ *
+ * The provided string should be a pattern to match one or more
+ * occurances of.
+ *
+ * @param delim
+ * The delimiter to split on.
+ */
+ public void addMultiDelimiter(String delim) {
+ String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
- if(i != tokenExpansionSize) {
- result.add(operatorName);
- }
- }
+ if(currPatt == null) {
+ currPatt = new StringBuilder();
+ currExclusionPatt = new StringBuilder();
- return result;
+ currPatt.append("(?:" + delimPat + ")");
+ currExclusionPatt.append("(?:(?:" + delim + ")+)");
+
+ } else {
+ currPatt.append("|(?:" + delimPat + ")");
+ currExclusionPatt.append("|(?:(?:" + delim + ")+)");
}
+ }
+
+ /**
+ * Marks strings matching the pattern delim as non-splittable.
+ *
+ * @param delim
+ * The regex to not splitting matching strings.
+ */
+ public void addNonMatcher(String delim) {
+ if(currPatt == null) {
+ currPatt = new StringBuilder();
+ currExclusionPatt = new StringBuilder();
- return new FunctionalList<>(tokenToSplit);
+ currExclusionPatt.append("(?:" + delim + ")");
+ } else {
+ currExclusionPatt.append("|(?:" + delim + ")");
+ }
+ }
+ /**
+ * Compiles the current set of delimiters to a pattern.
+ *
+ * Makes this splitter ready to use.
+ */
+ public void compile() {
+ compPatt = Pattern.compile(currPatt.toString());
+ exclusionPatt = Pattern.compile(currExclusionPatt.toString());
}
}