summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java
diff options
context:
space:
mode:
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java112
1 files changed, 112 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java
new file mode 100644
index 0000000..fd4b130
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java
@@ -0,0 +1,112 @@
+package bjc.utils.funcutils;
+
+import java.util.regex.Pattern;
+
+/**
+ * Split a string and keep given delimiters.
+ *
+ * @author Ben Culkin
+ */
+public class NeoTokenSplitter {
+ /*
+ * This string is a format template for the delimiter matching regex
+ *
+ * It does two things
+ * 1. Match the provided delimiter by positive lookahead
+ * 2. Match the provided delimiter by positive lookbehind
+ *
+ * Thus, it will only match in places where the delimiter is, but won't
+ * actually match the delimiter, leaving split to put it into the stream
+ */
+ private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))";
+
+ /*
+ * This string is a format template for the multi-delimiter matching
+ * regex.
+ *
+ * It does the same thing as the single delimiter regex, but has to have
+ * some negative lookahead/lookbehind assertions to avoid splitting a
+ * delimiter into pieces.
+ */
+ private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
+
+ private StringBuilder currPatt;
+
+ private Pattern compPatt;
+
+ /**
+ * Create a new token splitter.
+ */
+ public NeoTokenSplitter() {
+ }
+
+ /**
+ * Split a provided string using configured delimiters, and keeping the
+ * delimiters.
+ *
+ * The splitter must be compiled first.
+ *
+ * @param inp The string to split.
+ *
+ * @return The split string, including delimiters.
+ *
+ * @throws IllegalStateException If the splitter isn't compiled.
+ */
+ public String[] split(String inp) {
+ if(compPatt == null) {
+ throw new IllegalStateException("Token splitter has not been compiled yet");
+ }
+
+ return compPatt.split(inp);
+ }
+
+ /**
+ * Adds a string as a matched delimiter to split on.
+ *
+ * Only works for fixed length delimiters.
+ *
+ * The provided string is regex-escaped before being used.
+ *
+ * @param delim The delimiter to match on.
+ */
+ public void addDelimiter(String delim) {
+ String delimPat = String.format(WITH_DELIM, Pattern.quote(delim));
+
+ if(currPatt == null) {
+ currPatt = new StringBuilder();
+
+ currPatt.append("(?:" + delimPat + ")");
+ } else {
+ currPatt.append("|(?:" + delimPat + ")");
+ }
+ }
+
+ /**
+ * Adds a character class as a matched delimiter to split on.
+ *
+ * The provided string should be a pattern to match one or more
+ * occurances of.
+ *
+ * @param delim The delimiter to split on.
+ */
+ public void addMultiDelimiter(String delim) {
+ String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
+
+ if(currPatt == null) {
+ currPatt = new StringBuilder();
+
+ currPatt.append("(?:" + delimPat + ")");
+ } else {
+ currPatt.append("|(?:" + delimPat + ")");
+ }
+ }
+
+ /**
+ * Compiles the current set of delimiters to a pattern.
+ *
+ * Makes this splitter ready to use.
+ */
+ public void compile() {
+ compPatt = Pattern.compile(currPatt.toString());
+ }
+}