summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java
blob: fd4b13053dfab522a4233b2ee16d2f6026427469 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package bjc.utils.funcutils;

import java.util.regex.Pattern;

/**
 * Split a string and keep given delimiters.
 *
 * @author Ben Culkin
 */
public class NeoTokenSplitter {
	/*
	 * This string is a format template for the delimiter matching regex
	 *
	 * It does two things
	 * 1. Match the provided delimiter by positive lookahead
	 * 2. Match the provided delimiter by positive lookbehind
	 *
	 * Thus, it will only match in places where the delimiter is, but won't
	 * actually match the delimiter, leaving split to put it into the stream
	 */
	private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))";

	/*
	 * This string is a format template for the multi-delimiter matching
	 * regex.
	 *
	 * It does the same thing as the single delimiter regex, but has to have
	 * some negative lookahead/lookbehind assertions to avoid splitting a
	 * delimiter into pieces.
	 */
	private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";

	private StringBuilder currPatt;

	private Pattern compPatt;

	/**
	 * Create a new token splitter.
	 */
	public NeoTokenSplitter() {
	}

	/**
	 * Split a provided string using configured delimiters, and keeping the
	 * delimiters.
	 *
	 * The splitter must be compiled first.
	 *
	 * @param inp The string to split.
	 *
	 * @return The split string, including delimiters.
	 *
	 * @throws IllegalStateException If the splitter isn't compiled.
	 */
	public String[] split(String inp) {
		if(compPatt == null) {
			throw new IllegalStateException("Token splitter has not been compiled yet");
		}

		return compPatt.split(inp);
	}

	/**
	 * Adds a string as a matched delimiter to split on.
	 *
	 * Only works for fixed length delimiters.
	 *
	 * The provided string is regex-escaped before being used.
	 *
	 * @param delim The delimiter to match on.
	 */
	public void addDelimiter(String delim) {
		String delimPat = String.format(WITH_DELIM, Pattern.quote(delim));

		if(currPatt == null) {
			currPatt = new StringBuilder();

			currPatt.append("(?:" + delimPat + ")");
		} else {
			currPatt.append("|(?:" + delimPat + ")");
		}
	}

	/**
	 * Adds a character class as a matched delimiter to split on.
	 *
	 * The provided string should be a pattern to match one or more
	 * occurances of.
	 *
	 * @param delim The delimiter to split on.
	 */
	public void addMultiDelimiter(String delim) {
		String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");

		if(currPatt == null) {
			currPatt = new StringBuilder();

			currPatt.append("(?:" + delimPat + ")");
		} else {
			currPatt.append("|(?:" + delimPat + ")");
		}
	}

	/**
	 * Compiles the current set of delimiters to a pattern.
	 *
	 * Makes this splitter ready to use.
	 */
	public void compile() {
		compPatt = Pattern.compile(currPatt.toString());
	}
}