blob: fd4b13053dfab522a4233b2ee16d2f6026427469 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
package bjc.utils.funcutils;
import java.util.regex.Pattern;
/**
* Split a string and keep given delimiters.
*
* @author Ben Culkin
*/
public class NeoTokenSplitter {
/*
* This string is a format template for the delimiter matching regex
*
* It does two things
* 1. Match the provided delimiter by positive lookahead
* 2. Match the provided delimiter by positive lookbehind
*
* Thus, it will only match in places where the delimiter is, but won't
* actually match the delimiter, leaving split to put it into the stream
*/
private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))";
/*
* This string is a format template for the multi-delimiter matching
* regex.
*
* It does the same thing as the single delimiter regex, but has to have
* some negative lookahead/lookbehind assertions to avoid splitting a
* delimiter into pieces.
*/
private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
private StringBuilder currPatt;
private Pattern compPatt;
/**
* Create a new token splitter.
*/
public NeoTokenSplitter() {
}
/**
* Split a provided string using configured delimiters, and keeping the
* delimiters.
*
* The splitter must be compiled first.
*
* @param inp The string to split.
*
* @return The split string, including delimiters.
*
* @throws IllegalStateException If the splitter isn't compiled.
*/
public String[] split(String inp) {
if(compPatt == null) {
throw new IllegalStateException("Token splitter has not been compiled yet");
}
return compPatt.split(inp);
}
/**
* Adds a string as a matched delimiter to split on.
*
* Only works for fixed length delimiters.
*
* The provided string is regex-escaped before being used.
*
* @param delim The delimiter to match on.
*/
public void addDelimiter(String delim) {
String delimPat = String.format(WITH_DELIM, Pattern.quote(delim));
if(currPatt == null) {
currPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
} else {
currPatt.append("|(?:" + delimPat + ")");
}
}
/**
* Adds a character class as a matched delimiter to split on.
*
* The provided string should be a pattern to match one or more
* occurances of.
*
* @param delim The delimiter to split on.
*/
public void addMultiDelimiter(String delim) {
String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
if(currPatt == null) {
currPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
} else {
currPatt.append("|(?:" + delimPat + ")");
}
}
/**
* Compiles the current set of delimiters to a pattern.
*
* Makes this splitter ready to use.
*/
public void compile() {
compPatt = Pattern.compile(currPatt.toString());
}
}
|