blob: 600742952a4d28befa0ae8330ac33b8f3ee178dc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
package bjc.utils.funcutils;
import java.util.regex.Pattern;
/**
* Split a string and keep given delimiters.
*
* @author Ben Culkin
*/
public class NeoTokenSplitter {
/*
* This string is a format template for the delimiter matching regex
*
* It does two things 1. Match the provided delimiter by positive
* lookahead 2. Match the provided delimiter by positive lookbehind
*
* Thus, it will only match in places where the delimiter is, but won't
* actually match the delimiter, leaving split to put it into the stream
*/
private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))";
/*
* This string is a format template for the multi-delimiter matching
* regex.
*
* It does the same thing as the single delimiter regex, but has to have
* some negative lookahead/lookbehind assertions to avoid splitting a
* delimiter into pieces.
*/
private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
private StringBuilder currPatt;
private StringBuilder currExclusionPatt;
private Pattern compPatt;
private Pattern exclusionPatt;
/**
* Create a new token splitter.
*/
public NeoTokenSplitter() {
}
/**
* Split a provided string using configured delimiters, and keeping the
* delimiters.
*
* The splitter must be compiled first.
*
* @param inp
* The string to split.
*
* @return The split string, including delimiters.
*
* @throws IllegalStateException
* If the splitter isn't compiled.
*/
public String[] split(String inp) {
if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet");
/*
* Don't split something that matches only an operator
*/
if(exclusionPatt.matcher(inp).matches()) return new String[] { inp };
return compPatt.split(inp);
}
/**
* Adds a string as a matched delimiter to split on.
*
* Only works for fixed length delimiters.
*
* The provided string is regex-escaped before being used.
*
* @param delim
* The delimiter to match on.
*/
public void addDelimiter(String delim) {
String quoteDelim = Pattern.quote(delim);
String delimPat = String.format(WITH_DELIM, quoteDelim);
if(currPatt == null) {
currPatt = new StringBuilder();
currExclusionPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
currExclusionPatt.append("(?:" + quoteDelim + ")");
} else {
currPatt.append("|(?:" + delimPat + ")");
currExclusionPatt.append("|(?:" + quoteDelim + ")");
}
}
/**
* Adds a character class as a matched delimiter to split on.
*
* The provided string should be a pattern to match one or more
* occurances of.
*
* @param delim
* The delimiter to split on.
*/
public void addMultiDelimiter(String delim) {
String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
if(currPatt == null) {
currPatt = new StringBuilder();
currExclusionPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
currExclusionPatt.append("(?:(?:" + delim + ")+)");
} else {
currPatt.append("|(?:" + delimPat + ")");
currExclusionPatt.append("|(?:(?:" + delim + ")+)");
}
}
/**
* Compiles the current set of delimiters to a pattern.
*
* Makes this splitter ready to use.
*/
public void compile() {
compPatt = Pattern.compile(currPatt.toString());
exclusionPatt = Pattern.compile(currExclusionPatt.toString());
}
}
|