package bjc.utils.parserutils;
import java.util.regex.Pattern;
/**
* Split a string and keep given delimiters.
*
* @author Ben Culkin
*/
public class TokenSplitter {
/*
* This string is a format template for the delimiter matching regex
*
* It does two things:
*
*
* - Match to the left of the provided delimiter by positive lookahead
* - Match to the right of the provided delimiter by positive lookbehind
*
*
* Thus, it will only match in places where the delimiter is, but won't
* actually match the delimiter, leaving split to put it into the stream
*/
private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))";
/*
* This string is a format template for the multi-delimiter matching
* regex.
*
* It does the same thing as the single delimiter regex, but has to have
* some negative lookahead/lookbehind assertions to avoid splitting a
* delimiter into pieces.
*/
private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(?
* The splitter must be compiled first.
*
*
* @param inp
* The string to split.
*
* @return The split string, including delimiters.
*
* @throws IllegalStateException
* If the splitter isn't compiled.
*/
public String[] split(String inp) {
if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet");
/*
* Don't split something that we should exclude from being split.
*/
if(exclusionPatt.matcher(inp).matches()) return new String[] { inp };
return compPatt.split(inp);
}
/**
* Adds one or more strings as matched delimiters to split on.
*
* Only works for fixed length delimiters.
*
* The provided strings are regex-escaped before being used.
*
* @param delims
* The delimiters to match on.
*/
public void addDelimiter(String... delims) {
for(String delim : delims) {
String quoteDelim = Pattern.quote(delim);
String delimPat = String.format(WITH_DELIM, quoteDelim);
if(currPatt == null) {
currPatt = new StringBuilder();
currExclusionPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
currExclusionPatt.append("(?:" + quoteDelim + ")");
} else {
currPatt.append("|(?:" + delimPat + ")");
currExclusionPatt.append("|(?:" + quoteDelim + ")");
}
}
}
/**
* Adds a character class as a matched delimiter to split on.
*
* The provided string should be a pattern to match one or more
* occurances of.
*
* @param delim
* The delimiter to split on.
*/
public void addMultiDelimiter(String delim) {
String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
if(currPatt == null) {
currPatt = new StringBuilder();
currExclusionPatt = new StringBuilder();
currPatt.append("(?:" + delimPat + ")");
currExclusionPatt.append("(?:(?:" + delim + ")+)");
} else {
currPatt.append("|(?:" + delimPat + ")");
currExclusionPatt.append("|(?:(?:" + delim + ")+)");
}
}
/**
* Marks strings matching the pattern delim as non-splittable.
*
* @param delim
* The regex to not splitting matching strings.
*/
public void addNonMatcher(String delim) {
if(currPatt == null) {
currPatt = new StringBuilder();
currExclusionPatt = new StringBuilder();
currExclusionPatt.append("(?:" + delim + ")");
} else {
currExclusionPatt.append("|(?:" + delim + ")");
}
}
/**
* Compiles the current set of delimiters to a pattern.
*
* Makes this splitter ready to use.
*/
public void compile() {
compPatt = Pattern.compile(currPatt.toString());
exclusionPatt = Pattern.compile(currExclusionPatt.toString());
}
}