diff options
| author | bculkin2442 <bjculkin@mix.wvu.edu> | 2017-03-10 08:46:10 -0500 |
|---|---|---|
| committer | bculkin2442 <bjculkin@mix.wvu.edu> | 2017-03-10 08:46:10 -0500 |
| commit | 355b4d1dda5965ea9b58bd2c80e3703a55abce98 (patch) | |
| tree | 0cfad5bfea55a625223b040ad63609c1fcd6284b /BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java | |
| parent | 004b72b47d003135325a77d6d02160ae241eb1ed (diff) | |
String manipulation additions
More and better ways to manipulate strings
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java new file mode 100644 index 0000000..fd4b130 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/funcutils/NeoTokenSplitter.java @@ -0,0 +1,112 @@ +package bjc.utils.funcutils; + +import java.util.regex.Pattern; + +/** + * Split a string and keep given delimiters. + * + * @author Ben Culkin + */ +public class NeoTokenSplitter { + /* + * This string is a format template for the delimiter matching regex + * + * It does two things + * 1. Match the provided delimiter by positive lookahead + * 2. Match the provided delimiter by positive lookbehind + * + * Thus, it will only match in places where the delimiter is, but won't + * actually match the delimiter, leaving split to put it into the stream + */ + private static String WITH_DELIM = "((?<=%1$s)|(?=%1$s))"; + + /* + * This string is a format template for the multi-delimiter matching + * regex. + * + * It does the same thing as the single delimiter regex, but has to have + * some negative lookahead/lookbehind assertions to avoid splitting a + * delimiter into pieces. + */ + private static String WITH_MULTI_DELIM = "((?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))"; + + private StringBuilder currPatt; + + private Pattern compPatt; + + /** + * Create a new token splitter. + */ + public NeoTokenSplitter() { + } + + /** + * Split a provided string using configured delimiters, and keeping the + * delimiters. + * + * The splitter must be compiled first. + * + * @param inp The string to split. + * + * @return The split string, including delimiters. + * + * @throws IllegalStateException If the splitter isn't compiled. + */ + public String[] split(String inp) { + if(compPatt == null) { + throw new IllegalStateException("Token splitter has not been compiled yet"); + } + + return compPatt.split(inp); + } + + /** + * Adds a string as a matched delimiter to split on. + * + * Only works for fixed length delimiters. + * + * The provided string is regex-escaped before being used. + * + * @param delim The delimiter to match on. + */ + public void addDelimiter(String delim) { + String delimPat = String.format(WITH_DELIM, Pattern.quote(delim)); + + if(currPatt == null) { + currPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + } + } + + /** + * Adds a character class as a matched delimiter to split on. + * + * The provided string should be a pattern to match one or more + * occurances of. + * + * @param delim The delimiter to split on. + */ + public void addMultiDelimiter(String delim) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + } + } + + /** + * Compiles the current set of delimiters to a pattern. + * + * Makes this splitter ready to use. + */ + public void compile() { + compPatt = Pattern.compile(currPatt.toString()); + } +} |
