From 9f619b8de8f2c5da9dff170e2e351cfe57eaebc8 Mon Sep 17 00:00:00 2001 From: bculkin2442 Date: Tue, 11 Apr 2017 12:16:49 -0400 Subject: Remove old splitters --- .../parserutils/splitter/SimpleTokenSplitter.java | 239 ++------------------- 1 file changed, 23 insertions(+), 216 deletions(-) (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java index b30cec1..d483f7a 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java @@ -1,239 +1,46 @@ package bjc.utils.parserutils.splitter; -import java.util.HashSet; -import java.util.Set; import java.util.regex.Pattern; +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; +import bjc.utils.ioutils.RegexStringEditor; + /** - * Simple implementation of {@link TokenSplitter} + * Splits a string into pieces around a regular expression. * * @author EVE + * */ -@Deprecated public class SimpleTokenSplitter implements TokenSplitter { - /* - * This string is a format template for the delimiter matching regex - * - * It does two things: - * - *
  1. Match to the left of the provided delimiter by positive - * lookahead
  2. Match to the right of the provided delimiter by - * positive lookbehind
- * - * Thus, it will only match in places where the delimiter is, but won't - * actually match the delimiter, leaving split to put it into the stream - */ - private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; - - /* - * This string is a format template for the multi-delimiter matching - * regex. - * - * It does the same thing as the single delimiter regex, but has to have - * some negative lookahead/lookbehind assertions to avoid splitting a - * delimiter into pieces. - */ - private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? delimSet; - private final Set multidelimSet; - private final Set exclusionSet; + protected Pattern spliter; - /** - * Create a new token splitter. - */ - public SimpleTokenSplitter() { - delimSet = new HashSet<>(); - multidelimSet = new HashSet<>(); - exclusionSet = new HashSet<>(); - } - - @Override - public String[] split(final String inp) { - if (compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); - - /* - * Don't split something that we should exclude from being - * split. - */ - if (exclusionPatt.matcher(inp).matches()) return new String[] { inp }; - - return compPatt.split(inp); - } + private final boolean keepDelim; /** - * Adds one or more strings as matched delimiters to split on. + * Create a new simple token splitter. * - * Only works for fixed length delimiters. + * @param splitter + * The pattern to split around. * - * The provided strings are regex-escaped before being used. - * - * @param delims - * The delimiters to match on. + * @param keepDelims + * Whether or not delimiters should be kept. */ - public void addDelimiter(final String... delims) { - for (final String delim : delims) { - if (delim == null) throw new NullPointerException("Delim must not be null"); - - final String quoteDelim = Pattern.quote(delim); - final String delimPat = String.format(WITH_DELIM, quoteDelim); - - if (currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:" + quoteDelim + ")"); - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:" + quoteDelim + ")"); - } + public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) { + spliter = splitter; - delimSet.add(delim); - } + keepDelim = keepDelims; } - /** - * Adds a character class as a matched delimiter to split on. - * - * The provided string should be a pattern to match one or more - * occurances of. - * - * @param delims - * The delimiter to split on. - */ - public void addMultiDelimiter(final String... delims) { - for (final String delim : delims) { - if (delim == null) throw new NullPointerException("Delim must not be null"); - - final String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); - - if (currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); - - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); - } - - multidelimSet.add(delim); - } - } - - /** - * Marks strings matching the pattern delim as non-splittable. - * - * @param delims - * The regex to not splitting matching strings. - */ - public void addNonMatcher(final String... delims) { - for (final String delim : delims) { - if (delim == null) throw new NullPointerException("Delim must not be null"); - - if (currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); - } - - exclusionSet.add(delim); - } - } - - /** - * Compiles the current set of delimiters to a pattern. - * - * Makes this splitter ready to use. - */ - public void compile() { - if (currPatt == null) { - currPatt = new StringBuilder(); - } - if (currExclusionPatt == null) { - currExclusionPatt = new StringBuilder(); - } - - compPatt = Pattern.compile(currPatt.toString()); - exclusionPatt = Pattern.compile(currExclusionPatt.toString()); + @Override + public IList split(final String input) { + if (keepDelim) + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id()); + else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> ""); } - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ @Override public String toString() { - final StringBuilder builder = new StringBuilder(); - - builder.append("SimpleTokenSplitter ["); - - if (currPatt != null) { - builder.append("currPatt="); - builder.append(currPatt); - builder.append("\n\t, "); - } - - if (currExclusionPatt != null) { - builder.append("currExclusionPatt="); - builder.append(currExclusionPatt); - builder.append("\n\t, "); - } - - if (compPatt != null) { - builder.append("compPatt="); - builder.append(compPatt); - builder.append("\n\t, "); - } - - if (exclusionPatt != null) { - builder.append("exclusionPatt="); - builder.append(exclusionPatt); - builder.append("\n\t, "); - } - - if (delimSet != null) { - builder.append("delimSet="); - builder.append(delimSet); - builder.append("\n\t, "); - } - - if (multidelimSet != null) { - builder.append("multidelimSet="); - builder.append(multidelimSet); - builder.append("\n\t, "); - } - - if (exclusionSet != null) { - builder.append("exclusionSet="); - builder.append(exclusionSet); - } - - builder.append("]"); - return builder.toString(); + return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim); } -} +} \ No newline at end of file -- cgit v1.2.3