summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter
diff options
context:
space:
mode:
authorbculkin2442 <bjculkin@mix.wvu.edu>2017-04-11 12:16:49 -0400
committerbculkin2442 <bjculkin@mix.wvu.edu>2017-04-11 12:16:49 -0400
commit9f619b8de8f2c5da9dff170e2e351cfe57eaebc8 (patch)
tree796c698c23a30541701d3330530978362052b8d1 /BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter
parent2cc559513eda04aabbc140c2024ebf650631bccb (diff)
Remove old splitters
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java51
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java122
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java71
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java239
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java26
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TwoLevelSplitter.java125
6 files changed, 277 insertions, 357 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java
new file mode 100644
index 0000000..f431688
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java
@@ -0,0 +1,51 @@
+package bjc.utils.parserutils.splitter;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+import bjc.utils.functypes.ID;
+
+/**
+ * A token splitter that chains several other splitters together.
+ *
+ * @author EVE
+ *
+ */
+public class ChainTokenSplitter implements TokenSplitter {
+ private final IList<TokenSplitter> spliters;
+
+ /**
+ * Create a new chain token splitter.
+ */
+ public ChainTokenSplitter() {
+ spliters = new FunctionalList<>();
+ }
+
+ /**
+ * Append a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void appendSplitters(final TokenSplitter... splitters) {
+ spliters.addAll(splitters);
+ }
+
+ /**
+ * Prepend a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void prependSplitters(final TokenSplitter... splitters) {
+ spliters.prependAll(splitters);
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ final IList<String> initList = new FunctionalList<>(input);
+
+ return spliters.reduceAux(initList, (splitter, strangs) -> {
+ return strangs.flatMap(splitter::split);
+ }, ID.id());
+ }
+} \ No newline at end of file
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java
new file mode 100644
index 0000000..48ddcb4
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java
@@ -0,0 +1,122 @@
+package bjc.utils.parserutils.splitter;
+
+import static bjc.utils.PropertyDB.applyFormat;
+
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * Split a string into pieces around a regular expression, and offer an easy way
+ * to configure the regular expression.
+ *
+ * @author EVE
+ *
+ */
+public class ConfigurableTokenSplitter extends SimpleTokenSplitter {
+ private final Set<String> simpleDelimiters;
+ private final Set<String> multipleDelimiters;
+ private final Set<String> rRawDelimiters;
+
+ /**
+ * Create a new token splitter with blank configuration.
+ *
+ * @param keepDelims
+ * Whether or not to keep delimiters.
+ */
+ public ConfigurableTokenSplitter(final boolean keepDelims) {
+ super(null, keepDelims);
+
+ /*
+ * Use linked hash-sets to keep items in insertion order.
+ */
+ simpleDelimiters = new LinkedHashSet<>();
+ multipleDelimiters = new LinkedHashSet<>();
+ rRawDelimiters = new LinkedHashSet<>();
+ }
+
+ /**
+ * Add a set of simple delimiters to this splitter.
+ *
+ * Simple delimiters match one occurrence of themselves as literals.
+ *
+ * @param simpleDelims
+ * The simple delimiters to add.
+ */
+ public void addSimpleDelimiters(final String... simpleDelims) {
+ for (final String simpleDelim : simpleDelims) {
+ simpleDelimiters.add(simpleDelim);
+ }
+ }
+
+ /**
+ * Add a set of multiple delimiters to this splitter.
+ *
+ * Multiple delimiters match one or more occurrences of themselves as
+ * literals.
+ *
+ * @param multiDelims
+ * The multiple delimiters to add.
+ */
+ public void addMultiDelimiters(final String... multiDelims) {
+ for (final String multiDelim : multiDelims) {
+ multipleDelimiters.add(multiDelim);
+ }
+ }
+
+ /**
+ * Add a set of raw delimiters to this splitter.
+ *
+ * Raw delimiters match one occurrence of themselves as regular
+ * expressions.
+ *
+ * @param rRawDelims
+ * The raw delimiters to add.
+ */
+ public void addRawDelimiters(final String... rRawDelims) {
+ for (final String rRawDelim : rRawDelims) {
+ rRawDelimiters.add(rRawDelim);
+ }
+ }
+
+ /**
+ * Take the configuration and compile it into a regular expression to
+ * use when splitting.
+ */
+ public void compile() {
+ final StringBuilder rPattern = new StringBuilder();
+
+ for (final String rRawDelimiter : rRawDelimiters) {
+ rPattern.append(applyFormat("rawDelim", rRawDelimiter));
+ }
+
+ for (final String multipleDelimiter : multipleDelimiters) {
+ rPattern.append(applyFormat("multipleDelim", multipleDelimiter));
+ }
+
+ for (final String simpleDelimiter : simpleDelimiters) {
+ rPattern.append(applyFormat("simpleDelim", simpleDelimiter));
+ }
+
+ rPattern.deleteCharAt(rPattern.length() - 1);
+
+ spliter = Pattern.compile(rPattern.toString());
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ if (spliter == null) throw new IllegalStateException("Must compile splitter before use");
+
+ return super.split(input);
+ }
+
+ @Override
+ public String toString() {
+ final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s,"
+ + " rRawDelimiters=%s, spliter=%s]";
+
+ return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter);
+ }
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java
new file mode 100644
index 0000000..369e7ae
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java
@@ -0,0 +1,71 @@
+package bjc.utils.parserutils.splitter;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.Predicate;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+
+/**
+ * A token splitter that will not split certain tokens.
+ *
+ * @author EVE
+ *
+ */
+public class ExcludingTokenSplitter implements TokenSplitter {
+ private final Set<String> literalExclusions;
+
+ private final IList<Predicate<String>> predExclusions;
+
+ private final TokenSplitter spliter;
+
+ /**
+ * Create a new excluding token splitter.
+ *
+ * @param splitter
+ * The splitter to apply to non-excluded strings.
+ */
+ public ExcludingTokenSplitter(final TokenSplitter splitter) {
+ spliter = splitter;
+
+ literalExclusions = new HashSet<>();
+
+ predExclusions = new FunctionalList<>();
+ }
+
+ /**
+ * Exclude literal strings from splitting.
+ *
+ * @param exclusions
+ * The strings to exclude from splitting.
+ */
+ public final void addLiteralExclusions(final String... exclusions) {
+ for (final String exclusion : exclusions) {
+ literalExclusions.add(exclusion);
+ }
+ }
+
+ /**
+ * Exclude all of the strings matching any of the predicates from
+ * splitting.
+ *
+ * @param exclusions
+ * The predicates to use for exclusions.
+ */
+ @SafeVarargs
+ public final void addPredicateExclusion(final Predicate<String>... exclusions) {
+ for (final Predicate<String> exclusion : exclusions) {
+ predExclusions.add(exclusion);
+ }
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ if (literalExclusions.contains(input))
+ return new FunctionalList<>(input);
+ else if (predExclusions.anyMatch(pred -> pred.test(input)))
+ return new FunctionalList<>(input);
+ else return spliter.split(input);
+ }
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java
index b30cec1..d483f7a 100644
--- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java
@@ -1,239 +1,46 @@
package bjc.utils.parserutils.splitter;
-import java.util.HashSet;
-import java.util.Set;
import java.util.regex.Pattern;
+import bjc.utils.funcdata.IList;
+import bjc.utils.functypes.ID;
+import bjc.utils.ioutils.RegexStringEditor;
+
/**
- * Simple implementation of {@link TokenSplitter}
+ * Splits a string into pieces around a regular expression.
*
* @author EVE
+ *
*/
-@Deprecated
public class SimpleTokenSplitter implements TokenSplitter {
- /*
- * This string is a format template for the delimiter matching regex
- *
- * It does two things:
- *
- * <ol> <li> Match to the left of the provided delimiter by positive
- * lookahead </li> <li> Match to the right of the provided delimiter by
- * positive lookbehind </li> </ol>
- *
- * Thus, it will only match in places where the delimiter is, but won't
- * actually match the delimiter, leaving split to put it into the stream
- */
- private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))";
-
- /*
- * This string is a format template for the multi-delimiter matching
- * regex.
- *
- * It does the same thing as the single delimiter regex, but has to have
- * some negative lookahead/lookbehind assertions to avoid splitting a
- * delimiter into pieces.
- */
- private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(?<!%1$s)(?=%1$s+))";
-
- /*
- * These represent the internal state of the splitter.
- */
- private StringBuilder currPatt;
- private StringBuilder currExclusionPatt;
-
- /*
- * These represent the external state of the splitter.
- *
- * Compilation causes internal to become external.
- */
- private Pattern compPatt;
- private Pattern exclusionPatt;
-
- /*
- * These represent info for debugging.
- */
- private final Set<String> delimSet;
- private final Set<String> multidelimSet;
- private final Set<String> exclusionSet;
+ protected Pattern spliter;
- /**
- * Create a new token splitter.
- */
- public SimpleTokenSplitter() {
- delimSet = new HashSet<>();
- multidelimSet = new HashSet<>();
- exclusionSet = new HashSet<>();
- }
-
- @Override
- public String[] split(final String inp) {
- if (compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet");
-
- /*
- * Don't split something that we should exclude from being
- * split.
- */
- if (exclusionPatt.matcher(inp).matches()) return new String[] { inp };
-
- return compPatt.split(inp);
- }
+ private final boolean keepDelim;
/**
- * Adds one or more strings as matched delimiters to split on.
+ * Create a new simple token splitter.
*
- * Only works for fixed length delimiters.
+ * @param splitter
+ * The pattern to split around.
*
- * The provided strings are regex-escaped before being used.
- *
- * @param delims
- * The delimiters to match on.
+ * @param keepDelims
+ * Whether or not delimiters should be kept.
*/
- public void addDelimiter(final String... delims) {
- for (final String delim : delims) {
- if (delim == null) throw new NullPointerException("Delim must not be null");
-
- final String quoteDelim = Pattern.quote(delim);
- final String delimPat = String.format(WITH_DELIM, quoteDelim);
-
- if (currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currPatt.append("(?:" + delimPat + ")");
- currExclusionPatt.append("(?:" + quoteDelim + ")");
- } else {
- currPatt.append("|(?:" + delimPat + ")");
- currExclusionPatt.append("|(?:" + quoteDelim + ")");
- }
+ public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) {
+ spliter = splitter;
- delimSet.add(delim);
- }
+ keepDelim = keepDelims;
}
- /**
- * Adds a character class as a matched delimiter to split on.
- *
- * The provided string should be a pattern to match one or more
- * occurances of.
- *
- * @param delims
- * The delimiter to split on.
- */
- public void addMultiDelimiter(final String... delims) {
- for (final String delim : delims) {
- if (delim == null) throw new NullPointerException("Delim must not be null");
-
- final String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")");
-
- if (currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currPatt.append("(?:" + delimPat + ")");
- currExclusionPatt.append("(?:(?:" + delim + ")+)");
-
- } else {
- currPatt.append("|(?:" + delimPat + ")");
- currExclusionPatt.append("|(?:(?:" + delim + ")+)");
- }
-
- multidelimSet.add(delim);
- }
- }
-
- /**
- * Marks strings matching the pattern delim as non-splittable.
- *
- * @param delims
- * The regex to not splitting matching strings.
- */
- public void addNonMatcher(final String... delims) {
- for (final String delim : delims) {
- if (delim == null) throw new NullPointerException("Delim must not be null");
-
- if (currPatt == null) {
- currPatt = new StringBuilder();
- currExclusionPatt = new StringBuilder();
-
- currExclusionPatt.append("(?:" + delim + ")");
- } else {
- currExclusionPatt.append("|(?:" + delim + ")");
- }
-
- exclusionSet.add(delim);
- }
- }
-
- /**
- * Compiles the current set of delimiters to a pattern.
- *
- * Makes this splitter ready to use.
- */
- public void compile() {
- if (currPatt == null) {
- currPatt = new StringBuilder();
- }
- if (currExclusionPatt == null) {
- currExclusionPatt = new StringBuilder();
- }
-
- compPatt = Pattern.compile(currPatt.toString());
- exclusionPatt = Pattern.compile(currExclusionPatt.toString());
+ @Override
+ public IList<String> split(final String input) {
+ if (keepDelim)
+ return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id());
+ else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> "");
}
- /*
- * (non-Javadoc)
- *
- * @see java.lang.Object#toString()
- */
@Override
public String toString() {
- final StringBuilder builder = new StringBuilder();
-
- builder.append("SimpleTokenSplitter [");
-
- if (currPatt != null) {
- builder.append("currPatt=");
- builder.append(currPatt);
- builder.append("\n\t, ");
- }
-
- if (currExclusionPatt != null) {
- builder.append("currExclusionPatt=");
- builder.append(currExclusionPatt);
- builder.append("\n\t, ");
- }
-
- if (compPatt != null) {
- builder.append("compPatt=");
- builder.append(compPatt);
- builder.append("\n\t, ");
- }
-
- if (exclusionPatt != null) {
- builder.append("exclusionPatt=");
- builder.append(exclusionPatt);
- builder.append("\n\t, ");
- }
-
- if (delimSet != null) {
- builder.append("delimSet=");
- builder.append(delimSet);
- builder.append("\n\t, ");
- }
-
- if (multidelimSet != null) {
- builder.append("multidelimSet=");
- builder.append(multidelimSet);
- builder.append("\n\t, ");
- }
-
- if (exclusionSet != null) {
- builder.append("exclusionSet=");
- builder.append(exclusionSet);
- }
-
- builder.append("]");
- return builder.toString();
+ return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim);
}
-}
+} \ No newline at end of file
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java
index 6fd9f7b..ddb28a7 100644
--- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java
@@ -1,27 +1,21 @@
package bjc.utils.parserutils.splitter;
+import bjc.utils.funcdata.IList;
+
/**
- * Split a string and keep given delimiters.
+ * Split a string into a list of pieces.
+ *
+ * @author EVE
*
- * @author Ben Culkin
*/
-@Deprecated
public interface TokenSplitter {
/**
- * Split a provided string using configured delimiters, and keeping the
- * delimiters.
+ * Split a string into a list of pieces.
*
- * <p>
- * The splitter must be compiled first.
- * </p>
- *
- * @param inp
+ * @param input
* The string to split.
*
- * @return The split string, including delimiters.
- *
- * @throws IllegalStateException
- * If the splitter isn't compiled.
+ * @return The pieces of the string.
*/
- String[] split(String inp);
-} \ No newline at end of file
+ public IList<String> split(String input);
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TwoLevelSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TwoLevelSplitter.java
deleted file mode 100644
index 92b9de0..0000000
--- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/TwoLevelSplitter.java
+++ /dev/null
@@ -1,125 +0,0 @@
-package bjc.utils.parserutils.splitter;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/**
- * Implementation of a splitter that runs in two passes.
- *
- * This is useful because {@link SimpleTokenSplitter} doesn't like handling both
- * <= and = without mangling them.
- *
- * The first pass splits on compound operators, which are built up from simple
- * operators.
- *
- * The second pass removes simple operators.
- *
- * @author EVE
- *
- */
-@Deprecated
-public class TwoLevelSplitter implements TokenSplitter {
- private final SimpleTokenSplitter high;
- private final SimpleTokenSplitter low;
-
- /**
- * Create a new two level splitter.
- */
- public TwoLevelSplitter() {
- high = new SimpleTokenSplitter();
- low = new SimpleTokenSplitter();
- }
-
- @Override
- public String[] split(final String inp) {
- final List<String> ret = new ArrayList<>();
-
- final String[] partials = high.split(inp);
-
- for (final String partial : partials) {
- final String[] finals = low.split(partial);
-
- for (final String fin : finals) {
- ret.add(fin);
- }
- }
-
- return ret.toArray(new String[ret.size()]);
- }
-
- /**
- * Adds compound operators to split on.
- *
- * @param delims
- * The compound operators to split on.
- */
- public void addCompoundDelim(final String... delims) {
- for (final String delim : delims) {
- high.addDelimiter(delim);
-
- low.addNonMatcher(Pattern.quote(delim));
- }
- }
-
- /**
- * Adds simple operators to split on.
- *
- * @param delims
- * The simple operators to split on.
- */
- public void addSimpleDelim(final String... delims) {
- for (final String delim : delims) {
- low.addDelimiter(delim);
- }
- }
-
- /**
- * Adds repeated compound operators to split on.
- *
- * @param delims
- * The repeated compound operators to split on.
- */
- public void addCompoundMulti(final String... delims) {
- for (final String delim : delims) {
- high.addMultiDelimiter(delim);
-
- low.addNonMatcher("(?:" + delim + ")+");
- }
- }
-
- /**
- * Adds simple compound operators to split on.
- *
- * @param delims
- * The repeated simple operators to split on.
- */
- public void addSimpleMulti(final String... delims) {
- for (final String delim : delims) {
- low.addMultiDelimiter(delim);
- }
- }
-
- /**
- * Exclude strings matching a regex from both splits.
- *
- * @param exclusions
- * The regexes to exclude matches for.
- */
- public void exclude(final String... exclusions) {
- for (final String exclusion : exclusions) {
- high.addNonMatcher(exclusion);
-
- low.addNonMatcher(exclusion);
- }
- }
-
- /**
- * Ready the splitter for use.
- */
- public void compile() {
- high.compile();
-
- low.compile();
- }
-}