summaryrefslogtreecommitdiff
path: root/base/src/main/java/bjc/utils/parserutils/splitter
diff options
context:
space:
mode:
Diffstat (limited to 'base/src/main/java/bjc/utils/parserutils/splitter')
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java50
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java122
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java71
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java37
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java46
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java21
-rw-r--r--base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java38
7 files changed, 385 insertions, 0 deletions
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java
new file mode 100644
index 0000000..4736310
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java
@@ -0,0 +1,50 @@
+package bjc.utils.parserutils.splitter;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+
+/**
+ * A token splitter that chains several other splitters together.
+ *
+ * @author EVE
+ *
+ */
+public class ChainTokenSplitter implements TokenSplitter {
+ private final IList<TokenSplitter> spliters;
+
+ /**
+ * Create a new chain token splitter.
+ */
+ public ChainTokenSplitter() {
+ spliters = new FunctionalList<>();
+ }
+
+ /**
+ * Append a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void appendSplitters(final TokenSplitter... splitters) {
+ spliters.addAll(splitters);
+ }
+
+ /**
+ * Prepend a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void prependSplitters(final TokenSplitter... splitters) {
+ spliters.prependAll(splitters);
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ final IList<String> initList = new FunctionalList<>(input);
+
+ return spliters.reduceAux(initList, (splitter, strangs) -> {
+ return strangs.flatMap(splitter::split);
+ });
+ }
+} \ No newline at end of file
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java
new file mode 100644
index 0000000..48ddcb4
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java
@@ -0,0 +1,122 @@
+package bjc.utils.parserutils.splitter;
+
+import static bjc.utils.PropertyDB.applyFormat;
+
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * Split a string into pieces around a regular expression, and offer an easy way
+ * to configure the regular expression.
+ *
+ * @author EVE
+ *
+ */
+public class ConfigurableTokenSplitter extends SimpleTokenSplitter {
+ private final Set<String> simpleDelimiters;
+ private final Set<String> multipleDelimiters;
+ private final Set<String> rRawDelimiters;
+
+ /**
+ * Create a new token splitter with blank configuration.
+ *
+ * @param keepDelims
+ * Whether or not to keep delimiters.
+ */
+ public ConfigurableTokenSplitter(final boolean keepDelims) {
+ super(null, keepDelims);
+
+ /*
+ * Use linked hash-sets to keep items in insertion order.
+ */
+ simpleDelimiters = new LinkedHashSet<>();
+ multipleDelimiters = new LinkedHashSet<>();
+ rRawDelimiters = new LinkedHashSet<>();
+ }
+
+ /**
+ * Add a set of simple delimiters to this splitter.
+ *
+ * Simple delimiters match one occurrence of themselves as literals.
+ *
+ * @param simpleDelims
+ * The simple delimiters to add.
+ */
+ public void addSimpleDelimiters(final String... simpleDelims) {
+ for (final String simpleDelim : simpleDelims) {
+ simpleDelimiters.add(simpleDelim);
+ }
+ }
+
+ /**
+ * Add a set of multiple delimiters to this splitter.
+ *
+ * Multiple delimiters match one or more occurrences of themselves as
+ * literals.
+ *
+ * @param multiDelims
+ * The multiple delimiters to add.
+ */
+ public void addMultiDelimiters(final String... multiDelims) {
+ for (final String multiDelim : multiDelims) {
+ multipleDelimiters.add(multiDelim);
+ }
+ }
+
+ /**
+ * Add a set of raw delimiters to this splitter.
+ *
+ * Raw delimiters match one occurrence of themselves as regular
+ * expressions.
+ *
+ * @param rRawDelims
+ * The raw delimiters to add.
+ */
+ public void addRawDelimiters(final String... rRawDelims) {
+ for (final String rRawDelim : rRawDelims) {
+ rRawDelimiters.add(rRawDelim);
+ }
+ }
+
+ /**
+ * Take the configuration and compile it into a regular expression to
+ * use when splitting.
+ */
+ public void compile() {
+ final StringBuilder rPattern = new StringBuilder();
+
+ for (final String rRawDelimiter : rRawDelimiters) {
+ rPattern.append(applyFormat("rawDelim", rRawDelimiter));
+ }
+
+ for (final String multipleDelimiter : multipleDelimiters) {
+ rPattern.append(applyFormat("multipleDelim", multipleDelimiter));
+ }
+
+ for (final String simpleDelimiter : simpleDelimiters) {
+ rPattern.append(applyFormat("simpleDelim", simpleDelimiter));
+ }
+
+ rPattern.deleteCharAt(rPattern.length() - 1);
+
+ spliter = Pattern.compile(rPattern.toString());
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ if (spliter == null) throw new IllegalStateException("Must compile splitter before use");
+
+ return super.split(input);
+ }
+
+ @Override
+ public String toString() {
+ final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s,"
+ + " rRawDelimiters=%s, spliter=%s]";
+
+ return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter);
+ }
+}
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java
new file mode 100644
index 0000000..369e7ae
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java
@@ -0,0 +1,71 @@
+package bjc.utils.parserutils.splitter;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.Predicate;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+
+/**
+ * A token splitter that will not split certain tokens.
+ *
+ * @author EVE
+ *
+ */
+public class ExcludingTokenSplitter implements TokenSplitter {
+ private final Set<String> literalExclusions;
+
+ private final IList<Predicate<String>> predExclusions;
+
+ private final TokenSplitter spliter;
+
+ /**
+ * Create a new excluding token splitter.
+ *
+ * @param splitter
+ * The splitter to apply to non-excluded strings.
+ */
+ public ExcludingTokenSplitter(final TokenSplitter splitter) {
+ spliter = splitter;
+
+ literalExclusions = new HashSet<>();
+
+ predExclusions = new FunctionalList<>();
+ }
+
+ /**
+ * Exclude literal strings from splitting.
+ *
+ * @param exclusions
+ * The strings to exclude from splitting.
+ */
+ public final void addLiteralExclusions(final String... exclusions) {
+ for (final String exclusion : exclusions) {
+ literalExclusions.add(exclusion);
+ }
+ }
+
+ /**
+ * Exclude all of the strings matching any of the predicates from
+ * splitting.
+ *
+ * @param exclusions
+ * The predicates to use for exclusions.
+ */
+ @SafeVarargs
+ public final void addPredicateExclusion(final Predicate<String>... exclusions) {
+ for (final Predicate<String> exclusion : exclusions) {
+ predExclusions.add(exclusion);
+ }
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ if (literalExclusions.contains(input))
+ return new FunctionalList<>(input);
+ else if (predExclusions.anyMatch(pred -> pred.test(input)))
+ return new FunctionalList<>(input);
+ else return spliter.split(input);
+ }
+}
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java
new file mode 100644
index 0000000..5d954e0
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java
@@ -0,0 +1,37 @@
+package bjc.utils.parserutils.splitter;
+
+import java.util.function.Predicate;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * A token splitter that removes tokens that match a predicate from the stream
+ * of tokens.
+ *
+ * @author bjculkin
+ *
+ */
+public class FilteredTokenSplitter implements TokenSplitter {
+ private TokenSplitter source;
+
+ private Predicate<String> filter;
+
+ /**
+ * Create a new filtered token splitter.
+ *
+ * @param source
+ * The splitter to get tokens from.
+ *
+ * @param filter
+ * The filter to pass tokens through.
+ */
+ public FilteredTokenSplitter(TokenSplitter source, Predicate<String> filter) {
+ this.source = source;
+ this.filter = filter;
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ return source.split(input).getMatching(filter);
+ }
+}
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java
new file mode 100644
index 0000000..c357886
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java
@@ -0,0 +1,46 @@
+package bjc.utils.parserutils.splitter;
+
+import java.util.regex.Pattern;
+
+import bjc.utils.funcdata.IList;
+import bjc.utils.functypes.ID;
+import bjc.utils.ioutils.RegexStringEditor;
+
+/**
+ * Splits a string into pieces around a regular expression.
+ *
+ * @author EVE
+ *
+ */
+public class SimpleTokenSplitter implements TokenSplitter {
+ protected Pattern spliter;
+
+ private final boolean keepDelim;
+
+ /**
+ * Create a new simple token splitter.
+ *
+ * @param splitter
+ * The pattern to split around.
+ *
+ * @param keepDelims
+ * Whether or not delimiters should be kept.
+ */
+ public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) {
+ spliter = splitter;
+
+ keepDelim = keepDelims;
+ }
+
+ @Override
+ public IList<String> split(final String input) {
+ if (keepDelim)
+ return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id());
+ else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> "");
+ }
+
+ @Override
+ public String toString() {
+ return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim);
+ }
+}
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java
new file mode 100644
index 0000000..ddb28a7
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java
@@ -0,0 +1,21 @@
+package bjc.utils.parserutils.splitter;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * Split a string into a list of pieces.
+ *
+ * @author EVE
+ *
+ */
+public interface TokenSplitter {
+ /**
+ * Split a string into a list of pieces.
+ *
+ * @param input
+ * The string to split.
+ *
+ * @return The pieces of the string.
+ */
+ public IList<String> split(String input);
+}
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java
new file mode 100644
index 0000000..80490f5
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java
@@ -0,0 +1,38 @@
+package bjc.utils.parserutils.splitter;
+
+import java.util.function.UnaryOperator;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * A token splitter that performs a transform on the tokens from another
+ * splitter.
+ *
+ * @author bjculkin
+ *
+ */
+public class TransformTokenSplitter implements TokenSplitter {
+ private TokenSplitter source;
+
+ private UnaryOperator<String> transform;
+
+ /**
+ * Create a new transforming splitter.
+ *
+ * @param source
+ * The splitter to use as a source.
+ *
+ * @param transform
+ * The transform to apply to tokens.
+ */
+ public TransformTokenSplitter(TokenSplitter source, UnaryOperator<String> transform) {
+ this.source = source;
+ this.transform = transform;
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ return source.split(input).map(transform);
+ }
+
+}