summaryrefslogtreecommitdiff
path: root/BJC-Utils2/src
diff options
context:
space:
mode:
authorbjculkin <bjculkin@mix.wvu.edu>2017-04-07 16:08:53 -0400
committerbjculkin <bjculkin@mix.wvu.edu>2017-04-07 16:08:53 -0400
commit1a5f1d4cf955e5e25b45a4495aa23935b947c4ca (patch)
treebbdd341f2b4037e22acde452ee0f1a6271cf1f43 /BJC-Utils2/src
parentf4baa925b0b5590bc8b12ba5f32e0218384c8efc (diff)
New splitter implementation
Diffstat (limited to 'BJC-Utils2/src')
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java51
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java124
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java66
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java48
-rw-r--r--BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java21
5 files changed, 310 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java
new file mode 100644
index 0000000..2ecadaf
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java
@@ -0,0 +1,51 @@
+package bjc.utils.parserutils.splitterv2;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+import bjc.utils.functypes.ID;
+
+/**
+ * A token splitter that chains several other splitters together.
+ *
+ * @author EVE
+ *
+ */
+public class ChainTokenSplitter implements TokenSplitter {
+ private IList<TokenSplitter> spliters;
+
+ /**
+ * Create a new chain token splitter.
+ */
+ public ChainTokenSplitter() {
+ spliters = new FunctionalList<>();
+ }
+
+ /**
+ * Append a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void appendSplitters(TokenSplitter... splitters) {
+ spliters.addAll(splitters);
+ }
+
+ /**
+ * Prepend a series of splitters to the chain.
+ *
+ * @param splitters
+ * The splitters to append to the chain.
+ */
+ public void prependSplitters(TokenSplitter... splitters) {
+ spliters.prependAll(splitters);
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ IList<String> initList = new FunctionalList<>(input);
+
+ return spliters.reduceAux(initList, (splitter, strangs) -> {
+ return strangs.flatMap(splitter::split);
+ }, ID.id());
+ }
+} \ No newline at end of file
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java
new file mode 100644
index 0000000..021821a
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java
@@ -0,0 +1,124 @@
+package bjc.utils.parserutils.splitterv2;
+
+import bjc.utils.funcdata.IList;
+
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import static bjc.utils.PropertyDB.applyFormat;
+
+/**
+ * Split a string into pieces around a regular expression, and offer an easy way
+ * to configure the regular expression.
+ *
+ * @author EVE
+ *
+ */
+public class ConfigurableTokenSplitter extends SimpleTokenSplitter {
+ private Set<String> simpleDelimiters;
+ private Set<String> multipleDelimiters;
+ private Set<String> rRawDelimiters;
+
+ /**
+ * Create a new token splitter with blank configuration.
+ *
+ * @param keepDelims
+ * Whether or not to keep delimiters.
+ */
+ public ConfigurableTokenSplitter(boolean keepDelims) {
+ super(null, keepDelims);
+
+ /*
+ * Use linked hash-sets to keep items in insertion order.
+ */
+ simpleDelimiters = new LinkedHashSet<>();
+ multipleDelimiters = new LinkedHashSet<>();
+ rRawDelimiters = new LinkedHashSet<>();
+ }
+
+ /**
+ * Add a set of simple delimiters to this splitter.
+ *
+ * Simple delimiters match one occurrence of themselves as literals.
+ *
+ * @param simpleDelims
+ * The simple delimiters to add.
+ */
+ public void addSimpleDelimiters(String... simpleDelims) {
+ for(String simpleDelim : simpleDelims) {
+ simpleDelimiters.add(simpleDelim);
+ }
+ }
+
+ /**
+ * Add a set of multiple delimiters to this splitter.
+ *
+ * Multiple delimiters match one or more occurrences of themselves as
+ * literals.
+ *
+ * @param multiDelims
+ * The multiple delimiters to add.
+ */
+ public void addMultiDelimiters(String... multiDelims) {
+ for(String multiDelim : multiDelims) {
+ multipleDelimiters.add(multiDelim);
+ }
+ }
+
+ /**
+ * Add a set of raw delimiters to this splitter.
+ *
+ * Raw delimiters match one occurrence of themselves as regular
+ * expressions.
+ *
+ * @param rRawDelims
+ * The raw delimiters to add.
+ */
+ public void addRawDelimiters(String... rRawDelims) {
+ for(String rRawDelim : rRawDelims) {
+ rRawDelimiters.add(rRawDelim);
+ }
+ }
+
+ /**
+ * Take the configuration and compile it into a regular expression to
+ * use when splitting.
+ */
+ public void compile() {
+ StringBuilder rPattern = new StringBuilder();
+
+ for(String rRawDelimiter : rRawDelimiters) {
+ rPattern.append(applyFormat("rawDelim", rRawDelimiter));
+ }
+
+ for(String multipleDelimiter : multipleDelimiters) {
+ rPattern.append(applyFormat("multipleDelim", multipleDelimiter));
+ }
+
+ for(String simpleDelimiter : simpleDelimiters) {
+ rPattern.append(applyFormat("simpleDelim", simpleDelimiter));
+ }
+
+ rPattern.deleteCharAt(rPattern.length() - 1);
+
+ spliter = Pattern.compile(rPattern.toString());
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ if(spliter == null) {
+ throw new IllegalStateException("Must compile splitter before use");
+ }
+
+ return super.split(input);
+ }
+
+ @Override
+ public String toString() {
+ String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s,"
+ + " rRawDelimiters=%s, spliter=%s]";
+
+ return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter);
+ }
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java
new file mode 100644
index 0000000..0c93a25
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java
@@ -0,0 +1,66 @@
+package bjc.utils.parserutils.splitterv2;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.Predicate;
+
+/**
+ * A token splitter that will not split certain tokens.
+ *
+ * @author EVE
+ *
+ */
+public class ExcludingTokenSplitter implements TokenSplitter {
+ private Set<String> literalExclusions;
+
+ private IList<Predicate<String>> predExclusions;
+
+ private TokenSplitter spliter;
+
+ /**
+ * Create a new excluding token splitter.
+ *
+ * @param splitter
+ * The splitter to apply to non-excluded strings.
+ */
+ public ExcludingTokenSplitter(TokenSplitter splitter) {
+ spliter = splitter;
+
+ literalExclusions = new HashSet<>();
+
+ predExclusions = new FunctionalList<>();
+ }
+
+ /**
+ * Exclude a literal string from splitting.
+ *
+ * @param exclusion
+ * The string to exclude from splitting.
+ */
+ public void addLiteralExclusion(String exclusion) {
+ literalExclusions.add(exclusion);
+ }
+
+ /**
+ * Exclude all of the strings matching a predicate from splitting.
+ *
+ * @param exclusion
+ * The predicate to use for exclusions.
+ */
+ public void addPredicateExclusion(Predicate<String> exclusion) {
+ predExclusions.add(exclusion);
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ if(literalExclusions.contains(input))
+ return new FunctionalList<>(input);
+ else if(predExclusions.anyMatch(pred -> pred.test(input))) return new FunctionalList<>(input);
+
+ return spliter.split(input);
+ }
+
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java
new file mode 100644
index 0000000..b111ca3
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java
@@ -0,0 +1,48 @@
+package bjc.utils.parserutils.splitterv2;
+
+import bjc.utils.funcdata.IList;
+import bjc.utils.functypes.ID;
+import bjc.utils.ioutils.RegexStringEditor;
+
+import java.util.regex.Pattern;
+
+/**
+ * Splits a string into pieces around a regular expression.
+ *
+ * @author EVE
+ *
+ */
+public class SimpleTokenSplitter implements TokenSplitter {
+ protected Pattern spliter;
+
+ private boolean keepDelim;
+
+ /**
+ * Create a new simple token splitter.
+ *
+ * @param splitter
+ * The pattern to split around.
+ *
+ * @param keepDelims
+ * Whether or not delimiters should be kept.
+ */
+ public SimpleTokenSplitter(Pattern splitter, boolean keepDelims) {
+ spliter = splitter;
+
+ keepDelim = keepDelims;
+ }
+
+ @Override
+ public IList<String> split(String input) {
+ if(keepDelim) {
+ return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id());
+ } else {
+ return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> "");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim);
+ }
+} \ No newline at end of file
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java
new file mode 100644
index 0000000..5d510c1
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java
@@ -0,0 +1,21 @@
+package bjc.utils.parserutils.splitterv2;
+
+import bjc.utils.funcdata.IList;
+
+/**
+ * Split a string into a list of pieces.
+ *
+ * @author EVE
+ *
+ */
+public interface TokenSplitter {
+ /**
+ * Split a string into a list of pieces.
+ *
+ * @param input
+ * The string to split.
+ *
+ * @return The pieces of the string.
+ */
+ public IList<String> split(String input);
+}