From c82e3b3b2de0633317ec8fc85925e91422820597 Mon Sep 17 00:00:00 2001 From: "Benjamin J. Culkin" Date: Sun, 8 Oct 2017 22:39:59 -0300 Subject: Start splitting into maven modules --- .../parserutils/splitter/ChainTokenSplitter.java | 50 +++++++++ .../splitter/ConfigurableTokenSplitter.java | 122 +++++++++++++++++++++ .../splitter/ExcludingTokenSplitter.java | 71 ++++++++++++ .../splitter/FilteredTokenSplitter.java | 37 +++++++ .../parserutils/splitter/SimpleTokenSplitter.java | 46 ++++++++ .../utils/parserutils/splitter/TokenSplitter.java | 21 ++++ .../splitter/TransformTokenSplitter.java | 38 +++++++ 7 files changed, 385 insertions(+) create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java create mode 100644 base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java (limited to 'base/src/main/java/bjc/utils/parserutils/splitter') diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java new file mode 100644 index 0000000..4736310 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java @@ -0,0 +1,50 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that chains several other splitters together. + * + * @author EVE + * + */ +public class ChainTokenSplitter implements TokenSplitter { + private final IList spliters; + + /** + * Create a new chain token splitter. + */ + public ChainTokenSplitter() { + spliters = new FunctionalList<>(); + } + + /** + * Append a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void appendSplitters(final TokenSplitter... splitters) { + spliters.addAll(splitters); + } + + /** + * Prepend a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void prependSplitters(final TokenSplitter... splitters) { + spliters.prependAll(splitters); + } + + @Override + public IList split(final String input) { + final IList initList = new FunctionalList<>(input); + + return spliters.reduceAux(initList, (splitter, strangs) -> { + return strangs.flatMap(splitter::split); + }); + } +} \ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java new file mode 100644 index 0000000..48ddcb4 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java @@ -0,0 +1,122 @@ +package bjc.utils.parserutils.splitter; + +import static bjc.utils.PropertyDB.applyFormat; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into pieces around a regular expression, and offer an easy way + * to configure the regular expression. + * + * @author EVE + * + */ +public class ConfigurableTokenSplitter extends SimpleTokenSplitter { + private final Set simpleDelimiters; + private final Set multipleDelimiters; + private final Set rRawDelimiters; + + /** + * Create a new token splitter with blank configuration. + * + * @param keepDelims + * Whether or not to keep delimiters. + */ + public ConfigurableTokenSplitter(final boolean keepDelims) { + super(null, keepDelims); + + /* + * Use linked hash-sets to keep items in insertion order. + */ + simpleDelimiters = new LinkedHashSet<>(); + multipleDelimiters = new LinkedHashSet<>(); + rRawDelimiters = new LinkedHashSet<>(); + } + + /** + * Add a set of simple delimiters to this splitter. + * + * Simple delimiters match one occurrence of themselves as literals. + * + * @param simpleDelims + * The simple delimiters to add. + */ + public void addSimpleDelimiters(final String... simpleDelims) { + for (final String simpleDelim : simpleDelims) { + simpleDelimiters.add(simpleDelim); + } + } + + /** + * Add a set of multiple delimiters to this splitter. + * + * Multiple delimiters match one or more occurrences of themselves as + * literals. + * + * @param multiDelims + * The multiple delimiters to add. + */ + public void addMultiDelimiters(final String... multiDelims) { + for (final String multiDelim : multiDelims) { + multipleDelimiters.add(multiDelim); + } + } + + /** + * Add a set of raw delimiters to this splitter. + * + * Raw delimiters match one occurrence of themselves as regular + * expressions. + * + * @param rRawDelims + * The raw delimiters to add. + */ + public void addRawDelimiters(final String... rRawDelims) { + for (final String rRawDelim : rRawDelims) { + rRawDelimiters.add(rRawDelim); + } + } + + /** + * Take the configuration and compile it into a regular expression to + * use when splitting. + */ + public void compile() { + final StringBuilder rPattern = new StringBuilder(); + + for (final String rRawDelimiter : rRawDelimiters) { + rPattern.append(applyFormat("rawDelim", rRawDelimiter)); + } + + for (final String multipleDelimiter : multipleDelimiters) { + rPattern.append(applyFormat("multipleDelim", multipleDelimiter)); + } + + for (final String simpleDelimiter : simpleDelimiters) { + rPattern.append(applyFormat("simpleDelim", simpleDelimiter)); + } + + rPattern.deleteCharAt(rPattern.length() - 1); + + spliter = Pattern.compile(rPattern.toString()); + } + + @Override + public IList split(final String input) { + if (spliter == null) throw new IllegalStateException("Must compile splitter before use"); + + return super.split(input); + } + + @Override + public String toString() { + final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s," + + " rRawDelimiters=%s, spliter=%s]"; + + return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java new file mode 100644 index 0000000..369e7ae --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java @@ -0,0 +1,71 @@ +package bjc.utils.parserutils.splitter; + +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that will not split certain tokens. + * + * @author EVE + * + */ +public class ExcludingTokenSplitter implements TokenSplitter { + private final Set literalExclusions; + + private final IList> predExclusions; + + private final TokenSplitter spliter; + + /** + * Create a new excluding token splitter. + * + * @param splitter + * The splitter to apply to non-excluded strings. + */ + public ExcludingTokenSplitter(final TokenSplitter splitter) { + spliter = splitter; + + literalExclusions = new HashSet<>(); + + predExclusions = new FunctionalList<>(); + } + + /** + * Exclude literal strings from splitting. + * + * @param exclusions + * The strings to exclude from splitting. + */ + public final void addLiteralExclusions(final String... exclusions) { + for (final String exclusion : exclusions) { + literalExclusions.add(exclusion); + } + } + + /** + * Exclude all of the strings matching any of the predicates from + * splitting. + * + * @param exclusions + * The predicates to use for exclusions. + */ + @SafeVarargs + public final void addPredicateExclusion(final Predicate... exclusions) { + for (final Predicate exclusion : exclusions) { + predExclusions.add(exclusion); + } + } + + @Override + public IList split(final String input) { + if (literalExclusions.contains(input)) + return new FunctionalList<>(input); + else if (predExclusions.anyMatch(pred -> pred.test(input))) + return new FunctionalList<>(input); + else return spliter.split(input); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java new file mode 100644 index 0000000..5d954e0 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java @@ -0,0 +1,37 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.Predicate; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that removes tokens that match a predicate from the stream + * of tokens. + * + * @author bjculkin + * + */ +public class FilteredTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private Predicate filter; + + /** + * Create a new filtered token splitter. + * + * @param source + * The splitter to get tokens from. + * + * @param filter + * The filter to pass tokens through. + */ + public FilteredTokenSplitter(TokenSplitter source, Predicate filter) { + this.source = source; + this.filter = filter; + } + + @Override + public IList split(String input) { + return source.split(input).getMatching(filter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java new file mode 100644 index 0000000..c357886 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java @@ -0,0 +1,46 @@ +package bjc.utils.parserutils.splitter; + +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; +import bjc.utils.ioutils.RegexStringEditor; + +/** + * Splits a string into pieces around a regular expression. + * + * @author EVE + * + */ +public class SimpleTokenSplitter implements TokenSplitter { + protected Pattern spliter; + + private final boolean keepDelim; + + /** + * Create a new simple token splitter. + * + * @param splitter + * The pattern to split around. + * + * @param keepDelims + * Whether or not delimiters should be kept. + */ + public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) { + spliter = splitter; + + keepDelim = keepDelims; + } + + @Override + public IList split(final String input) { + if (keepDelim) + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id()); + else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> ""); + } + + @Override + public String toString() { + return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java new file mode 100644 index 0000000..ddb28a7 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java @@ -0,0 +1,21 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into a list of pieces. + * + * @author EVE + * + */ +public interface TokenSplitter { + /** + * Split a string into a list of pieces. + * + * @param input + * The string to split. + * + * @return The pieces of the string. + */ + public IList split(String input); +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java new file mode 100644 index 0000000..80490f5 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java @@ -0,0 +1,38 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.UnaryOperator; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that performs a transform on the tokens from another + * splitter. + * + * @author bjculkin + * + */ +public class TransformTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private UnaryOperator transform; + + /** + * Create a new transforming splitter. + * + * @param source + * The splitter to use as a source. + * + * @param transform + * The transform to apply to tokens. + */ + public TransformTokenSplitter(TokenSplitter source, UnaryOperator transform) { + this.source = source; + this.transform = transform; + } + + @Override + public IList split(String input) { + return source.split(input).map(transform); + } + +} -- cgit v1.2.3