diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-08 22:39:59 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-08 22:39:59 -0300 |
| commit | c82e3b3b2de0633317ec8fc85925e91422820597 (patch) | |
| tree | 96567416ce23c5ce85601f9cedc3a94bb1c55cba /base/src/main/java/bjc/utils/parserutils/splitter | |
| parent | b3ac1c8690c3e14c879913e5dcc03a5f5e14876e (diff) | |
Start splitting into maven modules
Diffstat (limited to 'base/src/main/java/bjc/utils/parserutils/splitter')
7 files changed, 385 insertions, 0 deletions
diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java new file mode 100644 index 0000000..4736310 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java @@ -0,0 +1,50 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that chains several other splitters together. + * + * @author EVE + * + */ +public class ChainTokenSplitter implements TokenSplitter { + private final IList<TokenSplitter> spliters; + + /** + * Create a new chain token splitter. + */ + public ChainTokenSplitter() { + spliters = new FunctionalList<>(); + } + + /** + * Append a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void appendSplitters(final TokenSplitter... splitters) { + spliters.addAll(splitters); + } + + /** + * Prepend a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void prependSplitters(final TokenSplitter... splitters) { + spliters.prependAll(splitters); + } + + @Override + public IList<String> split(final String input) { + final IList<String> initList = new FunctionalList<>(input); + + return spliters.reduceAux(initList, (splitter, strangs) -> { + return strangs.flatMap(splitter::split); + }); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java new file mode 100644 index 0000000..48ddcb4 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java @@ -0,0 +1,122 @@ +package bjc.utils.parserutils.splitter; + +import static bjc.utils.PropertyDB.applyFormat; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into pieces around a regular expression, and offer an easy way + * to configure the regular expression. + * + * @author EVE + * + */ +public class ConfigurableTokenSplitter extends SimpleTokenSplitter { + private final Set<String> simpleDelimiters; + private final Set<String> multipleDelimiters; + private final Set<String> rRawDelimiters; + + /** + * Create a new token splitter with blank configuration. + * + * @param keepDelims + * Whether or not to keep delimiters. + */ + public ConfigurableTokenSplitter(final boolean keepDelims) { + super(null, keepDelims); + + /* + * Use linked hash-sets to keep items in insertion order. + */ + simpleDelimiters = new LinkedHashSet<>(); + multipleDelimiters = new LinkedHashSet<>(); + rRawDelimiters = new LinkedHashSet<>(); + } + + /** + * Add a set of simple delimiters to this splitter. + * + * Simple delimiters match one occurrence of themselves as literals. + * + * @param simpleDelims + * The simple delimiters to add. + */ + public void addSimpleDelimiters(final String... simpleDelims) { + for (final String simpleDelim : simpleDelims) { + simpleDelimiters.add(simpleDelim); + } + } + + /** + * Add a set of multiple delimiters to this splitter. + * + * Multiple delimiters match one or more occurrences of themselves as + * literals. + * + * @param multiDelims + * The multiple delimiters to add. + */ + public void addMultiDelimiters(final String... multiDelims) { + for (final String multiDelim : multiDelims) { + multipleDelimiters.add(multiDelim); + } + } + + /** + * Add a set of raw delimiters to this splitter. + * + * Raw delimiters match one occurrence of themselves as regular + * expressions. + * + * @param rRawDelims + * The raw delimiters to add. + */ + public void addRawDelimiters(final String... rRawDelims) { + for (final String rRawDelim : rRawDelims) { + rRawDelimiters.add(rRawDelim); + } + } + + /** + * Take the configuration and compile it into a regular expression to + * use when splitting. + */ + public void compile() { + final StringBuilder rPattern = new StringBuilder(); + + for (final String rRawDelimiter : rRawDelimiters) { + rPattern.append(applyFormat("rawDelim", rRawDelimiter)); + } + + for (final String multipleDelimiter : multipleDelimiters) { + rPattern.append(applyFormat("multipleDelim", multipleDelimiter)); + } + + for (final String simpleDelimiter : simpleDelimiters) { + rPattern.append(applyFormat("simpleDelim", simpleDelimiter)); + } + + rPattern.deleteCharAt(rPattern.length() - 1); + + spliter = Pattern.compile(rPattern.toString()); + } + + @Override + public IList<String> split(final String input) { + if (spliter == null) throw new IllegalStateException("Must compile splitter before use"); + + return super.split(input); + } + + @Override + public String toString() { + final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s," + + " rRawDelimiters=%s, spliter=%s]"; + + return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java new file mode 100644 index 0000000..369e7ae --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java @@ -0,0 +1,71 @@ +package bjc.utils.parserutils.splitter; + +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that will not split certain tokens. + * + * @author EVE + * + */ +public class ExcludingTokenSplitter implements TokenSplitter { + private final Set<String> literalExclusions; + + private final IList<Predicate<String>> predExclusions; + + private final TokenSplitter spliter; + + /** + * Create a new excluding token splitter. + * + * @param splitter + * The splitter to apply to non-excluded strings. + */ + public ExcludingTokenSplitter(final TokenSplitter splitter) { + spliter = splitter; + + literalExclusions = new HashSet<>(); + + predExclusions = new FunctionalList<>(); + } + + /** + * Exclude literal strings from splitting. + * + * @param exclusions + * The strings to exclude from splitting. + */ + public final void addLiteralExclusions(final String... exclusions) { + for (final String exclusion : exclusions) { + literalExclusions.add(exclusion); + } + } + + /** + * Exclude all of the strings matching any of the predicates from + * splitting. + * + * @param exclusions + * The predicates to use for exclusions. + */ + @SafeVarargs + public final void addPredicateExclusion(final Predicate<String>... exclusions) { + for (final Predicate<String> exclusion : exclusions) { + predExclusions.add(exclusion); + } + } + + @Override + public IList<String> split(final String input) { + if (literalExclusions.contains(input)) + return new FunctionalList<>(input); + else if (predExclusions.anyMatch(pred -> pred.test(input))) + return new FunctionalList<>(input); + else return spliter.split(input); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java new file mode 100644 index 0000000..5d954e0 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java @@ -0,0 +1,37 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.Predicate; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that removes tokens that match a predicate from the stream + * of tokens. + * + * @author bjculkin + * + */ +public class FilteredTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private Predicate<String> filter; + + /** + * Create a new filtered token splitter. + * + * @param source + * The splitter to get tokens from. + * + * @param filter + * The filter to pass tokens through. + */ + public FilteredTokenSplitter(TokenSplitter source, Predicate<String> filter) { + this.source = source; + this.filter = filter; + } + + @Override + public IList<String> split(String input) { + return source.split(input).getMatching(filter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java new file mode 100644 index 0000000..c357886 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java @@ -0,0 +1,46 @@ +package bjc.utils.parserutils.splitter; + +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; +import bjc.utils.ioutils.RegexStringEditor; + +/** + * Splits a string into pieces around a regular expression. + * + * @author EVE + * + */ +public class SimpleTokenSplitter implements TokenSplitter { + protected Pattern spliter; + + private final boolean keepDelim; + + /** + * Create a new simple token splitter. + * + * @param splitter + * The pattern to split around. + * + * @param keepDelims + * Whether or not delimiters should be kept. + */ + public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) { + spliter = splitter; + + keepDelim = keepDelims; + } + + @Override + public IList<String> split(final String input) { + if (keepDelim) + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id()); + else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> ""); + } + + @Override + public String toString() { + return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java new file mode 100644 index 0000000..ddb28a7 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java @@ -0,0 +1,21 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into a list of pieces. + * + * @author EVE + * + */ +public interface TokenSplitter { + /** + * Split a string into a list of pieces. + * + * @param input + * The string to split. + * + * @return The pieces of the string. + */ + public IList<String> split(String input); +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java new file mode 100644 index 0000000..80490f5 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java @@ -0,0 +1,38 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.UnaryOperator; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that performs a transform on the tokens from another + * splitter. + * + * @author bjculkin + * + */ +public class TransformTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private UnaryOperator<String> transform; + + /** + * Create a new transforming splitter. + * + * @param source + * The splitter to use as a source. + * + * @param transform + * The transform to apply to tokens. + */ + public TransformTokenSplitter(TokenSplitter source, UnaryOperator<String> transform) { + this.source = source; + this.transform = transform; + } + + @Override + public IList<String> split(String input) { + return source.split(input).map(transform); + } + +} |
