diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-04-07 16:08:53 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-04-07 16:08:53 -0400 |
| commit | 1a5f1d4cf955e5e25b45a4495aa23935b947c4ca (patch) | |
| tree | bbdd341f2b4037e22acde452ee0f1a6271cf1f43 /BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2 | |
| parent | f4baa925b0b5590bc8b12ba5f32e0218384c8efc (diff) | |
New splitter implementation
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2')
5 files changed, 310 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java new file mode 100644 index 0000000..2ecadaf --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ChainTokenSplitter.java @@ -0,0 +1,51 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; + +/** + * A token splitter that chains several other splitters together. + * + * @author EVE + * + */ +public class ChainTokenSplitter implements TokenSplitter { + private IList<TokenSplitter> spliters; + + /** + * Create a new chain token splitter. + */ + public ChainTokenSplitter() { + spliters = new FunctionalList<>(); + } + + /** + * Append a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void appendSplitters(TokenSplitter... splitters) { + spliters.addAll(splitters); + } + + /** + * Prepend a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void prependSplitters(TokenSplitter... splitters) { + spliters.prependAll(splitters); + } + + @Override + public IList<String> split(String input) { + IList<String> initList = new FunctionalList<>(input); + + return spliters.reduceAux(initList, (splitter, strangs) -> { + return strangs.flatMap(splitter::split); + }, ID.id()); + } +}
\ No newline at end of file diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java new file mode 100644 index 0000000..021821a --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java @@ -0,0 +1,124 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.IList; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import static bjc.utils.PropertyDB.applyFormat; + +/** + * Split a string into pieces around a regular expression, and offer an easy way + * to configure the regular expression. + * + * @author EVE + * + */ +public class ConfigurableTokenSplitter extends SimpleTokenSplitter { + private Set<String> simpleDelimiters; + private Set<String> multipleDelimiters; + private Set<String> rRawDelimiters; + + /** + * Create a new token splitter with blank configuration. + * + * @param keepDelims + * Whether or not to keep delimiters. + */ + public ConfigurableTokenSplitter(boolean keepDelims) { + super(null, keepDelims); + + /* + * Use linked hash-sets to keep items in insertion order. + */ + simpleDelimiters = new LinkedHashSet<>(); + multipleDelimiters = new LinkedHashSet<>(); + rRawDelimiters = new LinkedHashSet<>(); + } + + /** + * Add a set of simple delimiters to this splitter. + * + * Simple delimiters match one occurrence of themselves as literals. + * + * @param simpleDelims + * The simple delimiters to add. + */ + public void addSimpleDelimiters(String... simpleDelims) { + for(String simpleDelim : simpleDelims) { + simpleDelimiters.add(simpleDelim); + } + } + + /** + * Add a set of multiple delimiters to this splitter. + * + * Multiple delimiters match one or more occurrences of themselves as + * literals. + * + * @param multiDelims + * The multiple delimiters to add. + */ + public void addMultiDelimiters(String... multiDelims) { + for(String multiDelim : multiDelims) { + multipleDelimiters.add(multiDelim); + } + } + + /** + * Add a set of raw delimiters to this splitter. + * + * Raw delimiters match one occurrence of themselves as regular + * expressions. + * + * @param rRawDelims + * The raw delimiters to add. + */ + public void addRawDelimiters(String... rRawDelims) { + for(String rRawDelim : rRawDelims) { + rRawDelimiters.add(rRawDelim); + } + } + + /** + * Take the configuration and compile it into a regular expression to + * use when splitting. + */ + public void compile() { + StringBuilder rPattern = new StringBuilder(); + + for(String rRawDelimiter : rRawDelimiters) { + rPattern.append(applyFormat("rawDelim", rRawDelimiter)); + } + + for(String multipleDelimiter : multipleDelimiters) { + rPattern.append(applyFormat("multipleDelim", multipleDelimiter)); + } + + for(String simpleDelimiter : simpleDelimiters) { + rPattern.append(applyFormat("simpleDelim", simpleDelimiter)); + } + + rPattern.deleteCharAt(rPattern.length() - 1); + + spliter = Pattern.compile(rPattern.toString()); + } + + @Override + public IList<String> split(String input) { + if(spliter == null) { + throw new IllegalStateException("Must compile splitter before use"); + } + + return super.split(input); + } + + @Override + public String toString() { + String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s," + + " rRawDelimiters=%s, spliter=%s]"; + + return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter); + } +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java new file mode 100644 index 0000000..0c93a25 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ExcludingTokenSplitter.java @@ -0,0 +1,66 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; + +/** + * A token splitter that will not split certain tokens. + * + * @author EVE + * + */ +public class ExcludingTokenSplitter implements TokenSplitter { + private Set<String> literalExclusions; + + private IList<Predicate<String>> predExclusions; + + private TokenSplitter spliter; + + /** + * Create a new excluding token splitter. + * + * @param splitter + * The splitter to apply to non-excluded strings. + */ + public ExcludingTokenSplitter(TokenSplitter splitter) { + spliter = splitter; + + literalExclusions = new HashSet<>(); + + predExclusions = new FunctionalList<>(); + } + + /** + * Exclude a literal string from splitting. + * + * @param exclusion + * The string to exclude from splitting. + */ + public void addLiteralExclusion(String exclusion) { + literalExclusions.add(exclusion); + } + + /** + * Exclude all of the strings matching a predicate from splitting. + * + * @param exclusion + * The predicate to use for exclusions. + */ + public void addPredicateExclusion(Predicate<String> exclusion) { + predExclusions.add(exclusion); + } + + @Override + public IList<String> split(String input) { + if(literalExclusions.contains(input)) + return new FunctionalList<>(input); + else if(predExclusions.anyMatch(pred -> pred.test(input))) return new FunctionalList<>(input); + + return spliter.split(input); + } + +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java new file mode 100644 index 0000000..b111ca3 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/SimpleTokenSplitter.java @@ -0,0 +1,48 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; +import bjc.utils.ioutils.RegexStringEditor; + +import java.util.regex.Pattern; + +/** + * Splits a string into pieces around a regular expression. + * + * @author EVE + * + */ +public class SimpleTokenSplitter implements TokenSplitter { + protected Pattern spliter; + + private boolean keepDelim; + + /** + * Create a new simple token splitter. + * + * @param splitter + * The pattern to split around. + * + * @param keepDelims + * Whether or not delimiters should be kept. + */ + public SimpleTokenSplitter(Pattern splitter, boolean keepDelims) { + spliter = splitter; + + keepDelim = keepDelims; + } + + @Override + public IList<String> split(String input) { + if(keepDelim) { + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id()); + } else { + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> ""); + } + } + + @Override + public String toString() { + return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim); + } +}
\ No newline at end of file diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java new file mode 100644 index 0000000..5d510c1 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/TokenSplitter.java @@ -0,0 +1,21 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into a list of pieces. + * + * @author EVE + * + */ +public interface TokenSplitter { + /** + * Split a string into a list of pieces. + * + * @param input + * The string to split. + * + * @return The pieces of the string. + */ + public IList<String> split(String input); +} |
