diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-04-07 16:08:53 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-04-07 16:08:53 -0400 |
| commit | 1a5f1d4cf955e5e25b45a4495aa23935b947c4ca (patch) | |
| tree | bbdd341f2b4037e22acde452ee0f1a6271cf1f43 /BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java | |
| parent | f4baa925b0b5590bc8b12ba5f32e0218384c8efc (diff) | |
New splitter implementation
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java new file mode 100644 index 0000000..021821a --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitterv2/ConfigurableTokenSplitter.java @@ -0,0 +1,124 @@ +package bjc.utils.parserutils.splitterv2; + +import bjc.utils.funcdata.IList; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import static bjc.utils.PropertyDB.applyFormat; + +/** + * Split a string into pieces around a regular expression, and offer an easy way + * to configure the regular expression. + * + * @author EVE + * + */ +public class ConfigurableTokenSplitter extends SimpleTokenSplitter { + private Set<String> simpleDelimiters; + private Set<String> multipleDelimiters; + private Set<String> rRawDelimiters; + + /** + * Create a new token splitter with blank configuration. + * + * @param keepDelims + * Whether or not to keep delimiters. + */ + public ConfigurableTokenSplitter(boolean keepDelims) { + super(null, keepDelims); + + /* + * Use linked hash-sets to keep items in insertion order. + */ + simpleDelimiters = new LinkedHashSet<>(); + multipleDelimiters = new LinkedHashSet<>(); + rRawDelimiters = new LinkedHashSet<>(); + } + + /** + * Add a set of simple delimiters to this splitter. + * + * Simple delimiters match one occurrence of themselves as literals. + * + * @param simpleDelims + * The simple delimiters to add. + */ + public void addSimpleDelimiters(String... simpleDelims) { + for(String simpleDelim : simpleDelims) { + simpleDelimiters.add(simpleDelim); + } + } + + /** + * Add a set of multiple delimiters to this splitter. + * + * Multiple delimiters match one or more occurrences of themselves as + * literals. + * + * @param multiDelims + * The multiple delimiters to add. + */ + public void addMultiDelimiters(String... multiDelims) { + for(String multiDelim : multiDelims) { + multipleDelimiters.add(multiDelim); + } + } + + /** + * Add a set of raw delimiters to this splitter. + * + * Raw delimiters match one occurrence of themselves as regular + * expressions. + * + * @param rRawDelims + * The raw delimiters to add. + */ + public void addRawDelimiters(String... rRawDelims) { + for(String rRawDelim : rRawDelims) { + rRawDelimiters.add(rRawDelim); + } + } + + /** + * Take the configuration and compile it into a regular expression to + * use when splitting. + */ + public void compile() { + StringBuilder rPattern = new StringBuilder(); + + for(String rRawDelimiter : rRawDelimiters) { + rPattern.append(applyFormat("rawDelim", rRawDelimiter)); + } + + for(String multipleDelimiter : multipleDelimiters) { + rPattern.append(applyFormat("multipleDelim", multipleDelimiter)); + } + + for(String simpleDelimiter : simpleDelimiters) { + rPattern.append(applyFormat("simpleDelim", simpleDelimiter)); + } + + rPattern.deleteCharAt(rPattern.length() - 1); + + spliter = Pattern.compile(rPattern.toString()); + } + + @Override + public IList<String> split(String input) { + if(spliter == null) { + throw new IllegalStateException("Must compile splitter before use"); + } + + return super.split(input); + } + + @Override + public String toString() { + String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s," + + " rRawDelimiters=%s, spliter=%s]"; + + return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter); + } +} |
