summaryrefslogtreecommitdiff
path: root/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java
blob: cc6922127ba49841e5465b9e5813c7ed241c73a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package bjc.utils.parserutils.splitter;

import static bjc.utils.PropertyDB.applyFormat;

import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;

import bjc.utils.funcdata.IList;

/**
 * Split a string into pieces around a regular expression, and offer an easy way
 * to configure the regular expression.
 *
 * @author EVE
 *
 */
public class ConfigurableTokenSplitter extends SimpleTokenSplitter {
	private final Set<String> simpleDelimiters;
	private final Set<String> multipleDelimiters;
	private final Set<String> rRawDelimiters;

	/**
	 * Create a new token splitter with blank configuration.
	 *
	 * @param keepDelims
	 *        Whether or not to keep delimiters.
	 */
	public ConfigurableTokenSplitter(final boolean keepDelims) {
		super(null, keepDelims);

		/* Use linked hash-sets to keep items in insertion order. */
		simpleDelimiters = new LinkedHashSet<>();
		multipleDelimiters = new LinkedHashSet<>();
		rRawDelimiters = new LinkedHashSet<>();
	}

	/**
	 * Add a set of simple delimiters to this splitter.
	 *
	 * Simple delimiters match one occurrence of themselves as literals.
	 *
	 * @param simpleDelims
	 *        The simple delimiters to add.
	 */
	public void addSimpleDelimiters(final String... simpleDelims) {
		for(final String simpleDelim : simpleDelims) {
			simpleDelimiters.add(simpleDelim);
		}
	}

	/**
	 * Add a set of multiple delimiters to this splitter.
	 *
	 * Multiple delimiters match one or more occurrences of themselves as
	 * literals.
	 *
	 * @param multiDelims
	 *        The multiple delimiters to add.
	 */
	public void addMultiDelimiters(final String... multiDelims) {
		for(final String multiDelim : multiDelims) {
			multipleDelimiters.add(multiDelim);
		}
	}

	/**
	 * Add a set of raw delimiters to this splitter.
	 *
	 * Raw delimiters match one occurrence of themselves as regular
	 * expressions.
	 *
	 * @param rRawDelims
	 *        The raw delimiters to add.
	 */
	public void addRawDelimiters(final String... rRawDelims) {
		for(final String rRawDelim : rRawDelims) {
			rRawDelimiters.add(rRawDelim);
		}
	}

	/**
	 * Take the configuration and compile it into a regular expression to
	 * use when splitting.
	 */
	public void compile() {
		final StringBuilder rPattern = new StringBuilder();

		for(final String rRawDelimiter : rRawDelimiters) {
			rPattern.append(applyFormat("rawDelim", rRawDelimiter));
		}

		for(final String multipleDelimiter : multipleDelimiters) {
			rPattern.append(applyFormat("multipleDelim", multipleDelimiter));
		}

		for(final String simpleDelimiter : simpleDelimiters) {
			rPattern.append(applyFormat("simpleDelim", simpleDelimiter));
		}

		rPattern.deleteCharAt(rPattern.length() - 1);

		spliter = Pattern.compile(rPattern.toString());
	}

	@Override
	public IList<String> split(final String input) {
		if(spliter == null) throw new IllegalStateException("Must compile splitter before use");

		return super.split(input);
	}

	@Override
	public String toString() {
		final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s,"
				+ " rRawDelimiters=%s, spliter=%s]";

		return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter);
	}
}