From 0f6565687e03968abd2e508fa8183f50f04f1cc7 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 24 Mar 2017 16:21:07 -0400 Subject: Update Pratt Parser --- .../parserutils/splitter/SimpleTokenSplitter.java | 235 +++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java new file mode 100644 index 0000000..8b078a9 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java @@ -0,0 +1,235 @@ +package bjc.utils.parserutils.splitter; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Simple implementation of {@link TokenSplitter} + * + * @author EVE + * + */ +public class SimpleTokenSplitter implements TokenSplitter { + /* + * This string is a format template for the delimiter matching regex + * + * It does two things: + * + *
  1. Match to the left of the provided delimiter by positive + * lookahead
  2. Match to the right of the provided delimiter by + * positive lookbehind
+ * + * Thus, it will only match in places where the delimiter is, but won't + * actually match the delimiter, leaving split to put it into the stream + */ + private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; + + /* + * This string is a format template for the multi-delimiter matching + * regex. + * + * It does the same thing as the single delimiter regex, but has to have + * some negative lookahead/lookbehind assertions to avoid splitting a + * delimiter into pieces. + */ + private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? delimSet; + private Set multidelimSet; + private Set exclusionSet; + + /** + * Create a new token splitter. + */ + public SimpleTokenSplitter() { + delimSet = new HashSet<>(); + multidelimSet = new HashSet<>(); + exclusionSet = new HashSet<>(); + } + + @Override + public String[] split(String inp) { + if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); + + /* + * Don't split something that we should exclude from being + * split. + */ + if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; + + return compPatt.split(inp); + } + + /** + * Adds one or more strings as matched delimiters to split on. + * + * Only works for fixed length delimiters. + * + * The provided strings are regex-escaped before being used. + * + * @param delims + * The delimiters to match on. + */ + public void addDelimiter(String... delims) { + for(String delim : delims) { + if(delim == null) throw new NullPointerException("Delim must not be null"); + + String quoteDelim = Pattern.quote(delim); + String delimPat = String.format(WITH_DELIM, quoteDelim); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:" + quoteDelim + ")"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:" + quoteDelim + ")"); + } + + delimSet.add(delim); + } + } + + /** + * Adds a character class as a matched delimiter to split on. + * + * The provided string should be a pattern to match one or more + * occurances of. + * + * @param delims + * The delimiter to split on. + */ + public void addMultiDelimiter(String... delims) { + for(String delim : delims) { + if(delim == null) throw new NullPointerException("Delim must not be null"); + + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); + + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } + + multidelimSet.add(delim); + } + } + + /** + * Marks strings matching the pattern delim as non-splittable. + * + * @param delims + * The regex to not splitting matching strings. + */ + public void addNonMatcher(String... delims) { + for(String delim : delims) { + if(delim == null) throw new NullPointerException("Delim must not be null"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + + exclusionSet.add(delim); + } + } + + /** + * Compiles the current set of delimiters to a pattern. + * + * Makes this splitter ready to use. + */ + public void compile() { + if(currPatt == null) currPatt = new StringBuilder(); + if(currExclusionPatt == null) currExclusionPatt = new StringBuilder(); + + compPatt = Pattern.compile(currPatt.toString()); + exclusionPatt = Pattern.compile(currExclusionPatt.toString()); + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("SimpleTokenSplitter ["); + + if(currPatt != null) { + builder.append("currPatt="); + builder.append(currPatt); + builder.append("\n\t, "); + } + + if(currExclusionPatt != null) { + builder.append("currExclusionPatt="); + builder.append(currExclusionPatt); + builder.append("\n\t, "); + } + + if(compPatt != null) { + builder.append("compPatt="); + builder.append(compPatt); + builder.append("\n\t, "); + } + + if(exclusionPatt != null) { + builder.append("exclusionPatt="); + builder.append(exclusionPatt); + builder.append("\n\t, "); + } + + if(delimSet != null) { + builder.append("delimSet="); + builder.append(delimSet); + builder.append("\n\t, "); + } + + if(multidelimSet != null) { + builder.append("multidelimSet="); + builder.append(multidelimSet); + builder.append("\n\t, "); + } + + if(exclusionSet != null) { + builder.append("exclusionSet="); + builder.append(exclusionSet); + } + + builder.append("]"); + return builder.toString(); + } +} -- cgit v1.2.3