package bjc.utils.parserutils.splitter; import bjc.utils.ioutils.RegexStringEditor; import java.util.HashSet; import java.util.Set; import java.util.regex.Pattern; /** * Simple implementation of {@link TokenSplitter} * * @author EVE * * TODO rewrite using {@link RegexStringEditor} */ public class SimpleTokenSplitter implements TokenSplitter { /* * This string is a format template for the delimiter matching regex * * It does two things: * *
  1. Match to the left of the provided delimiter by positive * lookahead
  2. Match to the right of the provided delimiter by * positive lookbehind
* * Thus, it will only match in places where the delimiter is, but won't * actually match the delimiter, leaving split to put it into the stream */ private static String WITH_DELIM = "(?:(?<=%1$s)|(?=%1$s))"; /* * This string is a format template for the multi-delimiter matching * regex. * * It does the same thing as the single delimiter regex, but has to have * some negative lookahead/lookbehind assertions to avoid splitting a * delimiter into pieces. */ private static String WITH_MULTI_DELIM = "(?:(?<=%1$s+)(?!%1$s)|(? delimSet; private Set multidelimSet; private Set exclusionSet; /** * Create a new token splitter. */ public SimpleTokenSplitter() { delimSet = new HashSet<>(); multidelimSet = new HashSet<>(); exclusionSet = new HashSet<>(); } @Override public String[] split(String inp) { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* * Don't split something that we should exclude from being * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; return compPatt.split(inp); } /** * Adds one or more strings as matched delimiters to split on. * * Only works for fixed length delimiters. * * The provided strings are regex-escaped before being used. * * @param delims * The delimiters to match on. */ public void addDelimiter(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); String quoteDelim = Pattern.quote(delim); String delimPat = String.format(WITH_DELIM, quoteDelim); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } delimSet.add(delim); } } /** * Adds a character class as a matched delimiter to split on. * * The provided string should be a pattern to match one or more * occurances of. * * @param delims * The delimiter to split on. */ public void addMultiDelimiter(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:(?:" + delim + ")+)"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:(?:" + delim + ")+)"); } multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * * @param delims * The regex to not splitting matching strings. */ public void addNonMatcher(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currExclusionPatt.append("(?:" + delim + ")"); } else { currExclusionPatt.append("|(?:" + delim + ")"); } exclusionSet.add(delim); } } /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { if(currPatt == null) currPatt = new StringBuilder(); if(currExclusionPatt == null) currExclusionPatt = new StringBuilder(); compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder builder = new StringBuilder(); builder.append("SimpleTokenSplitter ["); if(currPatt != null) { builder.append("currPatt="); builder.append(currPatt); builder.append("\n\t, "); } if(currExclusionPatt != null) { builder.append("currExclusionPatt="); builder.append(currExclusionPatt); builder.append("\n\t, "); } if(compPatt != null) { builder.append("compPatt="); builder.append(compPatt); builder.append("\n\t, "); } if(exclusionPatt != null) { builder.append("exclusionPatt="); builder.append(exclusionPatt); builder.append("\n\t, "); } if(delimSet != null) { builder.append("delimSet="); builder.append(delimSet); builder.append("\n\t, "); } if(multidelimSet != null) { builder.append("multidelimSet="); builder.append(multidelimSet); builder.append("\n\t, "); } if(exclusionSet != null) { builder.append("exclusionSet="); builder.append(exclusionSet); } builder.append("]"); return builder.toString(); } }