package bjc.utils.parserutils; import java.util.HashSet; import java.util.Set; import java.util.regex.Pattern; /** * Split a string and keep given delimiters. * * @author Ben Culkin */ public class TokenSplitter { /* * This string is a format template for the delimiter matching regex * * It does two things: * *
* The splitter must be compiled first. *
* * @param inp * The string to split. * * @return The split string, including delimiters. * * @throws IllegalStateException * If the splitter isn't compiled. */ public String[] split(String inp) { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* * Don't split something that we should exclude from being * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; return compPatt.split(inp); } /** * Adds one or more strings as matched delimiters to split on. * * Only works for fixed length delimiters. * * The provided strings are regex-escaped before being used. * * @param delims * The delimiters to match on. */ public void addDelimiter(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); String quoteDelim = Pattern.quote(delim); String delimPat = String.format(WITH_DELIM, quoteDelim); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } delimSet.add(delim); } } /** * Adds a character class as a matched delimiter to split on. * * The provided string should be a pattern to match one or more * occurances of. * * @param delims * The delimiter to split on. */ public void addMultiDelimiter(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:(?:" + delim + ")+)"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:(?:" + delim + ")+)"); } multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * * @param delims * The regex to not splitting matching strings. */ public void addNonMatcher(String... delims) { for(String delim : delims) { if(delim == null) throw new NullPointerException("Delim must not be null"); if(currPatt == null) { currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); currExclusionPatt.append("(?:" + delim + ")"); } else { currExclusionPatt.append("|(?:" + delim + ")"); } exclusionSet.add(delim); } } /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { if(currPatt == null) currPatt = new StringBuilder(); if(currExclusionPatt == null) currExclusionPatt = new StringBuilder(); compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } /* * (non-Javadoc) * * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder builder = new StringBuilder(); builder.append("TokenSplitter ["); if(currPatt != null) { builder.append("currPatt="); builder.append(currPatt); builder.append("\n\t, "); } if(currExclusionPatt != null) { builder.append("currExclusionPatt="); builder.append(currExclusionPatt); builder.append("\n\t, "); } if(compPatt != null) { builder.append("compPatt="); builder.append(compPatt); builder.append("\n\t, "); } if(exclusionPatt != null) { builder.append("exclusionPatt="); builder.append(exclusionPatt); builder.append("\n\t, "); } if(delimSet != null) { builder.append("delimSet="); builder.append(delimSet); builder.append("\n\t, "); } if(multidelimSet != null) { builder.append("multidelimSet="); builder.append(multidelimSet); builder.append("\n\t, "); } if(exclusionSet != null) { builder.append("exclusionSet="); builder.append(exclusionSet); } builder.append("]"); return builder.toString(); } }