diff options
Diffstat (limited to 'base/src/main/java/bjc/utils/parserutils')
24 files changed, 2837 insertions, 0 deletions
diff --git a/base/src/main/java/bjc/utils/parserutils/DoubleMatcher.java b/base/src/main/java/bjc/utils/parserutils/DoubleMatcher.java new file mode 100644 index 0000000..a885808 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/DoubleMatcher.java @@ -0,0 +1,46 @@ +package bjc.utils.parserutils; + +import static bjc.utils.PropertyDB.applyFormat; +import static bjc.utils.PropertyDB.getRegex; + +import java.util.regex.Pattern; + +/* + * Checks if a string would pass Double.parseDouble. + * + * Uses a regex from the javadoc for Double.valueOf() + */ +class DoubleMatcher { + /* + * Unit pieces. + */ + private static final String rDecDigits = getRegex("fpDigits"); + private static final String rHexDigits = getRegex("fpHexDigits"); + private static final String rExponent = applyFormat("fpExponent", getRegex("fpExponent"), rDecDigits); + + /* + * Decimal floating point numbers. + */ + private static final String rSimpleDec = applyFormat("fpDecimalDecimal", rDecDigits, rExponent); + private static final String rSimpleIntDec = applyFormat("fpDecimalInteger", rDecDigits, rExponent); + + /* + * Hex floating point numbers. + */ + private static final String rHexInt = applyFormat("fpHexInteger", rHexDigits); + private static final String rHexDec = applyFormat("fpHexDecimal", rHexDigits); + private static final String rHexLead = applyFormat("fpHexLeader", rHexInt, rHexDec); + private static final String rHexString = applyFormat("fpHexString", rHexLead, rDecDigits); + + /* + * Floating point components. + */ + private static final String rFPLeader = getRegex("fpLeader"); + private static final String rFPNum = applyFormat("fpNumber", rSimpleIntDec, rSimpleDec, rHexString); + + /* + * Full double. + */ + private static final String rDouble = applyFormat("fpDouble", rFPLeader, rFPNum); + public static final Pattern doubleLiteral = Pattern.compile("\\A" + rDouble + "\\Z"); +} diff --git a/base/src/main/java/bjc/utils/parserutils/IPrecedent.java b/base/src/main/java/bjc/utils/parserutils/IPrecedent.java new file mode 100644 index 0000000..aa366cf --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/IPrecedent.java @@ -0,0 +1,28 @@ +package bjc.utils.parserutils; + +/** + * Represents something that has a set precedence + * + * @author ben + * + */ +@FunctionalInterface +public interface IPrecedent { + /** + * Create a new object with set precedence + * + * @param precedence + * The precedence of the object to handle + * @return A new object with set precedence + */ + public static IPrecedent newSimplePrecedent(final int precedence) { + return () -> precedence; + } + + /** + * Get the precedence of the attached object + * + * @return The precedence of the attached object + */ + public int getPrecedence(); +} diff --git a/base/src/main/java/bjc/utils/parserutils/ParserException.java b/base/src/main/java/bjc/utils/parserutils/ParserException.java new file mode 100644 index 0000000..ae33aba --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/ParserException.java @@ -0,0 +1,36 @@ +package bjc.utils.parserutils; + +/** + * General superclass for exceptions thrown during parsing. + * + * @author EVE + * + */ +public class ParserException extends Exception { + /** + * + */ + private static final long serialVersionUID = 631298568113373233L; + + /** + * Create a new exception with the provided message. + * + * @param msg + * The message for the exception. + */ + public ParserException(final String msg) { + super(msg); + } + + /** + * Create a new exception with the provided message and cause. + * + * @param msg + * The message for the exception. + * @param cause + * The cause of the exception. + */ + public ParserException(final String msg, final Exception cause) { + super(msg, cause); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/ShuntingYard.java b/base/src/main/java/bjc/utils/parserutils/ShuntingYard.java new file mode 100644 index 0000000..a1b5feb --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/ShuntingYard.java @@ -0,0 +1,274 @@ +package bjc.utils.parserutils; + +import java.util.Deque; +import java.util.LinkedList; +import java.util.function.Consumer; +import java.util.function.Function; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.FunctionalMap; +import bjc.utils.funcdata.IList; +import bjc.utils.funcdata.IMap; +import bjc.utils.funcutils.StringUtils; + +/** + * Utility to run the shunting yard algorithm on a bunch of tokens. + * + * @author ben + * + * @param <TokenType> + * The type of tokens being shunted. + */ +public class ShuntingYard<TokenType> { + /** + * A enum representing the fundamental operator types. + * + * @author ben + * + */ + public static enum Operator implements IPrecedent { + /** + * Represents addition. + */ + ADD(1), + /** + * Represents subtraction. + */ + SUBTRACT(2), + + /** + * Represents multiplication. + */ + MULTIPLY(3), + /** + * Represents division. + */ + DIVIDE(4); + + private final int precedence; + + private Operator(final int prec) { + precedence = prec; + } + + @Override + public int getPrecedence() { + return precedence; + } + } + + /* + * Function that shunts tokens. + */ + private final class TokenShunter implements Consumer<String> { + private final IList<TokenType> output; + private final Deque<String> stack; + private final Function<String, TokenType> transformer; + + public TokenShunter(final IList<TokenType> outpt, final Deque<String> stack, + final Function<String, TokenType> transformer) { + this.output = outpt; + this.stack = stack; + this.transformer = transformer; + } + + @Override + public void accept(final String token) { + /* + * Handle operators + */ + if (operators.containsKey(token)) { + /* + * Pop operators while there isn't a higher precedence one + */ + while (!stack.isEmpty() && isHigherPrec(token, stack.peek())) { + output.add(transformer.apply(stack.pop())); + } + + /* + * Put this operator onto the stack + */ + stack.push(token); + } else if (StringUtils.containsOnly(token, "\\(")) { + /* + * Handle groups of parenthesis for multiple nesting levels + */ + stack.push(token); + } else if (StringUtils.containsOnly(token, "\\)")) { + /* + * Handle groups of parenthesis for multiple nesting levels + */ + final String swappedToken = token.replace(')', '('); + + /* + * Remove tokens up to a matching parenthesis + */ + while (!stack.peek().equals(swappedToken)) { + output.add(transformer.apply(stack.pop())); + } + + /* + * Remove the parenthesis + */ + stack.pop(); + } else { + /* + * Just add the transformed token + */ + output.add(transformer.apply(token)); + } + } + } + + /* + * Holds all the shuntable operations. + */ + private IMap<String, IPrecedent> operators; + + /** + * Create a new shunting yard with a default set of operators. + * + * @param configureBasics + * Whether or not basic math operators should be + * provided. + */ + public ShuntingYard(final boolean configureBasics) { + operators = new FunctionalMap<>(); + + /* + * Add basic operators if we're configured to do so + */ + if (configureBasics) { + operators.put("+", Operator.ADD); + operators.put("-", Operator.SUBTRACT); + operators.put("*", Operator.MULTIPLY); + operators.put("/", Operator.DIVIDE); + } + } + + /** + * Add an operator to the list of shuntable operators. + * + * @param operator + * The token representing the operator. + * + * @param precedence + * The precedence of the operator to add. + */ + public void addOp(final String operator, final int precedence) { + /* + * Create the precedence marker + */ + final IPrecedent prec = IPrecedent.newSimplePrecedent(precedence); + + this.addOp(operator, prec); + } + + /** + * Add an operator to the list of shuntable operators. + * + * @param operator + * The token representing the operator. + * + * @param precedence + * The precedence of the operator. + */ + public void addOp(final String operator, final IPrecedent precedence) { + /* + * Complain about trying to add an incorrect operator + */ + if (operator == null) + throw new NullPointerException("Operator must not be null"); + else if (precedence == null) throw new NullPointerException("Precedence must not be null"); + + /* + * Add the operator to the ones we handle + */ + operators.put(operator, precedence); + } + + private boolean isHigherPrec(final String left, final String right) { + /* + * Check if the right operator exists + */ + final boolean exists = operators.containsKey(right); + + /* + * If it doesn't, the left is higher precedence. + */ + if (!exists) return false; + + /* + * Get the precedence of operators + */ + final int rightPrecedence = operators.get(right).getPrecedence(); + final int leftPrecedence = operators.get(left).getPrecedence(); + + /* + * Evaluate what we were asked + */ + return rightPrecedence >= leftPrecedence; + } + + /** + * Transform a string of tokens from infix notation to postfix. + * + * @param input + * The string to transform. + * + * @param transformer + * The function to use to transform strings to tokens. + * + * @return A list of tokens in postfix notation. + */ + public IList<TokenType> postfix(final IList<String> input, final Function<String, TokenType> transformer) { + /* + * Check our input + */ + if (input == null) + throw new NullPointerException("Input must not be null"); + else if (transformer == null) throw new NullPointerException("Transformer must not be null"); + + /* + * Here's what we're handing back + */ + final IList<TokenType> output = new FunctionalList<>(); + + /* + * The stack to put operators on + */ + final Deque<String> stack = new LinkedList<>(); + + /* + * Shunt the tokens + */ + input.forEach(new TokenShunter(output, stack, transformer)); + + /* + * Transform any resulting tokens + */ + stack.forEach(token -> { + output.add(transformer.apply(token)); + }); + + return output; + } + + /** + * Remove an operator from the list of shuntable operators. + * + * @param operator + * The token representing the operator. If null, remove + * all operators. + */ + public void removeOp(final String operator) { + /* + * Check if we want to remove all operators + */ + if (operator == null) { + operators = new FunctionalMap<>(); + } else { + operators.remove(operator); + } + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/StringDescaper.java b/base/src/main/java/bjc/utils/parserutils/StringDescaper.java new file mode 100644 index 0000000..096656a --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/StringDescaper.java @@ -0,0 +1,242 @@ +package bjc.utils.parserutils; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.UnaryOperator; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static java.util.Map.Entry; + +import static bjc.utils.PropertyDB.applyFormat; +import static bjc.utils.PropertyDB.getCompiledRegex; +import static bjc.utils.PropertyDB.getRegex; + +public class StringDescaper { + private Logger LOGGER = Logger.getLogger(StringDescaper.class.getName()); + + /* + * Patterns and pattern parts. + */ + private static String rPossibleEscapeString = getRegex("possibleStringEscape"); + private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString); + + private static String rShortEscape = getRegex("shortFormStringEscape"); + private static String rOctalEscape = getRegex("octalStringEscape"); + private static String rUnicodeEscape = getRegex("unicodeStringEscape"); + + private String rEscapeString; + private Pattern escapePatt; + + private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonStringEscape"), rPossibleEscapeString); + private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString); + + private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); + + private Map<String, String> literalEscapes; + private Map<Pattern, UnaryOperator<String>> specialEscapes; + + public StringDescaper() { + literalEscapes = new HashMap<>(); + specialEscapes = new HashMap<>(); + + rEscapeString = String.format("\\\\(%1$s|%2$s|%3$s)"); + escapePatt = Pattern.compile(rEscapeString); + } + + public void addLiteralEscape(String escape, String val) { + if(literalEscapes.containsKey(escape)) { + LOGGER.warning(String.format("Shadowing literal escape '%s'\n", escape)); + } + + literalEscapes.put(escape, val); + } + + public void addSpecialEscape(String escape, UnaryOperator<String> val) { + if(specialEscapes.containsKey(escape)) { + LOGGER.warning(String.format("Shadowing special escape '%s'\n", escape)); + } + + /* + * Make sure this special escape is a valid regex. + */ + + Pattern patt = null; + try { + patt = Pattern.compile(escape); + } catch (PatternSyntaxException psex) { + String msg = String.format("Invalid special escape '%s'", escape); + + IllegalArgumentException iaex = new IllegalArgumentException(msg); + iaex.initCause(psex); + + throw psex; + } + + specialEscapes.put(patt, val); + } + + public void compileEscapes() { + StringBuilder work = new StringBuilder(); + + for(String litEscape : literalEscapes.keySet()) { + work.append("|(?:"); + work.append(Pattern.quote(litEscape)); + work.append(")"); + } + + for(Pattern specEscape : specialEscapes.keySet()) { + work.append("|(?:"); + work.append(specEscape.toString()); + work.append(")"); + } + + /* + * Convert user-defined escapes to a regex for matching. + * We don't need a bar before %4 because the string has it. + */ + rEscapeString = String.format("\\(%1$s|%2$s|%3$s%4$s)", rShortEscape, rOctalEscape, rUnicodeEscape, work.toString()); + escapePatt = Pattern.compile(rEscapeString); + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public String descapeString(final String inp) { + if (inp == null) { + throw new NullPointerException("Input to descapeString must not be null"); + } + + /* + * Prepare the buffer and escape finder. + */ + final StringBuffer work = new StringBuffer(); + final Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); + final Matcher escapeFinder = escapePatt.matcher(inp); + + while (possibleEscapeFinder.find()) { + if (!escapeFinder.find()) { + /* + * Found a possible escape that isn't actually an + * escape. + */ + final String msg = String.format("Illegal escape sequence '%s' at position %d of string '%s'", + possibleEscapeFinder.group(), possibleEscapeFinder.start(), inp); + throw new IllegalArgumentException(msg); + } + + final String escapeSeq = escapeFinder.group(); + + /* + * Convert the escape to a string. + */ + String escapeRep = ""; + switch (escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + /* + * Skip past the second slash. + */ + possibleEscapeFinder.find(); + escapeRep = "\\"; + break; + default: + if (escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else if(escapeSeq.startsWith("O")) { + escapeRep = handleOctalEscape(escapeSeq.substring(1)); + } else if(literalEscapes.containsKey(escapeSeq)) { + escapeRep = literalEscapes.get(escapeSeq); + } else { + for(Entry<Pattern, UnaryOperator<String>> ent : specialEscapes.entrySet()) { + Pattern pat = ent.getKey(); + + Matcher mat = pat.matcher(escapeSeq); + if(mat.matches()) { + escapeRep = ent.getValue().apply(escapeSeq); + break; + } + } + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + /* + * Handle a unicode codepoint. + */ + private static String handleUnicodeEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + /* + * Handle a octal codepoint. + */ + private static String handleOctalEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 8); + + if (codepoint > 255) { + final String msg = String.format("'%d' is outside the range of octal escapes', codepoint"); + + throw new IllegalArgumentException(msg); + } + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid octal escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/TokenTransformer.java b/base/src/main/java/bjc/utils/parserutils/TokenTransformer.java new file mode 100644 index 0000000..30ccc5a --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/TokenTransformer.java @@ -0,0 +1,131 @@ +package bjc.utils.parserutils; + +import java.util.Deque; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.function.UnaryOperator; + +import bjc.utils.data.IHolder; +import bjc.utils.data.ITree; +import bjc.utils.data.Pair; +import bjc.utils.data.Tree; +import bjc.utils.parserutils.TreeConstructor.ConstructorState; +import bjc.utils.parserutils.TreeConstructor.QueueFlattener; + +/* + * Handle creating ASTs from tokens. + */ +final class TokenTransformer<TokenType> implements Consumer<TokenType> { + /* + * Handle operators + */ + private final class OperatorHandler implements UnaryOperator<ConstructorState<TokenType>> { + private final TokenType element; + + public OperatorHandler(final TokenType element) { + this.element = element; + } + + @Override + public ConstructorState<TokenType> apply(final ConstructorState<TokenType> pair) { + /* + * Replace the current AST with the result of handling an operator + */ + return new ConstructorState<>(pair.bindLeft(queuedASTs -> { + return handleOperator(queuedASTs); + })); + } + + private ConstructorState<TokenType> handleOperator(final Deque<ITree<TokenType>> queuedASTs) { + /* + * The AST we're going to hand back + */ + ITree<TokenType> newAST; + + /* + * Handle special operators + */ + if (isSpecialOperator.test(element)) { + newAST = handleSpecialOperator.apply(element).apply(queuedASTs); + } else { + /* + * Error if we don't have enough for a binary operator + */ + if (queuedASTs.size() < 2) { + final String msg = String.format( + "Attempted to parse binary operator without enough operands\n\tProblem operator is: %s\n\tPossible operand is: %s", + element.toString(), queuedASTs.peek().toString()); + + throw new IllegalStateException(msg); + } + + /* + * Grab the two operands + */ + final ITree<TokenType> right = queuedASTs.pop(); + final ITree<TokenType> left = queuedASTs.pop(); + + /* + * Create a new AST + */ + newAST = new Tree<>(element, left, right); + } + + /* + * Stick it onto the stack + */ + queuedASTs.push(newAST); + + /* + * Hand back the state + */ + return new ConstructorState<>(queuedASTs, newAST); + } + } + + private final IHolder<ConstructorState<TokenType>> initialState; + + private final Predicate<TokenType> operatorPredicate; + + private final Predicate<TokenType> isSpecialOperator; + private final Function<TokenType, QueueFlattener<TokenType>> handleSpecialOperator; + + /* + * Create a new transformer + */ + public TokenTransformer(final IHolder<ConstructorState<TokenType>> initialState, + final Predicate<TokenType> operatorPredicate, final Predicate<TokenType> isSpecialOperator, + final Function<TokenType, QueueFlattener<TokenType>> handleSpecialOperator) { + this.initialState = initialState; + this.operatorPredicate = operatorPredicate; + this.isSpecialOperator = isSpecialOperator; + this.handleSpecialOperator = handleSpecialOperator; + } + + @Override + public void accept(final TokenType element) { + /* + * Handle operators + */ + if (operatorPredicate.test(element)) { + initialState.transform(new OperatorHandler(element)); + } else { + final ITree<TokenType> newAST = new Tree<>(element); + + /* + * Insert the new tree into the AST + */ + initialState.transform(pair -> { + /* + * Transform the pair, ignoring the current AST in favor of the one consisting of the current element + */ + return new ConstructorState<>(pair.bindLeft(queue -> { + queue.push(newAST); + + return new Pair<>(queue, newAST); + })); + }); + } + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/TokenUtils.java b/base/src/main/java/bjc/utils/parserutils/TokenUtils.java new file mode 100644 index 0000000..67c1e5a --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -0,0 +1,303 @@ +package bjc.utils.parserutils; + +import static bjc.utils.PropertyDB.applyFormat; +import static bjc.utils.PropertyDB.getCompiledRegex; +import static bjc.utils.PropertyDB.getRegex; + +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; +import bjc.utils.parserutils.splitter.TokenSplitter; + +/** + * Utilities useful for operating on PL tokens. + * + * @author EVE + * + */ +public class TokenUtils { + /** + * Simple implementation of TokenSplitter for removing double-quoted + * strings. + * + * @author EVE + * + */ + public static class StringTokenSplitter implements TokenSplitter { + @Override + public IList<String> split(final String input) { + return new FunctionalList<>(TokenUtils.removeDQuotedStrings(input)); + } + } + + /* + * Patterns and pattern parts. + */ + private static String rPossibleEscapeString = getRegex("possibleStringEscape"); + + private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString); + + private static String rShortEscape = getRegex("shortFormStringEscape"); + private static String rOctalEscape = getRegex("octalStringEscape"); + private static String rUnicodeEscape = getRegex("unicodeStringEscape"); + + private static String rEscapeString = applyFormat("stringEscape", rShortEscape, rOctalEscape, rUnicodeEscape); + + private static Pattern escapePatt = Pattern.compile(rEscapeString); + + private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonStringEscape"), + rPossibleEscapeString); + + private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString); + + private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); + + private static Pattern intLitPattern = getCompiledRegex("intLiteral"); + + /** + * Remove double quoted strings from a string. + * + * Splits a string around instances of java-style double-quoted strings. + * + * @param inp + * The string to split. + * + * @return An list containing alternating bits of the string and the + * embedded double-quoted strings that separated them. + */ + public static List<String> removeDQuotedStrings(final String inp) { + if (inp == null) throw new NullPointerException("inp must not be null"); + + /* + * What we need for piece-by-piece string building + */ + StringBuffer work = new StringBuffer(); + final List<String> res = new LinkedList<>(); + + /* + * Matcher for proper strings and single quotes. + */ + final Matcher mt = doubleQuotePatt.matcher(inp); + final Matcher corr = quotePatt.matcher(inp); + + if (corr.find() && !corr.find()) { + /* + * There's a unmatched opening quote with no strings. + */ + final String msg = String.format( + "Unclosed string literal '%s'. Opening quote was at position %d", inp, + inp.indexOf("\"")); + + throw new IllegalArgumentException(msg); + } + + while (mt.find()) { + /* + * Remove the string until the quoted string. + */ + mt.appendReplacement(work, ""); + + /* + * Add the string preceding the double-quoted string and + * the double-quoted string to the list. + */ + res.add(work.toString()); + res.add(mt.group(1)); + + /* + * Renew the buffer. + */ + work = new StringBuffer(); + } + + /* + * Grab the remainder of the string. + */ + mt.appendTail(work); + final String tail = work.toString(); + + if (tail.contains("\"")) { + /* + * There's a unmatched opening quote with at least one + * string. + */ + final String msg = String.format( + "Unclosed string literal '%s'. Opening quote was at position %d", inp, + inp.lastIndexOf("\"")); + + throw new IllegalArgumentException(msg); + } + + /* + * Only add an empty tail if the string was empty. + */ + if (!tail.equals("") || res.isEmpty()) { + res.add(tail); + } + + return res; + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public static String descapeString(final String inp) { + if (inp == null) throw new NullPointerException("inp must not be null"); + + /* + * Prepare the buffer and escape finder. + */ + final StringBuffer work = new StringBuffer(); + final Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); + final Matcher escapeFinder = escapePatt.matcher(inp); + + while (possibleEscapeFinder.find()) { + if (!escapeFinder.find()) { + /* + * Found a possible escape that isn't actually an + * escape. + */ + final String msg = String.format("Illegal escape sequence '%s' at position %d", + possibleEscapeFinder.group(), possibleEscapeFinder.start()); + + throw new IllegalArgumentException(msg); + } + + final String escapeSeq = escapeFinder.group(); + + /* + * Convert the escape to a string. + */ + String escapeRep = ""; + switch (escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + /* + * Skip past the second slash. + */ + possibleEscapeFinder.find(); + escapeRep = "\\"; + break; + default: + if (escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else { + escapeRep = handleOctalEscape(escapeSeq); + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + /* + * Handle a unicode codepoint. + */ + private static String handleUnicodeEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + /* + * Handle a octal codepoint. + */ + private static String handleOctalEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 8); + + if (codepoint > 255) { + final String msg = String + .format("'%d' is outside the range of octal escapes', codepoint"); + + throw new IllegalArgumentException(msg); + } + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid octal escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + /** + * Check if a given string would be successfully converted to a double + * by {@link Double#parseDouble(String)}. + * + * @param inp + * The string to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isDouble(final String inp) { + return DoubleMatcher.doubleLiteral.matcher(inp).matches(); + } + + /** + * Check if a given string would be successfully converted to a integer + * by {@link Integer#parseInt(String)}. + * + * NOTE: This only checks syntax. Using values out of the range of + * integers will still cause errors. + * + * @param inp + * The input to check. + * @return Whether the string is a valid integer or not. + */ + public static boolean isInt(final String inp) { + try { + Integer.parseInt(inp); + return true; + } catch (NumberFormatException nfex) { + return false; + } + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/TreeConstructor.java b/base/src/main/java/bjc/utils/parserutils/TreeConstructor.java new file mode 100644 index 0000000..90141ef --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/TreeConstructor.java @@ -0,0 +1,125 @@ +package bjc.utils.parserutils; + +import java.util.Deque; +import java.util.LinkedList; +import java.util.function.Function; +import java.util.function.Predicate; + +import bjc.utils.data.IHolder; +import bjc.utils.data.IPair; +import bjc.utils.data.ITree; +import bjc.utils.data.Identity; +import bjc.utils.data.Pair; +import bjc.utils.funcdata.IList; + +/** + * Creates a parse tree from a postfix expression + * + * @author ben + * + */ +public class TreeConstructor { + /** + * Alias interface for special operator types. + * + * @param <TokenType> + * The token type of the tree. + */ + public interface QueueFlattener<TokenType> extends Function<Deque<ITree<TokenType>>, ITree<TokenType>> { + + } + + /* + * Alias for constructor state. + */ + static final class ConstructorState<TokenType> extends Pair<Deque<ITree<TokenType>>, ITree<TokenType>> { + public ConstructorState(final Deque<ITree<TokenType>> left, final ITree<TokenType> right) { + super(left, right); + } + + public ConstructorState(final IPair<Deque<ITree<TokenType>>, ITree<TokenType>> par) { + super(par.getLeft(), par.getRight()); + } + } + + /** + * Construct a tree from a list of tokens in postfix notation + * + * Only binary operators are accepted. + * + * @param <TokenType> + * The elements of the parse tree + * @param tokens + * The list of tokens to build a tree from + * @param isOperator + * The predicate to use to determine if something is a + * operator + * @return A AST from the expression + */ + public static <TokenType> ITree<TokenType> constructTree(final IList<TokenType> tokens, + final Predicate<TokenType> isOperator) { + /* + * Construct a tree with no special operators + */ + return constructTree(tokens, isOperator, op -> false, null); + } + + /** + * Construct a tree from a list of tokens in postfix notation. + * + * Only binary operators are accepted by default. Use the last two + * parameters to handle non-binary operators. + * + * @param <TokenType> + * The elements of the parse tree. + * + * @param tokens + * The list of tokens to build a tree from. + * + * @param isOperator + * The predicate to use to determine if something is a + * operator. + * + * @param isSpecialOperator + * The predicate to use to determine if an operator needs + * special handling. + * + * @param handleSpecialOperator + * The function to use to handle special case operators. + * + * @return A AST from the expression + * + */ + public static <TokenType> ITree<TokenType> constructTree(final IList<TokenType> tokens, + final Predicate<TokenType> isOperator, final Predicate<TokenType> isSpecialOperator, + final Function<TokenType, QueueFlattener<TokenType>> handleSpecialOperator) { + /* + * Make sure our parameters are valid + */ + if (tokens == null) + throw new NullPointerException("Tokens must not be null"); + else if (isOperator == null) + throw new NullPointerException("Operator predicate must not be null"); + else if (isSpecialOperator == null) + throw new NullPointerException("Special operator determiner must not be null"); + + /* + * Here is the state for the tree construction + */ + final IHolder<ConstructorState<TokenType>> initialState = new Identity<>( + new ConstructorState<>(new LinkedList<>(), null)); + + /* + * Transform each of the tokens + */ + tokens.forEach(new TokenTransformer<>(initialState, isOperator, isSpecialOperator, + handleSpecialOperator)); + + /* + * Grab the tree from the state + */ + return initialState.unwrap(pair -> { + return pair.getRight(); + }); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/defines/IteratedDefine.java b/base/src/main/java/bjc/utils/parserutils/defines/IteratedDefine.java new file mode 100644 index 0000000..552b471 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/defines/IteratedDefine.java @@ -0,0 +1,48 @@ +package bjc.utils.parserutils.defines; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.function.UnaryOperator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import bjc.utils.data.CircularIterator; + +public class IteratedDefine implements UnaryOperator<String> { + private Pattern patt; + + private Iterator<String> repls; + + /** + * Create a new iterated define. + * + * @param pattern + * The pattern to use for matching. + * @param circular + * Whether or not to loop through the list of replacers, or just + * repeat the last one. + * @param replacers + * The set of replacers to use. + */ + public IteratedDefine(Pattern pattern, boolean circular, String... replacers) { + patt = pattern; + + repls = new CircularIterator<>(Arrays.asList(replacers), circular); + } + + @Override + public String apply(String ln) { + Matcher mat = patt.matcher(ln); + StringBuffer sb = new StringBuffer(); + + while(mat.find()) { + String repl = repls.next(); + + mat.appendReplacement(sb, repl); + } + + mat.appendTail(sb); + + return sb.toString(); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/defines/SimpleDefine.java b/base/src/main/java/bjc/utils/parserutils/defines/SimpleDefine.java new file mode 100644 index 0000000..42866c2 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/defines/SimpleDefine.java @@ -0,0 +1,23 @@ +package bjc.utils.parserutils.defines; + +import java.util.function.UnaryOperator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SimpleDefine implements UnaryOperator<String> { + private Pattern patt; + private String repl; + + public SimpleDefine(Pattern pattern, String replace) { + patt = pattern; + + repl = replace; + } + + @Override + public String apply(String line) { + Matcher mat = patt.matcher(line); + + return mat.replaceAll(repl); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java b/base/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java new file mode 100644 index 0000000..071afb4 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/DelimiterException.java @@ -0,0 +1,21 @@ +package bjc.utils.parserutils.delims; + +/** + * The superclass for exceptions thrown during sequence delimitation. + */ +public class DelimiterException extends RuntimeException { + /** + * + */ + private static final long serialVersionUID = 2079514406049040888L; + + /** + * Create a new generic delimiter exception. + * + * @param res + * The reason for this exception. + */ + public DelimiterException(final String res) { + super(res); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java b/base/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java new file mode 100644 index 0000000..b1d8597 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/DelimiterGroup.java @@ -0,0 +1,593 @@ +package bjc.utils.parserutils.delims; + +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.BiPredicate; +import java.util.function.Function; + +import bjc.utils.data.IPair; +import bjc.utils.data.ITree; +import bjc.utils.data.Pair; +import bjc.utils.data.Tree; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * Represents a possible delimiter group to match. + * + * @author EVE + * + * @param <T> + * The type of items in the sequence. + */ +public class DelimiterGroup<T> { + /** + * Represents an instance of a delimiter group. + * + * @author EVE + * + */ + public class OpenGroup { + /* + * The contents of this group. + */ + private final Deque<ITree<T>> contents; + + /* + * The contents of the current subgroup. + */ + private IList<ITree<T>> currentGroup; + + /* + * The token that opened the group, and any opening parameters. + */ + private final T opener; + private final T[] params; + + /** + * Create a new instance of a delimiter group. + * + * @param open + * The item that opened this group. + * + * @param parms + * Any parameters from the opener. + */ + public OpenGroup(final T open, final T[] parms) { + opener = open; + params = parms; + + contents = new LinkedList<>(); + + currentGroup = new FunctionalList<>(); + } + + /** + * Add an item to this group instance. + * + * @param itm + * The item to add to this group instance. + */ + public void addItem(final ITree<T> itm) { + currentGroup.add(itm); + } + + /** + * Mark a subgroup. + * + * @param marker + * The item that indicated this subgroup. + * + * @param chars + * The characteristics for building the tree. + */ + public void markSubgroup(final T marker, final SequenceCharacteristics<T> chars) { + /* + * Add all of the contents to the subgroup. + */ + final ITree<T> subgroupContents = new Tree<>(chars.contents); + for (final ITree<T> itm : currentGroup) { + subgroupContents.addChild(itm); + } + + /* + * Handle subordinate sub-groups. + */ + while (!contents.isEmpty()) { + final ITree<T> possibleSubordinate = contents.peek(); + + /* + * Subordinate lower priority subgroups. + */ + if (possibleSubordinate.getHead().equals(chars.subgroup)) { + final T otherMarker = possibleSubordinate.getChild(1).getHead(); + + if (subgroups.get(marker) > subgroups.get(otherMarker)) { + subgroupContents.prependChild(contents.pop()); + } else { + break; + } + } else { + subgroupContents.prependChild(contents.pop()); + } + } + + final Tree<T> subgroup = new Tree<>(chars.subgroup, subgroupContents, new Tree<>(marker)); + + contents.push(subgroup); + + currentGroup = new FunctionalList<>(); + } + + /** + * Convert this group into a tree. + * + * @param closer + * The item that closed this group. + * + * @param chars + * The characteristics for building the tree. + * + * @return This group as a tree. + */ + public ITree<T> toTree(final T closer, final SequenceCharacteristics<T> chars) { + /* + * Mark any implied subgroups. + */ + if (impliedSubgroups.containsKey(closer)) { + markSubgroup(impliedSubgroups.get(closer), chars); + } + + final ITree<T> res = new Tree<>(chars.contents); + + /* + * Add either the contents of the current group, + * or subgroups if they're their. + */ + if (contents.isEmpty()) { + currentGroup.forEach(res::addChild); + } else { + while (!contents.isEmpty()) { + res.prependChild(contents.poll()); + } + + currentGroup.forEach(res::addChild); + } + + return new Tree<>(groupName, new Tree<>(opener), res, new Tree<>(closer)); + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + + builder.append("OpenGroup [contents="); + builder.append(contents); + builder.append(", currentGroup="); + builder.append(currentGroup); + builder.append(", opener="); + builder.append(opener); + builder.append("]"); + + return builder.toString(); + } + + /** + * Check if a group is excluded at the top level of this group. + * + * @param groupName + * The group to check. + * + * @return Whether or not the provided group is excluded. + */ + public boolean excludes(final T groupName) { + return topLevelExclusions.contains(groupName); + } + + /** + * Check if the provided delimiter would close this group. + * + * @param del + * The string to check as a closing delimiter. + * + * @return Whether or not the provided delimiter closes this + * group. + */ + public boolean isClosing(final T del) { + if (closingDelimiters.contains(del)) return true; + + for (final BiPredicate<T, T[]> pred : predClosers) { + if (pred.test(del, params)) return true; + } + + return closingDelimiters.contains(del); + } + + /** + * Get the name of the group this is an instance of. + * + * @return The name of the group this is an instance of. + */ + public T getName() { + return groupName; + } + + /** + * Get the groups that aren't allowed at all in this group. + * + * @return The groups that aren't allowed at all in this group. + */ + public Set<T> getNestingExclusions() { + return groupExclusions; + } + + /** + * Get the groups that are allowed to open anywhere inside this + * group. + * + * @return The groups allowed to open anywhere inside this + * group. + */ + public Map<T, T> getNestingOpeners() { + return nestedOpenDelimiters; + } + + /** + * Checks if a given token marks a subgroup. + * + * @param tok + * The token to check. + * + * @return Whether or not the token marks a subgroup. + */ + public boolean marksSubgroup(final T tok) { + return subgroups.containsKey(tok); + } + + /** + * Checks if a given token opens a group. + * + * @param marker + * The token to check. + * + * @return The name of the group T opens, or null if it doesn't + * open one. + */ + public IPair<T, T[]> doesOpen(final T marker) { + if (openDelimiters.containsKey(marker)) return new Pair<>(openDelimiters.get(marker), null); + + for (final Function<T, IPair<T, T[]>> pred : predOpeners) { + final IPair<T, T[]> par = pred.apply(marker); + + if (par.getLeft() != null) return par; + } + + return new Pair<>(null, null); + } + + /** + * Check if this group starts a new nesting scope. + * + * @return Whether this group starts a new nesting scope. + */ + public boolean isForgetful() { + return forgetful; + } + } + + /** + * The name of this delimiter group. + */ + public final T groupName; + + /* + * The delimiters that open groups at the top level of this group. + */ + private final Map<T, T> openDelimiters; + + /* + * The delimiters that open groups inside of this group. + */ + private final Map<T, T> nestedOpenDelimiters; + + /* + * The delimiters that close this group. + */ + private final Set<T> closingDelimiters; + + /* + * The groups that can't occur in the top level of this group. + */ + private final Set<T> topLevelExclusions; + + /* + * The groups that can't occur anywhere inside this group. + */ + private final Set<T> groupExclusions; + + /* + * Mapping from sub-group delimiters, to any sub-groups enclosed in + * them. + */ + private final Map<T, Integer> subgroups; + + /* + * Subgroups implied by a particular closing delimiter + */ + private final Map<T, T> impliedSubgroups; + + /* + * Allows more complex openings + */ + private final List<Function<T, IPair<T, T[]>>> predOpeners; + + /* + * Allow more complex closings + */ + private final List<BiPredicate<T, T[]>> predClosers; + + /* + * Whether or not this group starts a new nesting set. + */ + private boolean forgetful; + + /** + * Create a new empty delimiter group. + * + * @param name + * The name of the delimiter group + */ + public DelimiterGroup(final T name) { + if (name == null) throw new NullPointerException("Group name must not be null"); + + groupName = name; + + openDelimiters = new HashMap<>(); + nestedOpenDelimiters = new HashMap<>(); + + closingDelimiters = new HashSet<>(); + + topLevelExclusions = new HashSet<>(); + groupExclusions = new HashSet<>(); + + subgroups = new HashMap<>(); + impliedSubgroups = new HashMap<>(); + + predOpeners = new LinkedList<>(); + predClosers = new LinkedList<>(); + } + + /** + * Adds one or more delimiters that close this group. + * + * @param closers + * Delimiters that close this group. + */ + @SafeVarargs + public final void addClosing(final T... closers) { + final List<T> closerList = Arrays.asList(closers); + + for (final T closer : closerList) { + if (closer == null) + throw new NullPointerException("Closing delimiter must not be null"); + else if (closer.equals("")) + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + else { + closingDelimiters.add(closer); + } + } + } + + /** + * Adds one or more groups that cannot occur in the top level of this + * group. + * + * @param exclusions + * The groups forbidden in the top level of this group. + */ + @SafeVarargs + public final void addTopLevelForbid(final T... exclusions) { + for (final T exclusion : exclusions) { + if (exclusion == null) + throw new NullPointerException("Exclusion must not be null"); + else if (exclusion.equals("")) + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + else { + topLevelExclusions.add(exclusion); + } + } + } + + /** + * Adds one or more groups that cannot occur at all in this group. + * + * @param exclusions + * The groups forbidden inside this group. + */ + @SafeVarargs + public final void addGroupForbid(final T... exclusions) { + for (final T exclusion : exclusions) { + if (exclusion == null) + throw new NullPointerException("Exclusion must not be null"); + else if (exclusion.equals("")) + /* + * We can do this because equals works on + * arbitrary objects, not just those of the same + * type. + */ + throw new IllegalArgumentException("Empty string is not a valid exclusion"); + else { + groupExclusions.add(exclusion); + } + } + } + + /** + * Adds sub-group markers to this group. + * + * @param subgroup + * The token to mark a sub-group. + * + * @param priority + * The priority of this sub-group. + */ + public void addSubgroup(final T subgroup, final int priority) { + if (subgroup == null) throw new NullPointerException("Subgroup marker must not be null"); + + subgroups.put(subgroup, priority); + } + + /** + * Adds a marker that opens a group at the top level of this group. + * + * @param opener + * The marker that opens the group. + * + * @param group + * The group opened by the marker. + */ + public void addOpener(final T opener, final T group) { + if (opener == null) throw new NullPointerException("Opener must not be null"); + else if (group == null) throw new NullPointerException("Group to open must not be null"); + + openDelimiters.put(opener, group); + } + + /** + * Adds a marker that opens a group inside of this group. + * + * @param opener + * The marker that opens the group. + * + * @param group + * The group opened by the marker. + */ + public void addNestedOpener(final T opener, final T group) { + if (opener == null) throw new NullPointerException("Opener must not be null"); + else if (group == null) throw new NullPointerException("Group to open must not be null"); + + nestedOpenDelimiters.put(opener, group); + } + + /** + * Mark a closing delimiter as implying a subgroup. + * + * @param closer + * The closing delimiter. + * + * @param subgroup + * The subgroup to imply. + */ + public void implySubgroup(final T closer, final T subgroup) { + if (closer == null) throw new NullPointerException("Closer must not be null"); + else if (subgroup == null) throw new NullPointerException("Subgroup must not be null"); + else if (!closingDelimiters.contains(closer)) throw new IllegalArgumentException(String.format("No closing delimiter '%s' defined", closer)); + else if (!subgroups.containsKey(subgroup)) throw new IllegalArgumentException(String.format("No subgroup '%s' defined", subgroup)); + + impliedSubgroups.put(closer, subgroup); + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + + builder.append("("); + + builder.append("groupName=["); + builder.append(groupName); + builder.append("], "); + + builder.append("closingDelimiters=["); + for (final T closer : closingDelimiters) { + builder.append(closer + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + + if (topLevelExclusions != null && !topLevelExclusions.isEmpty()) { + builder.append(", "); + builder.append("topLevelExclusions=["); + for (final T exclusion : topLevelExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + if (groupExclusions != null && !groupExclusions.isEmpty()) { + builder.append(", "); + builder.append("groupExclusions=["); + for (final T exclusion : groupExclusions) { + builder.append(exclusion + ","); + } + builder.deleteCharAt(builder.length() - 1); + builder.append("]"); + } + + builder.append(" )"); + + return builder.toString(); + } + + /** + * Open an instance of this group. + * + * @param opener + * The item that opened this group. + * + * @param parms + * The parameters that opened this group + * + * @return An opened instance of this group. + */ + public OpenGroup open(final T opener, final T[] parms) { + return new OpenGroup(opener, parms); + } + + /** + * Adds a predicated opener to the top level of this group. + * + * @param pred + * The predicate that defines the opener and its + * parameters. + */ + public void addPredOpener(final Function<T, IPair<T, T[]>> pred) { + predOpeners.add(pred); + } + + /** + * Adds a predicated closer to the top level of this group. + * + * @param pred + * The predicate that defines the closer. + */ + public void addPredCloser(final BiPredicate<T, T[]> pred) { + predClosers.add(pred); + } + + /** + * Set whether or not this group starts a new nesting set. + * + * @param forgetful + * Whether this group starts a new nesting set. + */ + public void setForgetful(final boolean forgetful) { + this.forgetful = forgetful; + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/delims/RegexCloser.java b/base/src/main/java/bjc/utils/parserutils/delims/RegexCloser.java new file mode 100644 index 0000000..4b29949 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/RegexCloser.java @@ -0,0 +1,33 @@ +package bjc.utils.parserutils.delims; + +import java.util.function.BiPredicate; + +/** + * A predicated closer for use with {@link RegexOpener}. + * + * @author bjculkin + * + */ +public class RegexCloser implements BiPredicate<String, String[]> { + private final String rep; + + /** + * Create a new regex closer. + * + * @param closer + * The format string to use for closing. + */ + public RegexCloser(final String closer) { + rep = closer; + } + + @Override + public boolean test(final String closer, final String[] params) { + /* + * Confirm passing an array instead of a single var-arg. + */ + final String work = String.format(rep, (Object[]) params); + + return work.equals(closer); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/delims/RegexOpener.java b/base/src/main/java/bjc/utils/parserutils/delims/RegexOpener.java new file mode 100644 index 0000000..ee93b73 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/RegexOpener.java @@ -0,0 +1,54 @@ +package bjc.utils.parserutils.delims; + +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; + +/** + * A predicated opener for use with {@link RegexCloser} + * + * @author bjculkin + * + */ +public class RegexOpener implements Function<String, IPair<String, String[]>> { + private final String name; + + private final Pattern patt; + + /** + * Create a new regex opener. + * + * @param groupName + * The name of the opened group. + * + * @param groupRegex + * The regex that matches the opener. + */ + public RegexOpener(final String groupName, final String groupRegex) { + name = groupName; + + patt = Pattern.compile(groupRegex); + } + + @Override + public IPair<String, String[]> apply(final String str) { + final Matcher m = patt.matcher(str); + + if (m.matches()) { + final int numGroups = m.groupCount(); + + final String[] parms = new String[numGroups + 1]; + + for (int i = 0; i <= numGroups; i++) { + parms[i] = m.group(i); + } + + return new Pair<>(name, parms); + } + + return new Pair<>(null, null); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java b/base/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java new file mode 100644 index 0000000..882b4c5 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/SequenceCharacteristics.java @@ -0,0 +1,93 @@ +package bjc.utils.parserutils.delims; + +/** + * Marks the parameters for building a sequence tree. + * + * @author EVE + * + * @param <T> + * The type of item in the tree. + */ +public class SequenceCharacteristics<T> { + /** + * The item to mark the root of the tree. + */ + public final T root; + + /** + * The item to mark the contents of a group/subgroup. + */ + + public final T contents; + + /** + * The item to mark a subgroup. + */ + public final T subgroup; + + /** + * Create a new set of parameters for building a tree. + * + * @param root + * The root marker. + * @param contents + * The group/subgroup contents marker. + * @param subgroup + * The subgroup marker. + */ + public SequenceCharacteristics(final T root, final T contents, final T subgroup) { + this.root = root; + this.contents = contents; + this.subgroup = subgroup; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + + result = prime * result + (contents == null ? 0 : contents.hashCode()); + result = prime * result + (root == null ? 0 : root.hashCode()); + result = prime * result + (subgroup == null ? 0 : subgroup.hashCode()); + + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (!(obj instanceof SequenceCharacteristics)) return false; + + final SequenceCharacteristics<?> other = (SequenceCharacteristics<?>) obj; + + if (contents == null) { + if (other.contents != null) return false; + } else if (!contents.equals(other.contents)) return false; + + if (root == null) { + if (other.root != null) return false; + } else if (!root.equals(other.root)) return false; + + if (subgroup == null) { + if (other.subgroup != null) return false; + } else if (!subgroup.equals(other.subgroup)) return false; + + return true; + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + + builder.append("SequenceCharacteristics [root="); + builder.append(root == null ? "(null)" : root); + builder.append(", contents="); + builder.append(contents == null ? "(null)" : contents); + builder.append(", subgroup="); + builder.append(subgroup == null ? "(null)" : subgroup); + builder.append("]"); + + return builder.toString(); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java b/base/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java new file mode 100644 index 0000000..ccfaffb --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/SequenceDelimiter.java @@ -0,0 +1,371 @@ +package bjc.utils.parserutils.delims; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multiset; + +import bjc.utils.data.IPair; +import bjc.utils.data.ITree; +import bjc.utils.data.Tree; +import bjc.utils.esodata.PushdownMap; +import bjc.utils.esodata.SimpleStack; +import bjc.utils.esodata.Stack; +import bjc.utils.funcdata.IMap; +import bjc.utils.funcutils.StringUtils; + +/** + * Convert linear sequences into trees that represent group structure. + * + * @author EVE + * + * @param <T> + * The type of items in the sequence. + */ +public class SequenceDelimiter<T> { + /* + * Mapping from group names to actual groups. + */ + private final Map<T, DelimiterGroup<T>> groups; + + /* + * The initial group to start with. + */ + private DelimiterGroup<T> initialGroup; + + /** + * Create a new sequence delimiter. + */ + public SequenceDelimiter() { + groups = new HashMap<>(); + } + + /** + * Convert a linear sequence into a tree that matches the delimiter + * structure. + * + * Essentially, creates a parse tree of the expression against the + * following grammar while obeying the defined grouping rules. + * + * <pre> + * <tree> → (<data> | <subgroup> | <group>)* + * <subgroup> → <tree> <marker> + * <group> → <open> <tree> <close> + * + * <data> → STRING + * <open> → STRING + * <close> → STRING + * <marker> → STRING + * </pre> + * + * @param chars + * The parameters on how to mark certain portions of the + * tree. + * @param seq + * The sequence to delimit. + * + * @return The sequence as a tree that matches its group structure. Each + * node in the tree is either a data node, a subgroup node, or a + * group node. + * + * A data node is a leaf node whose data is the string it + * represents. + * + * A subgroup node is a node with two children, and the name of + * the sub-group as its label. The first child is the contents + * of the sub-group, and the second is the marker that started + * the subgroup. The marker is a leaf node labeled with its + * contents, and the contents contains a recursive tree. + * + * A group node is a node with three children, and the name of + * the group as its label. The first child is the opening + * delimiter, the second is the group contents, and the third is + * the closing delimiter. The delimiters are leaf nodes labeled + * with their contents, while the group node contains a + * recursive tree. + * + * @throws DelimiterException + * Thrown if something went wrong during sequence + * delimitation. + * + */ + public ITree<T> delimitSequence(final SequenceCharacteristics<T> chars, + @SuppressWarnings("unchecked") final T... seq) throws DelimiterException { + if (initialGroup == null) throw new NullPointerException("Initial group must be specified."); + else if (chars == null) throw new NullPointerException("Sequence characteristics must not be null"); + + /* + * The stack of opened and not yet closed groups. + */ + final Stack<DelimiterGroup<T>.OpenGroup> groupStack = new SimpleStack<>(); + + /* + * Open initial group. + */ + groupStack.push(initialGroup.open(chars.root, null)); + + /* + * Groups that aren't allowed to be opened at the moment. + */ + final Stack<Multiset<T>> forbiddenDelimiters = new SimpleStack<>(); + forbiddenDelimiters.push(HashMultiset.create()); + + /* + * Groups that are allowed to be opened at the moment. + */ + final Stack<Multimap<T, T>> allowedDelimiters = new SimpleStack<>(); + allowedDelimiters.push(HashMultimap.create()); + + /* + * Map of who forbid what for debugging purposes. + */ + final IMap<T, T> whoForbid = new PushdownMap<>(); + + /* + * Process each member of the sequence. + */ + for (int i = 0; i < seq.length; i++) { + final T tok = seq[i]; + + /* + * Check if this token could open a group. + */ + final IPair<T, T[]> possibleOpenPar = groupStack.top().doesOpen(tok); + T possibleOpen = possibleOpenPar.getLeft(); + + if (possibleOpen == null) { + /* + * Handle nested openers. + * + * Local openers take priority over nested ones + * if they overlap. + */ + if (allowedDelimiters.top().containsKey(tok)) { + possibleOpen = allowedDelimiters.top().get(tok).iterator().next(); + } + } + + /* + * If we have an opening delimiter, handle it. + */ + if (possibleOpen != null) { + final DelimiterGroup<T> group = groups.get(possibleOpen); + + /* + * Error on groups that can't open in this + * context. + * + * This means groups that can't occur at the + * top-level of this group, as well as nested + * exclusions from all enclosing groups. + */ + if (isForbidden(groupStack, forbiddenDelimiters, possibleOpen)) { + T forbiddenBy; + + if (whoForbid.containsKey(tok)) { + forbiddenBy = whoForbid.get(tok); + } else { + forbiddenBy = groupStack.top().getName(); + } + + final String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + final String fmt = "Group '%s' can't be opened in this context. (forbidden by '%s')\nContext Stack: %s"; + + throw new DelimiterException(String.format(fmt, group, forbiddenBy, ctxList)); + } + + /* + * Add an open group. + */ + final DelimiterGroup<T>.OpenGroup open = group.open(tok, possibleOpenPar.getRight()); + groupStack.push(open); + + /* + * Handle 'forgetful' groups that reset nesting + */ + if (open.isForgetful()) { + allowedDelimiters.push(HashMultimap.create()); + forbiddenDelimiters.push(HashMultiset.create()); + } + + /* + * Add the nested opens from this group. + */ + final Multimap<T, T> currentAllowed = allowedDelimiters.top(); + for (final Entry<T, T> opener : open.getNestingOpeners().entrySet()) { + currentAllowed.put(opener.getKey(), opener.getValue()); + } + + /* + * Add the nested exclusions from this group + */ + final Multiset<T> currentForbidden = forbiddenDelimiters.top(); + for (final T exclusion : open.getNestingExclusions()) { + currentForbidden.add(exclusion); + + whoForbid.put(exclusion, possibleOpen); + } + } else if (!groupStack.empty() && groupStack.top().isClosing(tok)) { + /* + * Close the group. + */ + final DelimiterGroup<T>.OpenGroup closed = groupStack.pop(); + + groupStack.top().addItem(closed.toTree(tok, chars)); + + /* + * Remove nested exclusions from this group. + */ + final Multiset<T> currentForbidden = forbiddenDelimiters.top(); + for (final T excludedGroup : closed.getNestingExclusions()) { + currentForbidden.remove(excludedGroup); + + whoForbid.remove(excludedGroup); + } + + /* + * Remove the nested opens from this group. + */ + final Multimap<T, T> currentAllowed = allowedDelimiters.top(); + for (final Entry<T, T> closer : closed.getNestingOpeners().entrySet()) { + currentAllowed.remove(closer.getKey(), closer.getValue()); + } + + /* + * Handle 'forgetful' groups that reset nesting. + */ + if (closed.isForgetful()) { + allowedDelimiters.drop(); + forbiddenDelimiters.drop(); + } + } else if (!groupStack.empty() && groupStack.top().marksSubgroup(tok)) { + /* + * Mark a subgroup. + */ + groupStack.top().markSubgroup(tok, chars); + } else { + /* + * Add an item to the group. + */ + groupStack.top().addItem(new Tree<>(tok)); + } + } + + /* + * Error if not all groups were closed. + */ + if (groupStack.size() > 1) { + final DelimiterGroup<T>.OpenGroup group = groupStack.top(); + + final StringBuilder msgBuilder = new StringBuilder(); + + final String closingDelims = StringUtils.toEnglishList(group.getNestingExclusions().toArray(), + false); + + final String ctxList = StringUtils.toEnglishList(groupStack.toArray(), "then"); + + msgBuilder.append("Unclosed group '"); + msgBuilder.append(group.getName()); + msgBuilder.append("'. Expected one of "); + msgBuilder.append(closingDelims); + msgBuilder.append(" to close it\nOpen groups: "); + msgBuilder.append(ctxList); + + final String fmt = "Unclosed group '%s'. Expected one of %s to close it.\nOpen groups: %n"; + + throw new DelimiterException(String.format(fmt, group.getName(), closingDelims, ctxList)); + } + + return groupStack.pop().toTree(chars.root, chars); + } + + /* + * Check if a group is forbidden to open in a context. + */ + private boolean isForbidden(final Stack<DelimiterGroup<T>.OpenGroup> groupStack, + final Stack<Multiset<T>> forbiddenDelimiters, final T groupName) { + boolean localForbid; + + /* + * Check if a delimiter is locally forbidden. + */ + if (groupStack.empty()) { + localForbid = false; + } else { + localForbid = groupStack.top().excludes(groupName); + } + + return localForbid || forbiddenDelimiters.top().contains(groupName); + } + + /** + * Add a delimiter group. + * + * @param group + * The delimiter group. + */ + public void addGroup(final DelimiterGroup<T> group) { + if (group == null) throw new NullPointerException("Group must not be null"); + + groups.put(group.groupName, group); + } + + /** + * Creates and adds a delimiter group using the provided settings. + * + * @param openers + * The tokens that open this group + * @param groupName + * The name of the group + * @param closers + * The tokens that close this group + */ + public void addGroup(final T[] openers, final T groupName, @SuppressWarnings("unchecked") final T... closers) { + final DelimiterGroup<T> group = new DelimiterGroup<>(groupName); + + group.addClosing(closers); + + addGroup(group); + + for (final T open : openers) { + group.addOpener(open, groupName); + } + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + + builder.append("SequenceDelimiter ["); + + if (groups != null) { + builder.append("groups="); + builder.append(groups); + builder.append(","); + } + + if (initialGroup != null) { + builder.append("initialGroup="); + builder.append(initialGroup); + } + + builder.append("]"); + + return builder.toString(); + } + + /** + * Set the initial group of this delimiter. + * + * @param initialGroup + * The initial group of this delimiter. + */ + public void setInitialGroup(final DelimiterGroup<T> initialGroup) { + this.initialGroup = initialGroup; + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java b/base/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java new file mode 100644 index 0000000..e3eeea5 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/delims/StringDelimiter.java @@ -0,0 +1,31 @@ +package bjc.utils.parserutils.delims; + +import bjc.utils.data.ITree; + +/** + * A sequence delimiter specialized for strings. + * + * @author EVE + * + */ +public class StringDelimiter extends SequenceDelimiter<String> { + + /** + * Override of + * {@link SequenceDelimiter#delimitSequence(SequenceCharacteristics, Object...)} + * for ease of use for strings. + * + * @param seq + * The sequence to delimit. + * + * @return The sequence as a tree. + * + * @throws DelimiterException + * if something went wrong with delimiting the sequence. + * + * @see SequenceDelimiter + */ + public ITree<String> delimitSequence(final String... seq) throws DelimiterException { + return super.delimitSequence(new SequenceCharacteristics<>("root", "contents", "subgroup"), seq); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java new file mode 100644 index 0000000..4736310 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ChainTokenSplitter.java @@ -0,0 +1,50 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that chains several other splitters together. + * + * @author EVE + * + */ +public class ChainTokenSplitter implements TokenSplitter { + private final IList<TokenSplitter> spliters; + + /** + * Create a new chain token splitter. + */ + public ChainTokenSplitter() { + spliters = new FunctionalList<>(); + } + + /** + * Append a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void appendSplitters(final TokenSplitter... splitters) { + spliters.addAll(splitters); + } + + /** + * Prepend a series of splitters to the chain. + * + * @param splitters + * The splitters to append to the chain. + */ + public void prependSplitters(final TokenSplitter... splitters) { + spliters.prependAll(splitters); + } + + @Override + public IList<String> split(final String input) { + final IList<String> initList = new FunctionalList<>(input); + + return spliters.reduceAux(initList, (splitter, strangs) -> { + return strangs.flatMap(splitter::split); + }); + } +}
\ No newline at end of file diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java new file mode 100644 index 0000000..48ddcb4 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ConfigurableTokenSplitter.java @@ -0,0 +1,122 @@ +package bjc.utils.parserutils.splitter; + +import static bjc.utils.PropertyDB.applyFormat; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into pieces around a regular expression, and offer an easy way + * to configure the regular expression. + * + * @author EVE + * + */ +public class ConfigurableTokenSplitter extends SimpleTokenSplitter { + private final Set<String> simpleDelimiters; + private final Set<String> multipleDelimiters; + private final Set<String> rRawDelimiters; + + /** + * Create a new token splitter with blank configuration. + * + * @param keepDelims + * Whether or not to keep delimiters. + */ + public ConfigurableTokenSplitter(final boolean keepDelims) { + super(null, keepDelims); + + /* + * Use linked hash-sets to keep items in insertion order. + */ + simpleDelimiters = new LinkedHashSet<>(); + multipleDelimiters = new LinkedHashSet<>(); + rRawDelimiters = new LinkedHashSet<>(); + } + + /** + * Add a set of simple delimiters to this splitter. + * + * Simple delimiters match one occurrence of themselves as literals. + * + * @param simpleDelims + * The simple delimiters to add. + */ + public void addSimpleDelimiters(final String... simpleDelims) { + for (final String simpleDelim : simpleDelims) { + simpleDelimiters.add(simpleDelim); + } + } + + /** + * Add a set of multiple delimiters to this splitter. + * + * Multiple delimiters match one or more occurrences of themselves as + * literals. + * + * @param multiDelims + * The multiple delimiters to add. + */ + public void addMultiDelimiters(final String... multiDelims) { + for (final String multiDelim : multiDelims) { + multipleDelimiters.add(multiDelim); + } + } + + /** + * Add a set of raw delimiters to this splitter. + * + * Raw delimiters match one occurrence of themselves as regular + * expressions. + * + * @param rRawDelims + * The raw delimiters to add. + */ + public void addRawDelimiters(final String... rRawDelims) { + for (final String rRawDelim : rRawDelims) { + rRawDelimiters.add(rRawDelim); + } + } + + /** + * Take the configuration and compile it into a regular expression to + * use when splitting. + */ + public void compile() { + final StringBuilder rPattern = new StringBuilder(); + + for (final String rRawDelimiter : rRawDelimiters) { + rPattern.append(applyFormat("rawDelim", rRawDelimiter)); + } + + for (final String multipleDelimiter : multipleDelimiters) { + rPattern.append(applyFormat("multipleDelim", multipleDelimiter)); + } + + for (final String simpleDelimiter : simpleDelimiters) { + rPattern.append(applyFormat("simpleDelim", simpleDelimiter)); + } + + rPattern.deleteCharAt(rPattern.length() - 1); + + spliter = Pattern.compile(rPattern.toString()); + } + + @Override + public IList<String> split(final String input) { + if (spliter == null) throw new IllegalStateException("Must compile splitter before use"); + + return super.split(input); + } + + @Override + public String toString() { + final String fmt = "ConfigurableTokenSplitter [simpleDelimiters=%s, multipleDelimiters=%s," + + " rRawDelimiters=%s, spliter=%s]"; + + return String.format(fmt, simpleDelimiters, multipleDelimiters, rRawDelimiters, spliter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java new file mode 100644 index 0000000..369e7ae --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/ExcludingTokenSplitter.java @@ -0,0 +1,71 @@ +package bjc.utils.parserutils.splitter; + +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +/** + * A token splitter that will not split certain tokens. + * + * @author EVE + * + */ +public class ExcludingTokenSplitter implements TokenSplitter { + private final Set<String> literalExclusions; + + private final IList<Predicate<String>> predExclusions; + + private final TokenSplitter spliter; + + /** + * Create a new excluding token splitter. + * + * @param splitter + * The splitter to apply to non-excluded strings. + */ + public ExcludingTokenSplitter(final TokenSplitter splitter) { + spliter = splitter; + + literalExclusions = new HashSet<>(); + + predExclusions = new FunctionalList<>(); + } + + /** + * Exclude literal strings from splitting. + * + * @param exclusions + * The strings to exclude from splitting. + */ + public final void addLiteralExclusions(final String... exclusions) { + for (final String exclusion : exclusions) { + literalExclusions.add(exclusion); + } + } + + /** + * Exclude all of the strings matching any of the predicates from + * splitting. + * + * @param exclusions + * The predicates to use for exclusions. + */ + @SafeVarargs + public final void addPredicateExclusion(final Predicate<String>... exclusions) { + for (final Predicate<String> exclusion : exclusions) { + predExclusions.add(exclusion); + } + } + + @Override + public IList<String> split(final String input) { + if (literalExclusions.contains(input)) + return new FunctionalList<>(input); + else if (predExclusions.anyMatch(pred -> pred.test(input))) + return new FunctionalList<>(input); + else return spliter.split(input); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java new file mode 100644 index 0000000..5d954e0 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/FilteredTokenSplitter.java @@ -0,0 +1,37 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.Predicate; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that removes tokens that match a predicate from the stream + * of tokens. + * + * @author bjculkin + * + */ +public class FilteredTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private Predicate<String> filter; + + /** + * Create a new filtered token splitter. + * + * @param source + * The splitter to get tokens from. + * + * @param filter + * The filter to pass tokens through. + */ + public FilteredTokenSplitter(TokenSplitter source, Predicate<String> filter) { + this.source = source; + this.filter = filter; + } + + @Override + public IList<String> split(String input) { + return source.split(input).getMatching(filter); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java new file mode 100644 index 0000000..c357886 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/SimpleTokenSplitter.java @@ -0,0 +1,46 @@ +package bjc.utils.parserutils.splitter; + +import java.util.regex.Pattern; + +import bjc.utils.funcdata.IList; +import bjc.utils.functypes.ID; +import bjc.utils.ioutils.RegexStringEditor; + +/** + * Splits a string into pieces around a regular expression. + * + * @author EVE + * + */ +public class SimpleTokenSplitter implements TokenSplitter { + protected Pattern spliter; + + private final boolean keepDelim; + + /** + * Create a new simple token splitter. + * + * @param splitter + * The pattern to split around. + * + * @param keepDelims + * Whether or not delimiters should be kept. + */ + public SimpleTokenSplitter(final Pattern splitter, final boolean keepDelims) { + spliter = splitter; + + keepDelim = keepDelims; + } + + @Override + public IList<String> split(final String input) { + if (keepDelim) + return RegexStringEditor.mapOccurances(input, spliter, ID.id(), ID.id()); + else return RegexStringEditor.mapOccurances(input, spliter, ID.id(), strang -> ""); + } + + @Override + public String toString() { + return String.format("SimpleTokenSplitter [spliter=%s, keepDelim=%s]", spliter, keepDelim); + } +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java new file mode 100644 index 0000000..ddb28a7 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TokenSplitter.java @@ -0,0 +1,21 @@ +package bjc.utils.parserutils.splitter; + +import bjc.utils.funcdata.IList; + +/** + * Split a string into a list of pieces. + * + * @author EVE + * + */ +public interface TokenSplitter { + /** + * Split a string into a list of pieces. + * + * @param input + * The string to split. + * + * @return The pieces of the string. + */ + public IList<String> split(String input); +} diff --git a/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java new file mode 100644 index 0000000..80490f5 --- /dev/null +++ b/base/src/main/java/bjc/utils/parserutils/splitter/TransformTokenSplitter.java @@ -0,0 +1,38 @@ +package bjc.utils.parserutils.splitter; + +import java.util.function.UnaryOperator; + +import bjc.utils.funcdata.IList; + +/** + * A token splitter that performs a transform on the tokens from another + * splitter. + * + * @author bjculkin + * + */ +public class TransformTokenSplitter implements TokenSplitter { + private TokenSplitter source; + + private UnaryOperator<String> transform; + + /** + * Create a new transforming splitter. + * + * @param source + * The splitter to use as a source. + * + * @param transform + * The transform to apply to tokens. + */ + public TransformTokenSplitter(TokenSplitter source, UnaryOperator<String> transform) { + this.source = source; + this.transform = transform; + } + + @Override + public IList<String> split(String input) { + return source.split(input).map(transform); + } + +} |
