From 33918524d7faab0146a0a92c13eaaef46cdbea8a Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 24 Mar 2017 10:53:59 -0400 Subject: Update Pratt parser. --- .../utils/examples/parsing/PrattParserTest.java | 150 ++++++++++++++------- 1 file changed, 105 insertions(+), 45 deletions(-) (limited to 'BJC-Utils2/src/examples/java/bjc') diff --git a/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java b/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java index 80e6130..8634641 100644 --- a/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java +++ b/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java @@ -9,11 +9,14 @@ import bjc.utils.parserutils.pratt.StringToken; import bjc.utils.parserutils.pratt.StringTokenStream; import bjc.utils.parserutils.pratt.Token; -import com.google.common.collect.Iterators; - import java.util.Arrays; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; import java.util.Scanner; +import java.util.Set; import static bjc.utils.parserutils.pratt.LeftCommands.*; import static bjc.utils.parserutils.pratt.NullCommands.*; @@ -29,60 +32,46 @@ public class PrattParserTest { * Main method. * * @param args - * Unused CLI args. + * Unused CLI arguments. */ public static void main(String[] args) { - TokenSplitter split = new TokenSplitter(); - split.addDelimiter("+", "-", "*", "/"); - split.addDelimiter("^", "!"); - split.addDelimiter("(", ")"); - split.compile(); - - PrattParser parser = new PrattParser<>(); - - parser.addNonInitialCommand("+", infixLeft(20)); - parser.addNonInitialCommand("-", infixLeft(20)); + /* + * Use a linked hash set to preserve insertion order. + */ + Set ops = new LinkedHashSet<>(); + + ops.add(":="); + ops.addAll(Arrays.asList("<=", ">=")); + + ops.addAll(Arrays.asList("=", "<", ">")); + ops.addAll(Arrays.asList("+", "-", "*", "/")); + ops.addAll(Arrays.asList("^", "!")); + ops.addAll(Arrays.asList("(", ")")); + ops.addAll(Arrays.asList("[", "]")); + + /* + * Reserved words that represent themselves, not literals. + */ + Set reserved = new LinkedHashSet<>(); + reserved.add("if"); + reserved.add("else"); - parser.addNonInitialCommand("*", infixLeft(30)); - parser.addNonInitialCommand("/", infixLeft(30)); + TokenSplitter split = new TokenSplitter(); + ops.forEach(split::addDelimiter); - parser.addNonInitialCommand("!", postfix(40)); + split.addNonMatcher("<=", ">="); - parser.addNonInitialCommand("^", infixRight(50)); + split.compile(); - parser.addInitialCommand("(", grouping(0, ")", new StringToken("()", "()"))); - parser.addInitialCommand("(literal)", leaf()); + PrattParser parser = createParser(); Scanner scn = new Scanner(System.in); System.out.print("Enter a command (blank line to exit): "); String ln = scn.nextLine(); - while(!ln.trim().equals("")) { - String[] strangs = split.split(ln); - - System.out.println("Split string: " + Arrays.toString(strangs)); - - Iterator source = Iterators.forArray(strangs); - - Iterator> tokens = new TransformIterator<>(source, (strang) -> { - String type; - - switch(strang) { - case "+": - case "-": - case "*": - case "/": - case "(": - case ")": - type = strang; - break; - default: - type = "(literal)"; - } - - return new StringToken(type, strang); - }); + while (!ln.trim().equals("")) { + Iterator> tokens = preprocessInput(ops, split, ln, reserved); try { StringTokenStream tokenStream = new StringTokenStream(tokens); @@ -94,8 +83,12 @@ public class PrattParserTest { ITree> tree = parser.parseExpression(0, tokenStream, null); + if (!tokenStream.current().getKey().equals("(end)")) { + System.out.println("Multipe expressions on line"); + } + System.out.println("Parsed expression:\n" + tree); - } catch(ParserException pex) { + } catch (ParserException pex) { pex.printStackTrace(); } @@ -105,4 +98,71 @@ public class PrattParserTest { scn.close(); } + + private static Iterator> preprocessInput(Set ops, TokenSplitter split, String ln, + Set reserved) { + String[] rawTokens = ln.split("\\s+"); + + List splitTokens = new LinkedList<>(); + + for (String raw : rawTokens) { + String[] strangs = split.split(raw); + + splitTokens.addAll(Arrays.asList(strangs)); + } + + System.out.println("Split string: " + splitTokens); + + Iterator source = splitTokens.iterator(); + + Iterator> tokens = new TransformIterator<>(source, (String strang) -> { + if (ops.contains(strang) || reserved.contains(strang)) { + return new StringToken(strang, strang); + } else { + return new StringToken("(literal)", strang); + } + }); + return tokens; + } + + private static PrattParser createParser() { + /* + * Set of which relational operators chain with each other. + */ + HashSet chainSet = new HashSet<>(); + chainSet.addAll(Arrays.asList("=", "<", ">", "<=", ">=")); + + /* + * Token for marking chains. + */ + StringToken chainToken = new StringToken("and", "and"); + + PrattParser parser = new PrattParser<>(); + + parser.addNonInitialCommand("if", ternary(5, 0, "else", new StringToken("cond", "cond"), false)); + + parser.addNonInitialCommand(":=", infixNon(10)); + + parser.addNonInitialCommand("=", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand("<", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand(">", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand("<=", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand(">=", chain(10, chainSet, chainToken)); + + parser.addNonInitialCommand("+", infixLeft(20)); + parser.addNonInitialCommand("-", infixLeft(20)); + + parser.addNonInitialCommand("*", infixLeft(30)); + parser.addNonInitialCommand("/", infixLeft(30)); + + parser.addNonInitialCommand("!", postfix(40)); + + parser.addNonInitialCommand("^", infixRight(50)); + + parser.addNonInitialCommand("[", postCircumfix(60, 0, "]", new StringToken("idx", "idx"))); + + parser.addInitialCommand("(", grouping(0, ")", new StringToken("()", "()"))); + parser.addInitialCommand("(literal)", leaf()); + return parser; + } } -- cgit v1.2.3