From 33918524d7faab0146a0a92c13eaaef46cdbea8a Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 24 Mar 2017 10:53:59 -0400 Subject: Update Pratt parser. --- .../utils/examples/parsing/PrattParserTest.java | 150 ++++++++++++++------- BJC-Utils2/src/main/java/bjc/utils/data/Tree.java | 78 ++++++----- .../main/java/bjc/utils/funcutils/StringUtils.java | 15 ++- .../bjc/utils/parserutils/pratt/LeftCommands.java | 11 +- .../bjc/utils/parserutils/pratt/StringToken.java | 27 ++-- 5 files changed, 182 insertions(+), 99 deletions(-) (limited to 'BJC-Utils2') diff --git a/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java b/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java index 80e6130..8634641 100644 --- a/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java +++ b/BJC-Utils2/src/examples/java/bjc/utils/examples/parsing/PrattParserTest.java @@ -9,11 +9,14 @@ import bjc.utils.parserutils.pratt.StringToken; import bjc.utils.parserutils.pratt.StringTokenStream; import bjc.utils.parserutils.pratt.Token; -import com.google.common.collect.Iterators; - import java.util.Arrays; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; import java.util.Scanner; +import java.util.Set; import static bjc.utils.parserutils.pratt.LeftCommands.*; import static bjc.utils.parserutils.pratt.NullCommands.*; @@ -29,60 +32,46 @@ public class PrattParserTest { * Main method. * * @param args - * Unused CLI args. + * Unused CLI arguments. */ public static void main(String[] args) { - TokenSplitter split = new TokenSplitter(); - split.addDelimiter("+", "-", "*", "/"); - split.addDelimiter("^", "!"); - split.addDelimiter("(", ")"); - split.compile(); - - PrattParser parser = new PrattParser<>(); - - parser.addNonInitialCommand("+", infixLeft(20)); - parser.addNonInitialCommand("-", infixLeft(20)); + /* + * Use a linked hash set to preserve insertion order. + */ + Set ops = new LinkedHashSet<>(); + + ops.add(":="); + ops.addAll(Arrays.asList("<=", ">=")); + + ops.addAll(Arrays.asList("=", "<", ">")); + ops.addAll(Arrays.asList("+", "-", "*", "/")); + ops.addAll(Arrays.asList("^", "!")); + ops.addAll(Arrays.asList("(", ")")); + ops.addAll(Arrays.asList("[", "]")); + + /* + * Reserved words that represent themselves, not literals. + */ + Set reserved = new LinkedHashSet<>(); + reserved.add("if"); + reserved.add("else"); - parser.addNonInitialCommand("*", infixLeft(30)); - parser.addNonInitialCommand("/", infixLeft(30)); + TokenSplitter split = new TokenSplitter(); + ops.forEach(split::addDelimiter); - parser.addNonInitialCommand("!", postfix(40)); + split.addNonMatcher("<=", ">="); - parser.addNonInitialCommand("^", infixRight(50)); + split.compile(); - parser.addInitialCommand("(", grouping(0, ")", new StringToken("()", "()"))); - parser.addInitialCommand("(literal)", leaf()); + PrattParser parser = createParser(); Scanner scn = new Scanner(System.in); System.out.print("Enter a command (blank line to exit): "); String ln = scn.nextLine(); - while(!ln.trim().equals("")) { - String[] strangs = split.split(ln); - - System.out.println("Split string: " + Arrays.toString(strangs)); - - Iterator source = Iterators.forArray(strangs); - - Iterator> tokens = new TransformIterator<>(source, (strang) -> { - String type; - - switch(strang) { - case "+": - case "-": - case "*": - case "/": - case "(": - case ")": - type = strang; - break; - default: - type = "(literal)"; - } - - return new StringToken(type, strang); - }); + while (!ln.trim().equals("")) { + Iterator> tokens = preprocessInput(ops, split, ln, reserved); try { StringTokenStream tokenStream = new StringTokenStream(tokens); @@ -94,8 +83,12 @@ public class PrattParserTest { ITree> tree = parser.parseExpression(0, tokenStream, null); + if (!tokenStream.current().getKey().equals("(end)")) { + System.out.println("Multipe expressions on line"); + } + System.out.println("Parsed expression:\n" + tree); - } catch(ParserException pex) { + } catch (ParserException pex) { pex.printStackTrace(); } @@ -105,4 +98,71 @@ public class PrattParserTest { scn.close(); } + + private static Iterator> preprocessInput(Set ops, TokenSplitter split, String ln, + Set reserved) { + String[] rawTokens = ln.split("\\s+"); + + List splitTokens = new LinkedList<>(); + + for (String raw : rawTokens) { + String[] strangs = split.split(raw); + + splitTokens.addAll(Arrays.asList(strangs)); + } + + System.out.println("Split string: " + splitTokens); + + Iterator source = splitTokens.iterator(); + + Iterator> tokens = new TransformIterator<>(source, (String strang) -> { + if (ops.contains(strang) || reserved.contains(strang)) { + return new StringToken(strang, strang); + } else { + return new StringToken("(literal)", strang); + } + }); + return tokens; + } + + private static PrattParser createParser() { + /* + * Set of which relational operators chain with each other. + */ + HashSet chainSet = new HashSet<>(); + chainSet.addAll(Arrays.asList("=", "<", ">", "<=", ">=")); + + /* + * Token for marking chains. + */ + StringToken chainToken = new StringToken("and", "and"); + + PrattParser parser = new PrattParser<>(); + + parser.addNonInitialCommand("if", ternary(5, 0, "else", new StringToken("cond", "cond"), false)); + + parser.addNonInitialCommand(":=", infixNon(10)); + + parser.addNonInitialCommand("=", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand("<", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand(">", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand("<=", chain(10, chainSet, chainToken)); + parser.addNonInitialCommand(">=", chain(10, chainSet, chainToken)); + + parser.addNonInitialCommand("+", infixLeft(20)); + parser.addNonInitialCommand("-", infixLeft(20)); + + parser.addNonInitialCommand("*", infixLeft(30)); + parser.addNonInitialCommand("/", infixLeft(30)); + + parser.addNonInitialCommand("!", postfix(40)); + + parser.addNonInitialCommand("^", infixRight(50)); + + parser.addNonInitialCommand("[", postCircumfix(60, 0, "]", new StringToken("idx", "idx"))); + + parser.addInitialCommand("(", grouping(0, ")", new StringToken("()", "()"))); + parser.addInitialCommand("(literal)", leaf()); + return parser; + } } diff --git a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java index cf4d1fb..cef9d51 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java +++ b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java @@ -74,7 +74,7 @@ public class Tree implements ITree { children = new FunctionalList<>(); - for(ITree child : childrn) { + for (ITree child : childrn) { children.add(child); childCount++; @@ -83,7 +83,7 @@ public class Tree implements ITree { @Override public void addChild(ITree child) { - if(hasChildren == false) { + if (hasChildren == false) { hasChildren = true; children = new FunctionalList<>(); @@ -104,14 +104,14 @@ public class Tree implements ITree { @Override public void doForChildren(Consumer> action) { - if(childCount > 0) { + if (childCount > 0) { children.forEach(action); } } @Override public ITree flatMapTree(Function> mapper) { - if(hasChildren) { + if (hasChildren) { ITree flatMappedData = mapper.apply(data); children.map((child) -> child.flatMapTree(mapper)) @@ -130,11 +130,14 @@ public class Tree implements ITree { protected NewType internalCollapse(Function leafTransform, Function, NewType>> nodeCollapser) { - if(hasChildren) { + if (hasChildren) { Function, NewType> nodeTransformer = nodeCollapser.apply(data); IList collapsedChildren = children.map((child) -> { - return child.collapse(leafTransform, nodeCollapser, (subTreeVal) -> subTreeVal); + NewType collapsed = child.collapse(leafTransform, nodeCollapser, + (subTreeVal) -> subTreeVal); + + return collapsed; }); return nodeTransformer.apply(collapsedChildren); @@ -144,7 +147,7 @@ public class Tree implements ITree { } protected void internalToString(StringBuilder builder, int indentLevel, boolean initial) { - for(int i = 0; i < indentLevel; i++) { + for (int i = 0; i < indentLevel; i++) { builder.append(">\t"); } @@ -154,7 +157,7 @@ public class Tree implements ITree { builder.append(data == null ? "(null)" : data.toString()); builder.append("\n"); - if(hasChildren) { + if (hasChildren) { children.forEach((child) -> { ((Tree) child).internalToString(builder, indentLevel + 1, false); }); @@ -164,7 +167,7 @@ public class Tree implements ITree { @Override public ITree rebuildTree(Function leafTransformer, Function operatorTransformer) { - if(hasChildren) { + if (hasChildren) { IList> mappedChildren = children.map((child) -> { return child.rebuildTree(leafTransformer, operatorTransformer); }); @@ -177,7 +180,7 @@ public class Tree implements ITree { @Override public void selectiveTransform(Predicate nodePicker, UnaryOperator transformer) { - if(hasChildren) { + if (hasChildren) { children.forEach((child) -> child.selectiveTransform(nodePicker, transformer)); } else { data = transformer.apply(data); @@ -189,11 +192,11 @@ public class Tree implements ITree { UnaryOperator> transformer) { TopDownTransformResult transformResult = transformPicker.apply(data); - switch(transformResult) { + switch (transformResult) { case PASSTHROUGH: ITree result = new Tree<>(data); - if(hasChildren) { + if (hasChildren) { children.forEach((child) -> { result.addChild(child.topDownTransform(transformPicker, transformer)); }); @@ -209,7 +212,7 @@ public class Tree implements ITree { case PUSHDOWN: result = new Tree<>(data); - if(hasChildren) { + if (hasChildren) { children.forEach((child) -> { result.addChild(child.topDownTransform(transformPicker, transformer)); }); @@ -246,7 +249,7 @@ public class Tree implements ITree { @Override public TransformedType transformChild(int childNo, Function, TransformedType> transformer) { - if(childNo < 0 || childNo > childCount - 1) + if (childNo < 0 || childNo > childCount - 1) throw new IllegalArgumentException("Child index #" + childNo + " is invalid"); return transformer.apply(children.getByIndex(childNo)); @@ -259,7 +262,7 @@ public class Tree implements ITree { @Override public ITree transformTree(Function transformer) { - if(hasChildren) { + if (hasChildren) { IList> transformedChildren = children .map((child) -> child.transformTree(transformer)); @@ -271,11 +274,12 @@ public class Tree implements ITree { @Override public void traverse(TreeLinearizationMethod linearizationMethod, Consumer action) { - if(hasChildren) { - switch(linearizationMethod) { + if (hasChildren) { + switch (linearizationMethod) { case INORDER: - if(childCount != 2) throw new IllegalArgumentException( - "Can only do in-order traversal for binary trees."); + if (childCount != 2) + throw new IllegalArgumentException( + "Can only do in-order traversal for binary trees."); children.getByIndex(0).traverse(linearizationMethod, action); @@ -316,32 +320,40 @@ public class Tree implements ITree { @Override public boolean equals(Object obj) { - if(this == obj) return true; - if(obj == null) return false; - if(getClass() != obj.getClass()) return false; + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; Tree other = (Tree) obj; - if(data == null) { - if(other.data != null) return false; - } else if(!data.equals(other.data)) return false; + if (data == null) { + if (other.data != null) + return false; + } else if (!data.equals(other.data)) + return false; - if(childCount != other.childCount) return false; + if (childCount != other.childCount) + return false; - if(children == null) { - if(other.children != null) return false; - } else if(!children.equals(other.children)) return false; + if (children == null) { + if (other.children != null) + return false; + } else if (!children.equals(other.children)) + return false; return true; } @Override public int revFind(Predicate> childPred) { - if(childCount == 0) { + if (childCount == 0) { return -1; } else { - for(int i = childCount - 1; i >= 0; i--) { - if(childPred.test(getChild(i))) { + for (int i = childCount - 1; i >= 0; i--) { + if (childPred.test(getChild(i))) { return i; } } @@ -352,7 +364,7 @@ public class Tree implements ITree { @Override public void prependChild(ITree child) { - if(hasChildren == false) { + if (hasChildren == false) { hasChildren = true; children = new FunctionalList<>(); diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java index b142948..51be875 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java @@ -22,9 +22,10 @@ public class StringUtils { * of the provided regex */ public static boolean containsOnly(String input, String regex) { - if(input == null) + if (input == null) throw new NullPointerException("Input must not be null"); - else if(regex == null) throw new NullPointerException("Regex must not be null"); + else if (regex == null) + throw new NullPointerException("Regex must not be null"); /* * This regular expression is fairly simple. @@ -46,7 +47,7 @@ public class StringUtils { * The number of levels to indent */ public static void indentNLevels(StringBuilder builder, int levels) { - for(int i = 0; i < levels; i++) { + for (int i = 0; i < levels; i++) { builder.append("\t"); } } @@ -80,7 +81,7 @@ public class StringUtils { * @return */ public static String toEnglishList(Object[] objects, String join, String comma) { - if(objects == null) { + if (objects == null) { throw new NullPointerException("Sequence must not be null"); } @@ -89,7 +90,7 @@ public class StringUtils { String joiner = join; String coma = comma; - switch(objects.length) { + switch (objects.length) { case 0: /* * Empty list. @@ -113,7 +114,7 @@ public class StringUtils { /* * Three or more items. */ - for(int i = 0; i < objects.length - 1; i++) { + for (int i = 0; i < objects.length - 1; i++) { sb.append(objects[i].toString()); sb.append(coma + " "); } @@ -157,7 +158,7 @@ public class StringUtils { * @return */ public static String toEnglishList(Object[] objects, boolean and) { - if(and) { + if (and) { return toEnglishList(objects, "and"); } else { return toEnglishList(objects, "or"); diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/LeftCommands.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/LeftCommands.java index 30f3af8..5844c49 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/LeftCommands.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/LeftCommands.java @@ -130,8 +130,11 @@ public class LeftCommands { private boolean nonassoc; - public TernaryCommand(int leftPower, K terminator, Token mark, boolean isNonassoc) { + public TernaryCommand(int leftPower, K terminator, Token marker, boolean isNonassoc) { super(leftPower); + + term = terminator; + mark = marker; nonassoc = isNonassoc; } @@ -150,10 +153,10 @@ public class LeftCommands { @Override public int nextBinding() { - if(nonassoc) { + if (nonassoc) { return leftBinding() - 1; } else { - return super.nextBinding(); + return leftBinding(); } } } @@ -177,7 +180,7 @@ public class LeftCommands { ITree> res = new Tree<>(operator, operand, tree); - if(chainWith.contains(ctx.tokens.current().getKey())) { + if (chainWith.contains(ctx.tokens.current().getKey())) { Token tok = ctx.tokens.current(); ctx.tokens.next(); diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/StringToken.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/StringToken.java index 957e6fa..74f9c63 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/StringToken.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/pratt/StringToken.java @@ -47,25 +47,32 @@ public class StringToken implements Token { @Override public boolean equals(Object obj) { - if(this == obj) return true; - if(obj == null) return false; - if(!(obj instanceof StringToken)) return false; + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof StringToken)) + return false; StringToken other = (StringToken) obj; - if(key == null) { - if(other.key != null) return false; - } else if(!key.equals(other.key)) return false; + if (key == null) { + if (other.key != null) + return false; + } else if (!key.equals(other.key)) + return false; - if(val == null) { - if(other.val != null) return false; - } else if(!val.equals(other.val)) return false; + if (val == null) { + if (other.val != null) + return false; + } else if (!val.equals(other.val)) + return false; return true; } @Override public String toString() { - return String.format("StringToken [key=%s, val=%s]", key, val); + return String.format("StringToken [key='%s', val='%s']", key, val); } } -- cgit v1.2.3