summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbjculkin <bjculkin@mix.wvu.edu>2017-03-16 15:33:09 -0400
committerbjculkin <bjculkin@mix.wvu.edu>2017-03-16 15:33:09 -0400
commit348e10db258ae0cd0ae61dd99f6359ac4c8c0bd1 (patch)
tree96e91ff2c65000612667b98aece9f301f70004ac
parentca704c30af8b5bf2695c7128e0b21f505457da7b (diff)
Added alternate expression parser.
This adds an alternate expression parser designed solely for arithmetic operations.
-rw-r--r--dice-lang/src/bjc/dicelang/expr/Lexer.java54
-rw-r--r--dice-lang/src/bjc/dicelang/expr/Parser.java153
-rw-r--r--dice-lang/src/bjc/dicelang/expr/Shunter.java95
-rw-r--r--dice-lang/src/bjc/dicelang/expr/Token.java87
-rw-r--r--dice-lang/src/bjc/dicelang/expr/TokenType.java69
-rw-r--r--dice-lang/src/bjc/dicelang/expr/Tokens.java101
6 files changed, 559 insertions, 0 deletions
diff --git a/dice-lang/src/bjc/dicelang/expr/Lexer.java b/dice-lang/src/bjc/dicelang/expr/Lexer.java
new file mode 100644
index 0000000..cff1170
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Lexer.java
@@ -0,0 +1,54 @@
+package bjc.dicelang.expr;
+
+import bjc.utils.funcutils.TokenSplitter;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Implements the lexer for simple expression operations.
+ *
+ * @author Ben Culkin
+ */
+public class Lexer {
+ /*
+ * Spliter we use
+ */
+ private TokenSplitter split;
+
+ /**
+ * Create a new expression lexer.
+ */
+ public Lexer() {
+ split = new TokenSplitter();
+
+ split.addDelimiter("(", ")");
+ split.addDelimiter("+", "-", "*", "/");
+ }
+
+ /**
+ * Convert a string from a input command to a series of infix tokens.
+ *
+ * @param inp
+ * The input command.
+ * @param tks
+ * The token state
+ *
+ * @return A series of infix tokens representing the command.
+ */
+ public Token[] lexString(String inp, Tokens tks) {
+ String[] spacedTokens = inp.split("[ \t]");
+
+ List<Token> tokens = new LinkedList<>();
+
+ for(String spacedToken : spacedTokens) {
+ String[] rawTokens = split.split(spacedToken);
+
+ for(String tok : rawTokens) {
+ tokens.add(tks.lexToken(tok, spacedToken));
+ }
+ }
+
+ return tokens.toArray(new Token[0]);
+ }
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Parser.java b/dice-lang/src/bjc/dicelang/expr/Parser.java
new file mode 100644
index 0000000..e806a6f
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Parser.java
@@ -0,0 +1,153 @@
+package bjc.dicelang.expr;
+
+import bjc.utils.data.ITree;
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.parserutils.TreeConstructor;
+
+import java.util.Arrays;
+import java.util.Scanner;
+
+/**
+ * Parser for simple math expressions.
+ *
+ * @author Ben Culkin
+ */
+public class Parser {
+ /**
+ * Main method.
+ *
+ * @param args
+ * Unused CLI args.
+ */
+ public static void main(String[] args) {
+ /*
+ * Create our objects.
+ */
+ Tokens toks = new Tokens();
+ Lexer lex = new Lexer();
+
+ /*
+ * Prepare our input.
+ */
+ Scanner scan = new Scanner(System.in);
+
+ /*
+ * Read initial command.
+ */
+ System.out.print("Enter a math expression (blank line to quit): ");
+ String ln = scan.nextLine().trim();
+
+ /*
+ * Enter REPL loop.
+ */
+ while(!ln.equals("")) {
+ /*
+ * Print raw command.
+ */
+ System.out.println("Raw command: " + ln);
+ System.out.println();
+
+ /*
+ * Lex command to infix tokens.
+ */
+ Token[] infixTokens = lex.lexString(ln, toks);
+ System.out.println("Lexed tokens: ");
+ for(Token tok : infixTokens) {
+ System.out.println("\t" + tok);
+ }
+
+ /*
+ * Print out infix expression.
+ */
+ System.out.print("Lexed expression: ");
+ for(Token tok : infixTokens) {
+ System.out.print(tok.toExpr() + " ");
+ }
+ System.out.println();
+ System.out.println();
+
+ /*
+ * Shunt infix tokens to postfix tokens.
+ */
+ Token[] postfixTokens = Shunter.shuntTokens(infixTokens);
+ System.out.println("Lexed tokens: ");
+ for(Token tok : postfixTokens) {
+ System.out.println("\t" + tok);
+ }
+
+ /*
+ * Print out postfix tokens.
+ */
+ System.out.print("Shunted expression: ");
+ for(Token tok : postfixTokens) {
+ System.out.print(tok.toExpr() + " ");
+ }
+ System.out.println();
+ System.out.println();
+
+ FunctionalList<Token> tokList = new FunctionalList<>(Arrays.asList(postfixTokens));
+ ITree<Token> ast = TreeConstructor.constructTree(tokList, tok -> tok.type.isOperator);
+
+ /*
+ * Print the tree, then the canonical expression for it.
+ */
+ System.out.println("Parsed tree");
+ System.out.println(ast.toString());
+ System.out.println("\nCanonical expr: " + toCanonicalExpr(ast));
+
+ System.out.println();
+
+ /*
+ * Prompt for a new expression.
+ */
+ System.out.print("Enter a math expression (blank line to quit): ");
+ ln = scan.nextLine().trim();
+ }
+
+ /*
+ * Cleanup after ourselves.
+ */
+ scan.close();
+ }
+
+ /*
+ * Convert an expression to one that uses the smallest necessary amount
+ * of parens.
+ */
+ private static String toCanonicalExpr(ITree<Token> ast) {
+ Token data = ast.getHead();
+
+ if(ast.getChildrenCount() == 0)
+ /*
+ * Handle leaf nodes
+ */
+ return data.toExpr();
+ else {
+ ITree<Token> left = ast.getChild(0);
+ ITree<Token> right = ast.getChild(1);
+
+ String leftExpr = toCanonicalExpr(left);
+ String rightExpr = toCanonicalExpr(right);
+
+ /*
+ * Add parens if the left was higher priority.
+ */
+ if(left.getChildrenCount() == 0) {
+ if(left.getHead().type.operatorPriority >= data.type.operatorPriority) {
+ leftExpr = "(" + leftExpr + ")";
+ }
+ }
+
+ /*
+ * Add parens if the right was higher priority.
+ */
+ if(right.getChildrenCount() == 0) {
+ if(right.getHead().type.operatorPriority >= data.type.operatorPriority) {
+ rightExpr = "(" + rightExpr + ")";
+ }
+ }
+
+ return leftExpr + " " + data.toExpr() + " " + rightExpr;
+ }
+ }
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Shunter.java b/dice-lang/src/bjc/dicelang/expr/Shunter.java
new file mode 100644
index 0000000..5d8cb7c
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Shunter.java
@@ -0,0 +1,95 @@
+package bjc.dicelang.expr;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Converts a infix series of tokens into a prefix series of tokens.
+ *
+ * @author Ben Culkin
+ */
+public class Shunter {
+ /**
+ * Convert a infix series of tokens to a postfix series of tokens.
+ *
+ * @param infixTokens
+ * The tokens in infix order.
+ *
+ * @return The tokens in postfix order.
+ */
+ public static Token[] shuntTokens(Token[] infixTokens) {
+ List<Token> postfixTokens = new ArrayList<>(infixTokens.length);
+
+ Deque<Token> opStack = new LinkedList<>();
+
+ /*
+ * Shunt each token.
+ */
+ for(Token tok : infixTokens) {
+ /*
+ * Handle operators.
+ */
+ if(tok.type.isOperator) {
+ Token curOp = opStack.peek();
+
+ /*
+ * Check if an operator is higher priority,
+ * respecting their left associativity.
+ */
+ int leftPriority = tok.type.operatorPriority;
+ int rightPriority = curOp == null ? 0 : curOp.type.operatorPriority;
+
+ boolean isHigherPrec = leftPriority >= rightPriority;
+
+ /*
+ * Pop all operators that are lower precedence
+ * than us.
+ */
+ while(!opStack.isEmpty() && isHigherPrec) {
+ postfixTokens.add(opStack.pop());
+
+ curOp = opStack.peek();
+
+ leftPriority = tok.type.operatorPriority;
+ rightPriority = curOp == null ? 0 : curOp.type.operatorPriority;
+
+ isHigherPrec = leftPriority >= rightPriority;
+ }
+
+ opStack.push(tok);
+ } else if(tok.type == TokenType.OPAREN) {
+ opStack.push(tok);
+ } else if(tok.type == TokenType.CPAREN) {
+ Token curOp = opStack.peek();
+
+ /*
+ * Pop things until we find the matching paren.
+ */
+ while(curOp.type != TokenType.OPAREN) {
+ Token tk = opStack.pop();
+
+ postfixTokens.add(tk);
+
+ curOp = opStack.peek();
+ }
+
+ if(!opStack.isEmpty()) {
+ opStack.pop();
+ }
+ } else {
+ postfixTokens.add(tok);
+ }
+ }
+
+ /*
+ * Flush remaining operators.
+ */
+ while(!opStack.isEmpty()) {
+ postfixTokens.add(opStack.pop());
+ }
+
+ return postfixTokens.toArray(new Token[0]);
+ }
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Token.java b/dice-lang/src/bjc/dicelang/expr/Token.java
new file mode 100644
index 0000000..b02f6c9
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Token.java
@@ -0,0 +1,87 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents a lexical token.
+ *
+ * @author Ben Culkin
+ */
+public class Token {
+ /*
+ * The state for this token.
+ */
+ private Tokens tks;
+
+ /**
+ * The type of the token.
+ *
+ * Determines which fields have a value.
+ */
+ public final TokenType type;
+
+ /**
+ * The integer value attached to this token.
+ */
+ public int intValue;
+
+ /**
+ * The original string this token was part of.
+ */
+ public String rawValue;
+
+ /**
+ * Create a new token.
+ *
+ * @param type
+ * The type of this token.
+ * @param raw
+ * The string this token came from.
+ * @param toks
+ * The state for this token
+ */
+ public Token(TokenType type, String raw, Tokens toks) {
+ this.type = type;
+
+ rawValue = raw;
+
+ tks = toks;
+ }
+
+ @Override
+ public String toString() {
+ String typeStr = type.toString();
+ typeStr += " (" + type.name() + ")";
+
+ if(type == TokenType.VREF) {
+ typeStr += " (ind. " + intValue;
+ typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")";
+ }
+
+ return typeStr + " (originally from: " + rawValue + ")";
+ }
+
+ /**
+ * Convert this token into the string representation of it.
+ *
+ * @return The string representation of it.
+ */
+ public String toExpr() {
+ switch(type) {
+ case ADD:
+ return "+";
+ case SUBTRACT:
+ return "-";
+ case MULTIPLY:
+ return "*";
+ case DIVIDE:
+ return "/";
+ case VREF:
+ return tks.symbolTable.get(intValue);
+ case OPAREN:
+ return "(";
+ case CPAREN:
+ return ")";
+ default:
+ return "???";
+ }
+ }
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/TokenType.java b/dice-lang/src/bjc/dicelang/expr/TokenType.java
new file mode 100644
index 0000000..9e4bbc2
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/TokenType.java
@@ -0,0 +1,69 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents the type of this token.
+ */
+public enum TokenType {
+ /**
+ * Represents +
+ */
+ ADD(14, true, 0),
+ /**
+ * Represents -
+ */
+ SUBTRACT(15, true, 0),
+
+ /**
+ * Represents *
+ */
+ MULTIPLY(16, true, 1),
+ /**
+ * Represents /
+ */
+ DIVIDE(17, true, 1),
+
+ /**
+ * Represents variable names.
+ */
+ VREF(11),
+
+ /**
+ * Represents (
+ */
+ OPAREN(0, false, 100),
+ /**
+ * Represents )
+ */
+ CPAREN(0, false, 100);
+
+ /**
+ * The ID number for this token type.
+ */
+ public final int nVal;
+
+ /**
+ * Whether or not this type of token is an operator.
+ */
+ public final boolean isOperator;
+ /**
+ * The priority of this operator, if it is one.
+ */
+ public final int operatorPriority;
+
+ private TokenType(int num, boolean isOp, int priority) {
+ nVal = num;
+
+ isOperator = isOp;
+
+ operatorPriority = priority;
+ }
+
+ private TokenType(int num) {
+ this(num, false, -1);
+ }
+
+ @Override
+ public String toString() {
+ return Integer.toString(nVal);
+ }
+} \ No newline at end of file
diff --git a/dice-lang/src/bjc/dicelang/expr/Tokens.java b/dice-lang/src/bjc/dicelang/expr/Tokens.java
new file mode 100644
index 0000000..08e7197
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Tokens.java
@@ -0,0 +1,101 @@
+package bjc.dicelang.expr;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains per-instance state for token parsing.
+ *
+ * @author EVE
+ *
+ */
+public class Tokens {
+ /*
+ * Contains mappings from variable references to string names.
+ */
+ private final Map<Integer, String> symTab;
+ /*
+ * Reverse index into the symbol table.
+ */
+ private final Map<String, Integer> revSymTab;
+
+ /**
+ * Read-only view on the symbol table.
+ */
+ public final Map<Integer, String> symbolTable;
+
+ /*
+ * Next index into the symbol table
+ */
+ private int nextSym;
+
+ /*
+ * Mapping from literal tokens to token types.
+ */
+ private final Map<String, TokenType> litTokens;
+
+ /**
+ * Create a new set of tokens.
+ */
+ public Tokens() {
+ symTab = new HashMap<>();
+ revSymTab = new HashMap<>();
+
+ symbolTable = Collections.unmodifiableMap(symTab);
+
+ nextSym = 0;
+
+ litTokens = new HashMap<>();
+
+ litTokens.put("+", TokenType.ADD);
+ litTokens.put("-", TokenType.SUBTRACT);
+ litTokens.put("*", TokenType.MULTIPLY);
+ litTokens.put("/", TokenType.DIVIDE);
+ litTokens.put("(", TokenType.OPAREN);
+ litTokens.put(")", TokenType.CPAREN);
+ }
+
+ /**
+ * Convert the string representation of a token into a token.
+ *
+ * @param tok
+ * The string repr. of the token.
+ * @param raw
+ * The original string the token came from.
+ *
+ * @return The token the string represents.
+ */
+ public Token lexToken(String tok, String raw) {
+ if(litTokens.containsKey(tok))
+ return new Token(litTokens.get(tok), raw, this);
+ else
+ return parseVRef(tok, raw);
+ }
+
+ /*
+ * Parse a variable reference.
+ */
+ private Token parseVRef(String tok, String raw) {
+ Token tk = new Token(TokenType.VREF, raw, this);
+
+ if(revSymTab.containsKey(tok)) {
+ /*
+ * Reuse the entry if it exists.
+ */
+ tk.intValue = revSymTab.get(tok);
+ } else {
+ /*
+ * Create a new entry.
+ */
+ tk.intValue = nextSym;
+
+ symTab.put(nextSym, tok);
+ revSymTab.put(tok, nextSym);
+
+ nextSym += 1;
+ }
+
+ return tk;
+ }
+}