Added alternate expression parser.

This adds an alternate expression parser designed solely for arithmetic operations.
author: bjculkin <bjculkin@mix.wvu.edu> 2017-03-16 15:33:09 -0400
committer: bjculkin <bjculkin@mix.wvu.edu> 2017-03-16 15:33:09 -0400
commit: 348e10db258ae0cd0ae61dd99f6359ac4c8c0bd1 (patch)
tree: 96e91ff2c65000612667b98aece9f301f70004ac /dice-lang/src
parent: ca704c30af8b5bf2695c7128e0b21f505457da7b (diff)
6 files changed, 559 insertions, 0 deletions
diff --git a/dice-lang/src/bjc/dicelang/expr/Lexer.java b/dice-lang/src/bjc/dicelang/expr/Lexer.java
new file mode 100644
index 0000000..cff1170
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Lexer.java
@@ -0,0 +1,54 @@
+package bjc.dicelang.expr;
+
+import bjc.utils.funcutils.TokenSplitter;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Implements the lexer for simple expression operations.
+ *
+ * @author Ben Culkin
+ */
+public class Lexer {
+	/*
+	 * Spliter we use
+	 */
+	private TokenSplitter split;
+
+	/**
+	 * Create a new expression lexer.
+	 */
+	public Lexer() {
+		split = new TokenSplitter();
+
+		split.addDelimiter("(", ")");
+		split.addDelimiter("+", "-", "*", "/");
+	}
+
+	/**
+	 * Convert a string from a input command to a series of infix tokens.
+	 *
+	 * @param inp
+	 *                The input command.
+	 * @param tks
+	 *                The token state
+	 *
+	 * @return A series of infix tokens representing the command.
+	 */
+	public Token[] lexString(String inp, Tokens tks) {
+		String[] spacedTokens = inp.split("[ \t]");
+
+		List<Token> tokens = new LinkedList<>();
+
+		for(String spacedToken : spacedTokens) {
+			String[] rawTokens = split.split(spacedToken);
+
+			for(String tok : rawTokens) {
+				tokens.add(tks.lexToken(tok, spacedToken));
+			}
+		}
+
+		return tokens.toArray(new Token[0]);
+	}
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Parser.java b/dice-lang/src/bjc/dicelang/expr/Parser.java
new file mode 100644
index 0000000..e806a6f
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Parser.java
@@ -0,0 +1,153 @@
+package bjc.dicelang.expr;
+
+import bjc.utils.data.ITree;
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.parserutils.TreeConstructor;
+
+import java.util.Arrays;
+import java.util.Scanner;
+
+/**
+ * Parser for simple math expressions.
+ *
+ * @author Ben Culkin
+ */
+public class Parser {
+	/**
+	 * Main method.
+	 *
+	 * @param args
+	 *                Unused CLI args.
+	 */
+	public static void main(String[] args) {
+		/*
+		 * Create our objects.
+		 */
+		Tokens toks = new Tokens();
+		Lexer lex = new Lexer();
+
+		/*
+		 * Prepare our input.
+		 */
+		Scanner scan = new Scanner(System.in);
+
+		/*
+		 * Read initial command.
+		 */
+		System.out.print("Enter a math expression (blank line to quit): ");
+		String ln = scan.nextLine().trim();
+
+		/*
+		 * Enter REPL loop.
+		 */
+		while(!ln.equals("")) {
+			/*
+			 * Print raw command.
+			 */
+			System.out.println("Raw command: " + ln);
+			System.out.println();
+
+			/*
+			 * Lex command to infix tokens.
+			 */
+			Token[] infixTokens = lex.lexString(ln, toks);
+			System.out.println("Lexed tokens: ");
+			for(Token tok : infixTokens) {
+				System.out.println("\t" + tok);
+			}
+
+			/*
+			 * Print out infix expression.
+			 */
+			System.out.print("Lexed expression: ");
+			for(Token tok : infixTokens) {
+				System.out.print(tok.toExpr() + " ");
+			}
+			System.out.println();
+			System.out.println();
+
+			/*
+			 * Shunt infix tokens to postfix tokens.
+			 */
+			Token[] postfixTokens = Shunter.shuntTokens(infixTokens);
+			System.out.println("Lexed tokens: ");
+			for(Token tok : postfixTokens) {
+				System.out.println("\t" + tok);
+			}
+
+			/*
+			 * Print out postfix tokens.
+			 */
+			System.out.print("Shunted expression: ");
+			for(Token tok : postfixTokens) {
+				System.out.print(tok.toExpr() + " ");
+			}
+			System.out.println();
+			System.out.println();
+
+			FunctionalList<Token> tokList = new FunctionalList<>(Arrays.asList(postfixTokens));
+			ITree<Token> ast = TreeConstructor.constructTree(tokList, tok -> tok.type.isOperator);
+
+			/*
+			 * Print the tree, then the canonical expression for it.
+			 */
+			System.out.println("Parsed tree");
+			System.out.println(ast.toString());
+			System.out.println("\nCanonical expr: " + toCanonicalExpr(ast));
+
+			System.out.println();
+
+			/*
+			 * Prompt for a new expression.
+			 */
+			System.out.print("Enter a math expression (blank line to quit): ");
+			ln = scan.nextLine().trim();
+		}
+
+		/*
+		 * Cleanup after ourselves.
+		 */
+		scan.close();
+	}
+
+	/*
+	 * Convert an expression to one that uses the smallest necessary amount
+	 * of parens.
+	 */
+	private static String toCanonicalExpr(ITree<Token> ast) {
+		Token data = ast.getHead();
+
+		if(ast.getChildrenCount() == 0)
+			/*
+			 * Handle leaf nodes
+			 */
+			return data.toExpr();
+		else {
+			ITree<Token> left = ast.getChild(0);
+			ITree<Token> right = ast.getChild(1);
+
+			String leftExpr = toCanonicalExpr(left);
+			String rightExpr = toCanonicalExpr(right);
+
+			/*
+			 * Add parens if the left was higher priority.
+			 */
+			if(left.getChildrenCount() == 0) {
+				if(left.getHead().type.operatorPriority >= data.type.operatorPriority) {
+					leftExpr = "(" + leftExpr + ")";
+				}
+			}
+
+			/*
+			 * Add parens if the right was higher priority.
+			 */
+			if(right.getChildrenCount() == 0) {
+				if(right.getHead().type.operatorPriority >= data.type.operatorPriority) {
+					rightExpr = "(" + rightExpr + ")";
+				}
+			}
+
+			return leftExpr + " " + data.toExpr() + " " + rightExpr;
+		}
+	}
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Shunter.java b/dice-lang/src/bjc/dicelang/expr/Shunter.java
new file mode 100644
index 0000000..5d8cb7c
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Shunter.java
@@ -0,0 +1,95 @@
+package bjc.dicelang.expr;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Converts a infix series of tokens into a prefix series of tokens.
+ *
+ * @author Ben Culkin
+ */
+public class Shunter {
+	/**
+	 * Convert a infix series of tokens to a postfix series of tokens.
+	 *
+	 * @param infixTokens
+	 *                The tokens in infix order.
+	 *
+	 * @return The tokens in postfix order.
+	 */
+	public static Token[] shuntTokens(Token[] infixTokens) {
+		List<Token> postfixTokens = new ArrayList<>(infixTokens.length);
+
+		Deque<Token> opStack = new LinkedList<>();
+
+		/*
+		 * Shunt each token.
+		 */
+		for(Token tok : infixTokens) {
+			/*
+			 * Handle operators.
+			 */
+			if(tok.type.isOperator) {
+				Token curOp = opStack.peek();
+
+				/*
+				 * Check if an operator is higher priority,
+				 * respecting their left associativity.
+				 */
+				int leftPriority = tok.type.operatorPriority;
+				int rightPriority = curOp == null ? 0 : curOp.type.operatorPriority;
+
+				boolean isHigherPrec = leftPriority >= rightPriority;
+
+				/*
+				 * Pop all operators that are lower precedence
+				 * than us.
+				 */
+				while(!opStack.isEmpty() && isHigherPrec) {
+					postfixTokens.add(opStack.pop());
+
+					curOp = opStack.peek();
+
+					leftPriority = tok.type.operatorPriority;
+					rightPriority = curOp == null ? 0 : curOp.type.operatorPriority;
+
+					isHigherPrec = leftPriority >= rightPriority;
+				}
+
+				opStack.push(tok);
+			} else if(tok.type == TokenType.OPAREN) {
+				opStack.push(tok);
+			} else if(tok.type == TokenType.CPAREN) {
+				Token curOp = opStack.peek();
+
+				/*
+				 * Pop things until we find the matching paren.
+				 */
+				while(curOp.type != TokenType.OPAREN) {
+					Token tk = opStack.pop();
+
+					postfixTokens.add(tk);
+
+					curOp = opStack.peek();
+				}
+
+				if(!opStack.isEmpty()) {
+					opStack.pop();
+				}
+			} else {
+				postfixTokens.add(tok);
+			}
+		}
+
+		/*
+		 * Flush remaining operators.
+		 */
+		while(!opStack.isEmpty()) {
+			postfixTokens.add(opStack.pop());
+		}
+
+		return postfixTokens.toArray(new Token[0]);
+	}
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/Token.java b/dice-lang/src/bjc/dicelang/expr/Token.java
new file mode 100644
index 0000000..b02f6c9
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Token.java
@@ -0,0 +1,87 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents a lexical token.
+ *
+ * @author Ben Culkin
+ */
+public class Token {
+	/*
+	 * The state for this token.
+	 */
+	private Tokens tks;
+
+	/**
+	 * The type of the token.
+	 *
+	 * Determines which fields have a value.
+	 */
+	public final TokenType type;
+
+	/**
+	 * The integer value attached to this token.
+	 */
+	public int intValue;
+
+	/**
+	 * The original string this token was part of.
+	 */
+	public String rawValue;
+
+	/**
+	 * Create a new token.
+	 *
+	 * @param type
+	 *                The type of this token.
+	 * @param raw
+	 *                The string this token came from.
+	 * @param toks
+	 *                The state for this token
+	 */
+	public Token(TokenType type, String raw, Tokens toks) {
+		this.type = type;
+
+		rawValue = raw;
+
+		tks = toks;
+	}
+
+	@Override
+	public String toString() {
+		String typeStr = type.toString();
+		typeStr += " (" + type.name() + ")";
+
+		if(type == TokenType.VREF) {
+			typeStr += " (ind. " + intValue;
+			typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")";
+		}
+
+		return typeStr + " (originally from: " + rawValue + ")";
+	}
+
+	/**
+	 * Convert this token into the string representation of it.
+	 *
+	 * @return The string representation of it.
+	 */
+	public String toExpr() {
+		switch(type) {
+		case ADD:
+			return "+";
+		case SUBTRACT:
+			return "-";
+		case MULTIPLY:
+			return "*";
+		case DIVIDE:
+			return "/";
+		case VREF:
+			return tks.symbolTable.get(intValue);
+		case OPAREN:
+			return "(";
+		case CPAREN:
+			return ")";
+		default:
+			return "???";
+		}
+	}
+}
diff --git a/dice-lang/src/bjc/dicelang/expr/TokenType.java b/dice-lang/src/bjc/dicelang/expr/TokenType.java
new file mode 100644
index 0000000..9e4bbc2
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/TokenType.java
@@ -0,0 +1,69 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents the type of this token.
+ */
+public enum TokenType {
+	/**
+	 * Represents +
+	 */
+	ADD(14, true, 0),
+	/**
+	 * Represents -
+	 */
+	SUBTRACT(15, true, 0),
+
+	/**
+	 * Represents *
+	 */
+	MULTIPLY(16, true, 1),
+	/**
+	 * Represents /
+	 */
+	DIVIDE(17, true, 1),
+
+	/**
+	 * Represents variable names.
+	 */
+	VREF(11),
+
+	/**
+	 * Represents (
+	 */
+	OPAREN(0, false, 100),
+	/**
+	 * Represents )
+	 */
+	CPAREN(0, false, 100);
+
+	/**
+	 * The ID number for this token type.
+	 */
+	public final int nVal;
+
+	/**
+	 * Whether or not this type of token is an operator.
+	 */
+	public final boolean	isOperator;
+	/**
+	 * The priority of this operator, if it is one.
+	 */
+	public final int	operatorPriority;
+
+	private TokenType(int num, boolean isOp, int priority) {
+		nVal = num;
+
+		isOperator = isOp;
+
+		operatorPriority = priority;
+	}
+
+	private TokenType(int num) {
+		this(num, false, -1);
+	}
+
+	@Override
+	public String toString() {
+		return Integer.toString(nVal);
+	}
+}
+\ No newline at end of file
diff --git a/dice-lang/src/bjc/dicelang/expr/Tokens.java b/dice-lang/src/bjc/dicelang/expr/Tokens.java
new file mode 100644
index 0000000..08e7197
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/expr/Tokens.java
@@ -0,0 +1,101 @@
+package bjc.dicelang.expr;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains per-instance state for token parsing.
+ *
+ * @author EVE
+ *
+ */
+public class Tokens {
+	/*
+	 * Contains mappings from variable references to string names.
+	 */
+	private final Map<Integer, String> symTab;
+	/*
+	 * Reverse index into the symbol table.
+	 */
+	private final Map<String, Integer> revSymTab;
+
+	/**
+	 * Read-only view on the symbol table.
+	 */
+	public final Map<Integer, String> symbolTable;
+
+	/*
+	 * Next index into the symbol table
+	 */
+	private int nextSym;
+
+	/*
+	 * Mapping from literal tokens to token types.
+	 */
+	private final Map<String, TokenType> litTokens;
+
+	/**
+	 * Create a new set of tokens.
+	 */
+	public Tokens() {
+		symTab = new HashMap<>();
+		revSymTab = new HashMap<>();
+
+		symbolTable = Collections.unmodifiableMap(symTab);
+
+		nextSym = 0;
+
+		litTokens = new HashMap<>();
+
+		litTokens.put("+", TokenType.ADD);
+		litTokens.put("-", TokenType.SUBTRACT);
+		litTokens.put("*", TokenType.MULTIPLY);
+		litTokens.put("/", TokenType.DIVIDE);
+		litTokens.put("(", TokenType.OPAREN);
+		litTokens.put(")", TokenType.CPAREN);
+	}
+
+	/**
+	 * Convert the string representation of a token into a token.
+	 *
+	 * @param tok
+	 *                The string repr. of the token.
+	 * @param raw
+	 *                The original string the token came from.
+	 *
+	 * @return The token the string represents.
+	 */
+	public Token lexToken(String tok, String raw) {
+		if(litTokens.containsKey(tok))
+			return new Token(litTokens.get(tok), raw, this);
+		else
+			return parseVRef(tok, raw);
+	}
+
+	/*
+	 * Parse a variable reference.
+	 */
+	private Token parseVRef(String tok, String raw) {
+		Token tk = new Token(TokenType.VREF, raw, this);
+
+		if(revSymTab.containsKey(tok)) {
+			/*
+			 * Reuse the entry if it exists.
+			 */
+			tk.intValue = revSymTab.get(tok);
+		} else {
+			/*
+			 * Create a new entry.
+			 */
+			tk.intValue = nextSym;
+
+			symTab.put(nextSym, tok);
+			revSymTab.put(tok, nextSym);
+
+			nextSym += 1;
+		}
+
+		return tk;
+	}
+}
author	bjculkin <bjculkin@mix.wvu.edu>	2017-03-16 15:33:09 -0400
committer	bjculkin <bjculkin@mix.wvu.edu>	2017-03-16 15:33:09 -0400
commit	348e10db258ae0cd0ae61dd99f6359ac4c8c0bd1 (patch)
tree	96e91ff2c65000612667b98aece9f301f70004ac /dice-lang/src
parent	ca704c30af8b5bf2695c7128e0b21f505457da7b (diff)