diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-16 15:33:09 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-16 15:33:09 -0400 |
| commit | 348e10db258ae0cd0ae61dd99f6359ac4c8c0bd1 (patch) | |
| tree | 96e91ff2c65000612667b98aece9f301f70004ac /dice-lang/src | |
| parent | ca704c30af8b5bf2695c7128e0b21f505457da7b (diff) | |
Added alternate expression parser.
This adds an alternate expression parser designed solely for arithmetic
operations.
Diffstat (limited to 'dice-lang/src')
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Lexer.java | 54 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Parser.java | 153 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Shunter.java | 95 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Token.java | 87 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/TokenType.java | 69 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Tokens.java | 101 |
6 files changed, 559 insertions, 0 deletions
diff --git a/dice-lang/src/bjc/dicelang/expr/Lexer.java b/dice-lang/src/bjc/dicelang/expr/Lexer.java new file mode 100644 index 0000000..cff1170 --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/Lexer.java @@ -0,0 +1,54 @@ +package bjc.dicelang.expr; + +import bjc.utils.funcutils.TokenSplitter; + +import java.util.LinkedList; +import java.util.List; + +/** + * Implements the lexer for simple expression operations. + * + * @author Ben Culkin + */ +public class Lexer { + /* + * Spliter we use + */ + private TokenSplitter split; + + /** + * Create a new expression lexer. + */ + public Lexer() { + split = new TokenSplitter(); + + split.addDelimiter("(", ")"); + split.addDelimiter("+", "-", "*", "/"); + } + + /** + * Convert a string from a input command to a series of infix tokens. + * + * @param inp + * The input command. + * @param tks + * The token state + * + * @return A series of infix tokens representing the command. + */ + public Token[] lexString(String inp, Tokens tks) { + String[] spacedTokens = inp.split("[ \t]"); + + List<Token> tokens = new LinkedList<>(); + + for(String spacedToken : spacedTokens) { + String[] rawTokens = split.split(spacedToken); + + for(String tok : rawTokens) { + tokens.add(tks.lexToken(tok, spacedToken)); + } + } + + return tokens.toArray(new Token[0]); + } +} diff --git a/dice-lang/src/bjc/dicelang/expr/Parser.java b/dice-lang/src/bjc/dicelang/expr/Parser.java new file mode 100644 index 0000000..e806a6f --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/Parser.java @@ -0,0 +1,153 @@ +package bjc.dicelang.expr; + +import bjc.utils.data.ITree; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.parserutils.TreeConstructor; + +import java.util.Arrays; +import java.util.Scanner; + +/** + * Parser for simple math expressions. + * + * @author Ben Culkin + */ +public class Parser { + /** + * Main method. + * + * @param args + * Unused CLI args. + */ + public static void main(String[] args) { + /* + * Create our objects. + */ + Tokens toks = new Tokens(); + Lexer lex = new Lexer(); + + /* + * Prepare our input. + */ + Scanner scan = new Scanner(System.in); + + /* + * Read initial command. + */ + System.out.print("Enter a math expression (blank line to quit): "); + String ln = scan.nextLine().trim(); + + /* + * Enter REPL loop. + */ + while(!ln.equals("")) { + /* + * Print raw command. + */ + System.out.println("Raw command: " + ln); + System.out.println(); + + /* + * Lex command to infix tokens. + */ + Token[] infixTokens = lex.lexString(ln, toks); + System.out.println("Lexed tokens: "); + for(Token tok : infixTokens) { + System.out.println("\t" + tok); + } + + /* + * Print out infix expression. + */ + System.out.print("Lexed expression: "); + for(Token tok : infixTokens) { + System.out.print(tok.toExpr() + " "); + } + System.out.println(); + System.out.println(); + + /* + * Shunt infix tokens to postfix tokens. + */ + Token[] postfixTokens = Shunter.shuntTokens(infixTokens); + System.out.println("Lexed tokens: "); + for(Token tok : postfixTokens) { + System.out.println("\t" + tok); + } + + /* + * Print out postfix tokens. + */ + System.out.print("Shunted expression: "); + for(Token tok : postfixTokens) { + System.out.print(tok.toExpr() + " "); + } + System.out.println(); + System.out.println(); + + FunctionalList<Token> tokList = new FunctionalList<>(Arrays.asList(postfixTokens)); + ITree<Token> ast = TreeConstructor.constructTree(tokList, tok -> tok.type.isOperator); + + /* + * Print the tree, then the canonical expression for it. + */ + System.out.println("Parsed tree"); + System.out.println(ast.toString()); + System.out.println("\nCanonical expr: " + toCanonicalExpr(ast)); + + System.out.println(); + + /* + * Prompt for a new expression. + */ + System.out.print("Enter a math expression (blank line to quit): "); + ln = scan.nextLine().trim(); + } + + /* + * Cleanup after ourselves. + */ + scan.close(); + } + + /* + * Convert an expression to one that uses the smallest necessary amount + * of parens. + */ + private static String toCanonicalExpr(ITree<Token> ast) { + Token data = ast.getHead(); + + if(ast.getChildrenCount() == 0) + /* + * Handle leaf nodes + */ + return data.toExpr(); + else { + ITree<Token> left = ast.getChild(0); + ITree<Token> right = ast.getChild(1); + + String leftExpr = toCanonicalExpr(left); + String rightExpr = toCanonicalExpr(right); + + /* + * Add parens if the left was higher priority. + */ + if(left.getChildrenCount() == 0) { + if(left.getHead().type.operatorPriority >= data.type.operatorPriority) { + leftExpr = "(" + leftExpr + ")"; + } + } + + /* + * Add parens if the right was higher priority. + */ + if(right.getChildrenCount() == 0) { + if(right.getHead().type.operatorPriority >= data.type.operatorPriority) { + rightExpr = "(" + rightExpr + ")"; + } + } + + return leftExpr + " " + data.toExpr() + " " + rightExpr; + } + } +} diff --git a/dice-lang/src/bjc/dicelang/expr/Shunter.java b/dice-lang/src/bjc/dicelang/expr/Shunter.java new file mode 100644 index 0000000..5d8cb7c --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/Shunter.java @@ -0,0 +1,95 @@ +package bjc.dicelang.expr; + +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; + +/** + * Converts a infix series of tokens into a prefix series of tokens. + * + * @author Ben Culkin + */ +public class Shunter { + /** + * Convert a infix series of tokens to a postfix series of tokens. + * + * @param infixTokens + * The tokens in infix order. + * + * @return The tokens in postfix order. + */ + public static Token[] shuntTokens(Token[] infixTokens) { + List<Token> postfixTokens = new ArrayList<>(infixTokens.length); + + Deque<Token> opStack = new LinkedList<>(); + + /* + * Shunt each token. + */ + for(Token tok : infixTokens) { + /* + * Handle operators. + */ + if(tok.type.isOperator) { + Token curOp = opStack.peek(); + + /* + * Check if an operator is higher priority, + * respecting their left associativity. + */ + int leftPriority = tok.type.operatorPriority; + int rightPriority = curOp == null ? 0 : curOp.type.operatorPriority; + + boolean isHigherPrec = leftPriority >= rightPriority; + + /* + * Pop all operators that are lower precedence + * than us. + */ + while(!opStack.isEmpty() && isHigherPrec) { + postfixTokens.add(opStack.pop()); + + curOp = opStack.peek(); + + leftPriority = tok.type.operatorPriority; + rightPriority = curOp == null ? 0 : curOp.type.operatorPriority; + + isHigherPrec = leftPriority >= rightPriority; + } + + opStack.push(tok); + } else if(tok.type == TokenType.OPAREN) { + opStack.push(tok); + } else if(tok.type == TokenType.CPAREN) { + Token curOp = opStack.peek(); + + /* + * Pop things until we find the matching paren. + */ + while(curOp.type != TokenType.OPAREN) { + Token tk = opStack.pop(); + + postfixTokens.add(tk); + + curOp = opStack.peek(); + } + + if(!opStack.isEmpty()) { + opStack.pop(); + } + } else { + postfixTokens.add(tok); + } + } + + /* + * Flush remaining operators. + */ + while(!opStack.isEmpty()) { + postfixTokens.add(opStack.pop()); + } + + return postfixTokens.toArray(new Token[0]); + } +} diff --git a/dice-lang/src/bjc/dicelang/expr/Token.java b/dice-lang/src/bjc/dicelang/expr/Token.java new file mode 100644 index 0000000..b02f6c9 --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/Token.java @@ -0,0 +1,87 @@ +package bjc.dicelang.expr; + +/** + * Represents a lexical token. + * + * @author Ben Culkin + */ +public class Token { + /* + * The state for this token. + */ + private Tokens tks; + + /** + * The type of the token. + * + * Determines which fields have a value. + */ + public final TokenType type; + + /** + * The integer value attached to this token. + */ + public int intValue; + + /** + * The original string this token was part of. + */ + public String rawValue; + + /** + * Create a new token. + * + * @param type + * The type of this token. + * @param raw + * The string this token came from. + * @param toks + * The state for this token + */ + public Token(TokenType type, String raw, Tokens toks) { + this.type = type; + + rawValue = raw; + + tks = toks; + } + + @Override + public String toString() { + String typeStr = type.toString(); + typeStr += " (" + type.name() + ")"; + + if(type == TokenType.VREF) { + typeStr += " (ind. " + intValue; + typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")"; + } + + return typeStr + " (originally from: " + rawValue + ")"; + } + + /** + * Convert this token into the string representation of it. + * + * @return The string representation of it. + */ + public String toExpr() { + switch(type) { + case ADD: + return "+"; + case SUBTRACT: + return "-"; + case MULTIPLY: + return "*"; + case DIVIDE: + return "/"; + case VREF: + return tks.symbolTable.get(intValue); + case OPAREN: + return "("; + case CPAREN: + return ")"; + default: + return "???"; + } + } +} diff --git a/dice-lang/src/bjc/dicelang/expr/TokenType.java b/dice-lang/src/bjc/dicelang/expr/TokenType.java new file mode 100644 index 0000000..9e4bbc2 --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/TokenType.java @@ -0,0 +1,69 @@ +package bjc.dicelang.expr; + +/** + * Represents the type of this token. + */ +public enum TokenType { + /** + * Represents + + */ + ADD(14, true, 0), + /** + * Represents - + */ + SUBTRACT(15, true, 0), + + /** + * Represents * + */ + MULTIPLY(16, true, 1), + /** + * Represents / + */ + DIVIDE(17, true, 1), + + /** + * Represents variable names. + */ + VREF(11), + + /** + * Represents ( + */ + OPAREN(0, false, 100), + /** + * Represents ) + */ + CPAREN(0, false, 100); + + /** + * The ID number for this token type. + */ + public final int nVal; + + /** + * Whether or not this type of token is an operator. + */ + public final boolean isOperator; + /** + * The priority of this operator, if it is one. + */ + public final int operatorPriority; + + private TokenType(int num, boolean isOp, int priority) { + nVal = num; + + isOperator = isOp; + + operatorPriority = priority; + } + + private TokenType(int num) { + this(num, false, -1); + } + + @Override + public String toString() { + return Integer.toString(nVal); + } +}
\ No newline at end of file diff --git a/dice-lang/src/bjc/dicelang/expr/Tokens.java b/dice-lang/src/bjc/dicelang/expr/Tokens.java new file mode 100644 index 0000000..08e7197 --- /dev/null +++ b/dice-lang/src/bjc/dicelang/expr/Tokens.java @@ -0,0 +1,101 @@ +package bjc.dicelang.expr; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Contains per-instance state for token parsing. + * + * @author EVE + * + */ +public class Tokens { + /* + * Contains mappings from variable references to string names. + */ + private final Map<Integer, String> symTab; + /* + * Reverse index into the symbol table. + */ + private final Map<String, Integer> revSymTab; + + /** + * Read-only view on the symbol table. + */ + public final Map<Integer, String> symbolTable; + + /* + * Next index into the symbol table + */ + private int nextSym; + + /* + * Mapping from literal tokens to token types. + */ + private final Map<String, TokenType> litTokens; + + /** + * Create a new set of tokens. + */ + public Tokens() { + symTab = new HashMap<>(); + revSymTab = new HashMap<>(); + + symbolTable = Collections.unmodifiableMap(symTab); + + nextSym = 0; + + litTokens = new HashMap<>(); + + litTokens.put("+", TokenType.ADD); + litTokens.put("-", TokenType.SUBTRACT); + litTokens.put("*", TokenType.MULTIPLY); + litTokens.put("/", TokenType.DIVIDE); + litTokens.put("(", TokenType.OPAREN); + litTokens.put(")", TokenType.CPAREN); + } + + /** + * Convert the string representation of a token into a token. + * + * @param tok + * The string repr. of the token. + * @param raw + * The original string the token came from. + * + * @return The token the string represents. + */ + public Token lexToken(String tok, String raw) { + if(litTokens.containsKey(tok)) + return new Token(litTokens.get(tok), raw, this); + else + return parseVRef(tok, raw); + } + + /* + * Parse a variable reference. + */ + private Token parseVRef(String tok, String raw) { + Token tk = new Token(TokenType.VREF, raw, this); + + if(revSymTab.containsKey(tok)) { + /* + * Reuse the entry if it exists. + */ + tk.intValue = revSymTab.get(tok); + } else { + /* + * Create a new entry. + */ + tk.intValue = nextSym; + + symTab.put(nextSym, tok); + revSymTab.put(tok, nextSym); + + nextSym += 1; + } + + return tk; + } +} |
