diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-09 16:02:10 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-09 16:02:10 -0300 |
| commit | f028ea6dc555fc5192a96b00b8e96e90dbf6de55 (patch) | |
| tree | 4b2a28ecbeb30095b50e6e9e8ac8b98fa8ddc79e /dice-lang/src/bjc/dicelang/expr | |
| parent | be4675f9512060aa85b1e0a4f223208b51b55812 (diff) | |
TODO tagging
Diffstat (limited to 'dice-lang/src/bjc/dicelang/expr')
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Ezpr.java | 4 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Lexer.java | 25 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Parser.java | 108 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Shunter.java | 26 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Token.java | 32 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/TokenType.java | 61 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/expr/Tokens.java | 50 |
7 files changed, 139 insertions, 167 deletions
diff --git a/dice-lang/src/bjc/dicelang/expr/Ezpr.java b/dice-lang/src/bjc/dicelang/expr/Ezpr.java index 7186d6f..862c097 100644 --- a/dice-lang/src/bjc/dicelang/expr/Ezpr.java +++ b/dice-lang/src/bjc/dicelang/expr/Ezpr.java @@ -1,3 +1,7 @@ +/* + * @TODO 10/08/17 Ben Culkin :EzprFixing + * Implement these, and make sure they work correctly. + */ // package bjc.dicelang.expr; // import bjc.utils.data.ITree; diff --git a/dice-lang/src/bjc/dicelang/expr/Lexer.java b/dice-lang/src/bjc/dicelang/expr/Lexer.java index bac866b..dfa0f76 100644 --- a/dice-lang/src/bjc/dicelang/expr/Lexer.java +++ b/dice-lang/src/bjc/dicelang/expr/Lexer.java @@ -6,20 +6,22 @@ import java.util.List; import bjc.utils.funcdata.IList; import bjc.utils.parserutils.splitter.ConfigurableTokenSplitter; +/* + * @TODO 10/08/18 :IntExpressions + * Add support for integer constants, and maybe floating-point ones as well + * if you feel like. Heck, you could even go for ratio constants and things + * as well. + */ /** * Implements the lexer for simple expression operations. * * @author Ben Culkin */ public class Lexer { - /* - * Splitter we use. - */ + /* Splitter we use. */ private final ConfigurableTokenSplitter split; - /** - * Create a new expression lexer. - */ + /** Create a new expression lexer. */ public Lexer() { split = new ConfigurableTokenSplitter(true); @@ -39,14 +41,19 @@ public class Lexer { * @return A series of infix tokens representing the command. */ public Token[] lexString(final String inp, final Tokens tks) { + /* Split tokens on whitespace. */ final String[] spacedTokens = inp.split("[ \t]"); + /* Tokens to return. */ + final List<Token> tokens = new LinkedList<>(); - final List<Token> tokens = new LinkedList<>(); - + /* Process each token. */ for (final String spacedToken : spacedTokens) { + /* Split on operators. */ final IList<String> splitTokens = split.split(spacedToken); - final IList<Token> rawTokens = splitTokens.map(tok -> tks.lexToken(tok, spacedToken)); + /* Convert strings to tokens. */ + final IList<Token> rawTokens = splitTokens.map(tok -> tks.lexToken(tok, spacedToken)); + /* Add tokens to results. */ rawTokens.forEach(tokens::add); } diff --git a/dice-lang/src/bjc/dicelang/expr/Parser.java b/dice-lang/src/bjc/dicelang/expr/Parser.java index 6d34b96..5fa2d3d 100644 --- a/dice-lang/src/bjc/dicelang/expr/Parser.java +++ b/dice-lang/src/bjc/dicelang/expr/Parser.java @@ -13,6 +13,10 @@ import bjc.utils.parserutils.TreeConstructor; * @author Ben Culkin */ public class Parser { + /* + * @TODO 10/08/17 Ben Culkin :MainSeperation + * This main method should be moved to its own class. + */ /** * Main method. * @@ -20,100 +24,81 @@ public class Parser { * Unused CLI args. */ public static void main(final String[] args) { - /* - * Create our objects. - */ + /* Create our objects. */ final Tokens toks = new Tokens(); final Lexer lex = new Lexer(); - /* - * Prepare our input. - */ + /* Prepare our input source. */ final Scanner scan = new Scanner(System.in); - /* - * Read initial command. - */ + /* Read initial command. */ System.out.print("Enter a math expression (blank line to quit): "); String ln = scan.nextLine().trim(); - /* - * Enter REPL loop. - */ + /* Enter REPL loop. */ while (!ln.equals("")) { - /* - * Print raw command. - */ + /* Print raw command. */ System.out.println("Raw command: " + ln); System.out.println(); - /* - * Lex command to infix tokens. - */ + /* Lex command to infix tokens. */ final Token[] infixTokens = lex.lexString(ln, toks); System.out.println("Lexed tokens: "); - for (final Token tok : infixTokens) { System.out.println("\t" + tok); } - /* - * Print out infix expression. - */ + /* Print out infix expression. */ System.out.print("Lexed expression: "); - for (final Token tok : infixTokens) { System.out.print(tok.toExpr() + " "); } + /* Space stages. */ System.out.println(); System.out.println(); - /* - * Shunt infix tokens to postfix tokens. - */ + /* Shunt infix tokens to postfix tokens. */ final Token[] postfixTokens = Shunter.shuntTokens(infixTokens); System.out.println("Lexed tokens: "); - for (final Token tok : postfixTokens) { System.out.println("\t" + tok); } - /* - * Print out postfix tokens. - */ + /* Print out postfix tokens. */ System.out.print("Shunted expression: "); - for (final Token tok : postfixTokens) { System.out.print(tok.toExpr() + " "); } + /* Space stages. */ System.out.println(); System.out.println(); - final FunctionalList<Token> tokList = new FunctionalList<>(Arrays.asList(postfixTokens)); + /* Construct a list from the array of tokens. */ + final FunctionalList<Token> tokList = new FunctionalList<>( + Arrays.asList(postfixTokens)); + + /* Construct a tree from the list of postfixed tokens. */ final ITree<Token> ast = TreeConstructor.constructTree(tokList, - tok -> tok.typ.isOperator); + tok -> tok.typ.isOperator); - /* - * Print the tree, then the canonical expression for it. - */ + /* Print the tree, then the canonical expression for it. */ System.out.println("Parsed tree"); System.out.println(ast.toString()); System.out.println("\nCanonical expr: " + toCanonicalExpr(ast)); + /* Space stages. */ + System.out.println(); System.out.println(); - /* - * Prompt for a new expression. - */ + /* Prompt for a new expression. */ System.out.print("Enter a math expression (blank line to quit): "); + /* Read it. */ ln = scan.nextLine().trim(); } - /* - * Cleanup after ourselves. - */ + /* Cleanup after ourselves. */ scan.close(); } @@ -124,38 +109,39 @@ public class Parser { private static String toCanonicalExpr(final ITree<Token> ast) { final Token data = ast.getHead(); - if (ast.getChildrenCount() == 0) - /* - * Handle leaf nodes. - */ - { + if (ast.getChildrenCount() == 0) { + /* Handle leaf nodes. */ return data.toExpr(); } - final ITree<Token> left = ast.getChild(0); + /* The left/right children. */ + final ITree<Token> left = ast.getChild(0); final ITree<Token> right = ast.getChild(1); - String leftExpr = toCanonicalExpr(left); + /* Recursively canonicalize them. */ + String leftExpr = toCanonicalExpr(left); String rightExpr = toCanonicalExpr(right); - /* - * Add parens if the left was higher priority. - */ + /* Add parens if the left was higher priority. */ if (left.getChildrenCount() == 0) { - if (left.getHead().typ.operatorPriority >= data.typ.operatorPriority) { - leftExpr = "(" + leftExpr + ")"; + int leftPriority = left.getHead().typ.operatorPriority; + int dataPriority = data.typ.operatorPriority; + + if (leftPriority >= dataPriority) { + leftExpr = String.format("(%s)", leftExpr); } } - /* - * Add parens if the right was higher priority. - */ + /* Add parens if the right was higher priority. */ if (right.getChildrenCount() == 0) { - if (right.getHead().typ.operatorPriority >= data.typ.operatorPriority) { - rightExpr = "(" + rightExpr + ")"; + int rightPriority = right.getHead().typ.operatorPriority; + int dataPriority = data.typ.operatorPriority; + + if (rightPriority >= dataPriority) { + rightExpr = String.format("(%s)", rightExpr); } } - return leftExpr + " " + data.toExpr() + " " + rightExpr; + return String.format("%s %s %s", leftExpr, data.toExpr(), rightExpr); } -}
\ No newline at end of file +} diff --git a/dice-lang/src/bjc/dicelang/expr/Shunter.java b/dice-lang/src/bjc/dicelang/expr/Shunter.java index 3e49356..213e473 100644 --- a/dice-lang/src/bjc/dicelang/expr/Shunter.java +++ b/dice-lang/src/bjc/dicelang/expr/Shunter.java @@ -11,6 +11,11 @@ import java.util.List; * @author Ben Culkin */ public class Shunter { + /* + * @NOTE + * Why does this method return an array, and not the list of + * tokens? + */ /** * Convert a infix series of tokens to a postfix series of tokens. * @@ -20,26 +25,27 @@ public class Shunter { * @return The tokens in postfix order. */ public static Token[] shuntTokens(final Token[] infixTokens) { + /* The returned tokens. */ final List<Token> postfixTokens = new ArrayList<>(infixTokens.length); + /* The current stack of operators. */ final Deque<Token> opStack = new LinkedList<>(); - /* - * Shunt each token. - */ + /* Shunt each token. */ for (final Token tok : infixTokens) { - /* - * Handle operators. - */ + /* Handle operators. */ if (tok.typ.isOperator) { Token curOp = opStack.peek(); /* * Check if an operator is higher priority, * respecting their left associativity. + * + * @NOTE + * Should this be factored out into a + * method? */ int leftPriority = tok.typ.operatorPriority; - int rightPriority; if (curOp == null) { @@ -56,11 +62,9 @@ public class Shunter { */ while (!opStack.isEmpty() && isHigherPrec) { postfixTokens.add(opStack.pop()); - curOp = opStack.peek(); leftPriority = tok.typ.operatorPriority; - if (curOp == null) { rightPriority = 0; } else { @@ -82,9 +86,7 @@ public class Shunter { */ while (curOp.typ != TokenType.OPAREN) { final Token tk = opStack.pop(); - postfixTokens.add(tk); - curOp = opStack.peek(); } @@ -105,4 +107,4 @@ public class Shunter { return postfixTokens.toArray(new Token[0]); } -}
\ No newline at end of file +} diff --git a/dice-lang/src/bjc/dicelang/expr/Token.java b/dice-lang/src/bjc/dicelang/expr/Token.java index 1a506bf..bf92f97 100644 --- a/dice-lang/src/bjc/dicelang/expr/Token.java +++ b/dice-lang/src/bjc/dicelang/expr/Token.java @@ -1,14 +1,18 @@ package bjc.dicelang.expr; +/* + * @TODO 10/08/17 :TokenReorg + * I am not a fan of this 'having a bunch of subclasses' in one thing I + * seem to have been doing around this project. This should be multiple + * subclasses, one for each value for TokenType. + */ /** * Represents a lexical token. * * @author Ben Culkin */ public class Token { - /* - * The state for this token. - */ + /* The state for this token. */ private final Tokens tks; /** @@ -18,14 +22,10 @@ public class Token { */ public final TokenType typ; - /** - * The integer value attached to this token. - */ + /** The integer value attached to this token. */ public int intValue; - /** - * The original string this token was part of. - */ + /** The original string this token was part of. */ public String rawValue; /** @@ -44,21 +44,20 @@ public class Token { this.typ = type; rawValue = raw; - tks = toks; } @Override public String toString() { String typeStr = typ.toString(); - typeStr += " (" + typ.name() + ")"; + typeStr = String.format("%s (%s)", typeStr, typ.name()); if (typ == TokenType.VREF) { typeStr += " (ind. " + intValue; typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")"; } - return typeStr + " (originally from: " + rawValue + ")"; + return String.format("%s (originally from: %s)", typeStr, rawValue); } /** @@ -70,27 +69,20 @@ public class Token { switch (typ) { case ADD: return "+"; - case SUBTRACT: return "-"; - case MULTIPLY: return "*"; - case DIVIDE: return "/"; - case VREF: return tks.symbolTable.get(intValue); - case OPAREN: return "("; - case CPAREN: return ")"; - default: return "???"; } } -}
\ No newline at end of file +} diff --git a/dice-lang/src/bjc/dicelang/expr/TokenType.java b/dice-lang/src/bjc/dicelang/expr/TokenType.java index d88283e..ad01917 100644 --- a/dice-lang/src/bjc/dicelang/expr/TokenType.java +++ b/dice-lang/src/bjc/dicelang/expr/TokenType.java @@ -2,59 +2,46 @@ package bjc.dicelang.expr; /** * Represents the type of this token. + * + * @author Ben Culkin */ public enum TokenType { - /** - * Represents + + /* + * @NOTE + * Do we want to switch to auto-numbering the tokens? They were + * manually numbered because this was an assignment for PoPL and + * that was what Dr. Naz wanted. */ - ADD(14, true, 0), - /** - * Represents - - */ - SUBTRACT(15, true, 0), - /** - * Represents * - */ + /** Represents + */ + ADD( 14, true, 0), + /** Represents - */ + SUBTRACT(15, true, 0), + /** Represents * */ MULTIPLY(16, true, 1), - /** - * Represents / - */ - DIVIDE(17, true, 1), + /** Represents / */ + DIVIDE( 17, true, 1), - /** - * Represents variable names. - */ + /** Represents variable names. */ VREF(11), - /** - * Represents ( - */ + /** Represents ( */ OPAREN(0, false, 100), - /** - * Represents ) - */ + /** Represents ) */ CPAREN(0, false, 100); - /** - * The ID number for this token type. - */ + /** The ID number for this token type. */ public final int nVal; - /** - * Whether or not this type of token is an operator. - */ + /** Whether or not this type of token is an operator. */ public final boolean isOperator; - /** - * The priority of this operator, if it is one. - */ + /** The priority of this operator, if it is one. */ public final int operatorPriority; + /* Create a new token. */ private TokenType(final int num, final boolean isOp, final int priority) { - nVal = num; - - isOperator = isOp; - + nVal = num; + isOperator = isOp; operatorPriority = priority; } @@ -66,4 +53,4 @@ public enum TokenType { public String toString() { return Integer.toString(nVal); } -}
\ No newline at end of file +} diff --git a/dice-lang/src/bjc/dicelang/expr/Tokens.java b/dice-lang/src/bjc/dicelang/expr/Tokens.java index f763d37..287d2b4 100644 --- a/dice-lang/src/bjc/dicelang/expr/Tokens.java +++ b/dice-lang/src/bjc/dicelang/expr/Tokens.java @@ -11,43 +11,39 @@ import java.util.Map; * */ public class Tokens { - /* - * Contains mappings from variable references to string names. - */ + /* Contains mappings from variable references to string names. */ private final Map<Integer, String> symTab; - /* - * Reverse index into the symbol table. - */ + /* Reverse index into the symbol table. */ private final Map<String, Integer> revSymTab; - /** - * Read-only view on the symbol table. - */ + /** Read-only view on the symbol table. */ public final Map<Integer, String> symbolTable; - /* - * Next index into the symbol table. - */ + /* Next index into the symbol table. */ private int nextSym; - /* - * Mapping from literal tokens to token types. - */ + /* Mapping from literal tokens to token types. */ private final Map<String, TokenType> litTokens; - /** - * Create a new set of tokens. - */ + /** Create a new set of tokens. */ public Tokens() { + /* Create tables. */ symTab = new HashMap<>(); revSymTab = new HashMap<>(); + /* Init public view. */ symbolTable = Collections.unmodifiableMap(symTab); + /* Set sym ID. */ nextSym = 0; + /* + * Setup literal mappings. + * + * @NOTE + * Should this be a static member? + */ litTokens = new HashMap<>(); - litTokens.put("+", TokenType.ADD); litTokens.put("-", TokenType.SUBTRACT); litTokens.put("*", TokenType.MULTIPLY); @@ -68,32 +64,30 @@ public class Tokens { */ public Token lexToken(final String tok, final String raw) { if (litTokens.containsKey(tok)) { + /* Return matching literal token. */ return new Token(litTokens.get(tok), raw, this); } + /* Its a variable reference. */ return parseVRef(tok, raw); } - /* - * Parse a variable reference. - */ + /* Parse a variable reference. */ private Token parseVRef(final String tok, final String raw) { final Token tk = new Token(TokenType.VREF, raw, this); if (revSymTab.containsKey(tok)) { - /* - * Reuse the entry if it exists. - */ + /* Reuse the entry if it exists. */ tk.intValue = revSymTab.get(tok); } else { - /* - * Create a new entry. - */ + /* Create a new entry. */ tk.intValue = nextSym; + /* Record it. */ symTab.put(nextSym, tok); revSymTab.put(tok, nextSym); + /* Next ID. */ nextSym += 1; } |
