diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-25 12:10:14 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-10-25 12:10:14 -0300 |
| commit | 7bda9de511a5642efb297eae98c6ea7c42b27754 (patch) | |
| tree | dff1aa772b9ac088c5bd07b8d10d944cbff89f96 /base/src/bjc/dicelang/expr | |
| parent | f028ea6dc555fc5192a96b00b8e96e90dbf6de55 (diff) | |
Start switch to maven modules
Diffstat (limited to 'base/src/bjc/dicelang/expr')
| -rw-r--r-- | base/src/bjc/dicelang/expr/Ezpr.java | 149 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/Lexer.java | 62 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/Parser.java | 147 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/Shunter.java | 110 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/Token.java | 88 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/TokenType.java | 56 | ||||
| -rw-r--r-- | base/src/bjc/dicelang/expr/Tokens.java | 96 |
7 files changed, 708 insertions, 0 deletions
diff --git a/base/src/bjc/dicelang/expr/Ezpr.java b/base/src/bjc/dicelang/expr/Ezpr.java new file mode 100644 index 0000000..862c097 --- /dev/null +++ b/base/src/bjc/dicelang/expr/Ezpr.java @@ -0,0 +1,149 @@ +/* + * @TODO 10/08/17 Ben Culkin :EzprFixing + * Implement these, and make sure they work correctly. + */ +// package bjc.dicelang.expr; + +// import bjc.utils.data.ITree; + +// import com.google.guava.collect.HashMultiset; +// import com.google.guava.collect.Multiset; + +// import static bjc.dicelang.expr.EzprType; +// import static bjc.dicelang.expr.EzprType.SUM; +// import static bjc.dicelang.expr.EzprType.MUL; + +// import static bjc.dicelang.expr.EzprNode; +// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType; +// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.EZPR; +// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.TOKEN; + +// public class Ezpr { +// public static enum EzprType { +// SUM, MUL +// } + +// public static class EzprNode { +// public static enum EzprNodeType { +// EZPR, TOKEN +// } + +// public final EzprNodeType typ; + +// public final Ezpr ezp; +// public final Token tok; + +// public EzprNode(Ezpr exp) { +// typ = EZPR; + +// ezp = exp; +// tok = null; +// } + +// public EzprNode(Token tk) { +// typ = TOKEN; + +// tok = tk; +// ezp = null; +// } + +// public String toString() { +// if(typ == TOKEN) { +// return tok.toString(); +// } +// return ezp.toString(); +// } +// } + +// private EzprType typ; + +// private Multiset<EzprNode> positive; +// private Multiset<EzprNode> negative; + +// public Ezpr(EzprType type, Multiset<EzprNode> pos, Multiset<EzprNode> neg) { +// typ = type; + +// positive = pos; +// negative = neg; +// } + +// public Ezpr flatten() { +// HashMultiset<EzprNode> newPositive = HashMultiset.create(); +// HashMultiset<EzprNode> newNegative = HashMultiset.create(); + +// for(EzprNode nd : positive) { +// /* Flatten enclosed ezprs of the same type. */ +// if(nd.typ == EZPR && (nd.ezp.typ == typ)) { +// /* Recursively flatten kids. */ +// Ezpr kid = nd.ezp.flatten(); + +// if(typ == SUM) { +// /* Add sum parts to corresponding bags. */ +// for(EzprNode knd : kid.positive) { +// newPositive.add(knd); +// } +// for(EzprNode knd : kid.negative) { +// newNegative.add(knd); +// } +// } else { +// /* @TODO ensure that this is correct. */ +// for(EzprNode knd : kid.positive) { +// newPositive.add(knd); +// } +// for(EzprNode knd : kid.negative) { +// newNegative.add(knd); +// } +// } +// } else { +// newPositive.add(nd); +// } +// } + +// for(EzprNode nd : negative) { +// /* Flatten enclosed ezprs of the same type. */ +// if(nd.typ == EZPR && (nd.ezp.typ == typ)) { +// /* Recursively flatten kids. */ +// Ezpr kid = nd.ezp.flatten(); + +// /* @TODO ensure that this is correct. */ +// if(typ == SUM) { +// for(EzprNode knd : kid.positive) { +// newNegative.add(knd); +// } +// for(EzprNode knd : kid.negative) { +// newPositive.add(knd); +// } +// } else { +// for(EzprNode knd : kid.positive) { +// newNegative.add(knd); +// } +// for(EzprNode knd : kid.negative) { +// newPositive.add(knd); +// } +// } +// } else { +// newNegative.add(nd); +// } +// } +// } + +// public String toString() { +// StringBuilder sb = new StringBuilder(typ.toString()); + +// sb.append(" [ "); +// for(EzprNode nd : positive) { +// sb.append(nd.toString()); +// sb.append(" "); +// } + +// sb.append("# "); +// for(EzprNode nd : negative) { +// sb.append(nd.toString()); +// sb.append(" "); +// } + +// sb.append("]"); + +// return sb.toString(); +// } +// } diff --git a/base/src/bjc/dicelang/expr/Lexer.java b/base/src/bjc/dicelang/expr/Lexer.java new file mode 100644 index 0000000..dfa0f76 --- /dev/null +++ b/base/src/bjc/dicelang/expr/Lexer.java @@ -0,0 +1,62 @@ +package bjc.dicelang.expr; + +import java.util.LinkedList; +import java.util.List; + +import bjc.utils.funcdata.IList; +import bjc.utils.parserutils.splitter.ConfigurableTokenSplitter; + +/* + * @TODO 10/08/18 :IntExpressions + * Add support for integer constants, and maybe floating-point ones as well + * if you feel like. Heck, you could even go for ratio constants and things + * as well. + */ +/** + * Implements the lexer for simple expression operations. + * + * @author Ben Culkin + */ +public class Lexer { + /* Splitter we use. */ + private final ConfigurableTokenSplitter split; + + /** Create a new expression lexer. */ + public Lexer() { + split = new ConfigurableTokenSplitter(true); + + split.addSimpleDelimiters("(", ")"); + split.addSimpleDelimiters("+", "-", "*", "/"); + } + + /** + * Convert a string from a input command to a series of infix tokens. + * + * @param inp + * The input command. + * + * @param tks + * The token state. + * + * @return A series of infix tokens representing the command. + */ + public Token[] lexString(final String inp, final Tokens tks) { + /* Split tokens on whitespace. */ + final String[] spacedTokens = inp.split("[ \t]"); + /* Tokens to return. */ + final List<Token> tokens = new LinkedList<>(); + + /* Process each token. */ + for (final String spacedToken : spacedTokens) { + /* Split on operators. */ + final IList<String> splitTokens = split.split(spacedToken); + /* Convert strings to tokens. */ + final IList<Token> rawTokens = splitTokens.map(tok -> tks.lexToken(tok, spacedToken)); + + /* Add tokens to results. */ + rawTokens.forEach(tokens::add); + } + + return tokens.toArray(new Token[0]); + } +} diff --git a/base/src/bjc/dicelang/expr/Parser.java b/base/src/bjc/dicelang/expr/Parser.java new file mode 100644 index 0000000..5fa2d3d --- /dev/null +++ b/base/src/bjc/dicelang/expr/Parser.java @@ -0,0 +1,147 @@ +package bjc.dicelang.expr; + +import java.util.Arrays; +import java.util.Scanner; + +import bjc.utils.data.ITree; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.parserutils.TreeConstructor; + +/** + * Parser for simple math expressions. + * + * @author Ben Culkin + */ +public class Parser { + /* + * @TODO 10/08/17 Ben Culkin :MainSeperation + * This main method should be moved to its own class. + */ + /** + * Main method. + * + * @param args + * Unused CLI args. + */ + public static void main(final String[] args) { + /* Create our objects. */ + final Tokens toks = new Tokens(); + final Lexer lex = new Lexer(); + + /* Prepare our input source. */ + final Scanner scan = new Scanner(System.in); + + /* Read initial command. */ + System.out.print("Enter a math expression (blank line to quit): "); + String ln = scan.nextLine().trim(); + + /* Enter REPL loop. */ + while (!ln.equals("")) { + /* Print raw command. */ + System.out.println("Raw command: " + ln); + System.out.println(); + + /* Lex command to infix tokens. */ + final Token[] infixTokens = lex.lexString(ln, toks); + System.out.println("Lexed tokens: "); + for (final Token tok : infixTokens) { + System.out.println("\t" + tok); + } + + /* Print out infix expression. */ + System.out.print("Lexed expression: "); + for (final Token tok : infixTokens) { + System.out.print(tok.toExpr() + " "); + } + + /* Space stages. */ + System.out.println(); + System.out.println(); + + /* Shunt infix tokens to postfix tokens. */ + final Token[] postfixTokens = Shunter.shuntTokens(infixTokens); + System.out.println("Lexed tokens: "); + for (final Token tok : postfixTokens) { + System.out.println("\t" + tok); + } + + /* Print out postfix tokens. */ + System.out.print("Shunted expression: "); + for (final Token tok : postfixTokens) { + System.out.print(tok.toExpr() + " "); + } + + /* Space stages. */ + System.out.println(); + System.out.println(); + + /* Construct a list from the array of tokens. */ + final FunctionalList<Token> tokList = new FunctionalList<>( + Arrays.asList(postfixTokens)); + + /* Construct a tree from the list of postfixed tokens. */ + final ITree<Token> ast = TreeConstructor.constructTree(tokList, + tok -> tok.typ.isOperator); + + /* Print the tree, then the canonical expression for it. */ + System.out.println("Parsed tree"); + System.out.println(ast.toString()); + System.out.println("\nCanonical expr: " + toCanonicalExpr(ast)); + + /* Space stages. */ + System.out.println(); + System.out.println(); + + /* Prompt for a new expression. */ + System.out.print("Enter a math expression (blank line to quit): "); + /* Read it. */ + ln = scan.nextLine().trim(); + } + + /* Cleanup after ourselves. */ + scan.close(); + } + + /* + * Convert an expression to one that uses the smallest necessary amount + * of parens. + */ + private static String toCanonicalExpr(final ITree<Token> ast) { + final Token data = ast.getHead(); + + if (ast.getChildrenCount() == 0) { + /* Handle leaf nodes. */ + return data.toExpr(); + } + + /* The left/right children. */ + final ITree<Token> left = ast.getChild(0); + final ITree<Token> right = ast.getChild(1); + + /* Recursively canonicalize them. */ + String leftExpr = toCanonicalExpr(left); + String rightExpr = toCanonicalExpr(right); + + /* Add parens if the left was higher priority. */ + if (left.getChildrenCount() == 0) { + int leftPriority = left.getHead().typ.operatorPriority; + int dataPriority = data.typ.operatorPriority; + + if (leftPriority >= dataPriority) { + leftExpr = String.format("(%s)", leftExpr); + } + } + + /* Add parens if the right was higher priority. */ + if (right.getChildrenCount() == 0) { + int rightPriority = right.getHead().typ.operatorPriority; + int dataPriority = data.typ.operatorPriority; + + if (rightPriority >= dataPriority) { + rightExpr = String.format("(%s)", rightExpr); + } + } + + return String.format("%s %s %s", leftExpr, data.toExpr(), rightExpr); + } +} diff --git a/base/src/bjc/dicelang/expr/Shunter.java b/base/src/bjc/dicelang/expr/Shunter.java new file mode 100644 index 0000000..213e473 --- /dev/null +++ b/base/src/bjc/dicelang/expr/Shunter.java @@ -0,0 +1,110 @@ +package bjc.dicelang.expr; + +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; + +/** + * Converts a infix series of tokens into a prefix series of tokens. + * + * @author Ben Culkin + */ +public class Shunter { + /* + * @NOTE + * Why does this method return an array, and not the list of + * tokens? + */ + /** + * Convert a infix series of tokens to a postfix series of tokens. + * + * @param infixTokens + * The tokens in infix order. + * + * @return The tokens in postfix order. + */ + public static Token[] shuntTokens(final Token[] infixTokens) { + /* The returned tokens. */ + final List<Token> postfixTokens = new ArrayList<>(infixTokens.length); + + /* The current stack of operators. */ + final Deque<Token> opStack = new LinkedList<>(); + + /* Shunt each token. */ + for (final Token tok : infixTokens) { + /* Handle operators. */ + if (tok.typ.isOperator) { + Token curOp = opStack.peek(); + + /* + * Check if an operator is higher priority, + * respecting their left associativity. + * + * @NOTE + * Should this be factored out into a + * method? + */ + int leftPriority = tok.typ.operatorPriority; + int rightPriority; + + if (curOp == null) { + rightPriority = 0; + } else { + rightPriority = curOp.typ.operatorPriority; + } + + boolean isHigherPrec = leftPriority >= rightPriority; + + /* + * Pop all operators that are lower precedence + * than us. + */ + while (!opStack.isEmpty() && isHigherPrec) { + postfixTokens.add(opStack.pop()); + curOp = opStack.peek(); + + leftPriority = tok.typ.operatorPriority; + if (curOp == null) { + rightPriority = 0; + } else { + rightPriority = curOp.typ.operatorPriority; + } + + isHigherPrec = leftPriority >= rightPriority; + } + + opStack.push(tok); + } else if (tok.typ == TokenType.OPAREN) { + opStack.push(tok); + } else if (tok.typ == TokenType.CPAREN) { + Token curOp = opStack.peek(); + + /* + * Pop things until we find the matching + * parenthesis. + */ + while (curOp.typ != TokenType.OPAREN) { + final Token tk = opStack.pop(); + postfixTokens.add(tk); + curOp = opStack.peek(); + } + + if (!opStack.isEmpty()) { + opStack.pop(); + } + } else { + postfixTokens.add(tok); + } + } + + /* + * Flush remaining operators. + */ + while (!opStack.isEmpty()) { + postfixTokens.add(opStack.pop()); + } + + return postfixTokens.toArray(new Token[0]); + } +} diff --git a/base/src/bjc/dicelang/expr/Token.java b/base/src/bjc/dicelang/expr/Token.java new file mode 100644 index 0000000..bf92f97 --- /dev/null +++ b/base/src/bjc/dicelang/expr/Token.java @@ -0,0 +1,88 @@ +package bjc.dicelang.expr; + +/* + * @TODO 10/08/17 :TokenReorg + * I am not a fan of this 'having a bunch of subclasses' in one thing I + * seem to have been doing around this project. This should be multiple + * subclasses, one for each value for TokenType. + */ +/** + * Represents a lexical token. + * + * @author Ben Culkin + */ +public class Token { + /* The state for this token. */ + private final Tokens tks; + + /** + * The type of the token. + * + * Determines which fields have a value. + */ + public final TokenType typ; + + /** The integer value attached to this token. */ + public int intValue; + + /** The original string this token was part of. */ + public String rawValue; + + /** + * Create a new token. + * + * @param type + * The type of this token. + * + * @param raw + * The string this token came from. + * + * @param toks + * The state for this token + */ + public Token(final TokenType type, final String raw, final Tokens toks) { + this.typ = type; + + rawValue = raw; + tks = toks; + } + + @Override + public String toString() { + String typeStr = typ.toString(); + typeStr = String.format("%s (%s)", typeStr, typ.name()); + + if (typ == TokenType.VREF) { + typeStr += " (ind. " + intValue; + typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")"; + } + + return String.format("%s (originally from: %s)", typeStr, rawValue); + } + + /** + * Convert this token into the string representation of it. + * + * @return The string representation of it. + */ + public String toExpr() { + switch (typ) { + case ADD: + return "+"; + case SUBTRACT: + return "-"; + case MULTIPLY: + return "*"; + case DIVIDE: + return "/"; + case VREF: + return tks.symbolTable.get(intValue); + case OPAREN: + return "("; + case CPAREN: + return ")"; + default: + return "???"; + } + } +} diff --git a/base/src/bjc/dicelang/expr/TokenType.java b/base/src/bjc/dicelang/expr/TokenType.java new file mode 100644 index 0000000..ad01917 --- /dev/null +++ b/base/src/bjc/dicelang/expr/TokenType.java @@ -0,0 +1,56 @@ +package bjc.dicelang.expr; + +/** + * Represents the type of this token. + * + * @author Ben Culkin + */ +public enum TokenType { + /* + * @NOTE + * Do we want to switch to auto-numbering the tokens? They were + * manually numbered because this was an assignment for PoPL and + * that was what Dr. Naz wanted. + */ + + /** Represents + */ + ADD( 14, true, 0), + /** Represents - */ + SUBTRACT(15, true, 0), + /** Represents * */ + MULTIPLY(16, true, 1), + /** Represents / */ + DIVIDE( 17, true, 1), + + /** Represents variable names. */ + VREF(11), + + /** Represents ( */ + OPAREN(0, false, 100), + /** Represents ) */ + CPAREN(0, false, 100); + + /** The ID number for this token type. */ + public final int nVal; + + /** Whether or not this type of token is an operator. */ + public final boolean isOperator; + /** The priority of this operator, if it is one. */ + public final int operatorPriority; + + /* Create a new token. */ + private TokenType(final int num, final boolean isOp, final int priority) { + nVal = num; + isOperator = isOp; + operatorPriority = priority; + } + + private TokenType(final int num) { + this(num, false, -1); + } + + @Override + public String toString() { + return Integer.toString(nVal); + } +} diff --git a/base/src/bjc/dicelang/expr/Tokens.java b/base/src/bjc/dicelang/expr/Tokens.java new file mode 100644 index 0000000..287d2b4 --- /dev/null +++ b/base/src/bjc/dicelang/expr/Tokens.java @@ -0,0 +1,96 @@ +package bjc.dicelang.expr; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Contains per-instance state for token parsing. + * + * @author EVE + * + */ +public class Tokens { + /* Contains mappings from variable references to string names. */ + private final Map<Integer, String> symTab; + /* Reverse index into the symbol table. */ + private final Map<String, Integer> revSymTab; + + /** Read-only view on the symbol table. */ + public final Map<Integer, String> symbolTable; + + /* Next index into the symbol table. */ + private int nextSym; + + /* Mapping from literal tokens to token types. */ + private final Map<String, TokenType> litTokens; + + /** Create a new set of tokens. */ + public Tokens() { + /* Create tables. */ + symTab = new HashMap<>(); + revSymTab = new HashMap<>(); + + /* Init public view. */ + symbolTable = Collections.unmodifiableMap(symTab); + + /* Set sym ID. */ + nextSym = 0; + + /* + * Setup literal mappings. + * + * @NOTE + * Should this be a static member? + */ + litTokens = new HashMap<>(); + litTokens.put("+", TokenType.ADD); + litTokens.put("-", TokenType.SUBTRACT); + litTokens.put("*", TokenType.MULTIPLY); + litTokens.put("/", TokenType.DIVIDE); + litTokens.put("(", TokenType.OPAREN); + litTokens.put(")", TokenType.CPAREN); + } + + /** + * Convert the string representation of a token into a token. + * + * @param tok + * The string representation of the token. + * @param raw + * The original string the token came from. + * + * @return The token the string represents. + */ + public Token lexToken(final String tok, final String raw) { + if (litTokens.containsKey(tok)) { + /* Return matching literal token. */ + return new Token(litTokens.get(tok), raw, this); + } + + /* Its a variable reference. */ + return parseVRef(tok, raw); + } + + /* Parse a variable reference. */ + private Token parseVRef(final String tok, final String raw) { + final Token tk = new Token(TokenType.VREF, raw, this); + + if (revSymTab.containsKey(tok)) { + /* Reuse the entry if it exists. */ + tk.intValue = revSymTab.get(tok); + } else { + /* Create a new entry. */ + tk.intValue = nextSym; + + /* Record it. */ + symTab.put(nextSym, tok); + revSymTab.put(tok, nextSym); + + /* Next ID. */ + nextSym += 1; + } + + return tk; + } +} |
