summaryrefslogtreecommitdiff
path: root/base/src/bjc/dicelang/expr
diff options
context:
space:
mode:
authorBenjamin J. Culkin <bjculkin@mix.wvu.edu>2017-10-25 12:10:14 -0300
committerBenjamin J. Culkin <bjculkin@mix.wvu.edu>2017-10-25 12:10:14 -0300
commit7bda9de511a5642efb297eae98c6ea7c42b27754 (patch)
treedff1aa772b9ac088c5bd07b8d10d944cbff89f96 /base/src/bjc/dicelang/expr
parentf028ea6dc555fc5192a96b00b8e96e90dbf6de55 (diff)
Start switch to maven modules
Diffstat (limited to 'base/src/bjc/dicelang/expr')
-rw-r--r--base/src/bjc/dicelang/expr/Ezpr.java149
-rw-r--r--base/src/bjc/dicelang/expr/Lexer.java62
-rw-r--r--base/src/bjc/dicelang/expr/Parser.java147
-rw-r--r--base/src/bjc/dicelang/expr/Shunter.java110
-rw-r--r--base/src/bjc/dicelang/expr/Token.java88
-rw-r--r--base/src/bjc/dicelang/expr/TokenType.java56
-rw-r--r--base/src/bjc/dicelang/expr/Tokens.java96
7 files changed, 708 insertions, 0 deletions
diff --git a/base/src/bjc/dicelang/expr/Ezpr.java b/base/src/bjc/dicelang/expr/Ezpr.java
new file mode 100644
index 0000000..862c097
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Ezpr.java
@@ -0,0 +1,149 @@
+/*
+ * @TODO 10/08/17 Ben Culkin :EzprFixing
+ * Implement these, and make sure they work correctly.
+ */
+// package bjc.dicelang.expr;
+
+// import bjc.utils.data.ITree;
+
+// import com.google.guava.collect.HashMultiset;
+// import com.google.guava.collect.Multiset;
+
+// import static bjc.dicelang.expr.EzprType;
+// import static bjc.dicelang.expr.EzprType.SUM;
+// import static bjc.dicelang.expr.EzprType.MUL;
+
+// import static bjc.dicelang.expr.EzprNode;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.EZPR;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.TOKEN;
+
+// public class Ezpr {
+// public static enum EzprType {
+// SUM, MUL
+// }
+
+// public static class EzprNode {
+// public static enum EzprNodeType {
+// EZPR, TOKEN
+// }
+
+// public final EzprNodeType typ;
+
+// public final Ezpr ezp;
+// public final Token tok;
+
+// public EzprNode(Ezpr exp) {
+// typ = EZPR;
+
+// ezp = exp;
+// tok = null;
+// }
+
+// public EzprNode(Token tk) {
+// typ = TOKEN;
+
+// tok = tk;
+// ezp = null;
+// }
+
+// public String toString() {
+// if(typ == TOKEN) {
+// return tok.toString();
+// }
+// return ezp.toString();
+// }
+// }
+
+// private EzprType typ;
+
+// private Multiset<EzprNode> positive;
+// private Multiset<EzprNode> negative;
+
+// public Ezpr(EzprType type, Multiset<EzprNode> pos, Multiset<EzprNode> neg) {
+// typ = type;
+
+// positive = pos;
+// negative = neg;
+// }
+
+// public Ezpr flatten() {
+// HashMultiset<EzprNode> newPositive = HashMultiset.create();
+// HashMultiset<EzprNode> newNegative = HashMultiset.create();
+
+// for(EzprNode nd : positive) {
+// /* Flatten enclosed ezprs of the same type. */
+// if(nd.typ == EZPR && (nd.ezp.typ == typ)) {
+// /* Recursively flatten kids. */
+// Ezpr kid = nd.ezp.flatten();
+
+// if(typ == SUM) {
+// /* Add sum parts to corresponding bags. */
+// for(EzprNode knd : kid.positive) {
+// newPositive.add(knd);
+// }
+// for(EzprNode knd : kid.negative) {
+// newNegative.add(knd);
+// }
+// } else {
+// /* @TODO ensure that this is correct. */
+// for(EzprNode knd : kid.positive) {
+// newPositive.add(knd);
+// }
+// for(EzprNode knd : kid.negative) {
+// newNegative.add(knd);
+// }
+// }
+// } else {
+// newPositive.add(nd);
+// }
+// }
+
+// for(EzprNode nd : negative) {
+// /* Flatten enclosed ezprs of the same type. */
+// if(nd.typ == EZPR && (nd.ezp.typ == typ)) {
+// /* Recursively flatten kids. */
+// Ezpr kid = nd.ezp.flatten();
+
+// /* @TODO ensure that this is correct. */
+// if(typ == SUM) {
+// for(EzprNode knd : kid.positive) {
+// newNegative.add(knd);
+// }
+// for(EzprNode knd : kid.negative) {
+// newPositive.add(knd);
+// }
+// } else {
+// for(EzprNode knd : kid.positive) {
+// newNegative.add(knd);
+// }
+// for(EzprNode knd : kid.negative) {
+// newPositive.add(knd);
+// }
+// }
+// } else {
+// newNegative.add(nd);
+// }
+// }
+// }
+
+// public String toString() {
+// StringBuilder sb = new StringBuilder(typ.toString());
+
+// sb.append(" [ ");
+// for(EzprNode nd : positive) {
+// sb.append(nd.toString());
+// sb.append(" ");
+// }
+
+// sb.append("# ");
+// for(EzprNode nd : negative) {
+// sb.append(nd.toString());
+// sb.append(" ");
+// }
+
+// sb.append("]");
+
+// return sb.toString();
+// }
+// }
diff --git a/base/src/bjc/dicelang/expr/Lexer.java b/base/src/bjc/dicelang/expr/Lexer.java
new file mode 100644
index 0000000..dfa0f76
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Lexer.java
@@ -0,0 +1,62 @@
+package bjc.dicelang.expr;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import bjc.utils.funcdata.IList;
+import bjc.utils.parserutils.splitter.ConfigurableTokenSplitter;
+
+/*
+ * @TODO 10/08/18 :IntExpressions
+ * Add support for integer constants, and maybe floating-point ones as well
+ * if you feel like. Heck, you could even go for ratio constants and things
+ * as well.
+ */
+/**
+ * Implements the lexer for simple expression operations.
+ *
+ * @author Ben Culkin
+ */
+public class Lexer {
+ /* Splitter we use. */
+ private final ConfigurableTokenSplitter split;
+
+ /** Create a new expression lexer. */
+ public Lexer() {
+ split = new ConfigurableTokenSplitter(true);
+
+ split.addSimpleDelimiters("(", ")");
+ split.addSimpleDelimiters("+", "-", "*", "/");
+ }
+
+ /**
+ * Convert a string from a input command to a series of infix tokens.
+ *
+ * @param inp
+ * The input command.
+ *
+ * @param tks
+ * The token state.
+ *
+ * @return A series of infix tokens representing the command.
+ */
+ public Token[] lexString(final String inp, final Tokens tks) {
+ /* Split tokens on whitespace. */
+ final String[] spacedTokens = inp.split("[ \t]");
+ /* Tokens to return. */
+ final List<Token> tokens = new LinkedList<>();
+
+ /* Process each token. */
+ for (final String spacedToken : spacedTokens) {
+ /* Split on operators. */
+ final IList<String> splitTokens = split.split(spacedToken);
+ /* Convert strings to tokens. */
+ final IList<Token> rawTokens = splitTokens.map(tok -> tks.lexToken(tok, spacedToken));
+
+ /* Add tokens to results. */
+ rawTokens.forEach(tokens::add);
+ }
+
+ return tokens.toArray(new Token[0]);
+ }
+}
diff --git a/base/src/bjc/dicelang/expr/Parser.java b/base/src/bjc/dicelang/expr/Parser.java
new file mode 100644
index 0000000..5fa2d3d
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Parser.java
@@ -0,0 +1,147 @@
+package bjc.dicelang.expr;
+
+import java.util.Arrays;
+import java.util.Scanner;
+
+import bjc.utils.data.ITree;
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.parserutils.TreeConstructor;
+
+/**
+ * Parser for simple math expressions.
+ *
+ * @author Ben Culkin
+ */
+public class Parser {
+ /*
+ * @TODO 10/08/17 Ben Culkin :MainSeperation
+ * This main method should be moved to its own class.
+ */
+ /**
+ * Main method.
+ *
+ * @param args
+ * Unused CLI args.
+ */
+ public static void main(final String[] args) {
+ /* Create our objects. */
+ final Tokens toks = new Tokens();
+ final Lexer lex = new Lexer();
+
+ /* Prepare our input source. */
+ final Scanner scan = new Scanner(System.in);
+
+ /* Read initial command. */
+ System.out.print("Enter a math expression (blank line to quit): ");
+ String ln = scan.nextLine().trim();
+
+ /* Enter REPL loop. */
+ while (!ln.equals("")) {
+ /* Print raw command. */
+ System.out.println("Raw command: " + ln);
+ System.out.println();
+
+ /* Lex command to infix tokens. */
+ final Token[] infixTokens = lex.lexString(ln, toks);
+ System.out.println("Lexed tokens: ");
+ for (final Token tok : infixTokens) {
+ System.out.println("\t" + tok);
+ }
+
+ /* Print out infix expression. */
+ System.out.print("Lexed expression: ");
+ for (final Token tok : infixTokens) {
+ System.out.print(tok.toExpr() + " ");
+ }
+
+ /* Space stages. */
+ System.out.println();
+ System.out.println();
+
+ /* Shunt infix tokens to postfix tokens. */
+ final Token[] postfixTokens = Shunter.shuntTokens(infixTokens);
+ System.out.println("Lexed tokens: ");
+ for (final Token tok : postfixTokens) {
+ System.out.println("\t" + tok);
+ }
+
+ /* Print out postfix tokens. */
+ System.out.print("Shunted expression: ");
+ for (final Token tok : postfixTokens) {
+ System.out.print(tok.toExpr() + " ");
+ }
+
+ /* Space stages. */
+ System.out.println();
+ System.out.println();
+
+ /* Construct a list from the array of tokens. */
+ final FunctionalList<Token> tokList = new FunctionalList<>(
+ Arrays.asList(postfixTokens));
+
+ /* Construct a tree from the list of postfixed tokens. */
+ final ITree<Token> ast = TreeConstructor.constructTree(tokList,
+ tok -> tok.typ.isOperator);
+
+ /* Print the tree, then the canonical expression for it. */
+ System.out.println("Parsed tree");
+ System.out.println(ast.toString());
+ System.out.println("\nCanonical expr: " + toCanonicalExpr(ast));
+
+ /* Space stages. */
+ System.out.println();
+ System.out.println();
+
+ /* Prompt for a new expression. */
+ System.out.print("Enter a math expression (blank line to quit): ");
+ /* Read it. */
+ ln = scan.nextLine().trim();
+ }
+
+ /* Cleanup after ourselves. */
+ scan.close();
+ }
+
+ /*
+ * Convert an expression to one that uses the smallest necessary amount
+ * of parens.
+ */
+ private static String toCanonicalExpr(final ITree<Token> ast) {
+ final Token data = ast.getHead();
+
+ if (ast.getChildrenCount() == 0) {
+ /* Handle leaf nodes. */
+ return data.toExpr();
+ }
+
+ /* The left/right children. */
+ final ITree<Token> left = ast.getChild(0);
+ final ITree<Token> right = ast.getChild(1);
+
+ /* Recursively canonicalize them. */
+ String leftExpr = toCanonicalExpr(left);
+ String rightExpr = toCanonicalExpr(right);
+
+ /* Add parens if the left was higher priority. */
+ if (left.getChildrenCount() == 0) {
+ int leftPriority = left.getHead().typ.operatorPriority;
+ int dataPriority = data.typ.operatorPriority;
+
+ if (leftPriority >= dataPriority) {
+ leftExpr = String.format("(%s)", leftExpr);
+ }
+ }
+
+ /* Add parens if the right was higher priority. */
+ if (right.getChildrenCount() == 0) {
+ int rightPriority = right.getHead().typ.operatorPriority;
+ int dataPriority = data.typ.operatorPriority;
+
+ if (rightPriority >= dataPriority) {
+ rightExpr = String.format("(%s)", rightExpr);
+ }
+ }
+
+ return String.format("%s %s %s", leftExpr, data.toExpr(), rightExpr);
+ }
+}
diff --git a/base/src/bjc/dicelang/expr/Shunter.java b/base/src/bjc/dicelang/expr/Shunter.java
new file mode 100644
index 0000000..213e473
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Shunter.java
@@ -0,0 +1,110 @@
+package bjc.dicelang.expr;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Converts a infix series of tokens into a prefix series of tokens.
+ *
+ * @author Ben Culkin
+ */
+public class Shunter {
+ /*
+ * @NOTE
+ * Why does this method return an array, and not the list of
+ * tokens?
+ */
+ /**
+ * Convert a infix series of tokens to a postfix series of tokens.
+ *
+ * @param infixTokens
+ * The tokens in infix order.
+ *
+ * @return The tokens in postfix order.
+ */
+ public static Token[] shuntTokens(final Token[] infixTokens) {
+ /* The returned tokens. */
+ final List<Token> postfixTokens = new ArrayList<>(infixTokens.length);
+
+ /* The current stack of operators. */
+ final Deque<Token> opStack = new LinkedList<>();
+
+ /* Shunt each token. */
+ for (final Token tok : infixTokens) {
+ /* Handle operators. */
+ if (tok.typ.isOperator) {
+ Token curOp = opStack.peek();
+
+ /*
+ * Check if an operator is higher priority,
+ * respecting their left associativity.
+ *
+ * @NOTE
+ * Should this be factored out into a
+ * method?
+ */
+ int leftPriority = tok.typ.operatorPriority;
+ int rightPriority;
+
+ if (curOp == null) {
+ rightPriority = 0;
+ } else {
+ rightPriority = curOp.typ.operatorPriority;
+ }
+
+ boolean isHigherPrec = leftPriority >= rightPriority;
+
+ /*
+ * Pop all operators that are lower precedence
+ * than us.
+ */
+ while (!opStack.isEmpty() && isHigherPrec) {
+ postfixTokens.add(opStack.pop());
+ curOp = opStack.peek();
+
+ leftPriority = tok.typ.operatorPriority;
+ if (curOp == null) {
+ rightPriority = 0;
+ } else {
+ rightPriority = curOp.typ.operatorPriority;
+ }
+
+ isHigherPrec = leftPriority >= rightPriority;
+ }
+
+ opStack.push(tok);
+ } else if (tok.typ == TokenType.OPAREN) {
+ opStack.push(tok);
+ } else if (tok.typ == TokenType.CPAREN) {
+ Token curOp = opStack.peek();
+
+ /*
+ * Pop things until we find the matching
+ * parenthesis.
+ */
+ while (curOp.typ != TokenType.OPAREN) {
+ final Token tk = opStack.pop();
+ postfixTokens.add(tk);
+ curOp = opStack.peek();
+ }
+
+ if (!opStack.isEmpty()) {
+ opStack.pop();
+ }
+ } else {
+ postfixTokens.add(tok);
+ }
+ }
+
+ /*
+ * Flush remaining operators.
+ */
+ while (!opStack.isEmpty()) {
+ postfixTokens.add(opStack.pop());
+ }
+
+ return postfixTokens.toArray(new Token[0]);
+ }
+}
diff --git a/base/src/bjc/dicelang/expr/Token.java b/base/src/bjc/dicelang/expr/Token.java
new file mode 100644
index 0000000..bf92f97
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Token.java
@@ -0,0 +1,88 @@
+package bjc.dicelang.expr;
+
+/*
+ * @TODO 10/08/17 :TokenReorg
+ * I am not a fan of this 'having a bunch of subclasses' in one thing I
+ * seem to have been doing around this project. This should be multiple
+ * subclasses, one for each value for TokenType.
+ */
+/**
+ * Represents a lexical token.
+ *
+ * @author Ben Culkin
+ */
+public class Token {
+ /* The state for this token. */
+ private final Tokens tks;
+
+ /**
+ * The type of the token.
+ *
+ * Determines which fields have a value.
+ */
+ public final TokenType typ;
+
+ /** The integer value attached to this token. */
+ public int intValue;
+
+ /** The original string this token was part of. */
+ public String rawValue;
+
+ /**
+ * Create a new token.
+ *
+ * @param type
+ * The type of this token.
+ *
+ * @param raw
+ * The string this token came from.
+ *
+ * @param toks
+ * The state for this token
+ */
+ public Token(final TokenType type, final String raw, final Tokens toks) {
+ this.typ = type;
+
+ rawValue = raw;
+ tks = toks;
+ }
+
+ @Override
+ public String toString() {
+ String typeStr = typ.toString();
+ typeStr = String.format("%s (%s)", typeStr, typ.name());
+
+ if (typ == TokenType.VREF) {
+ typeStr += " (ind. " + intValue;
+ typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")";
+ }
+
+ return String.format("%s (originally from: %s)", typeStr, rawValue);
+ }
+
+ /**
+ * Convert this token into the string representation of it.
+ *
+ * @return The string representation of it.
+ */
+ public String toExpr() {
+ switch (typ) {
+ case ADD:
+ return "+";
+ case SUBTRACT:
+ return "-";
+ case MULTIPLY:
+ return "*";
+ case DIVIDE:
+ return "/";
+ case VREF:
+ return tks.symbolTable.get(intValue);
+ case OPAREN:
+ return "(";
+ case CPAREN:
+ return ")";
+ default:
+ return "???";
+ }
+ }
+}
diff --git a/base/src/bjc/dicelang/expr/TokenType.java b/base/src/bjc/dicelang/expr/TokenType.java
new file mode 100644
index 0000000..ad01917
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/TokenType.java
@@ -0,0 +1,56 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents the type of this token.
+ *
+ * @author Ben Culkin
+ */
+public enum TokenType {
+ /*
+ * @NOTE
+ * Do we want to switch to auto-numbering the tokens? They were
+ * manually numbered because this was an assignment for PoPL and
+ * that was what Dr. Naz wanted.
+ */
+
+ /** Represents + */
+ ADD( 14, true, 0),
+ /** Represents - */
+ SUBTRACT(15, true, 0),
+ /** Represents * */
+ MULTIPLY(16, true, 1),
+ /** Represents / */
+ DIVIDE( 17, true, 1),
+
+ /** Represents variable names. */
+ VREF(11),
+
+ /** Represents ( */
+ OPAREN(0, false, 100),
+ /** Represents ) */
+ CPAREN(0, false, 100);
+
+ /** The ID number for this token type. */
+ public final int nVal;
+
+ /** Whether or not this type of token is an operator. */
+ public final boolean isOperator;
+ /** The priority of this operator, if it is one. */
+ public final int operatorPriority;
+
+ /* Create a new token. */
+ private TokenType(final int num, final boolean isOp, final int priority) {
+ nVal = num;
+ isOperator = isOp;
+ operatorPriority = priority;
+ }
+
+ private TokenType(final int num) {
+ this(num, false, -1);
+ }
+
+ @Override
+ public String toString() {
+ return Integer.toString(nVal);
+ }
+}
diff --git a/base/src/bjc/dicelang/expr/Tokens.java b/base/src/bjc/dicelang/expr/Tokens.java
new file mode 100644
index 0000000..287d2b4
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Tokens.java
@@ -0,0 +1,96 @@
+package bjc.dicelang.expr;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains per-instance state for token parsing.
+ *
+ * @author EVE
+ *
+ */
+public class Tokens {
+ /* Contains mappings from variable references to string names. */
+ private final Map<Integer, String> symTab;
+ /* Reverse index into the symbol table. */
+ private final Map<String, Integer> revSymTab;
+
+ /** Read-only view on the symbol table. */
+ public final Map<Integer, String> symbolTable;
+
+ /* Next index into the symbol table. */
+ private int nextSym;
+
+ /* Mapping from literal tokens to token types. */
+ private final Map<String, TokenType> litTokens;
+
+ /** Create a new set of tokens. */
+ public Tokens() {
+ /* Create tables. */
+ symTab = new HashMap<>();
+ revSymTab = new HashMap<>();
+
+ /* Init public view. */
+ symbolTable = Collections.unmodifiableMap(symTab);
+
+ /* Set sym ID. */
+ nextSym = 0;
+
+ /*
+ * Setup literal mappings.
+ *
+ * @NOTE
+ * Should this be a static member?
+ */
+ litTokens = new HashMap<>();
+ litTokens.put("+", TokenType.ADD);
+ litTokens.put("-", TokenType.SUBTRACT);
+ litTokens.put("*", TokenType.MULTIPLY);
+ litTokens.put("/", TokenType.DIVIDE);
+ litTokens.put("(", TokenType.OPAREN);
+ litTokens.put(")", TokenType.CPAREN);
+ }
+
+ /**
+ * Convert the string representation of a token into a token.
+ *
+ * @param tok
+ * The string representation of the token.
+ * @param raw
+ * The original string the token came from.
+ *
+ * @return The token the string represents.
+ */
+ public Token lexToken(final String tok, final String raw) {
+ if (litTokens.containsKey(tok)) {
+ /* Return matching literal token. */
+ return new Token(litTokens.get(tok), raw, this);
+ }
+
+ /* Its a variable reference. */
+ return parseVRef(tok, raw);
+ }
+
+ /* Parse a variable reference. */
+ private Token parseVRef(final String tok, final String raw) {
+ final Token tk = new Token(TokenType.VREF, raw, this);
+
+ if (revSymTab.containsKey(tok)) {
+ /* Reuse the entry if it exists. */
+ tk.intValue = revSymTab.get(tok);
+ } else {
+ /* Create a new entry. */
+ tk.intValue = nextSym;
+
+ /* Record it. */
+ symTab.put(nextSym, tok);
+ revSymTab.put(tok, nextSym);
+
+ /* Next ID. */
+ nextSym += 1;
+ }
+
+ return tk;
+ }
+}