Start switch to maven modules

author: Benjamin J. Culkin <bjculkin@mix.wvu.edu> 2017-10-25 12:10:14 -0300
committer: Benjamin J. Culkin <bjculkin@mix.wvu.edu> 2017-10-25 12:10:14 -0300
commit: 7bda9de511a5642efb297eae98c6ea7c42b27754 (patch)
tree: dff1aa772b9ac088c5bd07b8d10d944cbff89f96 /base/src/bjc/dicelang/expr
parent: f028ea6dc555fc5192a96b00b8e96e90dbf6de55 (diff)
7 files changed, 708 insertions, 0 deletions
diff --git a/base/src/bjc/dicelang/expr/Ezpr.java b/base/src/bjc/dicelang/expr/Ezpr.java
new file mode 100644
index 0000000..862c097
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Ezpr.java
@@ -0,0 +1,149 @@
+/*
+ * @TODO 10/08/17 Ben Culkin :EzprFixing
+ *	 Implement these, and make sure they work correctly.
+ */
+// package bjc.dicelang.expr;
+
+// import bjc.utils.data.ITree;
+
+// import com.google.guava.collect.HashMultiset;
+// import com.google.guava.collect.Multiset;
+
+// import static bjc.dicelang.expr.EzprType;
+// import static bjc.dicelang.expr.EzprType.SUM;
+// import static bjc.dicelang.expr.EzprType.MUL;
+
+// import static bjc.dicelang.expr.EzprNode;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.EZPR;
+// import static bjc.dicelnag.ezpr.EzprNode.EzprNodeType.TOKEN;
+
+// public class Ezpr {
+// 	public static enum EzprType {
+// 		SUM, MUL
+// 	}
+
+// 	public static class EzprNode {
+// 		public static enum EzprNodeType {
+// 			EZPR, TOKEN
+// 		}
+
+// 		public final EzprNodeType typ;
+
+// 		public final Ezpr ezp;
+// 		public final Token tok;
+
+// 		public EzprNode(Ezpr exp) {
+// 			typ = EZPR;
+
+// 			ezp = exp;
+// 			tok = null;
+// 		}
+
+// 		public EzprNode(Token tk) {
+// 			typ = TOKEN;
+
+// 			tok = tk;
+// 			ezp = null;
+// 		}
+
+// 		public String toString() {
+// 			if(typ == TOKEN) {
+// 				return tok.toString();
+// 			}
+// 			return ezp.toString();
+// 		}
+// 	}
+
+// 	private EzprType typ;
+
+// 	private Multiset<EzprNode> positive;
+// 	private Multiset<EzprNode> negative;
+
+// 	public Ezpr(EzprType type, Multiset<EzprNode> pos, Multiset<EzprNode> neg) {
+// 		typ = type;
+
+// 		positive = pos;
+// 		negative = neg;
+// 	}
+
+// 	public Ezpr flatten() {
+// 		HashMultiset<EzprNode> newPositive = HashMultiset.create();
+// 		HashMultiset<EzprNode> newNegative = HashMultiset.create();
+
+// 		for(EzprNode nd : positive) {
+// 			/* Flatten enclosed ezprs of the same type. */
+// 			if(nd.typ == EZPR && (nd.ezp.typ == typ)) {
+// 				/* Recursively flatten kids. */
+// 				Ezpr kid = nd.ezp.flatten();
+
+// 				if(typ == SUM) {
+// 					/* Add sum parts to corresponding bags. */
+// 					for(EzprNode knd : kid.positive) {
+// 						newPositive.add(knd);
+// 					}
+// 					for(EzprNode knd : kid.negative) {
+// 						newNegative.add(knd);
+// 					}
+// 				} else {
+// 					/* @TODO ensure that this is correct. */
+// 					for(EzprNode knd : kid.positive) {
+// 						newPositive.add(knd);
+// 					}
+// 					for(EzprNode knd : kid.negative) {
+// 						newNegative.add(knd);
+// 					}
+// 				}
+// 			} else {
+// 				newPositive.add(nd);
+// 			}
+// 		}
+
+// 		for(EzprNode nd : negative) {
+// 			/* Flatten enclosed ezprs of the same type. */
+// 			if(nd.typ == EZPR && (nd.ezp.typ == typ)) {
+// 				/* Recursively flatten kids. */
+// 				Ezpr kid = nd.ezp.flatten();
+
+// 				/* @TODO ensure that this is correct. */
+// 				if(typ == SUM) {
+// 					for(EzprNode knd : kid.positive) {
+// 						newNegative.add(knd);
+// 					}
+// 					for(EzprNode knd : kid.negative) {
+// 						newPositive.add(knd);
+// 					}
+// 				} else {
+// 					for(EzprNode knd : kid.positive) {
+// 						newNegative.add(knd);
+// 					}
+// 					for(EzprNode knd : kid.negative) {
+// 						newPositive.add(knd);
+// 					}
+// 				}
+// 			} else {
+// 				newNegative.add(nd);
+// 			}
+// 		}
+// 	}
+
+// 	public String toString() {
+// 		StringBuilder sb = new StringBuilder(typ.toString());
+
+// 		sb.append(" [ ");
+// 		for(EzprNode nd : positive) {
+// 			sb.append(nd.toString());
+// 			sb.append(" ");
+// 		}
+
+// 		sb.append("# ");
+// 		for(EzprNode nd : negative) {
+// 			sb.append(nd.toString());
+// 			sb.append(" ");
+// 		}
+
+// 		sb.append("]");
+
+// 		return sb.toString();
+// 	}
+// }
diff --git a/base/src/bjc/dicelang/expr/Lexer.java b/base/src/bjc/dicelang/expr/Lexer.java
new file mode 100644
index 0000000..dfa0f76
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Lexer.java
@@ -0,0 +1,62 @@
+package bjc.dicelang.expr;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import bjc.utils.funcdata.IList;
+import bjc.utils.parserutils.splitter.ConfigurableTokenSplitter;
+
+/*
+ * @TODO 10/08/18 :IntExpressions
+ * 	Add support for integer constants, and maybe floating-point ones as well
+ * 	if you feel like. Heck, you could even go for ratio constants and things
+ * 	as well.
+ */
+/**
+ * Implements the lexer for simple expression operations.
+ *
+ * @author Ben Culkin
+ */
+public class Lexer {
+	/* Splitter we use. */
+	private final ConfigurableTokenSplitter split;
+
+	/** Create a new expression lexer. */
+	public Lexer() {
+		split = new ConfigurableTokenSplitter(true);
+
+		split.addSimpleDelimiters("(", ")");
+		split.addSimpleDelimiters("+", "-", "*", "/");
+	}
+
+	/**
+	 * Convert a string from a input command to a series of infix tokens.
+	 *
+	 * @param inp
+	 *                The input command.
+	 *
+	 * @param tks
+	 *                The token state.
+	 *
+	 * @return A series of infix tokens representing the command.
+	 */
+	public Token[] lexString(final String inp, final Tokens tks) {
+		/* Split tokens on whitespace. */
+		final String[] spacedTokens = inp.split("[ \t]");
+		/* Tokens to return. */
+		final List<Token> tokens    = new LinkedList<>();
+
+		/* Process each token. */
+		for (final String spacedToken : spacedTokens) {
+			/* Split on operators. */
+			final IList<String> splitTokens = split.split(spacedToken);
+			/* Convert strings to tokens. */
+			final IList<Token> rawTokens    = splitTokens.map(tok -> tks.lexToken(tok, spacedToken));
+
+			/* Add tokens to results. */
+			rawTokens.forEach(tokens::add);
+		}
+
+		return tokens.toArray(new Token[0]);
+	}
+}
diff --git a/base/src/bjc/dicelang/expr/Parser.java b/base/src/bjc/dicelang/expr/Parser.java
new file mode 100644
index 0000000..5fa2d3d
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Parser.java
@@ -0,0 +1,147 @@
+package bjc.dicelang.expr;
+
+import java.util.Arrays;
+import java.util.Scanner;
+
+import bjc.utils.data.ITree;
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.parserutils.TreeConstructor;
+
+/**
+ * Parser for simple math expressions.
+ *
+ * @author Ben Culkin
+ */
+public class Parser {
+	/*
+	 * @TODO 10/08/17 Ben Culkin :MainSeperation
+	 * 	This main method should be moved to its own class.
+	 */
+	/**
+	 * Main method.
+	 *
+	 * @param args
+	 *                Unused CLI args.
+	 */
+	public static void main(final String[] args) {
+		/* Create our objects. */
+		final Tokens toks = new Tokens();
+		final Lexer lex = new Lexer();
+
+		/* Prepare our input source. */
+		final Scanner scan = new Scanner(System.in);
+
+		/* Read initial command. */
+		System.out.print("Enter a math expression (blank line to quit): ");
+		String ln = scan.nextLine().trim();
+
+		/* Enter REPL loop. */
+		while (!ln.equals("")) {
+			/* Print raw command. */
+			System.out.println("Raw command: " + ln);
+			System.out.println();
+
+			/* Lex command to infix tokens. */
+			final Token[] infixTokens = lex.lexString(ln, toks);
+			System.out.println("Lexed tokens: ");
+			for (final Token tok : infixTokens) {
+				System.out.println("\t" + tok);
+			}
+
+			/* Print out infix expression. */
+			System.out.print("Lexed expression: ");
+			for (final Token tok : infixTokens) {
+				System.out.print(tok.toExpr() + " ");
+			}
+
+			/* Space stages. */
+			System.out.println();
+			System.out.println();
+
+			/* Shunt infix tokens to postfix tokens. */
+			final Token[] postfixTokens = Shunter.shuntTokens(infixTokens);
+			System.out.println("Lexed tokens: ");
+			for (final Token tok : postfixTokens) {
+				System.out.println("\t" + tok);
+			}
+
+			/* Print out postfix tokens. */
+			System.out.print("Shunted expression: ");
+			for (final Token tok : postfixTokens) {
+				System.out.print(tok.toExpr() + " ");
+			}
+
+			/* Space stages. */
+			System.out.println();
+			System.out.println();
+
+			/* Construct a list from the array of tokens. */
+			final FunctionalList<Token> tokList = new FunctionalList<>(
+					Arrays.asList(postfixTokens));
+
+			/* Construct a tree from the list of postfixed tokens. */
+			final ITree<Token> ast = TreeConstructor.constructTree(tokList,
+					tok -> tok.typ.isOperator);
+
+			/* Print the tree, then the canonical expression for it. */
+			System.out.println("Parsed tree");
+			System.out.println(ast.toString());
+			System.out.println("\nCanonical expr: " + toCanonicalExpr(ast));
+
+			/* Space stages. */
+			System.out.println();
+			System.out.println();
+
+			/* Prompt for a new expression. */
+			System.out.print("Enter a math expression (blank line to quit): ");
+			/* Read it. */
+			ln = scan.nextLine().trim();
+		}
+
+		/* Cleanup after ourselves. */
+		scan.close();
+	}
+
+	/*
+	 * Convert an expression to one that uses the smallest necessary amount
+	 * of parens.
+	 */
+	private static String toCanonicalExpr(final ITree<Token> ast) {
+		final Token data = ast.getHead();
+
+		if (ast.getChildrenCount() == 0) {
+			/* Handle leaf nodes. */
+			return data.toExpr();
+		}
+
+		/* The left/right children. */
+		final ITree<Token> left  = ast.getChild(0);
+		final ITree<Token> right = ast.getChild(1);
+
+		/* Recursively canonicalize them. */
+		String leftExpr  = toCanonicalExpr(left);
+		String rightExpr = toCanonicalExpr(right);
+
+		/* Add parens if the left was higher priority. */
+		if (left.getChildrenCount() == 0) {
+			int leftPriority = left.getHead().typ.operatorPriority;
+			int dataPriority = data.typ.operatorPriority;
+
+			if (leftPriority >= dataPriority) {
+				leftExpr = String.format("(%s)", leftExpr);
+			}
+		}
+
+		/* Add parens if the right was higher priority. */
+		if (right.getChildrenCount() == 0) {
+			int rightPriority = right.getHead().typ.operatorPriority;
+			int dataPriority  = data.typ.operatorPriority;
+
+			if (rightPriority >= dataPriority) {
+				rightExpr = String.format("(%s)", rightExpr);
+			}
+		}
+
+		return String.format("%s %s %s", leftExpr, data.toExpr(), rightExpr);
+	}
+}
diff --git a/base/src/bjc/dicelang/expr/Shunter.java b/base/src/bjc/dicelang/expr/Shunter.java
new file mode 100644
index 0000000..213e473
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Shunter.java
@@ -0,0 +1,110 @@
+package bjc.dicelang.expr;
+
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Converts a infix series of tokens into a prefix series of tokens.
+ *
+ * @author Ben Culkin
+ */
+public class Shunter {
+	/*
+	 * @NOTE
+	 * 	Why does this method return an array, and not the list of
+	 * 	tokens?
+	 */
+	/**
+	 * Convert a infix series of tokens to a postfix series of tokens.
+	 *
+	 * @param infixTokens
+	 *                The tokens in infix order.
+	 *
+	 * @return The tokens in postfix order.
+	 */
+	public static Token[] shuntTokens(final Token[] infixTokens) {
+		/* The returned tokens. */
+		final List<Token> postfixTokens = new ArrayList<>(infixTokens.length);
+
+		/* The current stack of operators. */
+		final Deque<Token> opStack = new LinkedList<>();
+
+		/* Shunt each token. */
+		for (final Token tok : infixTokens) {
+			/* Handle operators. */
+			if (tok.typ.isOperator) {
+				Token curOp = opStack.peek();
+
+				/*
+				 * Check if an operator is higher priority,
+				 * respecting their left associativity.
+				 *
+				 * @NOTE
+				 * 	Should this be factored out into a
+				 * 	method?
+				 */
+				int leftPriority = tok.typ.operatorPriority;
+				int rightPriority;
+
+				if (curOp == null) {
+					rightPriority = 0;
+				} else {
+					rightPriority = curOp.typ.operatorPriority;
+				}
+
+				boolean isHigherPrec = leftPriority >= rightPriority;
+
+				/*
+				 * Pop all operators that are lower precedence
+				 * than us.
+				 */
+				while (!opStack.isEmpty() && isHigherPrec) {
+					postfixTokens.add(opStack.pop());
+					curOp = opStack.peek();
+
+					leftPriority = tok.typ.operatorPriority;
+					if (curOp == null) {
+						rightPriority = 0;
+					} else {
+						rightPriority = curOp.typ.operatorPriority;
+					}
+
+					isHigherPrec = leftPriority >= rightPriority;
+				}
+
+				opStack.push(tok);
+			} else if (tok.typ == TokenType.OPAREN) {
+				opStack.push(tok);
+			} else if (tok.typ == TokenType.CPAREN) {
+				Token curOp = opStack.peek();
+
+				/*
+				 * Pop things until we find the matching
+				 * parenthesis.
+				 */
+				while (curOp.typ != TokenType.OPAREN) {
+					final Token tk = opStack.pop();
+					postfixTokens.add(tk);
+					curOp = opStack.peek();
+				}
+
+				if (!opStack.isEmpty()) {
+					opStack.pop();
+				}
+			} else {
+				postfixTokens.add(tok);
+			}
+		}
+
+		/*
+		 * Flush remaining operators.
+		 */
+		while (!opStack.isEmpty()) {
+			postfixTokens.add(opStack.pop());
+		}
+
+		return postfixTokens.toArray(new Token[0]);
+	}
+}
diff --git a/base/src/bjc/dicelang/expr/Token.java b/base/src/bjc/dicelang/expr/Token.java
new file mode 100644
index 0000000..bf92f97
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Token.java
@@ -0,0 +1,88 @@
+package bjc.dicelang.expr;
+
+/*
+ * @TODO 10/08/17 :TokenReorg
+ * 	I am not a fan of this 'having a bunch of subclasses' in one thing I
+ * 	seem to have been doing around this project. This should be multiple
+ * 	subclasses, one for each value for TokenType.
+ */
+/**
+ * Represents a lexical token.
+ *
+ * @author Ben Culkin
+ */
+public class Token {
+	/* The state for this token. */
+	private final Tokens tks;
+
+	/**
+	 * The type of the token.
+	 *
+	 * Determines which fields have a value.
+	 */
+	public final TokenType typ;
+
+	/** The integer value attached to this token. */
+	public int intValue;
+
+	/** The original string this token was part of. */
+	public String rawValue;
+
+	/**
+	 * Create a new token.
+	 *
+	 * @param type
+	 *                The type of this token.
+	 *
+	 * @param raw
+	 *                The string this token came from.
+	 *
+	 * @param toks
+	 *                The state for this token
+	 */
+	public Token(final TokenType type, final String raw, final Tokens toks) {
+		this.typ = type;
+
+		rawValue = raw;
+		tks = toks;
+	}
+
+	@Override
+	public String toString() {
+		String typeStr = typ.toString();
+		       typeStr = String.format("%s (%s)", typeStr, typ.name());
+
+		if (typ == TokenType.VREF) {
+			typeStr += " (ind. " + intValue;
+			typeStr += ", sym. \"" + tks.symbolTable.get(intValue) + "\")";
+		}
+
+		return String.format("%s (originally from: %s)", typeStr, rawValue);
+	}
+
+	/**
+	 * Convert this token into the string representation of it.
+	 *
+	 * @return The string representation of it.
+	 */
+	public String toExpr() {
+		switch (typ) {
+		case ADD:
+			return "+";
+		case SUBTRACT:
+			return "-";
+		case MULTIPLY:
+			return "*";
+		case DIVIDE:
+			return "/";
+		case VREF:
+			return tks.symbolTable.get(intValue);
+		case OPAREN:
+			return "(";
+		case CPAREN:
+			return ")";
+		default:
+			return "???";
+		}
+	}
+}
diff --git a/base/src/bjc/dicelang/expr/TokenType.java b/base/src/bjc/dicelang/expr/TokenType.java
new file mode 100644
index 0000000..ad01917
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/TokenType.java
@@ -0,0 +1,56 @@
+package bjc.dicelang.expr;
+
+/**
+ * Represents the type of this token.
+ *
+ * @author Ben Culkin
+ */
+public enum TokenType {
+	/* 
+	 * @NOTE
+	 * 	Do we want to switch to auto-numbering the tokens? They were
+	 * 	manually numbered because this was an assignment for PoPL and
+	 * 	that was what Dr. Naz wanted.
+	 */
+
+	/** Represents + */
+	ADD(     14, true, 0),
+	/** Represents - */
+	SUBTRACT(15, true, 0),
+	/** Represents * */
+	MULTIPLY(16, true, 1),
+	/** Represents / */
+	DIVIDE(  17, true, 1),
+
+	/** Represents variable names. */
+	VREF(11),
+
+	/** Represents ( */
+	OPAREN(0, false, 100),
+	/** Represents ) */
+	CPAREN(0, false, 100);
+
+	/** The ID number for this token type. */
+	public final int nVal;
+
+	/** Whether or not this type of token is an operator. */
+	public final boolean    isOperator;
+	/** The priority of this operator, if it is one. */
+	public final int        operatorPriority;
+
+	/* Create a new token. */
+	private TokenType(final int num, final boolean isOp, final int priority) {
+		nVal             = num;
+		isOperator       = isOp;
+		operatorPriority = priority;
+	}
+
+	private TokenType(final int num) {
+		this(num, false, -1);
+	}
+
+	@Override
+	public String toString() {
+		return Integer.toString(nVal);
+	}
+}
diff --git a/base/src/bjc/dicelang/expr/Tokens.java b/base/src/bjc/dicelang/expr/Tokens.java
new file mode 100644
index 0000000..287d2b4
--- /dev/null
+++ b/base/src/bjc/dicelang/expr/Tokens.java
@@ -0,0 +1,96 @@
+package bjc.dicelang.expr;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contains per-instance state for token parsing.
+ *
+ * @author EVE
+ *
+ */
+public class Tokens {
+	/* Contains mappings from variable references to string names. */
+	private final Map<Integer, String> symTab;
+	/* Reverse index into the symbol table. */
+	private final Map<String, Integer> revSymTab;
+
+	/** Read-only view on the symbol table. */
+	public final Map<Integer, String> symbolTable;
+
+	/* Next index into the symbol table. */
+	private int nextSym;
+
+	/* Mapping from literal tokens to token types. */
+	private final Map<String, TokenType> litTokens;
+
+	/** Create a new set of tokens. */
+	public Tokens() {
+		/* Create tables. */
+		symTab = new HashMap<>();
+		revSymTab = new HashMap<>();
+
+		/* Init public view. */
+		symbolTable = Collections.unmodifiableMap(symTab);
+
+		/* Set sym ID. */
+		nextSym = 0;
+
+		/* 
+		 * Setup literal mappings.
+		 *
+		 * @NOTE
+		 * 	Should this be a static member?
+		 */
+		litTokens = new HashMap<>();
+		litTokens.put("+", TokenType.ADD);
+		litTokens.put("-", TokenType.SUBTRACT);
+		litTokens.put("*", TokenType.MULTIPLY);
+		litTokens.put("/", TokenType.DIVIDE);
+		litTokens.put("(", TokenType.OPAREN);
+		litTokens.put(")", TokenType.CPAREN);
+	}
+
+	/**
+	 * Convert the string representation of a token into a token.
+	 *
+	 * @param tok
+	 *                The string representation of the token.
+	 * @param raw
+	 *                The original string the token came from.
+	 *
+	 * @return The token the string represents.
+	 */
+	public Token lexToken(final String tok, final String raw) {
+		if (litTokens.containsKey(tok)) {
+			/* Return matching literal token. */
+			return new Token(litTokens.get(tok), raw, this);
+		}
+
+		/* Its a variable reference. */
+		return parseVRef(tok, raw);
+	}
+
+	/* Parse a variable reference. */
+	private Token parseVRef(final String tok, final String raw) {
+		final Token tk = new Token(TokenType.VREF, raw, this);
+
+		if (revSymTab.containsKey(tok)) {
+			/* Reuse the entry if it exists. */
+			tk.intValue = revSymTab.get(tok);
+		} else {
+			/* Create a new entry. */
+			tk.intValue = nextSym;
+
+			/* Record it. */
+			symTab.put(nextSym, tok);
+			revSymTab.put(tok, nextSym);
+
+			/* Next ID. */
+			nextSym += 1;
+		}
+
+		return tk;
+	}
+}
author	Benjamin J. Culkin <bjculkin@mix.wvu.edu>	2017-10-25 12:10:14 -0300
committer	Benjamin J. Culkin <bjculkin@mix.wvu.edu>	2017-10-25 12:10:14 -0300
commit	7bda9de511a5642efb297eae98c6ea7c42b27754 (patch)
tree	dff1aa772b9ac088c5bd07b8d10d944cbff89f96 /base/src/bjc/dicelang/expr
parent	f028ea6dc555fc5192a96b00b8e96e90dbf6de55 (diff)