summaryrefslogtreecommitdiff
path: root/dice-lang
diff options
context:
space:
mode:
authorbjculkin <bjculkin@WIT-136XG42.wvu-ad.wvu.edu>2017-02-27 10:48:18 -0500
committerbjculkin <bjculkin@WIT-136XG42.wvu-ad.wvu.edu>2017-02-27 10:48:18 -0500
commite36e3b56b9b736d7a5caa889f280f90688eb9ad7 (patch)
tree5c20e9f79b44e97cb30d3c324ebe60311aea230c /dice-lang
parent96e868388c7617136e916c27ddd90694c0bb8356 (diff)
Seperate tokenizer
Diffstat (limited to 'dice-lang')
-rw-r--r--dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java142
-rw-r--r--dice-lang/src/bjc/dicelang/v2/Tokenizer.java154
2 files changed, 160 insertions, 136 deletions
diff --git a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
index 200c0b6..8a877af 100644
--- a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
+++ b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
@@ -1,6 +1,5 @@
package bjc.dicelang.v2;
-import bjc.dicelang.v2.dice.DiceBox;
import bjc.utils.data.IPair;
import bjc.utils.data.ITree;
import bjc.utils.data.Pair;
@@ -10,7 +9,6 @@ import bjc.utils.funcdata.FunctionalStringTokenizer;
import bjc.utils.funcdata.IList;
import bjc.utils.funcdata.IMap;
import bjc.utils.funcutils.ListUtils;
-import bjc.utils.funcutils.StringUtils;
import static bjc.dicelang.v2.Errors.ErrorKey.*;
import static bjc.dicelang.v2.Token.Type.*;
@@ -30,7 +28,6 @@ public class DiceLangEngine {
// ID for generation
private int nextLiteral;
- private int nextSym;
// Debug indicator
private boolean debugMode;
@@ -43,6 +40,8 @@ public class DiceLangEngine {
// Shunter for token postfixing
private Shunter shunt;
+ // Tokenizer for tokenizing
+ private Tokenizer tokenzer;
// Parser for tree construction
private Parser parsr;
// Evaluator for evaluating
@@ -52,8 +51,6 @@ public class DiceLangEngine {
public final IMap<Integer, String> symTable;
public final IMap<Integer, String> stringLits;
- // Literal tokens for tokenization
- private IMap<String, Token.Type> litTokens;
// Lists for preprocessing
private IList<Define> lineDefns;
@@ -91,22 +88,7 @@ public class DiceLangEngine {
deaffixationList.add(new Pair<>("[", "\\["));
deaffixationList.add(new Pair<>("]", "\\]"));
deaffixationList.add(new Pair<>("{", "\\{"));
- deaffixationList.add(new Pair<>("}", "}"));
-
- litTokens = new FunctionalMap<>();
-
- litTokens.put("+", ADD);
- litTokens.put("-", SUBTRACT);
- litTokens.put("*", MULTIPLY);
- litTokens.put("/", DIVIDE);
- litTokens.put("//", IDIVIDE);
- litTokens.put("dg", DICEGROUP);
- litTokens.put("dc", DICECONCAT);
- litTokens.put("dl", DICELIST);
- litTokens.put("=>", LET);
- litTokens.put(":=", BIND);
- litTokens.put(",", GROUPSEP);
- litTokens.put("crc", COERCE);
+ deaffixationList.add(new Pair<>("}", "}"));
nextLiteral = 1;
@@ -116,8 +98,8 @@ public class DiceLangEngine {
stepEval = false;
streamEng = new StreamEngine(this);
-
shunt = new Shunter();
+ tokenzer = new Tokenizer(this);
parsr = new Parser();
eval = new Evaluator(this);
}
@@ -280,7 +262,7 @@ public class DiceLangEngine {
newTok = dfn.apply(newTok);
}
- Token tk = lexToken(token, stringLiterals);
+ Token tk = tokenzer.lexToken(token, stringLiterals);
if(tk == null) continue;
else if(tk == Token.NIL_TOKEN) return false;
@@ -467,119 +449,7 @@ public class DiceLangEngine {
return true;
}
- private Token lexToken(String token, IMap<String, String> stringLts) {
- if(token.equals("")) return null;
-
- Token tk = Token.NIL_TOKEN;
-
- if(litTokens.containsKey(token)) {
- tk = new Token(litTokens.get(token));
- } else {
- switch(token.charAt(0)) {
- case '(':
- case ')':
- case '[':
- case ']':
- case '{':
- case '}':
- tk = tokenizeGrouping(token);
- break;
- default:
- tk = tokenizeLiteral(token, stringLts);
- }
- }
-
- return tk;
- }
-
- private Token tokenizeGrouping(String token) {
- Token tk = Token.NIL_TOKEN;
-
- if(StringUtils.containsOnly(token, "\\" + token.charAt(0))) {
- switch(token.charAt(0)) {
- case '(':
- tk = new Token(OPAREN, token.length());
- break;
- case ')':
- tk = new Token(CPAREN, token.length());
- break;
- case '[':
- tk = new Token(OBRACKET, token.length());
- break;
- case ']':
- tk = new Token(CBRACKET, token.length());
- break;
- case '{':
- tk = new Token(OBRACE, token.length());
- break;
- case '}':
- tk = new Token(CBRACE, token.length());
- break;
- default:
- Errors.inst.printError(EK_TOK_UNGROUP, token);
- break;
- }
- }
-
- return tk;
- }
-
- private Pattern intMatcher = Pattern.compile("\\A[\\-\\+]?\\d+\\Z");
- private Pattern hexadecimalMatcher = Pattern.compile("\\A[\\-\\+]?0x[0-9A-Fa-f]+\\Z");
- private Pattern flexadecimalMatcher = Pattern.compile("\\A[\\-\\+]?[0-9][0-9A-Za-z]+B\\d{1,2}\\Z");
- private Pattern stringLitMatcher = Pattern.compile("\\AstringLiteral(\\d+)\\Z");
-
- private Token tokenizeLiteral(String token, IMap<String, String> stringLts) {
- Token tk = Token.NIL_TOKEN;
-
- if(intMatcher.matcher(token).matches()) {
- tk = new Token(INT_LIT, Long.parseLong(token));
- } else if(hexadecimalMatcher.matcher(token).matches()) {
- String newToken = token.substring(0, 1) + token.substring(token.indexOf('x'));
-
- tk = new Token(INT_LIT, Long.parseLong(newToken.substring(2).toUpperCase(), 16));
- } else if(flexadecimalMatcher.matcher(token).matches()) {
- int parseBase = Integer.parseInt(token.substring(token.lastIndexOf('B') + 1));
-
- if(parseBase < Character.MIN_RADIX || parseBase > Character.MAX_RADIX) {
- Errors.inst.printError(EK_TOK_INVBASE, Integer.toString(parseBase));
- return Token.NIL_TOKEN;
- }
-
- String flexNum = token.substring(0, token.lastIndexOf('B'));
-
- try {
- tk = new Token(INT_LIT, Long.parseLong(flexNum, parseBase));
- } catch (NumberFormatException nfex) {
- Errors.inst.printError(EK_TOK_INVFLEX, flexNum, Integer.toString(parseBase));
- return Token.NIL_TOKEN;
- }
- } else if(DoubleMatcher.floatingLiteral.matcher(token).matches()) {
- tk = new Token(FLOAT_LIT, Double.parseDouble(token));
- } else if(DiceBox.isValidExpression(token)) {
- tk = new Token(DICE_LIT, DiceBox.parseExpression(token));
- } else {
- Matcher stringLit = stringLitMatcher.matcher(token);
-
- if(stringLit.matches()) {
- int litNum = Integer.parseInt(stringLit.group(1));
-
- stringLits.put(litNum, stringLts.get(token));
- tk = new Token(STRING_LIT, litNum);
- } else {
- // @TODO define what a valid identifier is
- symTable.put(nextSym++, token);
-
- tk = new Token(VREF, nextSym - 1);
- }
-
- // @TODO uncomment when we have a defn. for var names
- // System.out.printf("\tERROR: Unrecognized token:"
- // + "%s\n", token);
- }
-
- return tk;
- }
+
private IList<String> deaffixTokens(IList<String> tokens, List<IPair<String, String>> deaffixTokens) {
Deque<String> working = new LinkedList<>();
diff --git a/dice-lang/src/bjc/dicelang/v2/Tokenizer.java b/dice-lang/src/bjc/dicelang/v2/Tokenizer.java
new file mode 100644
index 0000000..40b1285
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/v2/Tokenizer.java
@@ -0,0 +1,154 @@
+package bjc.dicelang.v2;
+
+import static bjc.dicelang.v2.Errors.ErrorKey.*;
+import static bjc.dicelang.v2.Token.Type.*;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import bjc.dicelang.v2.dice.DiceBox;
+import bjc.utils.funcdata.FunctionalMap;
+import bjc.utils.funcdata.IMap;
+import bjc.utils.funcutils.StringUtils;
+
+public class Tokenizer {
+ // Literal tokens for tokenization
+ private IMap<String, Token.Type> litTokens;
+
+ private DiceLangEngine eng;
+
+ private int nextSym = 0;
+
+ public Tokenizer(DiceLangEngine engine) {
+ eng = engine;
+
+ litTokens = new FunctionalMap<>();
+
+ litTokens.put("+", ADD);
+ litTokens.put("-", SUBTRACT);
+ litTokens.put("*", MULTIPLY);
+ litTokens.put("/", DIVIDE);
+ litTokens.put("//", IDIVIDE);
+ litTokens.put("dg", DICEGROUP);
+ litTokens.put("dc", DICECONCAT);
+ litTokens.put("dl", DICELIST);
+ litTokens.put("=>", LET);
+ litTokens.put(":=", BIND);
+ litTokens.put(",", GROUPSEP);
+ litTokens.put("crc", COERCE);
+ }
+
+ public Token lexToken(String token, IMap<String, String> stringLts) {
+ if(token.equals("")) return null;
+
+ Token tk = Token.NIL_TOKEN;
+
+ if(litTokens.containsKey(token)) {
+ tk = new Token(litTokens.get(token));
+ } else {
+ switch(token.charAt(0)) {
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ tk = tokenizeGrouping(token);
+ break;
+ default:
+ tk = tokenizeLiteral(token, stringLts);
+ }
+ }
+
+ return tk;
+ }
+
+ private Token tokenizeGrouping(String token) {
+ Token tk = Token.NIL_TOKEN;
+
+ if(StringUtils.containsOnly(token, "\\" + token.charAt(0))) {
+ switch(token.charAt(0)) {
+ case '(':
+ tk = new Token(OPAREN, token.length());
+ break;
+ case ')':
+ tk = new Token(CPAREN, token.length());
+ break;
+ case '[':
+ tk = new Token(OBRACKET, token.length());
+ break;
+ case ']':
+ tk = new Token(CBRACKET, token.length());
+ break;
+ case '{':
+ tk = new Token(OBRACE, token.length());
+ break;
+ case '}':
+ tk = new Token(CBRACE, token.length());
+ break;
+ default:
+ Errors.inst.printError(EK_TOK_UNGROUP, token);
+ break;
+ }
+ }
+
+ return tk;
+ }
+
+ private Pattern intMatcher = Pattern.compile("\\A[\\-\\+]?\\d+\\Z");
+ private Pattern hexadecimalMatcher = Pattern.compile("\\A[\\-\\+]?0x[0-9A-Fa-f]+\\Z");
+ private Pattern flexadecimalMatcher = Pattern.compile("\\A[\\-\\+]?[0-9][0-9A-Za-z]+B\\d{1,2}\\Z");
+ private Pattern stringLitMatcher = Pattern.compile("\\AstringLiteral(\\d+)\\Z");
+
+ private Token tokenizeLiteral(String token, IMap<String, String> stringLts) {
+ Token tk = Token.NIL_TOKEN;
+
+ if(intMatcher.matcher(token).matches()) {
+ tk = new Token(INT_LIT, Long.parseLong(token));
+ } else if(hexadecimalMatcher.matcher(token).matches()) {
+ String newToken = token.substring(0, 1) + token.substring(token.indexOf('x'));
+
+ tk = new Token(INT_LIT, Long.parseLong(newToken.substring(2).toUpperCase(), 16));
+ } else if(flexadecimalMatcher.matcher(token).matches()) {
+ int parseBase = Integer.parseInt(token.substring(token.lastIndexOf('B') + 1));
+
+ if(parseBase < Character.MIN_RADIX || parseBase > Character.MAX_RADIX) {
+ Errors.inst.printError(EK_TOK_INVBASE, Integer.toString(parseBase));
+ return Token.NIL_TOKEN;
+ }
+
+ String flexNum = token.substring(0, token.lastIndexOf('B'));
+
+ try {
+ tk = new Token(INT_LIT, Long.parseLong(flexNum, parseBase));
+ } catch (NumberFormatException nfex) {
+ Errors.inst.printError(EK_TOK_INVFLEX, flexNum, Integer.toString(parseBase));
+ return Token.NIL_TOKEN;
+ }
+ } else if(DoubleMatcher.floatingLiteral.matcher(token).matches()) {
+ tk = new Token(FLOAT_LIT, Double.parseDouble(token));
+ } else if(DiceBox.isValidExpression(token)) {
+ tk = new Token(DICE_LIT, DiceBox.parseExpression(token));
+ } else {
+ Matcher stringLit = stringLitMatcher.matcher(token);
+
+ if(stringLit.matches()) {
+ int litNum = Integer.parseInt(stringLit.group(1));
+
+ eng.stringLits.put(litNum, stringLts.get(token));
+ tk = new Token(STRING_LIT, litNum);
+ } else {
+ // @TODO define what a valid identifier is
+ eng.symTable.put(nextSym++, token);
+
+ tk = new Token(VREF, nextSym - 1);
+ }
+
+ // @TODO uncomment when we have a defn. for var names
+ // System.out.printf("\tERROR: Unrecognized token:"
+ // + "%s\n", token);
+ }
+
+ return tk;
+ }
+}