summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbculkin2442 <bjculkin@mix.wvu.edu>2017-02-16 08:36:43 -0500
committerbculkin2442 <bjculkin@mix.wvu.edu>2017-02-16 08:36:43 -0500
commitf9752a872ad68a47b872eccb953332d372052cac (patch)
treecb315abfb68c00cc6b2a3b6e6ca4072f4e2305e8
parent0c3270b408116d3a8d2f1558acb4222eaa808e7b (diff)
Preprocessor and better strings
-rw-r--r--dice-lang/src/bjc/dicelang/v2/Define.java93
-rw-r--r--dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java154
-rw-r--r--dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java304
3 files changed, 432 insertions, 119 deletions
diff --git a/dice-lang/src/bjc/dicelang/v2/Define.java b/dice-lang/src/bjc/dicelang/v2/Define.java
new file mode 100644
index 0000000..4617931
--- /dev/null
+++ b/dice-lang/src/bjc/dicelang/v2/Define.java
@@ -0,0 +1,93 @@
+package bjc.dicelang.v2;
+
+import bjc.utils.data.CircularIterator;
+
+import java.util.Iterator;
+import java.util.function.UnaryOperator;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+public class Define implements UnaryOperator<String> {
+ public static enum Type {
+ LINE, TOKEN
+ }
+
+ int priority;
+
+ boolean doRecur;
+ boolean subType;
+
+ Pattern predicate;
+ Pattern searcher;
+
+ Iterator<String> replacers;
+ String replacer;
+
+ public Define(int priorty, boolean isSub, boolean recur,
+ String predicte, String searchr, Iterable<String> replacrs) {
+ priority = priorty;
+ doRecur = recur;
+ subType = isSub;
+
+ if(predicte != null) {
+ predicate = Pattern.compile(predicte);
+ }
+ searcher = Pattern.compile(searchr);
+
+ if(subType) {
+ if(replacrs.iterator().hasNext()) {
+ replacers = new CircularIterator<>(replacrs);
+ } else {
+ replacers = null;
+ }
+ } else {
+ Iterator<String> itr = replacrs.iterator();
+
+ if(itr.hasNext()) replacer = itr.next();
+ else replacer = "";
+ }
+ }
+
+ public String apply(String tok) {
+ if(predicate != null) {
+ if(!predicate.matcher(tok).matches()) {
+ return tok;
+ }
+ }
+
+ String strang = doPass(tok);
+
+ if(doRecur) {
+ if(strang.equals(tok)) {
+ return strang;
+ } else {
+ String oldStrang = strang;
+
+ do {
+ strang = doPass(tok);
+ } while(!strang.equals(oldStrang));
+ }
+ }
+
+ return strang;
+ }
+
+ private String doPass(String tok) {
+ Matcher searcherMatcher = searcher.matcher(tok);
+
+ if(subType) {
+ StringBuffer sb = new StringBuffer();
+
+ while(searcherMatcher.find()) {
+ if(replacers == null) searcherMatcher.appendReplacement(sb,"");
+ else searcherMatcher.appendReplacement(sb, replacers.next());
+ }
+
+ searcherMatcher.appendTail(sb);
+
+ return sb.toString();
+ } else {
+ return searcherMatcher.replaceAll(replacer);
+ }
+ }
+}
diff --git a/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java b/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java
index c827edf..b3b5c08 100644
--- a/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java
+++ b/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java
@@ -1,6 +1,10 @@
package bjc.dicelang.v2;
+import java.util.List;
+import java.util.LinkedList;
import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public class DiceLangConsole {
private int commandNumber;
@@ -24,16 +28,25 @@ public class DiceLangConsole {
String comm = scn.nextLine();
while(!comm.equals("quit") && !comm.equals("exit")) {
- System.out.printf("\tRaw command: %s\n", comm);
+ if(comm.startsWith("pragma")) {
+ boolean success = handlePragma(comm.substring(7));
- boolean success = eng.runCommand(comm);
+ if(success)
+ System.out.println("Pragma completed succesfully");
+ else
+ System.out.println("Pragma execution failed");
+ } else {
+ System.out.printf("\tRaw command: %s\n", comm);
- if(success)
- System.out.println("Command completed succesfully");
- else
- System.out.println("Command execution failed");
+ boolean success = eng.runCommand(comm);
- commandNumber += 1;
+ if(success)
+ System.out.println("Command completed succesfully");
+ else
+ System.out.println("Command execution failed");
+
+ commandNumber += 1;
+ }
System.out.printf("(%d) dice-lang> ", commandNumber);
comm = scn.nextLine();
@@ -42,6 +55,133 @@ public class DiceLangConsole {
scn.close();
}
+ private boolean handlePragma(String pragma) {
+ System.out.println("\tRaw pragma: " + pragma);
+
+ switch(pragma) {
+ case "debug":
+ System.out.println("\tDebug mode is now " + eng.toggleDebug());
+ break;
+ case "postfix":
+ System.out.println("\tPostfix mode is now " + eng.togglePostfix());
+ break;
+ case "define":
+ return defineMode(pragma.substring(7));
+ default:
+ System.out.println("\tERROR: Unknown pragma: " + pragma);
+ return false;
+ }
+
+ return true;
+ }
+
+ /*
+ * Matches slash-delimited strings
+ * (like /text/ or /text\/text/)
+ * Uses the "normal* (special normal*)*" pattern style
+ * recommended in 'Mastering regular expressions'
+ * Here, the normal is 'anything but a forward or backslash'
+ * (in regex, thats '[^/\\]') and the special is 'an escaped forward slash'
+ * (in regex, thats '\\\\/')
+ *
+ * Then, we just follow the pattern, escape it for java strings, and
+ * add the enclosing slashes
+ */
+ private Pattern slashPattern = Pattern.compile("/([^/\\\\]*(?:\\\\/(?:[^/\\\\])*)*)/");
+
+ private boolean defineMode(String defineText) {
+ int firstIndex = defineText.indexOf(' ');
+ int secondIndex = defineText.indexOf(' ', firstIndex + 1);
+ int thirdIndex = defineText.indexOf(' ', secondIndex + 1);
+ int fourthIndex = defineText.indexOf(' ', thirdIndex + 1);
+ int fifthIndex = defineText.indexOf(' ', fourthIndex + 1);
+
+ if(firstIndex == -1) {
+ System.out.println("\tERROR: Improperly formatted define (no priority)");
+ return false;
+ } else if(secondIndex == -1) {
+ System.out.println("\tERROR: Improperly formatted define (no define type)");
+ return false;
+ } else if(thirdIndex == -1) {
+ System.out.println("\tERROR: Improperly formatted define (no recursion type)");
+ return false;
+ } else if(fourthIndex == -1) {
+ System.out.println("\tERROR: Improperly formatted define (no guard type)");
+ return false;
+ } else if(fifthIndex == -1) {
+ System.out.println("\tERROR: Improperly formatted define (no patterns)");
+ return false;
+ }
+
+ int priority = Integer.parseInt(defineText.substring(0, firstIndex));
+
+ String defineType = defineText.substring(firstIndex + 1, secondIndex);
+
+ Define.Type type;
+ boolean subMode = false;
+
+ switch(defineType) {
+ case "line":
+ type = Define.Type.LINE;
+ break;
+ case "token":
+ type = Define.Type.TOKEN;
+ break;
+ case "subline":
+ type = Define.Type.LINE;
+ subMode = true;
+ break;
+ case "subtoken":
+ type = Define.Type.TOKEN;
+ subMode = true;
+ break;
+ default:
+ System.out.println("\tERROR: Unknown define type "
+ + defineType);
+ return false;
+ }
+
+ boolean doRecur = defineText.substring(secondIndex + 1, thirdIndex)
+ .equalsIgnoreCase("true");
+ boolean hasGuard = defineText.substring(thirdIndex + 1, fourthIndex).
+ equalsIgnoreCase("true");
+
+ String pats = defineText.substring(fourthIndex + 1);
+
+ Matcher patMatcher = slashPattern.matcher(pats);
+
+ String guardPattern = null;
+
+ if(hasGuard) {
+ if(!patMatcher.find()) {
+ System.out.println("\tERROR: Improperly formatted define (no guard pattern)");
+ }
+
+ guardPattern = patMatcher.group(1);
+ }
+
+ if(!patMatcher.find()) {
+ System.out.println("\tERROR: Improperly formatted define (no search pattern)");
+ }
+
+ String searchPattern = patMatcher.group(1);
+ List<String> replacePatterns = new LinkedList<>();
+
+ while(patMatcher.find()) {
+ replacePatterns.add(patMatcher.group(1));
+ }
+
+ Define dfn = new Define(priority, subMode, doRecur, guardPattern, searchPattern, replacePatterns);
+
+ if(type == Define.Type.LINE) {
+ eng.addLineDefine(dfn);
+ } else {
+ eng.addTokenDefine(dfn);
+ }
+
+ return true;
+ }
+
public static void main(String[] args) {
DiceLangConsole console = new DiceLangConsole(args);
diff --git a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
index 2ab5030..455e5d2 100644
--- a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
+++ b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java
@@ -12,6 +12,7 @@ import bjc.utils.funcutils.StringUtils;
import java.util.Arrays;
import java.util.Deque;
+import java.util.List;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -20,8 +21,8 @@ import static bjc.dicelang.v2.Token.Type.*;
public class DiceLangEngine {
// Input rules for processing tokens
- private Deque<IPair<String, String>> opExpansionTokens;
- private Deque<IPair<String, String>> deaffixationTokens;
+ private List<IPair<String, String>> opExpansionList;
+ private List<IPair<String, String>> deaffixationList;
// ID for generation
private int nextLiteral;
@@ -29,6 +30,8 @@ public class DiceLangEngine {
// Debug indicator
private boolean debugMode;
+ // Should we do shunting?
+ private boolean postfixMode;
// Shunter for token postfixing
private Shunter shunt;
@@ -37,39 +40,44 @@ public class DiceLangEngine {
private IMap<Integer, String> symTable;
private IMap<Integer, String> stringLits;
+ // Literal tokens for tokenization
private IMap<String, Token.Type> litTokens;
+ // Lists for preprocessing
+ private IList<Define> lineDefns;
+ private IList<Define> tokenDefns;
+
+ // Are defns sorted by priority
+ private boolean defnsSorted;
+
private final int MATH_PREC = 20;
private final int DICE_PREC = 10;
private final int EXPR_PREC = 0;
public DiceLangEngine() {
+ lineDefns = new FunctionalList<>();
+ tokenDefns = new FunctionalList<>();
+ defnsSorted = true;
+
symTable = new FunctionalMap<>();
stringLits = new FunctionalMap<>();
- opExpansionTokens = new LinkedList<>();
+ opExpansionList = new LinkedList<>();
- opExpansionTokens.add(new Pair<>("+", "\\+"));
- opExpansionTokens.add(new Pair<>("-", "-"));
- opExpansionTokens.add(new Pair<>("*", "\\*"));
- opExpansionTokens.add(new Pair<>("//", "//"));
- opExpansionTokens.add(new Pair<>("/", "/"));
- opExpansionTokens.add(new Pair<>(":=", ":="));
- opExpansionTokens.add(new Pair<>("=>", "=>"));
+ opExpansionList.add(new Pair<>("+", "\\+"));
+ opExpansionList.add(new Pair<>("-", "-"));
+ opExpansionList.add(new Pair<>("*", "\\*"));
+ opExpansionList.add(new Pair<>("//", "//"));
+ opExpansionList.add(new Pair<>("/", "/"));
+ opExpansionList.add(new Pair<>(":=", ":="));
+ opExpansionList.add(new Pair<>("=>", "=>"));
- deaffixationTokens = new LinkedList<>();
+ deaffixationList = new LinkedList<>();
- deaffixationTokens.add(new Pair<>("(", "\\("));
- deaffixationTokens.add(new Pair<>(")", "\\)"));
- deaffixationTokens.add(new Pair<>("[", "\\["));
- deaffixationTokens.add(new Pair<>("]", "\\]"));
-
- nextLiteral = 1;
-
- // @TODO make configurable
- debugMode = true;
-
- shunt = new Shunter();
+ deaffixationList.add(new Pair<>("(", "\\("));
+ deaffixationList.add(new Pair<>(")", "\\)"));
+ deaffixationList.add(new Pair<>("[", "\\["));
+ deaffixationList.add(new Pair<>("]", "\\]"));
litTokens = new FunctionalMap<>();
@@ -83,29 +91,86 @@ public class DiceLangEngine {
litTokens.put("dl", DICELIST);
litTokens.put("=>", LET);
litTokens.put(":=", BIND);
+
+ shunt = new Shunter();
+
+ nextLiteral = 1;
+
+ debugMode = true;
+ postfixMode = false;
}
- public boolean runCommand(String command) {
- // Split the command into tokens
- IList<String> tokens = FunctionalStringTokenizer
- .fromString(command)
- .toList();
+ public void sortDefns() {
- // Will hold tokens with string literals removed
- IList<String> destringed = new FunctionalList<>();
- // Where we keep the string literals
- // @TODO put these in the sym-table early instead
- // once there is a sym-table
+ defnsSorted = true;
+ }
+
+ public void addLineDefine(Define dfn) {
+ lineDefns.add(dfn);
+
+ defnsSorted = false;
+ }
+
+ public void addTokenDefine(Define dfn) {
+ tokenDefns.add(dfn);
+
+ defnsSorted = false;
+ }
+
+ public boolean toggleDebug() {
+ debugMode = !debugMode;
+
+ return debugMode;
+ }
+
+ public boolean togglePostfix() {
+ postfixMode = !postfixMode;
+
+ return postfixMode;
+ }
+
+ /*
+ * Matches quote-delimited strings
+ * (like "text" or "text\"text")
+ * Uses the "normal* (special normal*)*" pattern style
+ * recommended in 'Mastering regular expressions'
+ * Here, the normal is 'anything but a forward or backslash'
+ * (in regex, thats '[^\""]') and the special is 'an escaped forward slash'
+ * (in regex, thats '\\"')
+ *
+ * Then, we just follow the pattern, escape it for java strings, and
+ * add the enclosing quotes
+ */
+ private Pattern quotePattern = Pattern.compile("\"([^\\\"]*(?:\\\"/(?:[^\\\"])*)*)\"");
+
+ public boolean runCommand(String command) {
+ // Sort the defines if they aren't sorted
+ if(!defnsSorted) sortDefns();
+
IMap<String, String> stringLiterals = new FunctionalMap<>();
- boolean success = destringTokens(tokens, stringLiterals,
- destringed);
+ Matcher quoteMatcher = quotePattern.matcher(command);
+ StringBuffer destringedCommand = new StringBuffer();
+
+ while(quoteMatcher.find()) {
+ String stringLit = quoteMatcher.group(1);
+
+ String litName = "stringLiteral" + nextLiteral++;
+ stringLiterals.put(litName, stringLit);
+
+ quoteMatcher.appendReplacement(destringedCommand, " " + litName + " ");
+ }
+
+ quoteMatcher.appendTail(destringedCommand);
- if(!success) return success;
+ // Split the command into tokens
+ IList<String> tokens = FunctionalStringTokenizer
+ .fromString(destringedCommand.toString())
+ .toList();
if(debugMode) {
- System.out.println("\tCommand after destringing: " + destringed.toString());
+ System.out.println("\tCommand after destringing: " + tokens.toString());
System.out.println("\tString literals in table");
@@ -115,13 +180,10 @@ public class DiceLangEngine {
});
}
- IList<String> semiExpandedTokens = ListUtils.deAffixTokens(destringed, deaffixationTokens);
- IList<String> fullyExpandedTokens = ListUtils.splitTokens(semiExpandedTokens, opExpansionTokens);
+ IList<String> semiExpandedTokens = deaffixTokens(tokens, deaffixationList);
+ IList<String> fullyExpandedTokens = deaffixTokens(semiExpandedTokens, opExpansionList);
if(debugMode) {
- System.out.printf("\tCommand after token deaffixation: "
- + semiExpandedTokens.toString() + "\n");
-
System.out.printf("\tCommand after token expansion: "
+ fullyExpandedTokens.toString() + "\n");
}
@@ -139,12 +201,15 @@ public class DiceLangEngine {
if(debugMode)
System.out.printf("\tCommand after tokenization: %s\n", lexedTokens.toString());
- IList<Token> shuntedTokens = new FunctionalList<>();
- success = shunt.shuntTokens(lexedTokens, shuntedTokens);
+ IList<Token> shuntedTokens = lexedTokens;
- if(!success) return false;
+ if(!postfixMode) {
+ shuntedTokens = new FunctionalList<>();
+ boolean success = shunt.shuntTokens(lexedTokens, shuntedTokens);
+ if(!success) return false;
+ }
- if(debugMode)
+ if(debugMode && !postfixMode)
System.out.printf("\tCommand after shunting: %s\n", shuntedTokens.toString());
return true;
@@ -226,7 +291,7 @@ public class DiceLangEngine {
Matcher stringLit = stringLitMatcher.matcher(token);
if(stringLit.matches()) {
- int litNum = Integer.parseInt(stringLit.group());
+ int litNum = Integer.parseInt(stringLit.group(1));
stringLits.put(litNum, stringLts.get(token));
tk = new Token(STRING_LIT, litNum);
@@ -245,89 +310,104 @@ public class DiceLangEngine {
return tk;
}
- private boolean destringTokens(IList<String> tokens,
- IMap<String, String> stringLiterals,
- IList<String> destringed) {
- // Are we parsing a string literal?
- boolean stringMode = false;
+ private IList<String> deaffixTokens(IList<String> tokens, List<IPair<String, String>> deaffixTokens) {
+ Deque<String> working = new LinkedList<>();
- // The current string literal
- StringBuilder currentLiteral = new StringBuilder();
- String literalName = "stringLiteral";
+ for(String tk : tokens.toIterable()) {
+ working.add(tk);
+ }
- for(String token : tokens.toIterable()) {
- if(token.startsWith("\"")) {
- if(token.endsWith("\"")) {
- String litName = literalName + nextLiteral++;
+ for(IPair<String, String> op : deaffixTokens) {
+ Deque<String> newWorking = new LinkedList<>();
+
+ String opName = op.getLeft();
+ String opRegex = op.getRight();
+
+ Pattern opRegexPattern = Pattern.compile(opRegex);
+ Pattern opRegexOnly = Pattern.compile("\\A(?:" + opRegex + ")+\\Z");
+ Pattern opRegexStarting = Pattern.compile("\\A" + opRegex);
+ Pattern opRegexEnding = Pattern.compile(opRegex + "\\Z");
+
+ for(String tk : working) {
+ // @Incomplete
+ if(opRegexOnly.matcher(tk).matches()) {
+ // The string contains only the operator
+ newWorking.add(tk);
+ } else {
+ Matcher medianMatcher = opRegexPattern.matcher(tk);
+
+ // Read the first match
+ boolean found = medianMatcher.find();
+
+ if(!found) {
+ newWorking.add(tk);
+ continue;
+ }
- stringLiterals.put(litName,
- token.substring(1, token.length() - 1));
- destringed.add(litName);
+ Matcher startMatcher = opRegexStarting.matcher(tk);
+ Matcher endMatcher = opRegexEnding.matcher(tk);
- continue;
- }
+ boolean startsWith = startMatcher.find();
+ boolean endsWith = endMatcher.find();
- if(stringMode) {
- // @TODO make this not an error
- System.out.printf("\tPARSER ERROR: Initial"
- +" quotes can only start strings\n");
- } else {
- currentLiteral.append(token.substring(1) + " ");
+ boolean doSplit = medianMatcher.find();
- stringMode = true;
- }
- } else if (token.endsWith("\"")) {
- if(!stringMode) {
- // @TODO make this not an error
- System.out.printf("\tPARSER ERROR: Terminal"
- +" quotes can only end strings\n");
- return false;
- } else {
- currentLiteral.append(
- token.substring(0, token.length() - 1));
+ medianMatcher.reset();
- String litName = literalName + nextLiteral++;
+ if(doSplit || (!startsWith && !endsWith)) {
+ String[] pieces = opRegexPattern.split(tk);
- stringLiterals.put(litName,
- currentLiteral.toString());
- destringed.add(litName);
+ if(startsWith) {
+ // Skip the starting operator
+ medianMatcher.find();
+ newWorking.add(tk.substring(0, startMatcher.end()));
+ }
- currentLiteral = new StringBuilder();
+ for(int i = 0; i < pieces.length; i++) {
+ String piece = pieces[i];
- stringMode = false;
- }
- } else if (token.contains("\"")) {
- if(token.contains("\\\"")) {
- if(stringMode) {
- currentLiteral.append(token + " ");
+ // Find the next operator
+ boolean didFind = medianMatcher.find();
+
+ if(piece.equals("")) {
+ System.out.printf("\tWARNING: Empty token found during operator expansion"
+ + "of token (%s). Weirdness may happen as a result\n", tk);
+ continue;
+ }
+
+ newWorking.add(piece);
+
+ if(didFind)
+ newWorking.add(tk.substring(medianMatcher.start(), medianMatcher.end()));
+ }
+
+ if(endsWith)
+ newWorking.add(tk.substring(endMatcher.start()));
+ } else if(startsWith && endsWith) {
+ newWorking.add(tk.substring(0, startMatcher.end()));
+ newWorking.add(tk.substring(startMatcher.end(), endMatcher.start()));
+ newWorking.add(tk.substring(endMatcher.start()));
+ } else if(startsWith) {
+ newWorking.add(tk.substring(0, startMatcher.end()));
+ newWorking.add(tk.substring(startMatcher.end()));
+ } else if(endsWith) {
+ newWorking.add(tk.substring(0, endMatcher.start()));
+ newWorking.add(tk.substring(endMatcher.end()));
} else {
- System.out.printf("\tERROR: Escaped quote "
- + " outside of string literal\n");
- return false;
+ newWorking.add(tk);
}
- } else {
- // @TODO make this not an error
- System.out.printf("\tPARSER ERROR: A string"
- + " literal must be delimited by spaces"
- + " for now.\n");
- return false;
- }
- } else {
- if(stringMode) {
- currentLiteral.append(token + " ");
- } else {
- destringed.add(token);
}
+
}
- }
- if(stringMode) {
- System.out.printf("\tERROR: Unclosed string literal (%s"
- + ").\n", currentLiteral.toString());
+ working = newWorking;
+ }
- return false;
+ IList<String> returned = new FunctionalList<>();
+ for(String ent : working) {
+ returned.add(ent);
}
- return true;
+ return returned;
}
}