diff options
| author | bculkin2442 <bjculkin@mix.wvu.edu> | 2017-02-16 08:36:43 -0500 |
|---|---|---|
| committer | bculkin2442 <bjculkin@mix.wvu.edu> | 2017-02-16 08:36:43 -0500 |
| commit | f9752a872ad68a47b872eccb953332d372052cac (patch) | |
| tree | cb315abfb68c00cc6b2a3b6e6ca4072f4e2305e8 | |
| parent | 0c3270b408116d3a8d2f1558acb4222eaa808e7b (diff) | |
Preprocessor and better strings
| -rw-r--r-- | dice-lang/src/bjc/dicelang/v2/Define.java | 93 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java | 154 | ||||
| -rw-r--r-- | dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java | 304 |
3 files changed, 432 insertions, 119 deletions
diff --git a/dice-lang/src/bjc/dicelang/v2/Define.java b/dice-lang/src/bjc/dicelang/v2/Define.java new file mode 100644 index 0000000..4617931 --- /dev/null +++ b/dice-lang/src/bjc/dicelang/v2/Define.java @@ -0,0 +1,93 @@ +package bjc.dicelang.v2; + +import bjc.utils.data.CircularIterator; + +import java.util.Iterator; +import java.util.function.UnaryOperator; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +public class Define implements UnaryOperator<String> { + public static enum Type { + LINE, TOKEN + } + + int priority; + + boolean doRecur; + boolean subType; + + Pattern predicate; + Pattern searcher; + + Iterator<String> replacers; + String replacer; + + public Define(int priorty, boolean isSub, boolean recur, + String predicte, String searchr, Iterable<String> replacrs) { + priority = priorty; + doRecur = recur; + subType = isSub; + + if(predicte != null) { + predicate = Pattern.compile(predicte); + } + searcher = Pattern.compile(searchr); + + if(subType) { + if(replacrs.iterator().hasNext()) { + replacers = new CircularIterator<>(replacrs); + } else { + replacers = null; + } + } else { + Iterator<String> itr = replacrs.iterator(); + + if(itr.hasNext()) replacer = itr.next(); + else replacer = ""; + } + } + + public String apply(String tok) { + if(predicate != null) { + if(!predicate.matcher(tok).matches()) { + return tok; + } + } + + String strang = doPass(tok); + + if(doRecur) { + if(strang.equals(tok)) { + return strang; + } else { + String oldStrang = strang; + + do { + strang = doPass(tok); + } while(!strang.equals(oldStrang)); + } + } + + return strang; + } + + private String doPass(String tok) { + Matcher searcherMatcher = searcher.matcher(tok); + + if(subType) { + StringBuffer sb = new StringBuffer(); + + while(searcherMatcher.find()) { + if(replacers == null) searcherMatcher.appendReplacement(sb,""); + else searcherMatcher.appendReplacement(sb, replacers.next()); + } + + searcherMatcher.appendTail(sb); + + return sb.toString(); + } else { + return searcherMatcher.replaceAll(replacer); + } + } +} diff --git a/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java b/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java index c827edf..b3b5c08 100644 --- a/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java +++ b/dice-lang/src/bjc/dicelang/v2/DiceLangConsole.java @@ -1,6 +1,10 @@ package bjc.dicelang.v2; +import java.util.List; +import java.util.LinkedList; import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class DiceLangConsole { private int commandNumber; @@ -24,16 +28,25 @@ public class DiceLangConsole { String comm = scn.nextLine(); while(!comm.equals("quit") && !comm.equals("exit")) { - System.out.printf("\tRaw command: %s\n", comm); + if(comm.startsWith("pragma")) { + boolean success = handlePragma(comm.substring(7)); - boolean success = eng.runCommand(comm); + if(success) + System.out.println("Pragma completed succesfully"); + else + System.out.println("Pragma execution failed"); + } else { + System.out.printf("\tRaw command: %s\n", comm); - if(success) - System.out.println("Command completed succesfully"); - else - System.out.println("Command execution failed"); + boolean success = eng.runCommand(comm); - commandNumber += 1; + if(success) + System.out.println("Command completed succesfully"); + else + System.out.println("Command execution failed"); + + commandNumber += 1; + } System.out.printf("(%d) dice-lang> ", commandNumber); comm = scn.nextLine(); @@ -42,6 +55,133 @@ public class DiceLangConsole { scn.close(); } + private boolean handlePragma(String pragma) { + System.out.println("\tRaw pragma: " + pragma); + + switch(pragma) { + case "debug": + System.out.println("\tDebug mode is now " + eng.toggleDebug()); + break; + case "postfix": + System.out.println("\tPostfix mode is now " + eng.togglePostfix()); + break; + case "define": + return defineMode(pragma.substring(7)); + default: + System.out.println("\tERROR: Unknown pragma: " + pragma); + return false; + } + + return true; + } + + /* + * Matches slash-delimited strings + * (like /text/ or /text\/text/) + * Uses the "normal* (special normal*)*" pattern style + * recommended in 'Mastering regular expressions' + * Here, the normal is 'anything but a forward or backslash' + * (in regex, thats '[^/\\]') and the special is 'an escaped forward slash' + * (in regex, thats '\\\\/') + * + * Then, we just follow the pattern, escape it for java strings, and + * add the enclosing slashes + */ + private Pattern slashPattern = Pattern.compile("/([^/\\\\]*(?:\\\\/(?:[^/\\\\])*)*)/"); + + private boolean defineMode(String defineText) { + int firstIndex = defineText.indexOf(' '); + int secondIndex = defineText.indexOf(' ', firstIndex + 1); + int thirdIndex = defineText.indexOf(' ', secondIndex + 1); + int fourthIndex = defineText.indexOf(' ', thirdIndex + 1); + int fifthIndex = defineText.indexOf(' ', fourthIndex + 1); + + if(firstIndex == -1) { + System.out.println("\tERROR: Improperly formatted define (no priority)"); + return false; + } else if(secondIndex == -1) { + System.out.println("\tERROR: Improperly formatted define (no define type)"); + return false; + } else if(thirdIndex == -1) { + System.out.println("\tERROR: Improperly formatted define (no recursion type)"); + return false; + } else if(fourthIndex == -1) { + System.out.println("\tERROR: Improperly formatted define (no guard type)"); + return false; + } else if(fifthIndex == -1) { + System.out.println("\tERROR: Improperly formatted define (no patterns)"); + return false; + } + + int priority = Integer.parseInt(defineText.substring(0, firstIndex)); + + String defineType = defineText.substring(firstIndex + 1, secondIndex); + + Define.Type type; + boolean subMode = false; + + switch(defineType) { + case "line": + type = Define.Type.LINE; + break; + case "token": + type = Define.Type.TOKEN; + break; + case "subline": + type = Define.Type.LINE; + subMode = true; + break; + case "subtoken": + type = Define.Type.TOKEN; + subMode = true; + break; + default: + System.out.println("\tERROR: Unknown define type " + + defineType); + return false; + } + + boolean doRecur = defineText.substring(secondIndex + 1, thirdIndex) + .equalsIgnoreCase("true"); + boolean hasGuard = defineText.substring(thirdIndex + 1, fourthIndex). + equalsIgnoreCase("true"); + + String pats = defineText.substring(fourthIndex + 1); + + Matcher patMatcher = slashPattern.matcher(pats); + + String guardPattern = null; + + if(hasGuard) { + if(!patMatcher.find()) { + System.out.println("\tERROR: Improperly formatted define (no guard pattern)"); + } + + guardPattern = patMatcher.group(1); + } + + if(!patMatcher.find()) { + System.out.println("\tERROR: Improperly formatted define (no search pattern)"); + } + + String searchPattern = patMatcher.group(1); + List<String> replacePatterns = new LinkedList<>(); + + while(patMatcher.find()) { + replacePatterns.add(patMatcher.group(1)); + } + + Define dfn = new Define(priority, subMode, doRecur, guardPattern, searchPattern, replacePatterns); + + if(type == Define.Type.LINE) { + eng.addLineDefine(dfn); + } else { + eng.addTokenDefine(dfn); + } + + return true; + } + public static void main(String[] args) { DiceLangConsole console = new DiceLangConsole(args); diff --git a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java index 2ab5030..455e5d2 100644 --- a/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java +++ b/dice-lang/src/bjc/dicelang/v2/DiceLangEngine.java @@ -12,6 +12,7 @@ import bjc.utils.funcutils.StringUtils; import java.util.Arrays; import java.util.Deque; +import java.util.List; import java.util.LinkedList; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,8 +21,8 @@ import static bjc.dicelang.v2.Token.Type.*; public class DiceLangEngine { // Input rules for processing tokens - private Deque<IPair<String, String>> opExpansionTokens; - private Deque<IPair<String, String>> deaffixationTokens; + private List<IPair<String, String>> opExpansionList; + private List<IPair<String, String>> deaffixationList; // ID for generation private int nextLiteral; @@ -29,6 +30,8 @@ public class DiceLangEngine { // Debug indicator private boolean debugMode; + // Should we do shunting? + private boolean postfixMode; // Shunter for token postfixing private Shunter shunt; @@ -37,39 +40,44 @@ public class DiceLangEngine { private IMap<Integer, String> symTable; private IMap<Integer, String> stringLits; + // Literal tokens for tokenization private IMap<String, Token.Type> litTokens; + // Lists for preprocessing + private IList<Define> lineDefns; + private IList<Define> tokenDefns; + + // Are defns sorted by priority + private boolean defnsSorted; + private final int MATH_PREC = 20; private final int DICE_PREC = 10; private final int EXPR_PREC = 0; public DiceLangEngine() { + lineDefns = new FunctionalList<>(); + tokenDefns = new FunctionalList<>(); + defnsSorted = true; + symTable = new FunctionalMap<>(); stringLits = new FunctionalMap<>(); - opExpansionTokens = new LinkedList<>(); + opExpansionList = new LinkedList<>(); - opExpansionTokens.add(new Pair<>("+", "\\+")); - opExpansionTokens.add(new Pair<>("-", "-")); - opExpansionTokens.add(new Pair<>("*", "\\*")); - opExpansionTokens.add(new Pair<>("//", "//")); - opExpansionTokens.add(new Pair<>("/", "/")); - opExpansionTokens.add(new Pair<>(":=", ":=")); - opExpansionTokens.add(new Pair<>("=>", "=>")); + opExpansionList.add(new Pair<>("+", "\\+")); + opExpansionList.add(new Pair<>("-", "-")); + opExpansionList.add(new Pair<>("*", "\\*")); + opExpansionList.add(new Pair<>("//", "//")); + opExpansionList.add(new Pair<>("/", "/")); + opExpansionList.add(new Pair<>(":=", ":=")); + opExpansionList.add(new Pair<>("=>", "=>")); - deaffixationTokens = new LinkedList<>(); + deaffixationList = new LinkedList<>(); - deaffixationTokens.add(new Pair<>("(", "\\(")); - deaffixationTokens.add(new Pair<>(")", "\\)")); - deaffixationTokens.add(new Pair<>("[", "\\[")); - deaffixationTokens.add(new Pair<>("]", "\\]")); - - nextLiteral = 1; - - // @TODO make configurable - debugMode = true; - - shunt = new Shunter(); + deaffixationList.add(new Pair<>("(", "\\(")); + deaffixationList.add(new Pair<>(")", "\\)")); + deaffixationList.add(new Pair<>("[", "\\[")); + deaffixationList.add(new Pair<>("]", "\\]")); litTokens = new FunctionalMap<>(); @@ -83,29 +91,86 @@ public class DiceLangEngine { litTokens.put("dl", DICELIST); litTokens.put("=>", LET); litTokens.put(":=", BIND); + + shunt = new Shunter(); + + nextLiteral = 1; + + debugMode = true; + postfixMode = false; } - public boolean runCommand(String command) { - // Split the command into tokens - IList<String> tokens = FunctionalStringTokenizer - .fromString(command) - .toList(); + public void sortDefns() { - // Will hold tokens with string literals removed - IList<String> destringed = new FunctionalList<>(); - // Where we keep the string literals - // @TODO put these in the sym-table early instead - // once there is a sym-table + defnsSorted = true; + } + + public void addLineDefine(Define dfn) { + lineDefns.add(dfn); + + defnsSorted = false; + } + + public void addTokenDefine(Define dfn) { + tokenDefns.add(dfn); + + defnsSorted = false; + } + + public boolean toggleDebug() { + debugMode = !debugMode; + + return debugMode; + } + + public boolean togglePostfix() { + postfixMode = !postfixMode; + + return postfixMode; + } + + /* + * Matches quote-delimited strings + * (like "text" or "text\"text") + * Uses the "normal* (special normal*)*" pattern style + * recommended in 'Mastering regular expressions' + * Here, the normal is 'anything but a forward or backslash' + * (in regex, thats '[^\""]') and the special is 'an escaped forward slash' + * (in regex, thats '\\"') + * + * Then, we just follow the pattern, escape it for java strings, and + * add the enclosing quotes + */ + private Pattern quotePattern = Pattern.compile("\"([^\\\"]*(?:\\\"/(?:[^\\\"])*)*)\""); + + public boolean runCommand(String command) { + // Sort the defines if they aren't sorted + if(!defnsSorted) sortDefns(); + IMap<String, String> stringLiterals = new FunctionalMap<>(); - boolean success = destringTokens(tokens, stringLiterals, - destringed); + Matcher quoteMatcher = quotePattern.matcher(command); + StringBuffer destringedCommand = new StringBuffer(); + + while(quoteMatcher.find()) { + String stringLit = quoteMatcher.group(1); + + String litName = "stringLiteral" + nextLiteral++; + stringLiterals.put(litName, stringLit); + + quoteMatcher.appendReplacement(destringedCommand, " " + litName + " "); + } + + quoteMatcher.appendTail(destringedCommand); - if(!success) return success; + // Split the command into tokens + IList<String> tokens = FunctionalStringTokenizer + .fromString(destringedCommand.toString()) + .toList(); if(debugMode) { - System.out.println("\tCommand after destringing: " + destringed.toString()); + System.out.println("\tCommand after destringing: " + tokens.toString()); System.out.println("\tString literals in table"); @@ -115,13 +180,10 @@ public class DiceLangEngine { }); } - IList<String> semiExpandedTokens = ListUtils.deAffixTokens(destringed, deaffixationTokens); - IList<String> fullyExpandedTokens = ListUtils.splitTokens(semiExpandedTokens, opExpansionTokens); + IList<String> semiExpandedTokens = deaffixTokens(tokens, deaffixationList); + IList<String> fullyExpandedTokens = deaffixTokens(semiExpandedTokens, opExpansionList); if(debugMode) { - System.out.printf("\tCommand after token deaffixation: " - + semiExpandedTokens.toString() + "\n"); - System.out.printf("\tCommand after token expansion: " + fullyExpandedTokens.toString() + "\n"); } @@ -139,12 +201,15 @@ public class DiceLangEngine { if(debugMode) System.out.printf("\tCommand after tokenization: %s\n", lexedTokens.toString()); - IList<Token> shuntedTokens = new FunctionalList<>(); - success = shunt.shuntTokens(lexedTokens, shuntedTokens); + IList<Token> shuntedTokens = lexedTokens; - if(!success) return false; + if(!postfixMode) { + shuntedTokens = new FunctionalList<>(); + boolean success = shunt.shuntTokens(lexedTokens, shuntedTokens); + if(!success) return false; + } - if(debugMode) + if(debugMode && !postfixMode) System.out.printf("\tCommand after shunting: %s\n", shuntedTokens.toString()); return true; @@ -226,7 +291,7 @@ public class DiceLangEngine { Matcher stringLit = stringLitMatcher.matcher(token); if(stringLit.matches()) { - int litNum = Integer.parseInt(stringLit.group()); + int litNum = Integer.parseInt(stringLit.group(1)); stringLits.put(litNum, stringLts.get(token)); tk = new Token(STRING_LIT, litNum); @@ -245,89 +310,104 @@ public class DiceLangEngine { return tk; } - private boolean destringTokens(IList<String> tokens, - IMap<String, String> stringLiterals, - IList<String> destringed) { - // Are we parsing a string literal? - boolean stringMode = false; + private IList<String> deaffixTokens(IList<String> tokens, List<IPair<String, String>> deaffixTokens) { + Deque<String> working = new LinkedList<>(); - // The current string literal - StringBuilder currentLiteral = new StringBuilder(); - String literalName = "stringLiteral"; + for(String tk : tokens.toIterable()) { + working.add(tk); + } - for(String token : tokens.toIterable()) { - if(token.startsWith("\"")) { - if(token.endsWith("\"")) { - String litName = literalName + nextLiteral++; + for(IPair<String, String> op : deaffixTokens) { + Deque<String> newWorking = new LinkedList<>(); + + String opName = op.getLeft(); + String opRegex = op.getRight(); + + Pattern opRegexPattern = Pattern.compile(opRegex); + Pattern opRegexOnly = Pattern.compile("\\A(?:" + opRegex + ")+\\Z"); + Pattern opRegexStarting = Pattern.compile("\\A" + opRegex); + Pattern opRegexEnding = Pattern.compile(opRegex + "\\Z"); + + for(String tk : working) { + // @Incomplete + if(opRegexOnly.matcher(tk).matches()) { + // The string contains only the operator + newWorking.add(tk); + } else { + Matcher medianMatcher = opRegexPattern.matcher(tk); + + // Read the first match + boolean found = medianMatcher.find(); + + if(!found) { + newWorking.add(tk); + continue; + } - stringLiterals.put(litName, - token.substring(1, token.length() - 1)); - destringed.add(litName); + Matcher startMatcher = opRegexStarting.matcher(tk); + Matcher endMatcher = opRegexEnding.matcher(tk); - continue; - } + boolean startsWith = startMatcher.find(); + boolean endsWith = endMatcher.find(); - if(stringMode) { - // @TODO make this not an error - System.out.printf("\tPARSER ERROR: Initial" - +" quotes can only start strings\n"); - } else { - currentLiteral.append(token.substring(1) + " "); + boolean doSplit = medianMatcher.find(); - stringMode = true; - } - } else if (token.endsWith("\"")) { - if(!stringMode) { - // @TODO make this not an error - System.out.printf("\tPARSER ERROR: Terminal" - +" quotes can only end strings\n"); - return false; - } else { - currentLiteral.append( - token.substring(0, token.length() - 1)); + medianMatcher.reset(); - String litName = literalName + nextLiteral++; + if(doSplit || (!startsWith && !endsWith)) { + String[] pieces = opRegexPattern.split(tk); - stringLiterals.put(litName, - currentLiteral.toString()); - destringed.add(litName); + if(startsWith) { + // Skip the starting operator + medianMatcher.find(); + newWorking.add(tk.substring(0, startMatcher.end())); + } - currentLiteral = new StringBuilder(); + for(int i = 0; i < pieces.length; i++) { + String piece = pieces[i]; - stringMode = false; - } - } else if (token.contains("\"")) { - if(token.contains("\\\"")) { - if(stringMode) { - currentLiteral.append(token + " "); + // Find the next operator + boolean didFind = medianMatcher.find(); + + if(piece.equals("")) { + System.out.printf("\tWARNING: Empty token found during operator expansion" + + "of token (%s). Weirdness may happen as a result\n", tk); + continue; + } + + newWorking.add(piece); + + if(didFind) + newWorking.add(tk.substring(medianMatcher.start(), medianMatcher.end())); + } + + if(endsWith) + newWorking.add(tk.substring(endMatcher.start())); + } else if(startsWith && endsWith) { + newWorking.add(tk.substring(0, startMatcher.end())); + newWorking.add(tk.substring(startMatcher.end(), endMatcher.start())); + newWorking.add(tk.substring(endMatcher.start())); + } else if(startsWith) { + newWorking.add(tk.substring(0, startMatcher.end())); + newWorking.add(tk.substring(startMatcher.end())); + } else if(endsWith) { + newWorking.add(tk.substring(0, endMatcher.start())); + newWorking.add(tk.substring(endMatcher.end())); } else { - System.out.printf("\tERROR: Escaped quote " - + " outside of string literal\n"); - return false; + newWorking.add(tk); } - } else { - // @TODO make this not an error - System.out.printf("\tPARSER ERROR: A string" - + " literal must be delimited by spaces" - + " for now.\n"); - return false; - } - } else { - if(stringMode) { - currentLiteral.append(token + " "); - } else { - destringed.add(token); } + } - } - if(stringMode) { - System.out.printf("\tERROR: Unclosed string literal (%s" - + ").\n", currentLiteral.toString()); + working = newWorking; + } - return false; + IList<String> returned = new FunctionalList<>(); + for(String ent : working) { + returned.add(ent); } - return true; + return returned; } } |
