1 files changed, 212 insertions, 49 deletions
diff --git a/src/main/java/bjc/rgens/parser/RGrammarParser.java b/src/main/java/bjc/rgens/parser/RGrammarParser.java
index 3a357b1..a1fc0e9 100644..100755
--- a/src/main/java/bjc/rgens/parser/RGrammarParser.java
+++ b/src/main/java/bjc/rgens/parser/RGrammarParser.java
@@ -1,8 +1,13 @@
 package bjc.rgens.parser;
 
-import bjc.rgens.parser.elements.CaseElement;
+import bjc.rgens.parser.elements.*;
+
+import bjc.utils.data.IPair;
+import bjc.utils.data.Pair;
 import bjc.utils.funcdata.FunctionalList;
 import bjc.utils.funcdata.IList;
+import bjc.utils.funcutils.ListUtils;
+import bjc.utils.funcutils.SetUtils;
 import bjc.utils.funcutils.TriConsumer;
 import bjc.utils.ioutils.blocks.Block;
 import bjc.utils.ioutils.blocks.BlockReader;
@@ -10,8 +15,13 @@ import bjc.utils.ioutils.blocks.SimpleBlockReader;
 
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.LinkedList;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Reads {@link RGrammar} from a input stream.
@@ -23,6 +33,7 @@ public class RGrammarParser {
 	 *  Whether we are in debug mode or not.
 	 */
 	public static final boolean DEBUG = false;
+	public static final boolean LINES = true;
 
 	/*
 	 * Templates for level-dependent delimiters.
@@ -73,6 +84,63 @@ public class RGrammarParser {
 			}
 		});
 
+		pragmas.put("recur-limit", (body, build, level) -> {
+			String[] parts = body.split(" ");
+
+			if(parts.length != 2) {
+				throw new GrammarException("Recur-limit pragma takes two arguments: the name of the rule to set the limit for, and the new value of the limit");
+			}
+
+			if(!parts[1].matches("\\A\\d+\\Z")) {
+				throw new GrammarException("Limit value must be an integer");
+			}
+
+			build.setRuleRecur(parts[0], Integer.parseInt(parts[1]));
+		});
+
+		pragmas.put("enable-weight", (body, build, level) -> {
+			String[] parts = body.split(" ");
+
+			if(parts.length != 2) {
+				throw new GrammarException("Enable-weight pragma takes one arguments: the name of the rule to set the weight factor for");
+			}
+
+			build.setWeight(parts[0]);
+		});
+		pragmas.put("enable-descent", (body, build, level) -> {
+			String[] parts = body.split(" ");
+
+			if(parts.length != 2) {
+				throw new GrammarException("Enable-descent pragma takes two arguments: the name of the rule to set the descent factor for, and the new value of the factor");
+			}
+
+			if(!parts[1].matches("\\A\\d+\\Z")) {
+				throw new GrammarException("Factor value must be an integer");
+			}
+
+			build.setDescent(parts[0], Integer.parseInt(parts[1]));
+		});
+
+		pragmas.put("enable-binomial", (body, build, level) -> {
+			String[] parts = body.split(" ");
+
+			if(parts.length != 4) {
+				throw new GrammarException("Enable-descent pragma takes four arguments: the name of the rule to set the binomial factors for, and the three binomial parameters (target, bound trials)");
+			}
+
+			if(!parts[1].matches("\\A\\d+\\Z")) {
+				throw new GrammarException("Target value must be an integer");
+			}
+			if(!parts[2].matches("\\A\\d+\\Z")) {
+				throw new GrammarException("Bound value must be an integer");
+			}
+			if(!parts[3].matches("\\A\\d+\\Z")) {
+				throw new GrammarException("Trials value must be an integer");
+			}
+
+			build.setBinomial(parts[0], Integer.parseInt(parts[1]), Integer.parseInt(parts[2]), Integer.parseInt(parts[3]));
+		});
+
 		pragmas.put("regex-rule", (body, build, level) -> {
 			int nameIndex = body.indexOf(" ");
 
@@ -83,31 +151,31 @@ public class RGrammarParser {
 			String name = body.substring(0, nameIndex).trim();
 			String patt = body.substring(nameIndex + 1).trim();
 
-			build.regexizeRule(name, patt);
+			//build.regexizeRule(name, patt);
 		});
 
 		pragmas.put("suffix-with", (body, build, level) -> {
-			String[] parts = body.trim().split(" ");
+			int idx = body.indexOf(" ");
 
-			if (parts.length != 2) {
-				String msg = "Suffix-with pragma takes two arguments, the name of the rule to suffix, then what to suffix it with";
+			if (idx == -1) {
+				String msg = "Suffix-with pragma takes at least two arguments, the name of the rule to suffix, then what to suffix it with\n\tThis can be more than one token, to get them suffixed as a group";
 
 				throw new GrammarException(msg);
 			}
 
-			build.suffixWith(parts[0], parts[1]);
+			build.suffixWith(body.substring(0, idx), parseElementString(body.substring(idx + 1)).getLeft());
 		});
 
 		pragmas.put("prefix-with", (body, build, level) -> {
-			String[] parts = body.trim().split(" ");
+			int idx = body.indexOf(" ");
 
-			if (parts.length != 2) {
-				String msg = "Prefix-with pragma takes two arguments, the name of the rule to prefix, then what to prefix it with";
+			if (idx == -1) {
+				String msg = "Prefix-with pragma takes at least two arguments, the name of the rule to prefix, then what to prefix it with\n\tThis can be more than one token, to get them prefixed as a group";
 
 				throw new GrammarException(msg);
 			}
 
-			build.prefixWith(parts[0], parts[1]);
+			build.prefixWith(body.substring(0, idx), parseElementString(body.substring(idx + 1)).getLeft());
 		});
 	}
 
@@ -134,12 +202,15 @@ public class RGrammarParser {
 			try {
 				RGrammarBuilder build = new RGrammarBuilder();
 
-				reader.forEachBlock((block) -> {
+				for(Block block : reader) {
 					if(DEBUG)
 						System.err.printf("Handling top-level block (%s)\n", block);
 
-					handleBlock(build, block.contents, 0);
-				});
+					handleBlock(build, block.contents, 0, block.startLine);
+				}
+
+				if(LINES)
+					System.err.printf("%d ", reader.getBlock().endLine);
 
 				return build.toRGrammar();
 			} catch (GrammarException gex) {
@@ -155,7 +226,7 @@ public class RGrammarParser {
 
 	/* Handles an arbitrary block. */
 	private static void handleBlock(RGrammarBuilder build, String block, 
-			int level) throws GrammarException {
+			int level, int lineOffset) throws GrammarException {
 		/* Discard empty blocks. */
 		if (block.equals("") || block.matches("\\R"))
 			return;
@@ -170,18 +241,19 @@ public class RGrammarParser {
 		String blockType = block.substring(0, typeSep).trim();
 
 		if (blockType.equalsIgnoreCase("pragma")) {
-			handlePragmaBlock(block, build, level);
+			handlePragmaBlock(block, build, level, lineOffset);
 		} else if (blockType.startsWith("[")) {
-			handleRuleBlock(block, build, level);
+			handleRuleBlock(block, build, level, lineOffset);
 		} else if (blockType.equalsIgnoreCase("where")) {
-			handleWhereBlock(block, build, level);
-		} else if (blockType.equalsIgnoreCase("#")) {
+			handleWhereBlock(block, build, level, lineOffset);
+		} else if (blockType.startsWith("#")) {
 			if(DEBUG)
 				System.err.printf("Handled comment block (%s)\n", block);
 			/*
 			 * Comment block.
 			 *
 			 * @TODO 10/11/17 Ben Culkin :GrammarComment
+			 *
 			 * 	Attach these to the grammar somehow so that they
 			 * 	can be re-output during formatting.
 			 */
@@ -194,11 +266,13 @@ public class RGrammarParser {
 
 	/* Handle reading a block of pragmas. */
 	private static void handlePragmaBlock(String block, RGrammarBuilder build,
-	                                      int level) throws GrammarException {
+	                                      int level, int lineOffset) throws GrammarException {
 		String dlm = String.format(TMPL_PRAGMA_BLOCK_DELIM, level);
 		try (BlockReader pragmaReader = new SimpleBlockReader(dlm, new StringReader(block))) {
 			try {
-				pragmaReader.forEachBlock((pragma) -> {
+				for(Block pragma : pragmaReader) {
+					pragma.lineOffset = lineOffset;
+
 					if(DEBUG)
 						System.err.printf("Handled pragma block (%s)\n", pragma);
 
@@ -221,8 +295,8 @@ public class RGrammarParser {
 						throw new GrammarException(msg);
 					}
 
-					handlePragma(pragmaBody, build, level);
-				});
+					handlePragma(pragmaBody, build, level, pragma.startLine + lineOffset);
+				}
 			} catch (GrammarException gex) {
 				Block pragma = pragmaReader.getBlock();
 				String msg   = String.format("Error in pragma: (%s)", pragma);
@@ -236,7 +310,7 @@ public class RGrammarParser {
 
 	/* Handle an individual pragma in a block. */
 	private static void handlePragma(String pragma, RGrammarBuilder build,
-	                                 int level) throws GrammarException {
+	                                 int level, int lineOffset) throws GrammarException {
 		int bodySep = pragma.indexOf(' ');
 
 		if (bodySep == -1)
@@ -265,7 +339,7 @@ public class RGrammarParser {
 
 	/* Handle a block of a rule declaration and one or more cases. */
 	private static void handleRuleBlock(String ruleBlock, RGrammarBuilder build,
-	                                    int level) throws GrammarException {
+	                                    int level, int lineOffset) throws GrammarException {
 		String dlm = String.format(TMPL_RULEDECL_BLOCK_DELIM, level);
 		try (BlockReader ruleReader = new SimpleBlockReader(dlm, new StringReader(ruleBlock))) {
 			try {
@@ -273,19 +347,20 @@ public class RGrammarParser {
 					/* Rule with a declaration followed by multiple cases. */
 					ruleReader.nextBlock();
 					Block declBlock = ruleReader.getBlock();
+					declBlock.lineOffset = lineOffset;
 
 					String declContents = declBlock.contents;
-					Rule rl = handleRuleDecl(build, declContents);
+					Rule rl = handleRuleDecl(build, declContents, lineOffset + declBlock.startLine);
 
-					ruleReader.forEachBlock((block) -> {
+					for(Block block : ruleReader) {
 						/* Ignore comment lines. */
 						if(block.contents.trim().startsWith("#")) return;
 
-						handleRuleCase(block.contents, build, rl);
-					});
+						handleRuleCase(block.contents, build, rl, block.startLine + lineOffset);
+					}
 				} else {
 					/* Rule with a declaration followed by a single case. */
-					handleRuleDecl(build, ruleBlock);
+					handleRuleDecl(build, ruleBlock, lineOffset);
 				}
 			} catch (GrammarException gex) {
 				String msg = String.format("Error in rule case (%s)", ruleReader.getBlock());
@@ -298,7 +373,7 @@ public class RGrammarParser {
 	}
 
 	/* Handle a rule declaration and its initial case. */
-	private static Rule handleRuleDecl(RGrammarBuilder build, String declContents) {
+	private static Rule handleRuleDecl(RGrammarBuilder build, String declContents, int lineOffset) {
 		int declSep = declContents.indexOf("\u2192");
 
 		if (declSep == -1) {
@@ -312,7 +387,7 @@ public class RGrammarParser {
 			declSep = declContents.indexOf(' ');
 
 			if (declSep == -1) {
-				String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192";
+				String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192 or ' '";
 
 				throw new GrammarException(msg);
 			}
@@ -327,32 +402,22 @@ public class RGrammarParser {
 
 		Rule rul = build.getOrCreateRule(ruleName);
 
-		handleRuleCase(ruleBody, build, rul);
+		handleRuleCase(ruleBody, build, rul, lineOffset);
 
 		return rul;
 	}
 
 	/* Handle a single case of a rule. */
-	private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul) {
-		IList<CaseElement> caseParts = new FunctionalList<>();
+	private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul, int lineOffset) {
+		Pair<IList<CaseElement>, Integer> caseParts = parseElementString(cse);
 
-		for (String csepart : cse.split(" ")) {
-			String partToAdd = csepart.trim();
-
-			/* Ignore empty parts */
-			if (partToAdd.equals(""))
-				continue;
-
-			caseParts.add(CaseElement.createElement(partToAdd));
-		}
-
-		rul.addCase(new RuleCase(RuleCase.CaseType.NORMAL, caseParts));
+		rul.addCase(new NormalRuleCase(caseParts.getLeft()), caseParts.getRight());
 	}
 
 	/* Handle a where block (a block with local rules). */
 	private static void handleWhereBlock(String block, RGrammarBuilder build,
-	                                     int level) throws GrammarException {
-		int nlIndex = block.indexOf("\\n");
+			int level, int lineOffset) throws GrammarException {
+		int nlIndex = block.indexOf("\\nin");
 
 		if (nlIndex == -1) {
 			throw new GrammarException("Where block must be a context followed by a body");
@@ -363,9 +428,10 @@ public class RGrammarParser {
 		String whereDelim = String.format(TMPL_WHERE_BLOCK_DELIM, level);
 
 		try (BlockReader whereReader = new SimpleBlockReader(whereDelim,
-			                new StringReader(trimBlock))) {
+					new StringReader(trimBlock))) {
 			try {
 				Block whereCtx = whereReader.next();
+				whereCtx.lineOffset = lineOffset;
 
 				StringReader ctxReader = new StringReader(whereCtx.contents.trim());
 				String ctxDelim = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, level + 1);
@@ -373,7 +439,9 @@ public class RGrammarParser {
 				try (BlockReader bodyReader = new SimpleBlockReader(ctxDelim, ctxReader)) {
 					@SuppressWarnings("unused")
 					Block whereBody = whereReader.next();
+					whereBody.lineOffset = lineOffset + whereCtx.startLine;
 
+					System.err.printf("\tUNIMPLEMENTED WHERE:\n%s\n", whereBody.contents);
 					/**
 					 * @TODO 10/11/17 Ben Culkin :WhereBlocks
 					 * 	Implement where blocks. 
@@ -385,10 +453,105 @@ public class RGrammarParser {
 				}
 			} catch (GrammarException gex) {
 				throw new GrammarException(String.format("Error in where block (%s)",
-				                           whereReader.getBlock()), gex);
+							whereReader.getBlock()), gex);
 			}
 		} catch (Exception ex) {
 			throw new GrammarException("Unknown error in where block", ex);
 		}
 	}
+
+	public static Pair<IList<CaseElement>, Integer> parseElementString(String cses) {
+		return parseElementString(cses.split(" "));
+	}
+
+	public static Pair<IList<CaseElement>, Integer> parseElementString(String... cses) {
+		IList<CaseElement> caseParts = new FunctionalList<>();
+
+		int weight = 1;
+
+		int repCount = 1;
+
+		int serialLower = -1;
+		int serialUpper = -1;
+
+		int chance = -1;
+		boolean doSerial = false;
+		boolean doChance = false;
+
+		for (String csepart : cses) {
+			String partToAdd = csepart.trim();
+
+			if (partToAdd.equals("")) {
+				/* Ignore empty parts */
+				continue;
+			} else if(partToAdd.matches("\\<\\^\\d+\\>")) {
+				/* Set case weights */
+				weight = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1));
+			} else if(partToAdd.matches("\\<&\\d+\\>")) {
+				repCount = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1));
+			} else if(partToAdd.matches("\\<&\\d+\\.\\.\\d+\\>")) {
+				serialLower = Integer.parseInt(partToAdd.substring(2, partToAdd.indexOf(".")));
+				serialUpper = Integer.parseInt(partToAdd.substring(partToAdd.lastIndexOf(".") + 1, partToAdd.length() - 1));
+
+				doSerial = true;
+			} else if(partToAdd.matches("\\<\\?\\d+\\>")) {
+				chance = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1));
+
+				doChance = true;
+			} else if (partToAdd.matches("\\<\\<\\>")) {
+				CaseElement elm = caseParts.popLast();
+
+				if(repCount == 0) {
+					/* Skip no-reps */
+				} else {
+					if(doChance) {
+						elm = new ChanceCaseElement(elm, chance);
+
+						doChance = false;
+					}
+
+					if(doSerial) {
+						elm = new SerialCaseElement(elm, serialLower, serialUpper);
+
+						doSerial = false;
+					}
+
+					for(int i = 1; i <= repCount; i++) {
+						caseParts.add(elm);
+					}
+
+					repCount = 1;
+				}
+			} else if(partToAdd.matches("\\<[^\\>]+\\>")) {
+				throw new GrammarException("Unknown parser meta-rule " + partToAdd);
+			} else {
+				CaseElement elm = CaseElement.createElement(partToAdd);
+
+				if(repCount == 0) {
+					/* Skip no-reps */
+				} else {
+					if(doChance) {
+						elm = new ChanceCaseElement(elm, chance);
+
+						doChance = false;
+					}
+
+					if(doSerial) {
+						elm = new SerialCaseElement(elm, serialLower, serialUpper);
+
+						doSerial = false;
+					}
+
+					for(int i = 1; i <= repCount; i++) {
+						caseParts.add(elm);
+					}
+
+				}
+
+				repCount = 1;
+			}
+		}
+
+		return new Pair<>(caseParts, weight);
+	}
 }