summaryrefslogtreecommitdiff
path: root/src/main/java/bjc/rgens/parser/RGrammar.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/bjc/rgens/parser/RGrammar.java')
-rwxr-xr-x[-rw-r--r--]src/main/java/bjc/rgens/parser/RGrammar.java389
1 files changed, 125 insertions, 264 deletions
diff --git a/src/main/java/bjc/rgens/parser/RGrammar.java b/src/main/java/bjc/rgens/parser/RGrammar.java
index 38f38c8..337ab35 100644..100755
--- a/src/main/java/bjc/rgens/parser/RGrammar.java
+++ b/src/main/java/bjc/rgens/parser/RGrammar.java
@@ -1,14 +1,16 @@
package bjc.rgens.parser;
-import bjc.rgens.parser.elements.CaseElement;
-import bjc.rgens.parser.elements.LiteralCaseElement;
-import bjc.rgens.parser.elements.RangeCaseElement;
-import bjc.rgens.parser.elements.RuleCaseElement;
-import bjc.rgens.parser.elements.VariableCaseElement;
+import bjc.utils.data.IPair;
+import bjc.utils.data.Pair;
import bjc.utils.funcutils.StringUtils;
+import bjc.rgens.parser.elements.*;
+
+import java.util.Arrays;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -22,12 +24,23 @@ import edu.gatech.gtri.bktree.BkTreeSearcher.Match;
import edu.gatech.gtri.bktree.Metric;
import edu.gatech.gtri.bktree.MutableBkTree;
+import static bjc.utils.data.IPair.pair;
+
/**
* Represents a randomized grammar.
*
* @author EVE
*/
public class RGrammar {
+ public RGrammarSet belongsTo;
+
+ public String name;
+
+ public List<IPair<String, String>> postprocs;
+
+ private static final List<IPair<String, String>> builtinPostprocs;
+ public boolean useBuiltinPostprocs = true;
+
/* The max distance between possible alternate rules. */
private static final int MAX_DISTANCE = 6;
@@ -48,42 +61,10 @@ public class RGrammar {
}
}
- /* The current state during generation. */
- private static class GenerationState {
- /* The current string. */
- public StringBuilder contents;
- /* The RNG. */
- public Random rnd;
-
- /* The current set of variables. */
- public Map<String, String> vars;
-
- /**
- * Create a new generation state.
- *
- * @param cont
- * The string being generated.
- *
- * @param rand
- * The RNG to use.
- *
- * @param vs
- * The variables to use.
- */
- public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs) {
- contents = cont;
- rnd = rand;
- vars = vs;
- }
- }
-
- /* The pattern for matching the name of a variable. */
- private static Pattern NAMEVAR_PATTERN = Pattern.compile("\\$(\\w+)");
-
/* The rules of the grammar. */
- private Map<String, Rule> rules;
+ public Map<String, Rule> rules;
/* The rules imported from other grammars. */
- private Map<String, RGrammar> importRules;
+ private Map<String, Rule> importRules;
/* The rules exported from this grammar. */
private Set<String> exportRules;
/* The initial rule of this grammar. */
@@ -92,6 +73,43 @@ public class RGrammar {
/* The tree to use for finding rule suggestions. */
private BkTreeSearcher<String> ruleSearcher;
+ static {
+ /* Collapse duplicate spaces */
+ IPair<String, String> collapseDupSpaces = pair("\\s+", " ");
+
+ /* Built-in post-processing steps */
+ builtinPostprocs = Arrays.asList(
+ collapseDupSpaces,
+
+ /*
+ * Remove extraneous spaces around punctuation
+ * marks, forced by the way the language syntax
+ * works.
+ *
+ * This can be done in grammars, but it is quite
+ * tedious to do so.
+ */
+
+
+ /* Handle 's */
+ pair(" 's ", "'s "),
+ /* Handle opening/closing punctuation. */
+ pair("([(\\[]) ", " $1"),
+ pair(" ([)\\]'\"])", "$1 "),
+ /* Remove spaces around series of opening/closing punctuation. */
+ pair("([(\\[])\\s+([(\\[])", "$1$2"),
+ pair("([)\\]])\\s+([)\\]])", "$1$2"),
+ /* Handle inter-word punctuation. */
+ pair(" ([,:.!])", "$1 "),
+ /* Handle intra-word punctuation. */
+ pair("\\s?([-/])\\s?", "$1"),
+
+ collapseDupSpaces,
+
+ /* Replace this once it is no longer needed. */
+ pair("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1")
+ );
+ }
/**
* Create a new randomized grammar using the specified set of rules.
*
@@ -100,6 +118,12 @@ public class RGrammar {
*/
public RGrammar(Map<String, Rule> ruls) {
rules = ruls;
+
+ for(Rule rl : ruls.values()) {
+ rl.belongsTo = this;
+ }
+
+ postprocs = new ArrayList<>();
}
/**
@@ -111,7 +135,7 @@ public class RGrammar {
* @param importedRules
* The set of imported rules to use.
*/
- public void setImportedRules(Map<String, RGrammar> importedRules) {
+ public void setImportedRules(Map<String, Rule> importedRules) {
importRules = importedRules;
}
@@ -137,7 +161,7 @@ public class RGrammar {
* @return A possible string from the grammar.
*/
public String generate(String startRule) {
- return generate(startRule, new Random(), new HashMap<>());
+ return generate(startRule, new Random(), new HashMap<>(), new HashMap<>());
}
/**
@@ -155,7 +179,26 @@ public class RGrammar {
*
* @return A possible string from the grammar.
*/
- public String generate(String startRule, Random rnd, Map<String, String> vars) {
+ public String generate(String startRule, Random rnd, Map<String, String> vars,
+ Map<String, Rule> rlVars) {
+ return generate(startRule, new GenerationState(new StringBuilder(), rnd, vars, rlVars, this));
+ }
+
+ /**
+ * Generate a string from this grammar, starting from the specified rule.
+ *
+ * @param startRule
+ * The rule to start generating at, or null to use the initial rule
+ * for this grammar.
+ *
+ * @param state
+ * The generation state.
+ */
+ public String generate(String startRule, GenerationState state) {
+ return generate(startRule, state, true);
+ }
+
+ public String generate(String startRule, GenerationState state, boolean doPostprocess) {
String fromRule = startRule;
if (startRule == null) {
@@ -170,242 +213,55 @@ public class RGrammar {
}
}
- RuleCase start = rules.get(fromRule).getCase(rnd);
-
- StringBuilder contents = new StringBuilder();
-
- generateCase(start, new GenerationState(contents, rnd, vars));
-
- String body = contents.toString();
- /*
- * Collapse duplicate spaces.
+ /*
+ * We don't search imports, so it will always belong to this
+ * grammar.
*/
- body = body.replaceAll("\\s+", " ");
-
- /*
- * Remove extraneous spaces around punctutation marks.
- *
- * This can be done in the grammars, but it is very tedious to do so.
- */
-
- /* Handle 's */
- body = body.replaceAll(" 's ", "'s ");
-
- /* Handle opening/closing punctuation. */
- body = body.replaceAll("([(\\[]) ", " $1");
- body = body.replaceAll(" ([)\\]'\"])", "$1 ");
+ Rule rl = state.findRule(fromRule, false);
- /* Remove spaces around series of opening/closing punctuation. */
- body = body.replaceAll("([(\\[])\\s+([(\\[])", "$1$2");
- body = body.replaceAll("([)\\]])\\s+([)\\]])", "$1$2");
+ if(rl == null)
+ throw new GrammarException("Could not find rule " + rl.name);
- /* Handle inter-word punctuation. */
- body = body.replaceAll(" ([,:.!])", "$1 ");
+ rl.generate(state);
- /* Handle intra-word punctuation. */
- body = body.replaceAll("\\s?([-/])\\s?", "$1");
+ String body = state.contents.toString();
- /*
- * Collapse duplicate spaces.
- */
- body = body.replaceAll("\\s+", " ");
-
- /*
- * @TODO 11/01/17 Ben Culkin :RegexRule Replace this once it is no longer
- * needed.
- */
- body = body.replaceAll("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1");
+ if(doPostprocess) {
+ body = postprocessRes(body);
+ }
return body;
}
- /* Generate a rule case. */
- private void generateCase(RuleCase start, GenerationState state) {
- try {
- switch (start.type) {
- case NORMAL:
- for (CaseElement elm : start.getElements()) {
- generateElement(elm, state);
-
- if (elm.type != CaseElement.ElementType.VARDEF) {
- state.contents.append(" ");
- }
- }
- break;
- case SPACEFLATTEN:
- for (CaseElement elm : start.getElements()) {
- generateElement(elm, state);
- }
- break;
- default:
- String msg = String.format("Unknown case type '%s'", start.type);
- throw new GrammarException(msg);
- }
- } catch (GrammarException gex) {
- String msg = String.format("Error in generating case (%s)", start);
- throw new GrammarException(msg, gex);
- }
- }
-
- /* Generate a case element. */
- private void generateElement(CaseElement elm, GenerationState state) {
- try {
- switch (elm.type) {
- case LITERAL: {
- LiteralCaseElement lit = (LiteralCaseElement)elm;
-
- state.contents.append(lit.val);
- break;
- }
- case RULEREF: {
- RuleCaseElement rle = (RuleCaseElement)elm;
-
- generateRuleReference(rle, state);
- break;
- }
- case RANGE: {
- RangeCaseElement rang = (RangeCaseElement)elm;
+ private String postprocessRes(String strang) {
+ String body = strang;
- int val = state.rnd.nextInt(rang.end - rang.begin);
- val += rang.begin;
-
- state.contents.append(val);
- break;
- }
- case VARDEF:
- generateVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state);
- break;
- case EXPVARDEF:
- generateExpVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state);
- break;
- default:
- String msg = String.format("Unknown element type '%s'", elm.type);
- throw new GrammarException(msg);
+ if(useBuiltinPostprocs) {
+ for(IPair<String, String> par : builtinPostprocs) {
+ body = body.replaceAll(par.getLeft(), par.getRight());
}
- } catch (GrammarException gex) {
- String msg = String.format("Error in generating case element (%s)", elm);
- throw new GrammarException(msg, gex);
}
- }
-
- /* Generate a expanding variable definition. */
- private void generateExpVarDef(String name, String defn, GenerationState state) {
- GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars);
- if (rules.containsKey(defn)) {
- RuleCase destCase = rules.get(defn).getCase();
-
- generateCase(destCase, newState);
- } else if (importRules.containsKey(defn)) {
- RGrammar destGrammar = importRules.get(defn);
- String res = destGrammar.generate(defn, state.rnd, state.vars);
-
- newState.contents.append(res);
- } else {
- String msg = String.format("No rule '%s' defined", defn);
- throw new GrammarException(msg);
+ for(IPair<String, String> par : postprocs) {
+ body = body.replaceAll(par.getLeft(), par.getRight());
}
- state.vars.put(name, newState.contents.toString());
- }
-
- /* Generate a variable definition. */
- private static void generateVarDef(String name, String defn, GenerationState state) {
- state.vars.put(name, defn);
+ return body.trim();
}
-
- /* Generate a rule reference. */
- private void generateRuleReference(RuleCaseElement elm, GenerationState state) {
- String refersTo = elm.val;
-
- GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars);
-
- if (refersTo.contains("$")) {
- /* Parse variables */
- String refBody = refersTo.substring(1, refersTo.length() - 1);
-
- if (refBody.contains("-")) {
- /* Handle dependent rule names. */
- StringBuffer nameBuffer = new StringBuffer();
-
- Matcher nameMatcher = NAMEVAR_PATTERN.matcher(refBody);
-
- while (nameMatcher.find()) {
- String var = nameMatcher.group(1);
-
- if (!state.vars.containsKey(var)) {
- String msg = String.format("No variable '%s' defined", var);
- throw new GrammarException(msg);
- }
-
- String name = state.vars.get(var);
-
- if (name.contains(" ")) {
- throw new GrammarException("Variables substituted into names cannot contain spaces");
- } else if (name.equals("")) {
- throw new GrammarException("Variables substituted into names cannot be empty");
- }
-
- nameMatcher.appendReplacement(nameBuffer, name);
- }
-
- nameMatcher.appendTail(nameBuffer);
-
- refersTo = "[" + nameBuffer.toString() + "]";
- } else {
- /* Handle string references. */
- if (refBody.equals("$")) {
- throw new GrammarException("Cannot refer to unnamed variables");
- }
-
- String key = refBody.substring(1);
-
- if (!state.vars.containsKey(key)) {
- String msg = String.format("No variable '%s' defined", key);
- throw new GrammarException(msg);
- }
-
- state.contents.append(state.vars.get(key));
-
- return;
- }
- }
-
- if (refersTo.startsWith("[^")) {
- refersTo = "[" + refersTo.substring(2);
-
- RGrammar dst = importRules.get(refersTo);
-
- newState.contents.append(dst.generate(refersTo, state.rnd, state.vars));
- } else if (rules.containsKey(refersTo)) {
- RuleCase cse = rules.get(refersTo).getCase(state.rnd);
-
- generateCase(cse, newState);
- } else if (importRules.containsKey(refersTo)) {
- RGrammar dst = importRules.get(refersTo);
-
- newState.contents.append(dst.generate(refersTo, state.rnd, state.vars));
- } else {
- if (ruleSearcher != null) {
- Set<Match<? extends String>> results = ruleSearcher.search(refersTo, MAX_DISTANCE);
-
- String[] resArray = results.stream().map(Match::getMatch).toArray((i) -> new String[i]);
-
- String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", refersTo,
- StringUtils.toEnglishList(resArray, false));
-
- throw new GrammarException(msg);
- }
-
- String msg = String.format("No rule '%s' defined", refersTo);
- throw new GrammarException(msg);
- }
-
- if (refersTo.contains("+")) {
- /* Rule names with pluses in them get space-flattened */
- state.contents.append(newState.contents.toString().replaceAll("\\s+", ""));
- } else {
- state.contents.append(newState.contents.toString());
+ /**
+ * Generate a rule case.
+ *
+ * @param start
+ * The rule case to generate.
+ * @param state
+ * The current generation state.
+ */
+ public void generateCase(RuleCase start, GenerationState state) {
+ try {
+ start.generate(state);
+ } catch (GrammarException gex) {
+ String msg = String.format("Error in generating case (%s)", start);
+ throw new GrammarException(msg, gex);
}
}
@@ -435,7 +291,7 @@ public class RGrammar {
if (initRule.equals("")) {
throw new GrammarException("The empty string is not a valid rule name");
} else if (!rules.containsKey(initRule)) {
- String msg = String.format("No rule '%s' local to this grammar defined.", initRule);
+ String msg = String.format("No rule '%s' local to this grammar (%s) defined.", initRule, name);
throw new GrammarException(msg);
}
@@ -455,7 +311,8 @@ public class RGrammar {
for (String rname : exportRules) {
if (!rules.containsKey(rname)) {
- String msg = String.format("No rule '%s' local to this grammar defined", initialRule);
+ String msg = String.format("No rule '%s' local to this grammar (%s) defined for export",
+ name, rname);
throw new GrammarException(msg);
}
@@ -488,4 +345,8 @@ public class RGrammar {
public Map<String, Rule> getRules() {
return rules;
}
+
+ public Map<String, Rule> getImportRules() {
+ return importRules;
+ }
}