diff options
Diffstat (limited to 'src/main/java/bjc/rgens/parser/RGrammar.java')
| -rwxr-xr-x[-rw-r--r--] | src/main/java/bjc/rgens/parser/RGrammar.java | 389 |
1 files changed, 125 insertions, 264 deletions
diff --git a/src/main/java/bjc/rgens/parser/RGrammar.java b/src/main/java/bjc/rgens/parser/RGrammar.java index 38f38c8..337ab35 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammar.java +++ b/src/main/java/bjc/rgens/parser/RGrammar.java @@ -1,14 +1,16 @@ package bjc.rgens.parser; -import bjc.rgens.parser.elements.CaseElement; -import bjc.rgens.parser.elements.LiteralCaseElement; -import bjc.rgens.parser.elements.RangeCaseElement; -import bjc.rgens.parser.elements.RuleCaseElement; -import bjc.rgens.parser.elements.VariableCaseElement; +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; import bjc.utils.funcutils.StringUtils; +import bjc.rgens.parser.elements.*; + +import java.util.Arrays; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; @@ -22,12 +24,23 @@ import edu.gatech.gtri.bktree.BkTreeSearcher.Match; import edu.gatech.gtri.bktree.Metric; import edu.gatech.gtri.bktree.MutableBkTree; +import static bjc.utils.data.IPair.pair; + /** * Represents a randomized grammar. * * @author EVE */ public class RGrammar { + public RGrammarSet belongsTo; + + public String name; + + public List<IPair<String, String>> postprocs; + + private static final List<IPair<String, String>> builtinPostprocs; + public boolean useBuiltinPostprocs = true; + /* The max distance between possible alternate rules. */ private static final int MAX_DISTANCE = 6; @@ -48,42 +61,10 @@ public class RGrammar { } } - /* The current state during generation. */ - private static class GenerationState { - /* The current string. */ - public StringBuilder contents; - /* The RNG. */ - public Random rnd; - - /* The current set of variables. */ - public Map<String, String> vars; - - /** - * Create a new generation state. - * - * @param cont - * The string being generated. - * - * @param rand - * The RNG to use. - * - * @param vs - * The variables to use. - */ - public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs) { - contents = cont; - rnd = rand; - vars = vs; - } - } - - /* The pattern for matching the name of a variable. */ - private static Pattern NAMEVAR_PATTERN = Pattern.compile("\\$(\\w+)"); - /* The rules of the grammar. */ - private Map<String, Rule> rules; + public Map<String, Rule> rules; /* The rules imported from other grammars. */ - private Map<String, RGrammar> importRules; + private Map<String, Rule> importRules; /* The rules exported from this grammar. */ private Set<String> exportRules; /* The initial rule of this grammar. */ @@ -92,6 +73,43 @@ public class RGrammar { /* The tree to use for finding rule suggestions. */ private BkTreeSearcher<String> ruleSearcher; + static { + /* Collapse duplicate spaces */ + IPair<String, String> collapseDupSpaces = pair("\\s+", " "); + + /* Built-in post-processing steps */ + builtinPostprocs = Arrays.asList( + collapseDupSpaces, + + /* + * Remove extraneous spaces around punctuation + * marks, forced by the way the language syntax + * works. + * + * This can be done in grammars, but it is quite + * tedious to do so. + */ + + + /* Handle 's */ + pair(" 's ", "'s "), + /* Handle opening/closing punctuation. */ + pair("([(\\[]) ", " $1"), + pair(" ([)\\]'\"])", "$1 "), + /* Remove spaces around series of opening/closing punctuation. */ + pair("([(\\[])\\s+([(\\[])", "$1$2"), + pair("([)\\]])\\s+([)\\]])", "$1$2"), + /* Handle inter-word punctuation. */ + pair(" ([,:.!])", "$1 "), + /* Handle intra-word punctuation. */ + pair("\\s?([-/])\\s?", "$1"), + + collapseDupSpaces, + + /* Replace this once it is no longer needed. */ + pair("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1") + ); + } /** * Create a new randomized grammar using the specified set of rules. * @@ -100,6 +118,12 @@ public class RGrammar { */ public RGrammar(Map<String, Rule> ruls) { rules = ruls; + + for(Rule rl : ruls.values()) { + rl.belongsTo = this; + } + + postprocs = new ArrayList<>(); } /** @@ -111,7 +135,7 @@ public class RGrammar { * @param importedRules * The set of imported rules to use. */ - public void setImportedRules(Map<String, RGrammar> importedRules) { + public void setImportedRules(Map<String, Rule> importedRules) { importRules = importedRules; } @@ -137,7 +161,7 @@ public class RGrammar { * @return A possible string from the grammar. */ public String generate(String startRule) { - return generate(startRule, new Random(), new HashMap<>()); + return generate(startRule, new Random(), new HashMap<>(), new HashMap<>()); } /** @@ -155,7 +179,26 @@ public class RGrammar { * * @return A possible string from the grammar. */ - public String generate(String startRule, Random rnd, Map<String, String> vars) { + public String generate(String startRule, Random rnd, Map<String, String> vars, + Map<String, Rule> rlVars) { + return generate(startRule, new GenerationState(new StringBuilder(), rnd, vars, rlVars, this)); + } + + /** + * Generate a string from this grammar, starting from the specified rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @param state + * The generation state. + */ + public String generate(String startRule, GenerationState state) { + return generate(startRule, state, true); + } + + public String generate(String startRule, GenerationState state, boolean doPostprocess) { String fromRule = startRule; if (startRule == null) { @@ -170,242 +213,55 @@ public class RGrammar { } } - RuleCase start = rules.get(fromRule).getCase(rnd); - - StringBuilder contents = new StringBuilder(); - - generateCase(start, new GenerationState(contents, rnd, vars)); - - String body = contents.toString(); - /* - * Collapse duplicate spaces. + /* + * We don't search imports, so it will always belong to this + * grammar. */ - body = body.replaceAll("\\s+", " "); - - /* - * Remove extraneous spaces around punctutation marks. - * - * This can be done in the grammars, but it is very tedious to do so. - */ - - /* Handle 's */ - body = body.replaceAll(" 's ", "'s "); - - /* Handle opening/closing punctuation. */ - body = body.replaceAll("([(\\[]) ", " $1"); - body = body.replaceAll(" ([)\\]'\"])", "$1 "); + Rule rl = state.findRule(fromRule, false); - /* Remove spaces around series of opening/closing punctuation. */ - body = body.replaceAll("([(\\[])\\s+([(\\[])", "$1$2"); - body = body.replaceAll("([)\\]])\\s+([)\\]])", "$1$2"); + if(rl == null) + throw new GrammarException("Could not find rule " + rl.name); - /* Handle inter-word punctuation. */ - body = body.replaceAll(" ([,:.!])", "$1 "); + rl.generate(state); - /* Handle intra-word punctuation. */ - body = body.replaceAll("\\s?([-/])\\s?", "$1"); + String body = state.contents.toString(); - /* - * Collapse duplicate spaces. - */ - body = body.replaceAll("\\s+", " "); - - /* - * @TODO 11/01/17 Ben Culkin :RegexRule Replace this once it is no longer - * needed. - */ - body = body.replaceAll("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1"); + if(doPostprocess) { + body = postprocessRes(body); + } return body; } - /* Generate a rule case. */ - private void generateCase(RuleCase start, GenerationState state) { - try { - switch (start.type) { - case NORMAL: - for (CaseElement elm : start.getElements()) { - generateElement(elm, state); - - if (elm.type != CaseElement.ElementType.VARDEF) { - state.contents.append(" "); - } - } - break; - case SPACEFLATTEN: - for (CaseElement elm : start.getElements()) { - generateElement(elm, state); - } - break; - default: - String msg = String.format("Unknown case type '%s'", start.type); - throw new GrammarException(msg); - } - } catch (GrammarException gex) { - String msg = String.format("Error in generating case (%s)", start); - throw new GrammarException(msg, gex); - } - } - - /* Generate a case element. */ - private void generateElement(CaseElement elm, GenerationState state) { - try { - switch (elm.type) { - case LITERAL: { - LiteralCaseElement lit = (LiteralCaseElement)elm; - - state.contents.append(lit.val); - break; - } - case RULEREF: { - RuleCaseElement rle = (RuleCaseElement)elm; - - generateRuleReference(rle, state); - break; - } - case RANGE: { - RangeCaseElement rang = (RangeCaseElement)elm; + private String postprocessRes(String strang) { + String body = strang; - int val = state.rnd.nextInt(rang.end - rang.begin); - val += rang.begin; - - state.contents.append(val); - break; - } - case VARDEF: - generateVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); - break; - case EXPVARDEF: - generateExpVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); - break; - default: - String msg = String.format("Unknown element type '%s'", elm.type); - throw new GrammarException(msg); + if(useBuiltinPostprocs) { + for(IPair<String, String> par : builtinPostprocs) { + body = body.replaceAll(par.getLeft(), par.getRight()); } - } catch (GrammarException gex) { - String msg = String.format("Error in generating case element (%s)", elm); - throw new GrammarException(msg, gex); } - } - - /* Generate a expanding variable definition. */ - private void generateExpVarDef(String name, String defn, GenerationState state) { - GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); - if (rules.containsKey(defn)) { - RuleCase destCase = rules.get(defn).getCase(); - - generateCase(destCase, newState); - } else if (importRules.containsKey(defn)) { - RGrammar destGrammar = importRules.get(defn); - String res = destGrammar.generate(defn, state.rnd, state.vars); - - newState.contents.append(res); - } else { - String msg = String.format("No rule '%s' defined", defn); - throw new GrammarException(msg); + for(IPair<String, String> par : postprocs) { + body = body.replaceAll(par.getLeft(), par.getRight()); } - state.vars.put(name, newState.contents.toString()); - } - - /* Generate a variable definition. */ - private static void generateVarDef(String name, String defn, GenerationState state) { - state.vars.put(name, defn); + return body.trim(); } - - /* Generate a rule reference. */ - private void generateRuleReference(RuleCaseElement elm, GenerationState state) { - String refersTo = elm.val; - - GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); - - if (refersTo.contains("$")) { - /* Parse variables */ - String refBody = refersTo.substring(1, refersTo.length() - 1); - - if (refBody.contains("-")) { - /* Handle dependent rule names. */ - StringBuffer nameBuffer = new StringBuffer(); - - Matcher nameMatcher = NAMEVAR_PATTERN.matcher(refBody); - - while (nameMatcher.find()) { - String var = nameMatcher.group(1); - - if (!state.vars.containsKey(var)) { - String msg = String.format("No variable '%s' defined", var); - throw new GrammarException(msg); - } - - String name = state.vars.get(var); - - if (name.contains(" ")) { - throw new GrammarException("Variables substituted into names cannot contain spaces"); - } else if (name.equals("")) { - throw new GrammarException("Variables substituted into names cannot be empty"); - } - - nameMatcher.appendReplacement(nameBuffer, name); - } - - nameMatcher.appendTail(nameBuffer); - - refersTo = "[" + nameBuffer.toString() + "]"; - } else { - /* Handle string references. */ - if (refBody.equals("$")) { - throw new GrammarException("Cannot refer to unnamed variables"); - } - - String key = refBody.substring(1); - - if (!state.vars.containsKey(key)) { - String msg = String.format("No variable '%s' defined", key); - throw new GrammarException(msg); - } - - state.contents.append(state.vars.get(key)); - - return; - } - } - - if (refersTo.startsWith("[^")) { - refersTo = "[" + refersTo.substring(2); - - RGrammar dst = importRules.get(refersTo); - - newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); - } else if (rules.containsKey(refersTo)) { - RuleCase cse = rules.get(refersTo).getCase(state.rnd); - - generateCase(cse, newState); - } else if (importRules.containsKey(refersTo)) { - RGrammar dst = importRules.get(refersTo); - - newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); - } else { - if (ruleSearcher != null) { - Set<Match<? extends String>> results = ruleSearcher.search(refersTo, MAX_DISTANCE); - - String[] resArray = results.stream().map(Match::getMatch).toArray((i) -> new String[i]); - - String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", refersTo, - StringUtils.toEnglishList(resArray, false)); - - throw new GrammarException(msg); - } - - String msg = String.format("No rule '%s' defined", refersTo); - throw new GrammarException(msg); - } - - if (refersTo.contains("+")) { - /* Rule names with pluses in them get space-flattened */ - state.contents.append(newState.contents.toString().replaceAll("\\s+", "")); - } else { - state.contents.append(newState.contents.toString()); + /** + * Generate a rule case. + * + * @param start + * The rule case to generate. + * @param state + * The current generation state. + */ + public void generateCase(RuleCase start, GenerationState state) { + try { + start.generate(state); + } catch (GrammarException gex) { + String msg = String.format("Error in generating case (%s)", start); + throw new GrammarException(msg, gex); } } @@ -435,7 +291,7 @@ public class RGrammar { if (initRule.equals("")) { throw new GrammarException("The empty string is not a valid rule name"); } else if (!rules.containsKey(initRule)) { - String msg = String.format("No rule '%s' local to this grammar defined.", initRule); + String msg = String.format("No rule '%s' local to this grammar (%s) defined.", initRule, name); throw new GrammarException(msg); } @@ -455,7 +311,8 @@ public class RGrammar { for (String rname : exportRules) { if (!rules.containsKey(rname)) { - String msg = String.format("No rule '%s' local to this grammar defined", initialRule); + String msg = String.format("No rule '%s' local to this grammar (%s) defined for export", + name, rname); throw new GrammarException(msg); } @@ -488,4 +345,8 @@ public class RGrammar { public Map<String, Rule> getRules() { return rules; } + + public Map<String, Rule> getImportRules() { + return importRules; + } } |
