diff options
Diffstat (limited to 'src/main/java')
46 files changed, 2056 insertions, 665 deletions
diff --git a/src/main/java/bjc/rgens/ZadronsPouch.java b/src/main/java/bjc/rgens/ZadronsPouch.java index 827d022..827d022 100644..100755 --- a/src/main/java/bjc/rgens/ZadronsPouch.java +++ b/src/main/java/bjc/rgens/ZadronsPouch.java diff --git a/src/main/java/bjc/rgens/parser/ConfigLoader.java b/src/main/java/bjc/rgens/parser/ConfigLoader.java new file mode 100644 index 0000000..6e9da16 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/ConfigLoader.java @@ -0,0 +1,242 @@ +package bjc.rgens.parser; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Scanner; + +import bjc.rgens.parser.templates.GrammarTemplate; + +public class ConfigLoader { + /** + * Load a grammar set from a configuration file. + * + * @param cfgFile + * The configuration file to load from. + * + * @return + * The grammar set created by the configuration file. + * + * @throws IOException + * If something goes wrong during configuration loading. + */ + public static ConfigSet fromConfigFile(Path cfgFile) throws IOException { + ConfigSet cfgSet = new ConfigSet(); + + /* The grammar set we're parsing into. */ + RGrammarSet set = new RGrammarSet(); + cfgSet.grammars.put("default", set); + set.belongsTo = cfgSet; + set.name = "default"; + + long startCFGTime = System.nanoTime(); + + /* Get the directory that contains the config file. */ + Path cfgParent = cfgFile.getParent(); + + try(Scanner scn = new Scanner(cfgFile)) { + int lno = 0; + + /* Execute lines from the configuration file. */ + while (scn.hasNextLine()) { + String ln = scn.nextLine().trim(); + + lno += 1; + + try { + /* Ignore blank/comment lines. */ + if (ln.equals("")) continue; + + if (ln.startsWith("#")) continue; + + /* Handle mixed whitespace. */ + ln = ln.replaceAll("\\s+", " "); + + /* Get line type */ + int typeIdx = ln.indexOf(" "); + if(typeIdx == -1) { + throw new GrammarException("Must specify config line type"); + } + String type = ln.substring(0, typeIdx).trim(); + ln = ln.substring(typeIdx).trim(); + + switch(type) { + case "load": + loadConfigLine(ln, cfgSet, set, cfgParent); + break; + default: + throw new GrammarException("Unknown config line type " + type); + } + } catch(GrammarException gex) { + System.out.printf("ERROR: Line %s of config set %s\n", lno, cfgFile); + + System.err.printf("ERROR: Line %s of config set %s\n", lno, cfgFile); + gex.printStackTrace(); + + System.out.println(); + System.out.println(); + + System.err.println(); + System.err.println(); + } + } + } + + long endCFGTime = System.nanoTime(); + + long cfgDur = endCFGTime - startCFGTime; + + System.err.printf("\n\nPERF: Read config file %s in %d ns (%f s)\n", cfgFile, cfgDur, cfgDur / 1000000000.0); + + return cfgSet; + } + + private static void loadConfigLine(String ln, ConfigSet cfgSet, RGrammarSet set, Path cfgParent) throws IOException { + /* + * Get the place where the tag ID ends + */ + int tagIdx = ln.indexOf(" "); + if(tagIdx == -1) { + throw new GrammarException("Must specify a tag as to what a line is"); + } + String tag = ln.substring(0, tagIdx).trim(); + ln = ln.substring(tagIdx).trim(); + + /* + * Get the place where the name of the grammar + * ends. + */ + int nameIdx = ln.indexOf(" "); + if (nameIdx == -1) { + throw new GrammarException("Must specify a name for a loaded object"); + } + String name = ln.substring(0, nameIdx); + ln = ln.substring(nameIdx).trim(); + + switch(tag) { + case "template": + loadTemplate(name, ln, cfgSet, set, cfgParent); + break; + case "subset": + { + /* @TODO implement subset grammars */ + throw new GrammarException("Sub-grammar sets aren't implemented yet"); + } + case "gram": + case "grammar": + loadGrammar(name, ln, cfgSet, set, cfgParent); + break; + default: + String msg = String.format("Unrecognized tag type '%s'", tag); + throw new GrammarException(msg); + } + } + + private static void loadTemplate(String name, String ln, ConfigSet cfgSet, RGrammarSet set, Path cfgParent) throws IOException { + Path path = Paths.get(ln); + + /* + * Convert from configuration relative path to + * absolute path. + */ + Path convPath = cfgParent.resolve(path.toString()); + + if(Files.isDirectory(convPath)) { + throw new GrammarException("Can't load grammar from directory" + convPath.toString()); + } else { + /* Load template file. */ + try { + long startFileTime = System.nanoTime(); + + BufferedReader fis = Files.newBufferedReader(convPath); + GrammarTemplate template = GrammarTemplate.readTemplate(fis); + template.belongsTo = cfgSet; + + if(template.name == null) { + System.err.printf("\tINFO: Naming unnamed template loaded from %s off config name '%s'\n", + convPath, name); + + template.name = name; + } + + fis.close(); + + long endFileTime = System.nanoTime(); + + long fileTime = endFileTime - startFileTime; + + System.err.printf("\tPERF: Read template %s (from %s) in %d ns (%f s)\n", + template.name, convPath, fileTime, fileTime / 1000000000.0); + + /* Add grammar to the set. */ + cfgSet.templates.put(name, template); + + /* + * @NOTE + * + * Do we need to do this + * for templates? + * + * Mark where the + * template came + * from. + */ + //set.loadedFrom.put(name, path.toString()); + } catch (GrammarException gex) { + String msg = String.format("Error loading template file '%s'", path); + throw new GrammarException(msg, gex); + } + } + } + + private static void loadGrammar(String name, String ln, ConfigSet cfgSet, RGrammarSet set, Path cfgParent) throws IOException { + Path path = Paths.get(ln); + + /* + * Convert from configuration relative path to + * absolute path. + */ + Path convPath = cfgParent.resolve(path.toString()); + + if(Files.isDirectory(convPath)) { + throw new GrammarException("Can't load grammar from directory" + convPath.toString()); + } else { + /* Load grammar file. */ + try { + long startFileTime = System.nanoTime(); + + BufferedReader fis = Files.newBufferedReader(convPath); + RGrammar gram = RGrammarParser.readGrammar(fis); + if(gram.name == null) { + System.err.printf("\tINFO: Naming unnamed grammar loaded from %s off config name '%s'\n", + convPath, name); + + gram.name = name; + } + + fis.close(); + + long endFileTime = System.nanoTime(); + + long fileTime = endFileTime - startFileTime; + + System.err.printf("\tPERF: Read grammar %s (from %s) in %d ns (%f s)\n", + gram.name, convPath, fileTime, fileTime / 1000000000.0); + + /* Add grammar to the set. */ + set.addGrammar(name, gram); + + /* + * Mark where the grammar came + * from. + */ + set.loadedFrom.put(name, path.toString()); + } catch (GrammarException gex) { + String msg = String.format("Error loading template '%s'", path); + throw new GrammarException(msg, gex); + } + } + } +} diff --git a/src/main/java/bjc/rgens/parser/ConfigSet.java b/src/main/java/bjc/rgens/parser/ConfigSet.java new file mode 100644 index 0000000..8945a0f --- /dev/null +++ b/src/main/java/bjc/rgens/parser/ConfigSet.java @@ -0,0 +1,18 @@ +package bjc.rgens.parser; + +import java.util.HashMap; +import java.util.Map; + +import bjc.rgens.parser.templates.GrammarTemplate; + +public class ConfigSet { + public final Map<String, RGrammarSet> grammars; + public final Map<String, GrammarTemplate> templates; + public final Map<String, ConfigSet> subconfigs; + + public ConfigSet() { + grammars = new HashMap<>(); + templates = new HashMap<>(); + subconfigs = new HashMap<>(); + } +} diff --git a/src/main/java/bjc/rgens/parser/FlatRuleCase.java b/src/main/java/bjc/rgens/parser/FlatRuleCase.java new file mode 100644 index 0000000..4bbd1cc --- /dev/null +++ b/src/main/java/bjc/rgens/parser/FlatRuleCase.java @@ -0,0 +1,23 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.IList; + +import bjc.rgens.parser.elements.CaseElement; + +public class FlatRuleCase extends RuleCase { + public FlatRuleCase(IList<CaseElement> elms) { + super(elms); + } + + @Override + public void generate(GenerationState state) { + for(CaseElement elm : elementList) { + elm.generate(state); + } + } + + public FlatRuleCase withElements(IList<CaseElement> elms) { + return new FlatRuleCase(elms); + } +} + diff --git a/src/main/java/bjc/rgens/parser/GenerationState.java b/src/main/java/bjc/rgens/parser/GenerationState.java new file mode 100644 index 0000000..f5cbc60 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/GenerationState.java @@ -0,0 +1,104 @@ +package bjc.rgens.parser; + +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +/* + * The current state during generation. + * + */ +public class GenerationState { + /** The current string. */ + public StringBuilder contents; + /** The RNG. */ + public Random rnd; + + /** The current grammar. */ + public RGrammar gram; + /** The rules of the grammar. */ + public Map<String, Rule> rules; + /** The rules imported from other grammars. */ + public Map<String, Rule> importRules; + + /** The current set of variables. */ + public Map<String, String> vars; + public Map<String, Rule> rlVars; + + private static final Random BASE = new Random(); + + /** + * Create a new generation state. + * + * @param cont + * The string being generated. + * + * @param rand + * The RNG to use. + * + * @param vs + * The variables to use. + */ + public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs, + Map<String, Rule> rvs, RGrammar gram) { + contents = cont; + rnd = rand; + vars = vs; + rlVars = rvs; + + this.gram = gram; + + this.rules = gram.getRules(); + this.importRules = gram.getImportRules(); + } + + public static GenerationState fromGrammar(RGrammar gram) { + return fromGrammar(BASE, gram); + } + + public static GenerationState fromGrammar(Random rand, RGrammar gram) { + return new GenerationState(new StringBuilder(), rand, new HashMap<>(), new HashMap<>(), gram); + } + + public void swapGrammar(RGrammar gram) { + if(this.gram == gram) return; + + this.gram = gram; + + rules = gram.getRules(); + + importRules = gram.getImportRules(); + } + + public GenerationState newBuf() { + return new GenerationState(new StringBuilder(), rnd, vars, rlVars, gram); + } + + /* + * @TODO 6/5/18 Ben Culkin :ImportRefactor + * + * Change this so that imports in almost all cases have to specify where + * they are importing the rule from, so as to make it clear which rules + * are imported, and which aren't + */ + public Rule findRule(String ruleName, boolean allowImports) { + if(rules.containsKey(ruleName)) { + return rules.get(ruleName); + } + + if(allowImports) return findImport(ruleName); + + return null; + } + + public Rule findImport(String ruleName) { + if(importRules.containsKey(ruleName)) { + return importRules.get(ruleName); + } + + return null; + } +} diff --git a/src/main/java/bjc/rgens/parser/GrammarException.java b/src/main/java/bjc/rgens/parser/GrammarException.java index 9eaa0a1..9eaa0a1 100644..100755 --- a/src/main/java/bjc/rgens/parser/GrammarException.java +++ b/src/main/java/bjc/rgens/parser/GrammarException.java diff --git a/src/main/java/bjc/rgens/parser/NormalRuleCase.java b/src/main/java/bjc/rgens/parser/NormalRuleCase.java new file mode 100644 index 0000000..c8891a2 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/NormalRuleCase.java @@ -0,0 +1,26 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.IList; + +import bjc.rgens.parser.elements.CaseElement; + +public class NormalRuleCase extends RuleCase { + public NormalRuleCase(IList<CaseElement> elms) { + super(elms); + } + + @Override + public void generate(GenerationState state) { + for(CaseElement elm : elementList) { + elm.generate(state); + + if(elm.spacing) { + state.contents.append(" "); + } + } + } + + public NormalRuleCase withElements(IList<CaseElement> elms) { + return new NormalRuleCase(elms); + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammar.java b/src/main/java/bjc/rgens/parser/RGrammar.java index 38f38c8..337ab35 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammar.java +++ b/src/main/java/bjc/rgens/parser/RGrammar.java @@ -1,14 +1,16 @@ package bjc.rgens.parser; -import bjc.rgens.parser.elements.CaseElement; -import bjc.rgens.parser.elements.LiteralCaseElement; -import bjc.rgens.parser.elements.RangeCaseElement; -import bjc.rgens.parser.elements.RuleCaseElement; -import bjc.rgens.parser.elements.VariableCaseElement; +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; import bjc.utils.funcutils.StringUtils; +import bjc.rgens.parser.elements.*; + +import java.util.Arrays; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; @@ -22,12 +24,23 @@ import edu.gatech.gtri.bktree.BkTreeSearcher.Match; import edu.gatech.gtri.bktree.Metric; import edu.gatech.gtri.bktree.MutableBkTree; +import static bjc.utils.data.IPair.pair; + /** * Represents a randomized grammar. * * @author EVE */ public class RGrammar { + public RGrammarSet belongsTo; + + public String name; + + public List<IPair<String, String>> postprocs; + + private static final List<IPair<String, String>> builtinPostprocs; + public boolean useBuiltinPostprocs = true; + /* The max distance between possible alternate rules. */ private static final int MAX_DISTANCE = 6; @@ -48,42 +61,10 @@ public class RGrammar { } } - /* The current state during generation. */ - private static class GenerationState { - /* The current string. */ - public StringBuilder contents; - /* The RNG. */ - public Random rnd; - - /* The current set of variables. */ - public Map<String, String> vars; - - /** - * Create a new generation state. - * - * @param cont - * The string being generated. - * - * @param rand - * The RNG to use. - * - * @param vs - * The variables to use. - */ - public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs) { - contents = cont; - rnd = rand; - vars = vs; - } - } - - /* The pattern for matching the name of a variable. */ - private static Pattern NAMEVAR_PATTERN = Pattern.compile("\\$(\\w+)"); - /* The rules of the grammar. */ - private Map<String, Rule> rules; + public Map<String, Rule> rules; /* The rules imported from other grammars. */ - private Map<String, RGrammar> importRules; + private Map<String, Rule> importRules; /* The rules exported from this grammar. */ private Set<String> exportRules; /* The initial rule of this grammar. */ @@ -92,6 +73,43 @@ public class RGrammar { /* The tree to use for finding rule suggestions. */ private BkTreeSearcher<String> ruleSearcher; + static { + /* Collapse duplicate spaces */ + IPair<String, String> collapseDupSpaces = pair("\\s+", " "); + + /* Built-in post-processing steps */ + builtinPostprocs = Arrays.asList( + collapseDupSpaces, + + /* + * Remove extraneous spaces around punctuation + * marks, forced by the way the language syntax + * works. + * + * This can be done in grammars, but it is quite + * tedious to do so. + */ + + + /* Handle 's */ + pair(" 's ", "'s "), + /* Handle opening/closing punctuation. */ + pair("([(\\[]) ", " $1"), + pair(" ([)\\]'\"])", "$1 "), + /* Remove spaces around series of opening/closing punctuation. */ + pair("([(\\[])\\s+([(\\[])", "$1$2"), + pair("([)\\]])\\s+([)\\]])", "$1$2"), + /* Handle inter-word punctuation. */ + pair(" ([,:.!])", "$1 "), + /* Handle intra-word punctuation. */ + pair("\\s?([-/])\\s?", "$1"), + + collapseDupSpaces, + + /* Replace this once it is no longer needed. */ + pair("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1") + ); + } /** * Create a new randomized grammar using the specified set of rules. * @@ -100,6 +118,12 @@ public class RGrammar { */ public RGrammar(Map<String, Rule> ruls) { rules = ruls; + + for(Rule rl : ruls.values()) { + rl.belongsTo = this; + } + + postprocs = new ArrayList<>(); } /** @@ -111,7 +135,7 @@ public class RGrammar { * @param importedRules * The set of imported rules to use. */ - public void setImportedRules(Map<String, RGrammar> importedRules) { + public void setImportedRules(Map<String, Rule> importedRules) { importRules = importedRules; } @@ -137,7 +161,7 @@ public class RGrammar { * @return A possible string from the grammar. */ public String generate(String startRule) { - return generate(startRule, new Random(), new HashMap<>()); + return generate(startRule, new Random(), new HashMap<>(), new HashMap<>()); } /** @@ -155,7 +179,26 @@ public class RGrammar { * * @return A possible string from the grammar. */ - public String generate(String startRule, Random rnd, Map<String, String> vars) { + public String generate(String startRule, Random rnd, Map<String, String> vars, + Map<String, Rule> rlVars) { + return generate(startRule, new GenerationState(new StringBuilder(), rnd, vars, rlVars, this)); + } + + /** + * Generate a string from this grammar, starting from the specified rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @param state + * The generation state. + */ + public String generate(String startRule, GenerationState state) { + return generate(startRule, state, true); + } + + public String generate(String startRule, GenerationState state, boolean doPostprocess) { String fromRule = startRule; if (startRule == null) { @@ -170,242 +213,55 @@ public class RGrammar { } } - RuleCase start = rules.get(fromRule).getCase(rnd); - - StringBuilder contents = new StringBuilder(); - - generateCase(start, new GenerationState(contents, rnd, vars)); - - String body = contents.toString(); - /* - * Collapse duplicate spaces. + /* + * We don't search imports, so it will always belong to this + * grammar. */ - body = body.replaceAll("\\s+", " "); - - /* - * Remove extraneous spaces around punctutation marks. - * - * This can be done in the grammars, but it is very tedious to do so. - */ - - /* Handle 's */ - body = body.replaceAll(" 's ", "'s "); - - /* Handle opening/closing punctuation. */ - body = body.replaceAll("([(\\[]) ", " $1"); - body = body.replaceAll(" ([)\\]'\"])", "$1 "); + Rule rl = state.findRule(fromRule, false); - /* Remove spaces around series of opening/closing punctuation. */ - body = body.replaceAll("([(\\[])\\s+([(\\[])", "$1$2"); - body = body.replaceAll("([)\\]])\\s+([)\\]])", "$1$2"); + if(rl == null) + throw new GrammarException("Could not find rule " + rl.name); - /* Handle inter-word punctuation. */ - body = body.replaceAll(" ([,:.!])", "$1 "); + rl.generate(state); - /* Handle intra-word punctuation. */ - body = body.replaceAll("\\s?([-/])\\s?", "$1"); + String body = state.contents.toString(); - /* - * Collapse duplicate spaces. - */ - body = body.replaceAll("\\s+", " "); - - /* - * @TODO 11/01/17 Ben Culkin :RegexRule Replace this once it is no longer - * needed. - */ - body = body.replaceAll("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1"); + if(doPostprocess) { + body = postprocessRes(body); + } return body; } - /* Generate a rule case. */ - private void generateCase(RuleCase start, GenerationState state) { - try { - switch (start.type) { - case NORMAL: - for (CaseElement elm : start.getElements()) { - generateElement(elm, state); - - if (elm.type != CaseElement.ElementType.VARDEF) { - state.contents.append(" "); - } - } - break; - case SPACEFLATTEN: - for (CaseElement elm : start.getElements()) { - generateElement(elm, state); - } - break; - default: - String msg = String.format("Unknown case type '%s'", start.type); - throw new GrammarException(msg); - } - } catch (GrammarException gex) { - String msg = String.format("Error in generating case (%s)", start); - throw new GrammarException(msg, gex); - } - } - - /* Generate a case element. */ - private void generateElement(CaseElement elm, GenerationState state) { - try { - switch (elm.type) { - case LITERAL: { - LiteralCaseElement lit = (LiteralCaseElement)elm; - - state.contents.append(lit.val); - break; - } - case RULEREF: { - RuleCaseElement rle = (RuleCaseElement)elm; - - generateRuleReference(rle, state); - break; - } - case RANGE: { - RangeCaseElement rang = (RangeCaseElement)elm; + private String postprocessRes(String strang) { + String body = strang; - int val = state.rnd.nextInt(rang.end - rang.begin); - val += rang.begin; - - state.contents.append(val); - break; - } - case VARDEF: - generateVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); - break; - case EXPVARDEF: - generateExpVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); - break; - default: - String msg = String.format("Unknown element type '%s'", elm.type); - throw new GrammarException(msg); + if(useBuiltinPostprocs) { + for(IPair<String, String> par : builtinPostprocs) { + body = body.replaceAll(par.getLeft(), par.getRight()); } - } catch (GrammarException gex) { - String msg = String.format("Error in generating case element (%s)", elm); - throw new GrammarException(msg, gex); } - } - - /* Generate a expanding variable definition. */ - private void generateExpVarDef(String name, String defn, GenerationState state) { - GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); - if (rules.containsKey(defn)) { - RuleCase destCase = rules.get(defn).getCase(); - - generateCase(destCase, newState); - } else if (importRules.containsKey(defn)) { - RGrammar destGrammar = importRules.get(defn); - String res = destGrammar.generate(defn, state.rnd, state.vars); - - newState.contents.append(res); - } else { - String msg = String.format("No rule '%s' defined", defn); - throw new GrammarException(msg); + for(IPair<String, String> par : postprocs) { + body = body.replaceAll(par.getLeft(), par.getRight()); } - state.vars.put(name, newState.contents.toString()); - } - - /* Generate a variable definition. */ - private static void generateVarDef(String name, String defn, GenerationState state) { - state.vars.put(name, defn); + return body.trim(); } - - /* Generate a rule reference. */ - private void generateRuleReference(RuleCaseElement elm, GenerationState state) { - String refersTo = elm.val; - - GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); - - if (refersTo.contains("$")) { - /* Parse variables */ - String refBody = refersTo.substring(1, refersTo.length() - 1); - - if (refBody.contains("-")) { - /* Handle dependent rule names. */ - StringBuffer nameBuffer = new StringBuffer(); - - Matcher nameMatcher = NAMEVAR_PATTERN.matcher(refBody); - - while (nameMatcher.find()) { - String var = nameMatcher.group(1); - - if (!state.vars.containsKey(var)) { - String msg = String.format("No variable '%s' defined", var); - throw new GrammarException(msg); - } - - String name = state.vars.get(var); - - if (name.contains(" ")) { - throw new GrammarException("Variables substituted into names cannot contain spaces"); - } else if (name.equals("")) { - throw new GrammarException("Variables substituted into names cannot be empty"); - } - - nameMatcher.appendReplacement(nameBuffer, name); - } - - nameMatcher.appendTail(nameBuffer); - - refersTo = "[" + nameBuffer.toString() + "]"; - } else { - /* Handle string references. */ - if (refBody.equals("$")) { - throw new GrammarException("Cannot refer to unnamed variables"); - } - - String key = refBody.substring(1); - - if (!state.vars.containsKey(key)) { - String msg = String.format("No variable '%s' defined", key); - throw new GrammarException(msg); - } - - state.contents.append(state.vars.get(key)); - - return; - } - } - - if (refersTo.startsWith("[^")) { - refersTo = "[" + refersTo.substring(2); - - RGrammar dst = importRules.get(refersTo); - - newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); - } else if (rules.containsKey(refersTo)) { - RuleCase cse = rules.get(refersTo).getCase(state.rnd); - - generateCase(cse, newState); - } else if (importRules.containsKey(refersTo)) { - RGrammar dst = importRules.get(refersTo); - - newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); - } else { - if (ruleSearcher != null) { - Set<Match<? extends String>> results = ruleSearcher.search(refersTo, MAX_DISTANCE); - - String[] resArray = results.stream().map(Match::getMatch).toArray((i) -> new String[i]); - - String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", refersTo, - StringUtils.toEnglishList(resArray, false)); - - throw new GrammarException(msg); - } - - String msg = String.format("No rule '%s' defined", refersTo); - throw new GrammarException(msg); - } - - if (refersTo.contains("+")) { - /* Rule names with pluses in them get space-flattened */ - state.contents.append(newState.contents.toString().replaceAll("\\s+", "")); - } else { - state.contents.append(newState.contents.toString()); + /** + * Generate a rule case. + * + * @param start + * The rule case to generate. + * @param state + * The current generation state. + */ + public void generateCase(RuleCase start, GenerationState state) { + try { + start.generate(state); + } catch (GrammarException gex) { + String msg = String.format("Error in generating case (%s)", start); + throw new GrammarException(msg, gex); } } @@ -435,7 +291,7 @@ public class RGrammar { if (initRule.equals("")) { throw new GrammarException("The empty string is not a valid rule name"); } else if (!rules.containsKey(initRule)) { - String msg = String.format("No rule '%s' local to this grammar defined.", initRule); + String msg = String.format("No rule '%s' local to this grammar (%s) defined.", initRule, name); throw new GrammarException(msg); } @@ -455,7 +311,8 @@ public class RGrammar { for (String rname : exportRules) { if (!rules.containsKey(rname)) { - String msg = String.format("No rule '%s' local to this grammar defined", initialRule); + String msg = String.format("No rule '%s' local to this grammar (%s) defined for export", + name, rname); throw new GrammarException(msg); } @@ -488,4 +345,8 @@ public class RGrammar { public Map<String, Rule> getRules() { return rules; } + + public Map<String, Rule> getImportRules() { + return importRules; + } } diff --git a/src/main/java/bjc/rgens/parser/RGrammarBuilder.java b/src/main/java/bjc/rgens/parser/RGrammarBuilder.java index b4cb04a..8f0a2d1 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammarBuilder.java +++ b/src/main/java/bjc/rgens/parser/RGrammarBuilder.java @@ -1,13 +1,19 @@ package bjc.rgens.parser; import bjc.rgens.parser.elements.CaseElement; + +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; import bjc.utils.funcdata.FunctionalList; import bjc.utils.funcdata.IList; +import bjc.utils.funcutils.ListUtils; +import bjc.utils.funcutils.SetUtils; -import static bjc.rgens.parser.RuleCase.CaseType.*; - +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +import java.util.LinkedList; import java.util.Map; import java.util.Set; @@ -46,9 +52,9 @@ public class RGrammarBuilder { else if(rName.equals("")) throw new IllegalArgumentException("The empty string is not a valid rule name"); - if(rules.containsKey(rName)) + if(rules.containsKey(rName)) { return rules.get(rName); - else { + } else { Rule ret = new Rule(rName); rules.put(rName, ret); @@ -66,8 +72,20 @@ public class RGrammarBuilder { public RGrammar toRGrammar() { RGrammar grammar = new RGrammar(rules); + if(initialRule != null) { + if(!rules.containsKey(initialRule)) { + throw new GrammarException(String.format("Rule '%s' doesn't exist\n", initialRule)); + } + } + grammar.setInitialRule(initialRule); + for(String export : exportedRules) { + if(!rules.containsKey(export)) { + throw new GrammarException(String.format("Rule '%s' doesn't exist\n", export)); + } + } + grammar.setExportedRules(exportedRules); return grammar; @@ -124,37 +142,48 @@ public class RGrammarBuilder { * If the rule name is either invalid or not defined by this * grammar, or if the suffix is invalid. */ - public void suffixWith(String ruleName, String suffix) { + public void suffixWith(String ruleName, IList<CaseElement> suffixes) { if (ruleName == null) { throw new NullPointerException("Rule name must not be null"); } else if (ruleName.equals("")) { throw new IllegalArgumentException("The empty string is not a valid rule name"); } else if(!rules.containsKey(ruleName)) { - String msg = String.format("Rule '%s' is not a valid rule name."); + String msg = String.format("Rule '%s' is not a valid rule name"); throw new IllegalArgumentException(msg); } - CaseElement element = CaseElement.createElement(suffix); + Set<CaseElement> elements = new HashSet<>(suffixes.getSize()); + for(CaseElement suffix : suffixes) { + elements.add(suffix); + } + + List<List<CaseElement>> suffixLists = powerList(elements); + + FunctionalList<IPair<Integer, RuleCase>> newCases = new FunctionalList<>(); - FunctionalList<RuleCase> newCases = new FunctionalList<>(); + IList<IPair<Integer, RuleCase>> caseList = rules.get(ruleName).getCases(); + for (IPair<Integer, RuleCase> ruleCase : caseList) { + RuleCase cas = ruleCase.getRight(); - IList<RuleCase> caseList = rules.get(ruleName).getCases(); - for (RuleCase ruleCase : caseList) { - FunctionalList<CaseElement> newCase = new FunctionalList<>(); + for(List<CaseElement> suffixList : suffixLists) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); - for(CaseElement elm : ruleCase.getElements()) { - newCase.add(elm); - } + for(CaseElement elm : cas.elementList) { + newCase.add(elm); + } - newCase.add(element); + for(CaseElement element : suffixList) { + newCase.add(element); + } - newCases.add(new RuleCase(NORMAL, newCase)); + newCases.add(new Pair<>(ruleCase.getLeft(), cas.withElements(newCase))); + } } - for (RuleCase newCase : newCases) { - caseList.add(newCase); + for (IPair<Integer, RuleCase> newCase : newCases) { + rules.get(ruleName).addCase(newCase.getRight(), newCase.getLeft()); } } @@ -171,37 +200,48 @@ public class RGrammarBuilder { * If the rule name is either invalid or not defined by this * grammar, or if the prefix is invalid. */ - public void prefixWith(String ruleName, String prefix) { + public void prefixWith(String ruleName, IList<CaseElement> prefixes) { if (ruleName == null) { throw new NullPointerException("Rule name must not be null"); } else if (ruleName.equals("")) { throw new IllegalArgumentException("The empty string is not a valid rule name"); } else if(!rules.containsKey(ruleName)) { - String msg = String.format("Rule '%s' is not a valid rule name."); + String msg = String.format("Rule '%s' is not a valid rule name"); throw new IllegalArgumentException(msg); } - CaseElement element = CaseElement.createElement(prefix); + Set<CaseElement> elements = new HashSet<>(prefixes.getSize()); + for(CaseElement prefix : prefixes) { + elements.add(prefix); + } + + List<List<CaseElement>> prefixLists = powerList(elements); - FunctionalList<RuleCase> newCases = new FunctionalList<>(); + FunctionalList<IPair<Integer, RuleCase>> newCases = new FunctionalList<>(); - IList<RuleCase> caseList = rules.get(ruleName).getCases(); - for (RuleCase ruleCase : caseList) { - FunctionalList<CaseElement> newCase = new FunctionalList<>(); + IList<IPair<Integer, RuleCase>> caseList = rules.get(ruleName).getCases(); + for (IPair<Integer, RuleCase> ruleCase : caseList) { + RuleCase cas = ruleCase.getRight(); - newCase.add(element); + for(List<CaseElement> prefixList : prefixLists) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); - for(CaseElement elm : ruleCase.getElements()) { - newCase.add(elm); - } + for(CaseElement elm: prefixList) { + newCase.add(elm); + } - newCases.add(new RuleCase(NORMAL, newCase)); + for(CaseElement elm :cas.elementList) { + newCase.add(elm); + } + + newCases.add(new Pair<>(ruleCase.getLeft(), cas.withElements(newCase))); + } } - for (RuleCase newCase : newCases) { - caseList.add(newCase); + for (IPair<Integer, RuleCase> newCase : newCases) { + rules.get(ruleName).addCase(newCase.getRight(), newCase.getLeft()); } } @@ -210,19 +250,85 @@ public class RGrammarBuilder { throw new NullPointerException("ruleName must not be null"); } else if (ruleName.equals("")) { throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(ruleName)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", ruleName)); } - IList<RuleCase> caseList = rules.get(ruleName).getCases(); + IList<IPair<Integer, RuleCase>> caseList = rules.get(ruleName).getCases(); - IList<RuleCase> newCaseList = new FunctionalList<>(); + IList<IPair<Integer, RuleCase>> newCaseList = new FunctionalList<>(); - for(RuleCase cse : caseList) { - newCaseList.add(new RuleCase(SPACEFLATTEN, cse.getElements())); + for(IPair<Integer, RuleCase> cse : caseList) { + newCaseList.add(new Pair<>(cse.getLeft(), new FlatRuleCase(cse.getRight().elementList))); } + System.err.printf("\t\tTRACE: Despacing %d cases of rule %s\n", caseList.getSize(), ruleName); + rules.get(ruleName).replaceCases(newCaseList); } + public void setWeight(String ruleName) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(ruleName)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", ruleName)); + } + + rules.get(ruleName).prob = Rule.ProbType.NORMAL; + } + + public void setRuleRecur(String ruleName, int recurLimit) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(ruleName)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", ruleName)); + } + + rules.get(ruleName).recurLimit = recurLimit; + } + + public void setDescent(String ruleName, int descentFactor) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(ruleName)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", ruleName)); + } + + Rule rl = rules.get(ruleName); + + rl.prob = Rule.ProbType.DESCENDING; + rl.descentFactor = descentFactor; + } + + public void setBinomial(String ruleName, int target, int bound, int trials) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(ruleName)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", ruleName)); + } + + Rule rl = rules.get(ruleName); + + rl.prob = Rule.ProbType.BINOMIAL; + + rl.target = target; + rl.bound = bound; + rl.trials = trials; + } + /* + * @TODO + * + * Actually get this working + */ + /* public void regexizeRule(String rule, String pattern) { if (rule == null) { throw new NullPointerException("rule must not be null"); @@ -230,6 +336,8 @@ public class RGrammarBuilder { throw new NullPointerException("pattern must not be null"); } else if (rule.equals("")) { throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if(!rules.containsKey(rule)) { + throw new IllegalArgumentException(String.format("The rule '%s' doesn't exist", rule)); } IList<RuleCase> caseList = rules.get(rule).getCases(); @@ -241,6 +349,52 @@ public class RGrammarBuilder { } rules.get(rule).replaceCases(newCaseList); + }*/ + + private static <T> List<List<T>> powerList(Set<T> elements) { + /* + * Fast-case the most common usage + */ + if(elements.size() == 1) { + List<List<T>> ret = new LinkedList<>(); + + List<T> curr = new ArrayList<>(elements.size()); + for(T elem : elements) { + curr.add(elem); + } + + ret.add(curr); + + return ret; + } + + Set<Set<T>> powerSet = SetUtils.powerSet(elements); + + List<List<T>> list = new LinkedList<>(); + + for(Set<T> set : powerSet) { + /* + * Skip empty sets + */ + if(set.size() == 0) continue; + + List<T> stor = new ArrayList<>(set.size()); + + for(T elm : set) { + stor.add(elm); + } + + for(List<T> permute : ListUtils.permuteList(stor)) { + System.err.printf("\t\tTRACE: generated permute "); + for(T elm : permute) { + System.err.printf("%s ", elm); + } + System.err.println(); + + list.add(permute); + } + } + return list; } } diff --git a/src/main/java/bjc/rgens/parser/RGrammarFormatter.java b/src/main/java/bjc/rgens/parser/RGrammarFormatter.java index a2454dc..c571cb7 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammarFormatter.java +++ b/src/main/java/bjc/rgens/parser/RGrammarFormatter.java @@ -1,6 +1,8 @@ package bjc.rgens.parser; import bjc.rgens.parser.elements.CaseElement; + +import bjc.utils.data.IPair; import bjc.utils.funcdata.IList; import java.util.HashSet; @@ -52,7 +54,7 @@ public class RGrammarFormatter { /* Format a rule. */ private static void processRule(Rule rule, StringBuilder sb) { - IList<RuleCase> cases = rule.getCases(); + IList<IPair<Integer, RuleCase>> cases = rule.getCases(); StringBuilder ruleBuilder = new StringBuilder(); @@ -61,20 +63,21 @@ public class RGrammarFormatter { int markerPos = ruleBuilder.length(); - processCase(cases.first(), ruleBuilder); + processCase(cases.first().getRight(), ruleBuilder); sb.append(ruleBuilder.toString().trim()); ruleBuilder = new StringBuilder(); - for (RuleCase cse : cases.tail()) { + for (IPair<Integer, RuleCase> cse : cases.tail()) { sb.append("\n\t"); for (int i = 8; i < markerPos; i++) { ruleBuilder.append(" "); } - processCase(cse, ruleBuilder); + /* @TODO do this right, once we pick the syntax */ + processCase(cse.getRight(), ruleBuilder); sb.append(ruleBuilder.toString()); @@ -86,7 +89,7 @@ public class RGrammarFormatter { /* Format a case. */ private static void processCase(RuleCase cse, StringBuilder sb) { /* Process each element, adding a space. */ - for (CaseElement element : cse.getElements()) { + for (CaseElement element : cse.elementList) { sb.append(element.toString()); sb.append(" "); } diff --git a/src/main/java/bjc/rgens/parser/RGrammarParser.java b/src/main/java/bjc/rgens/parser/RGrammarParser.java index 3a357b1..a1fc0e9 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammarParser.java +++ b/src/main/java/bjc/rgens/parser/RGrammarParser.java @@ -1,8 +1,13 @@ package bjc.rgens.parser; -import bjc.rgens.parser.elements.CaseElement; +import bjc.rgens.parser.elements.*; + +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; import bjc.utils.funcdata.FunctionalList; import bjc.utils.funcdata.IList; +import bjc.utils.funcutils.ListUtils; +import bjc.utils.funcutils.SetUtils; import bjc.utils.funcutils.TriConsumer; import bjc.utils.ioutils.blocks.Block; import bjc.utils.ioutils.blocks.BlockReader; @@ -10,8 +15,13 @@ import bjc.utils.ioutils.blocks.SimpleBlockReader; import java.io.Reader; import java.io.StringReader; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; +import java.util.LinkedList; import java.util.HashMap; import java.util.Map; +import java.util.Set; /** * Reads {@link RGrammar} from a input stream. @@ -23,6 +33,7 @@ public class RGrammarParser { * Whether we are in debug mode or not. */ public static final boolean DEBUG = false; + public static final boolean LINES = true; /* * Templates for level-dependent delimiters. @@ -73,6 +84,63 @@ public class RGrammarParser { } }); + pragmas.put("recur-limit", (body, build, level) -> { + String[] parts = body.split(" "); + + if(parts.length != 2) { + throw new GrammarException("Recur-limit pragma takes two arguments: the name of the rule to set the limit for, and the new value of the limit"); + } + + if(!parts[1].matches("\\A\\d+\\Z")) { + throw new GrammarException("Limit value must be an integer"); + } + + build.setRuleRecur(parts[0], Integer.parseInt(parts[1])); + }); + + pragmas.put("enable-weight", (body, build, level) -> { + String[] parts = body.split(" "); + + if(parts.length != 2) { + throw new GrammarException("Enable-weight pragma takes one arguments: the name of the rule to set the weight factor for"); + } + + build.setWeight(parts[0]); + }); + pragmas.put("enable-descent", (body, build, level) -> { + String[] parts = body.split(" "); + + if(parts.length != 2) { + throw new GrammarException("Enable-descent pragma takes two arguments: the name of the rule to set the descent factor for, and the new value of the factor"); + } + + if(!parts[1].matches("\\A\\d+\\Z")) { + throw new GrammarException("Factor value must be an integer"); + } + + build.setDescent(parts[0], Integer.parseInt(parts[1])); + }); + + pragmas.put("enable-binomial", (body, build, level) -> { + String[] parts = body.split(" "); + + if(parts.length != 4) { + throw new GrammarException("Enable-descent pragma takes four arguments: the name of the rule to set the binomial factors for, and the three binomial parameters (target, bound trials)"); + } + + if(!parts[1].matches("\\A\\d+\\Z")) { + throw new GrammarException("Target value must be an integer"); + } + if(!parts[2].matches("\\A\\d+\\Z")) { + throw new GrammarException("Bound value must be an integer"); + } + if(!parts[3].matches("\\A\\d+\\Z")) { + throw new GrammarException("Trials value must be an integer"); + } + + build.setBinomial(parts[0], Integer.parseInt(parts[1]), Integer.parseInt(parts[2]), Integer.parseInt(parts[3])); + }); + pragmas.put("regex-rule", (body, build, level) -> { int nameIndex = body.indexOf(" "); @@ -83,31 +151,31 @@ public class RGrammarParser { String name = body.substring(0, nameIndex).trim(); String patt = body.substring(nameIndex + 1).trim(); - build.regexizeRule(name, patt); + //build.regexizeRule(name, patt); }); pragmas.put("suffix-with", (body, build, level) -> { - String[] parts = body.trim().split(" "); + int idx = body.indexOf(" "); - if (parts.length != 2) { - String msg = "Suffix-with pragma takes two arguments, the name of the rule to suffix, then what to suffix it with"; + if (idx == -1) { + String msg = "Suffix-with pragma takes at least two arguments, the name of the rule to suffix, then what to suffix it with\n\tThis can be more than one token, to get them suffixed as a group"; throw new GrammarException(msg); } - build.suffixWith(parts[0], parts[1]); + build.suffixWith(body.substring(0, idx), parseElementString(body.substring(idx + 1)).getLeft()); }); pragmas.put("prefix-with", (body, build, level) -> { - String[] parts = body.trim().split(" "); + int idx = body.indexOf(" "); - if (parts.length != 2) { - String msg = "Prefix-with pragma takes two arguments, the name of the rule to prefix, then what to prefix it with"; + if (idx == -1) { + String msg = "Prefix-with pragma takes at least two arguments, the name of the rule to prefix, then what to prefix it with\n\tThis can be more than one token, to get them prefixed as a group"; throw new GrammarException(msg); } - build.prefixWith(parts[0], parts[1]); + build.prefixWith(body.substring(0, idx), parseElementString(body.substring(idx + 1)).getLeft()); }); } @@ -134,12 +202,15 @@ public class RGrammarParser { try { RGrammarBuilder build = new RGrammarBuilder(); - reader.forEachBlock((block) -> { + for(Block block : reader) { if(DEBUG) System.err.printf("Handling top-level block (%s)\n", block); - handleBlock(build, block.contents, 0); - }); + handleBlock(build, block.contents, 0, block.startLine); + } + + if(LINES) + System.err.printf("%d ", reader.getBlock().endLine); return build.toRGrammar(); } catch (GrammarException gex) { @@ -155,7 +226,7 @@ public class RGrammarParser { /* Handles an arbitrary block. */ private static void handleBlock(RGrammarBuilder build, String block, - int level) throws GrammarException { + int level, int lineOffset) throws GrammarException { /* Discard empty blocks. */ if (block.equals("") || block.matches("\\R")) return; @@ -170,18 +241,19 @@ public class RGrammarParser { String blockType = block.substring(0, typeSep).trim(); if (blockType.equalsIgnoreCase("pragma")) { - handlePragmaBlock(block, build, level); + handlePragmaBlock(block, build, level, lineOffset); } else if (blockType.startsWith("[")) { - handleRuleBlock(block, build, level); + handleRuleBlock(block, build, level, lineOffset); } else if (blockType.equalsIgnoreCase("where")) { - handleWhereBlock(block, build, level); - } else if (blockType.equalsIgnoreCase("#")) { + handleWhereBlock(block, build, level, lineOffset); + } else if (blockType.startsWith("#")) { if(DEBUG) System.err.printf("Handled comment block (%s)\n", block); /* * Comment block. * * @TODO 10/11/17 Ben Culkin :GrammarComment + * * Attach these to the grammar somehow so that they * can be re-output during formatting. */ @@ -194,11 +266,13 @@ public class RGrammarParser { /* Handle reading a block of pragmas. */ private static void handlePragmaBlock(String block, RGrammarBuilder build, - int level) throws GrammarException { + int level, int lineOffset) throws GrammarException { String dlm = String.format(TMPL_PRAGMA_BLOCK_DELIM, level); try (BlockReader pragmaReader = new SimpleBlockReader(dlm, new StringReader(block))) { try { - pragmaReader.forEachBlock((pragma) -> { + for(Block pragma : pragmaReader) { + pragma.lineOffset = lineOffset; + if(DEBUG) System.err.printf("Handled pragma block (%s)\n", pragma); @@ -221,8 +295,8 @@ public class RGrammarParser { throw new GrammarException(msg); } - handlePragma(pragmaBody, build, level); - }); + handlePragma(pragmaBody, build, level, pragma.startLine + lineOffset); + } } catch (GrammarException gex) { Block pragma = pragmaReader.getBlock(); String msg = String.format("Error in pragma: (%s)", pragma); @@ -236,7 +310,7 @@ public class RGrammarParser { /* Handle an individual pragma in a block. */ private static void handlePragma(String pragma, RGrammarBuilder build, - int level) throws GrammarException { + int level, int lineOffset) throws GrammarException { int bodySep = pragma.indexOf(' '); if (bodySep == -1) @@ -265,7 +339,7 @@ public class RGrammarParser { /* Handle a block of a rule declaration and one or more cases. */ private static void handleRuleBlock(String ruleBlock, RGrammarBuilder build, - int level) throws GrammarException { + int level, int lineOffset) throws GrammarException { String dlm = String.format(TMPL_RULEDECL_BLOCK_DELIM, level); try (BlockReader ruleReader = new SimpleBlockReader(dlm, new StringReader(ruleBlock))) { try { @@ -273,19 +347,20 @@ public class RGrammarParser { /* Rule with a declaration followed by multiple cases. */ ruleReader.nextBlock(); Block declBlock = ruleReader.getBlock(); + declBlock.lineOffset = lineOffset; String declContents = declBlock.contents; - Rule rl = handleRuleDecl(build, declContents); + Rule rl = handleRuleDecl(build, declContents, lineOffset + declBlock.startLine); - ruleReader.forEachBlock((block) -> { + for(Block block : ruleReader) { /* Ignore comment lines. */ if(block.contents.trim().startsWith("#")) return; - handleRuleCase(block.contents, build, rl); - }); + handleRuleCase(block.contents, build, rl, block.startLine + lineOffset); + } } else { /* Rule with a declaration followed by a single case. */ - handleRuleDecl(build, ruleBlock); + handleRuleDecl(build, ruleBlock, lineOffset); } } catch (GrammarException gex) { String msg = String.format("Error in rule case (%s)", ruleReader.getBlock()); @@ -298,7 +373,7 @@ public class RGrammarParser { } /* Handle a rule declaration and its initial case. */ - private static Rule handleRuleDecl(RGrammarBuilder build, String declContents) { + private static Rule handleRuleDecl(RGrammarBuilder build, String declContents, int lineOffset) { int declSep = declContents.indexOf("\u2192"); if (declSep == -1) { @@ -312,7 +387,7 @@ public class RGrammarParser { declSep = declContents.indexOf(' '); if (declSep == -1) { - String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192"; + String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192 or ' '"; throw new GrammarException(msg); } @@ -327,32 +402,22 @@ public class RGrammarParser { Rule rul = build.getOrCreateRule(ruleName); - handleRuleCase(ruleBody, build, rul); + handleRuleCase(ruleBody, build, rul, lineOffset); return rul; } /* Handle a single case of a rule. */ - private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul) { - IList<CaseElement> caseParts = new FunctionalList<>(); + private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul, int lineOffset) { + Pair<IList<CaseElement>, Integer> caseParts = parseElementString(cse); - for (String csepart : cse.split(" ")) { - String partToAdd = csepart.trim(); - - /* Ignore empty parts */ - if (partToAdd.equals("")) - continue; - - caseParts.add(CaseElement.createElement(partToAdd)); - } - - rul.addCase(new RuleCase(RuleCase.CaseType.NORMAL, caseParts)); + rul.addCase(new NormalRuleCase(caseParts.getLeft()), caseParts.getRight()); } /* Handle a where block (a block with local rules). */ private static void handleWhereBlock(String block, RGrammarBuilder build, - int level) throws GrammarException { - int nlIndex = block.indexOf("\\n"); + int level, int lineOffset) throws GrammarException { + int nlIndex = block.indexOf("\\nin"); if (nlIndex == -1) { throw new GrammarException("Where block must be a context followed by a body"); @@ -363,9 +428,10 @@ public class RGrammarParser { String whereDelim = String.format(TMPL_WHERE_BLOCK_DELIM, level); try (BlockReader whereReader = new SimpleBlockReader(whereDelim, - new StringReader(trimBlock))) { + new StringReader(trimBlock))) { try { Block whereCtx = whereReader.next(); + whereCtx.lineOffset = lineOffset; StringReader ctxReader = new StringReader(whereCtx.contents.trim()); String ctxDelim = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, level + 1); @@ -373,7 +439,9 @@ public class RGrammarParser { try (BlockReader bodyReader = new SimpleBlockReader(ctxDelim, ctxReader)) { @SuppressWarnings("unused") Block whereBody = whereReader.next(); + whereBody.lineOffset = lineOffset + whereCtx.startLine; + System.err.printf("\tUNIMPLEMENTED WHERE:\n%s\n", whereBody.contents); /** * @TODO 10/11/17 Ben Culkin :WhereBlocks * Implement where blocks. @@ -385,10 +453,105 @@ public class RGrammarParser { } } catch (GrammarException gex) { throw new GrammarException(String.format("Error in where block (%s)", - whereReader.getBlock()), gex); + whereReader.getBlock()), gex); } } catch (Exception ex) { throw new GrammarException("Unknown error in where block", ex); } } + + public static Pair<IList<CaseElement>, Integer> parseElementString(String cses) { + return parseElementString(cses.split(" ")); + } + + public static Pair<IList<CaseElement>, Integer> parseElementString(String... cses) { + IList<CaseElement> caseParts = new FunctionalList<>(); + + int weight = 1; + + int repCount = 1; + + int serialLower = -1; + int serialUpper = -1; + + int chance = -1; + boolean doSerial = false; + boolean doChance = false; + + for (String csepart : cses) { + String partToAdd = csepart.trim(); + + if (partToAdd.equals("")) { + /* Ignore empty parts */ + continue; + } else if(partToAdd.matches("\\<\\^\\d+\\>")) { + /* Set case weights */ + weight = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1)); + } else if(partToAdd.matches("\\<&\\d+\\>")) { + repCount = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1)); + } else if(partToAdd.matches("\\<&\\d+\\.\\.\\d+\\>")) { + serialLower = Integer.parseInt(partToAdd.substring(2, partToAdd.indexOf("."))); + serialUpper = Integer.parseInt(partToAdd.substring(partToAdd.lastIndexOf(".") + 1, partToAdd.length() - 1)); + + doSerial = true; + } else if(partToAdd.matches("\\<\\?\\d+\\>")) { + chance = Integer.parseInt(partToAdd.substring(2, partToAdd.length() - 1)); + + doChance = true; + } else if (partToAdd.matches("\\<\\<\\>")) { + CaseElement elm = caseParts.popLast(); + + if(repCount == 0) { + /* Skip no-reps */ + } else { + if(doChance) { + elm = new ChanceCaseElement(elm, chance); + + doChance = false; + } + + if(doSerial) { + elm = new SerialCaseElement(elm, serialLower, serialUpper); + + doSerial = false; + } + + for(int i = 1; i <= repCount; i++) { + caseParts.add(elm); + } + + repCount = 1; + } + } else if(partToAdd.matches("\\<[^\\>]+\\>")) { + throw new GrammarException("Unknown parser meta-rule " + partToAdd); + } else { + CaseElement elm = CaseElement.createElement(partToAdd); + + if(repCount == 0) { + /* Skip no-reps */ + } else { + if(doChance) { + elm = new ChanceCaseElement(elm, chance); + + doChance = false; + } + + if(doSerial) { + elm = new SerialCaseElement(elm, serialLower, serialUpper); + + doSerial = false; + } + + for(int i = 1; i <= repCount; i++) { + caseParts.add(elm); + } + + } + + repCount = 1; + } + } + + return new Pair<>(caseParts, weight); + } } diff --git a/src/main/java/bjc/rgens/parser/RGrammarSet.java b/src/main/java/bjc/rgens/parser/RGrammarSet.java index 975510a..b110d21 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammarSet.java +++ b/src/main/java/bjc/rgens/parser/RGrammarSet.java @@ -1,13 +1,8 @@ package bjc.rgens.parser; -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; -import java.util.Scanner; +import java.util.TreeMap; import java.util.Set; /** @@ -16,26 +11,33 @@ import java.util.Set; * @author EVE */ public class RGrammarSet { + public String name; + + public ConfigSet belongsTo; + + public RGrammar exportGrammar; + /* Contains all the grammars in this set. */ private Map<String, RGrammar> grammars; /* Contains all the exported rules from grammars. */ - private Map<String, RGrammar> exportedRules; - - /* Contains which export came from which grammar. */ - private Map<String, String> exportFrom; + private Map<String, Rule> exportedRules; /* Contains which file a grammar was loaded from. */ - private Map<String, String> loadedFrom; + public Map<String, String> loadedFrom; + + public static final boolean PERF = true; + public static final boolean DEBUG = true; /** Create a new set of randomized grammars. */ public RGrammarSet() { grammars = new HashMap<>(); - exportedRules = new HashMap<>(); + exportedRules = new TreeMap<>(); - exportFrom = new HashMap<>(); loadedFrom = new HashMap<>(); + + exportGrammar = new RGrammar(exportedRules); } /** @@ -61,12 +63,17 @@ public class RGrammarSet { } grammars.put(grammarName, gram); + gram.belongsTo = this; /* Process exports from the grammar. */ for (Rule export : gram.getExportedRules()) { - exportedRules.put(export.name, gram); + if(exportedRules.containsKey(export.name)) + System.err.printf("WARN: Shadowing rule %s in %s from %s\n", export.name, export.belongsTo.name, grammarName); - exportFrom.put(export.name, grammarName); + exportedRules.put(export.name, export); + + if(DEBUG) + System.err.printf("\t\tDEBUG: %s (%d cases) exported from %s\n", export.name, export.getCases().getSize(), grammarName); } /* Add exports to grammar. */ @@ -123,7 +130,7 @@ public class RGrammarSet { throw new IllegalArgumentException(msg); } - return exportedRules.get(exportName); + return exportedRules.get(exportName).belongsTo; } /** @@ -152,7 +159,12 @@ public class RGrammarSet { throw new IllegalArgumentException(msg); } - return exportFrom.getOrDefault(exportName, "Unknown"); + String nm = exportedRules.get(exportName).belongsTo.name; + if(nm == null) { + return "Unknown"; + } + + return nm; } /** @@ -204,87 +216,4 @@ public class RGrammarSet { public Set<String> getExportedRules() { return exportedRules.keySet(); } - - /** - * Load a grammar set from a configuration file. - * - * @param cfgFile - * The configuration file to load from. - * - * @return - * The grammar set created by the configuration file. - * - * @throws IOException - * If something goes wrong during configuration loading. - */ - public static RGrammarSet fromConfigFile(Path cfgFile) throws IOException { - /* The grammar set to hand back. */ - RGrammarSet set = new RGrammarSet(); - - /* Get the directory that contains the config file. */ - Path cfgParent = cfgFile.getParent(); - - try(Scanner scn = new Scanner(cfgFile)) { - /* Execute lines from the configuration file. */ - while (scn.hasNextLine()) { - String ln = scn.nextLine().trim(); - - /* Ignore blank/comment lines. */ - if (ln.equals("")) continue; - - if (ln.startsWith("#")) continue; - - /* Handle mixed whitespace. */ - ln = ln.replaceAll("\\s+", " "); - - /* - * Get the place where the name of the grammar - * ends. - */ - int nameIdx = ln.indexOf(" "); - if (nameIdx == -1) { - throw new GrammarException("Must specify a name for a loaded grammar"); - } - - /* Name and path of grammar. */ - String name = ln.substring(0, nameIdx); - Path path = Paths.get(ln.substring(nameIdx).trim()); - - /* - * Convert from configuration relative path to - * absolute path. - */ - Path convPath = cfgParent.resolve(path.toString()); - - //if(Files.isDirectory(convPath)) { - // /* @TODO implement subset grammars */ - // throw new GrammarException("Sub-grammar sets aren't implemented yet"); - //} else if (convPath.getFileName().endsWith(".gram")) { - /* Load grammar file. */ - try { - BufferedReader fis = Files.newBufferedReader(convPath); - RGrammar gram = RGrammarParser.readGrammar(fis); - fis.close(); - - /* Add grammar to the set. */ - set.addGrammar(name, gram); - - /* - * Mark where the grammar came - * from. - */ - set.loadedFrom.put(name, path.toString()); - } catch (GrammarException gex) { - String msg = String.format("Error loading file '%s'", path); - throw new GrammarException(msg, gex); - } - //} else { - // String msg = String.format("Unrecognized file type '%s'", convPath.getFileName()); - // throw new GrammarException(msg); - //} - } - } - - return set; - } } diff --git a/src/main/java/bjc/rgens/parser/RGrammarTest.java b/src/main/java/bjc/rgens/parser/RGrammarTest.java index 4b1f283..8193fa3 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammarTest.java +++ b/src/main/java/bjc/rgens/parser/RGrammarTest.java @@ -1,10 +1,13 @@ package bjc.rgens.parser; +import bjc.rgens.parser.templates.GrammarTemplate; + import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Random; /** * Test for new grammar syntax. @@ -19,49 +22,19 @@ public class RGrammarTest { * Unused CLI args. */ public static void main(String[] args) { - URL rsc = RGrammarTest.class.getResource("/server-config-sample.cfg"); + URL rsc = RGrammarTest.class.getResource("/server-config-sample.gcfg"); try { /* Load a grammar set. */ Path cfgPath = Paths.get(rsc.toURI()); - RGrammarSet gramSet = RGrammarSet.fromConfigFile(cfgPath); + ConfigSet cfgSet = ConfigLoader.fromConfigFile(cfgPath); - /* Generate rule suggestions for all the grammars in the set. */ - for (String gramName : gramSet.getGrammars()) { - gramSet.getGrammar(gramName).generateSuggestions(); + for(RGrammarSet gramSet : cfgSet.grammars.values()) { + testGrammarSet(gramSet); } - /* Generate for each exported rule. */ - for (String exportName : gramSet.getExportedRules()) { - /* Where we loaded the rule from. */ - String loadSrc = gramSet.loadedFrom(gramSet.exportedFrom(exportName)); - - System.out.println(); - System.out.printf("Generating for exported rule '%s' from file '%s'\n", exportName, loadSrc); - - RGrammar grammar = gramSet.getExportSource(exportName); - for (int i = 0; i < 100; i++) { - try { - String res = grammar.generate(exportName); - if(exportName.contains("+")) res = res.replaceAll("\\s+", ""); - - if(res.length() > 120) { - System.out.printf("\t\n\tContents: %s\n\t\n", res); - } else { - System.out.printf("\tContents: %s\n", res); - } - } catch (GrammarException gex) { - /* Print out errors with generation. */ - String fmt = "Error in exported rule '%s' (loaded from '%s')\n"; - - System.out.printf(fmt, exportName, loadSrc); - System.out.println(); - gex.printStackTrace(); - - System.out.println(); - System.out.println(); - } - } + for(GrammarTemplate template : cfgSet.templates.values()) { + testTemplate(template, cfgSet.grammars.get("default")); } } catch (IOException ioex) { ioex.printStackTrace(); @@ -69,4 +42,83 @@ public class RGrammarTest { urisex.printStackTrace(); } } + + private static void testTemplate(GrammarTemplate template, RGrammarSet set) { + System.out.printf("Generating for template %s\n", template); + + Random rnd = new Random(); + + for(int i = 0; i < 10; i++) { + GenerationState state = GenerationState.fromGrammar(rnd, set.exportGrammar); + + template.generate(state); + + String res = state.contents.toString(); + + if(res.length() > 120) { + System.out.printf("\t\n\tContents: %s\n\t\n", res); + } else { + System.out.printf("\tContents: %s\n", res); + } + } + } + + private static void testGrammarSet(RGrammarSet gramSet) { + /* Generate rule suggestions for all the grammars in the set. */ + for (String gramName : gramSet.getGrammars()) { + long startSuggTime = System.nanoTime(); + + gramSet.getGrammar(gramName).generateSuggestions(); + + long endSuggTime = System.nanoTime(); + + long suggDur = endSuggTime - startSuggTime; + + System.err.printf("PERF: Generated rule suggestions for %s in %d ns (%f s)\n", gramName, suggDur, suggDur / 1000000000.0); + } + + System.err.printf("\n\n"); + + /* Generate for each exported rule. */ + for (String exportName : gramSet.getExportedRules()) { + /* Where we loaded the rule from. */ + String loadSrc = gramSet.loadedFrom(gramSet.exportedFrom(exportName)); + + System.out.println(); + System.out.printf("Generating for exported rule '%s' from file '%s'\n", exportName, loadSrc); + + RGrammar grammar = gramSet.getExportSource(exportName); + long startGenTime = System.nanoTime(); + for (int i = 0; i < 100; i++) { + try { + String res = grammar.generate(exportName); + if(exportName.contains("+")) res = res.replaceAll("\\s+", ""); + + if(res.length() > 120) { + System.out.printf("\t\n\tContents: %s\n\t\n", res); + } else { + System.out.printf("\tContents: %s\n", res); + } + } catch (GrammarException gex) { + /* Print out errors with generation. */ + String fmt = "ERROR: Exported rule %s from %s failed (loaded from '%s')\n"; + + System.out.printf(fmt, exportName, grammar.name, loadSrc); + System.out.println(); + System.out.println(); + + System.err.printf(fmt, exportName, grammar.name, loadSrc); + gex.printStackTrace(); + + System.err.println(); + System.err.println(); + } + } + long endGenTime = System.nanoTime(); + + long genDur = endGenTime - startGenTime; + + System.err.printf("PERF: Generated %s 100 times in %d ns (%f s)\n\n\n", exportName, genDur, genDur / 1000000000.0); + } + } } diff --git a/src/main/java/bjc/rgens/parser/RGrammars.java b/src/main/java/bjc/rgens/parser/RGrammars.java index f74a756..cc31bad 100644..100755 --- a/src/main/java/bjc/rgens/parser/RGrammars.java +++ b/src/main/java/bjc/rgens/parser/RGrammars.java @@ -16,20 +16,21 @@ import java.util.Map; * @author Ben Culkin */ public class RGrammars { - private static RGrammarSet gramSet; + private static ConfigSet cfgSet; private static void loadSet() { try { - URI rsc = RGrammarTest.class.getResource("/server-config-sample.cfg").toURI(); + URI rsc = RGrammarTest.class.getResource("/server-config-sample.gcfg").toURI(); Map<String, String> env = new HashMap<>(); env.put("create", "true"); + /* Ensure we can get at the file we need */ @SuppressWarnings("unused") FileSystem zipfs = FileSystems.newFileSystem(rsc, env); Path cfgPath = Paths.get(rsc); - gramSet = RGrammarSet.fromConfigFile(cfgPath); + cfgSet = ConfigLoader.fromConfigFile(cfgPath); } catch (IOException | URISyntaxException ex) { RuntimeException rtex = new RuntimeException("Could not load grammars"); @@ -49,19 +50,23 @@ public class RGrammars { * If something went wrong. */ public static String generateExport(String exportName) throws GrammarException { - if (gramSet == null) + if (cfgSet == null) loadSet(); - if (!gramSet.getExportedRules().contains(exportName)) { - throw new GrammarException(String.format("No exported rule named %s", exportName)); - } + for(RGrammarSet gramSet : cfgSet.grammars.values()) { + if (!gramSet.getExportedRules().contains(exportName)) { + continue; + } + + RGrammar gram = gramSet.getExportSource(exportName); - RGrammar gram = gramSet.getExportSource(exportName); + String res = gram.generate(exportName); + if (exportName.contains("+")) + res = res.replaceAll("\\s+", ""); - String res = gram.generate(exportName); - if (exportName.contains("+")) - res = res.replaceAll("\\s+", ""); + return res; + } - return res; + throw new GrammarException(String.format("No exported rule named %s", exportName)); } } diff --git a/src/main/java/bjc/rgens/parser/RecurLimitException.java b/src/main/java/bjc/rgens/parser/RecurLimitException.java new file mode 100644 index 0000000..faeffb3 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RecurLimitException.java @@ -0,0 +1,35 @@ +package bjc.rgens.parser; + +/** + * The exception thrown when a rule exceeds its recurrence limit + * + * @author student + */ +public class RecurLimitException extends GrammarException { + /* Serialization ID. */ + private static final long serialVersionUID = -7287427479316953668L; + + /** + * Create a new grammar exception with the specified message. + * + * @param msg + * The message for this exception. + */ + public RecurLimitException(String msg) { + super(msg); + } + + /** + * Create a new grammar exception with the specified message and + * cause. + * + * @param msg + * The message for this exception. + * + * @param cause + * The cause of this exception. + */ + public RecurLimitException(String msg, Exception cause) { + super(msg, cause); + } +} diff --git a/src/main/java/bjc/rgens/parser/RegexRuleCase.java b/src/main/java/bjc/rgens/parser/RegexRuleCase.java index 5e03cd6..3c57489 100644..100755 --- a/src/main/java/bjc/rgens/parser/RegexRuleCase.java +++ b/src/main/java/bjc/rgens/parser/RegexRuleCase.java @@ -6,27 +6,22 @@ import bjc.utils.funcdata.IList; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +/* + * @TODO + * + * Actually implement this + */ public class RegexRuleCase extends RuleCase { - private Pattern patt; + public RegexRuleCase(IList<CaseElement> elements) { + super(elements); - public RegexRuleCase(IList<CaseElement> elements, String pattern) { - super(RuleCase.CaseType.REGEX); - - elementList = elements; - - try { - patt = Pattern.compile(pattern); - } catch (PatternSyntaxException psex) { - IllegalArgumentException iaex = - new IllegalArgumentException("This type requires a valid regular expression parameter"); + } - iaex.initCause(psex); + public void generate(GenerationState state) { - throw iaex; - } } - public Pattern getPattern() { - return patt; + public RegexRuleCase withElements(IList<CaseElement> elements) { + return new RegexRuleCase(elements); } } diff --git a/src/main/java/bjc/rgens/parser/Rule.java b/src/main/java/bjc/rgens/parser/Rule.java index 7043e0f..ac67158 100644..100755 --- a/src/main/java/bjc/rgens/parser/Rule.java +++ b/src/main/java/bjc/rgens/parser/Rule.java @@ -1,7 +1,9 @@ package bjc.rgens.parser; +import bjc.utils.data.IPair; import bjc.utils.funcdata.FunctionalList; import bjc.utils.funcdata.IList; +import bjc.utils.gen.WeightedRandom; import java.util.Random; @@ -11,12 +13,34 @@ import java.util.Random; * @author EVE */ public class Rule { + public RGrammar belongsTo; + /** The name of this grammar rule. */ - public final String name; + public String name; /* The cases for this rule. */ - private IList<RuleCase> cases; + private WeightedRandom<RuleCase> cases; + + public static enum ProbType { + NORMAL, + DESCENDING, + BINOMIAL + } + + public ProbType prob; + + public int descentFactor; + + public int target; + public int bound; + public int trials; + + public int recurLimit = 5; + private int currentRecur; + private final static Random BASE = new Random(); + + private int serial = 1; /** * Create a new grammar rule. * @@ -35,7 +59,9 @@ public class Rule { name = ruleName; - cases = new FunctionalList<>(); + cases = new WeightedRandom<>(); + + prob = ProbType.NORMAL; } /** @@ -45,11 +71,25 @@ public class Rule { * The case to add. */ public void addCase(RuleCase cse) { + addCase(cse, 1); + } + + /** + * Adds a case to the rule. + * + * @param cse + * The case to add. + */ + public void addCase(RuleCase cse, int weight) { if (cse == null) { throw new NullPointerException("Case must not be null"); } - cases.add(cse); + cse.belongsTo = this; + cse.debugName = String.format("%s-%d", name, serial); + serial += 1; + + cases.addProbability(weight, cse); } /** @@ -59,7 +99,7 @@ public class Rule { * A random case from this rule. */ public RuleCase getCase() { - return cases.randItem(); + return getCase(BASE); } /** @@ -72,7 +112,16 @@ public class Rule { * A random case from this rule. */ public RuleCase getCase(Random rnd) { - return cases.randItem(rnd::nextInt); + switch(prob) { + case DESCENDING: + return cases.getDescent(descentFactor, rnd); + case BINOMIAL: + return cases.getBinomial(target, bound, trials, rnd); + case NORMAL: + return cases.generateValue(rnd); + default: + return cases.generateValue(rnd); + } } /** @@ -81,8 +130,8 @@ public class Rule { * @return * All the cases in this rule. */ - public IList<RuleCase> getCases() { - return cases; + public IList<IPair<Integer, RuleCase>> getCases() { + return cases.getValues(); } /** @@ -91,8 +140,17 @@ public class Rule { * @param cases * The new list of cases. */ - public void replaceCases(IList<RuleCase> cases) { - this.cases = cases; + public void replaceCases(IList<IPair<Integer, RuleCase>> cases) { + this.cases = new WeightedRandom<>(); + + for(IPair<Integer, RuleCase> cse : cases) { + RuleCase cs = cse.getRight(); + cs.belongsTo = this; + cs.debugName = String.format("%s-%d", name, serial); + serial += 1; + + this.cases.addProbability(cse.getLeft(), cs); + } } @Override @@ -129,6 +187,58 @@ public class Rule { @Override public String toString() { - return String.format("Rule [ruleName='%s', ruleCases=%s]", name, cases); + return String.format("Rule '%s' with %d cases", name, cases.getValues().getSize()); + } + + public boolean doRecur() { + if(currentRecur > recurLimit) return false; + + currentRecur += 1; + + return true; + } + + public void endRecur() { + if(currentRecur > 0) currentRecur -= 1; + } + + public Rule exhaust() { + Rule rl = new Rule(name); + + rl.belongsTo = belongsTo; + + rl.cases = cases.exhaustible(); + + rl.prob = prob; + + rl.descentFactor = descentFactor; + + rl.target = target; + rl.bound = bound; + rl.trials = trials; + + rl.recurLimit = recurLimit; + /* @NOTE Is this the right thing to do? */ + rl.currentRecur = 0; + + return rl; + } + + public void generate(GenerationState state) { + state.swapGrammar(belongsTo); + + if(doRecur()) { + RuleCase cse = getCase(state.rnd); + + System.err.printf("\tFINE: Generating %s (from %s)\n", cse, belongsTo.name); + + belongsTo.generateCase(cse, state); + + endRecur(); + } + + if(name.contains("+")) { + state.contents = new StringBuilder(state.contents.toString().replaceAll("\\s+", "")); + } } } diff --git a/src/main/java/bjc/rgens/parser/RuleCase.java b/src/main/java/bjc/rgens/parser/RuleCase.java index 9c0a856..33aea0c 100644..100755 --- a/src/main/java/bjc/rgens/parser/RuleCase.java +++ b/src/main/java/bjc/rgens/parser/RuleCase.java @@ -13,76 +13,42 @@ import bjc.utils.funcdata.IList; * * @author EVE */ -public class RuleCase { - /** - * The possible types of a case. - * - * @author EVE - */ - public static enum CaseType { - /** A normal case, composed from a list of elements. */ - NORMAL, - /** A case that doesn't insert spaces. */ - SPACEFLATTEN, - /** A case that applies a regex after generation. */ - REGEX - } +public abstract class RuleCase { + public String debugName; - /** The type of this case. */ - public final CaseType type; + public final int serial; - /** - * The list of element values for this case. - * - * <h2>Used For</h2> - * <dl> - * <dt>NORMAL, SPACEFLATTEN</dt> - * <dd>Used as the list of elementList the rule is composed of.</dd> - * </dl> - */ - protected IList<CaseElement> elementList; + private static int nextSerial = 0; - protected RuleCase(CaseType typ) { - type = typ; - } + public Rule belongsTo; + + public IList<CaseElement> elementList; /** * Create a new case of the specified type that takes a element list * parameter. * - * @param typ - * The type of case to create. - * * @param elements * The element list parameter of the case. * - * @throws IllegalArgumentException - * If this type doesn't take a element list parameter. */ - public RuleCase(CaseType typ, IList<CaseElement> elements) { - this(typ); - - switch (typ) { - case NORMAL: - case SPACEFLATTEN: - break; - case REGEX: - throw new IllegalArgumentException("This type requires an element list and a pattern"); - default: - throw new IllegalArgumentException("This type doesn't have a element list parameter"); - } - + protected RuleCase(IList<CaseElement> elements) { elementList = elements; + + serial = nextSerial; + nextSerial += 1; } - /** - * Get the element list value of this type. - * - * @return - * The element list value of this case, or null if this type - * doesn't have one. - */ - public IList<CaseElement> getElements() { - return elementList; + public abstract void generate(GenerationState state); + + public abstract RuleCase withElements(IList<CaseElement> elements); + + public String toString() { + if(debugName != null) { + return String.format("Case %s (#%d) of %s", debugName, serial, belongsTo); + } else { + return String.format("Case #%d of %s", serial, belongsTo, serial, belongsTo); + } } + } diff --git a/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java b/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java index 7229e92..66ff310 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java @@ -1,5 +1,7 @@ package bjc.rgens.parser.elements; +import bjc.rgens.parser.GenerationState; + public class BlankCaseElement extends LiteralCaseElement { public BlankCaseElement() { super(""); diff --git a/src/main/java/bjc/rgens/parser/elements/CaseElement.java b/src/main/java/bjc/rgens/parser/elements/CaseElement.java index d74ab52..a44ef6c 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/CaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/CaseElement.java @@ -1,17 +1,16 @@ package bjc.rgens.parser.elements; +import bjc.utils.funcutils.StringUtils; + +import bjc.rgens.parser.GenerationState; import bjc.rgens.parser.GrammarException; -/* - * @TODO 10/11/17 Ben Culkin :CaseElementSplit Split this into multiple - * subclasses based off of a value of ElementType. - */ /** * A element in a rule case. * * @author EVE */ -public class CaseElement { +public abstract class CaseElement { /** * The possible types of an element. * @@ -19,27 +18,27 @@ public class CaseElement { */ public static enum ElementType { /** An element that represents a literal string. */ - LITERAL, + LITERAL(true), /** An element that represents a rule reference. */ - RULEREF, + RULEREF(true), /** An element that represents a random range. */ - RANGE, + RANGE(true), /** An element that represents a variable that stores a string. */ - VARDEF, - /** - * An element that represents a variable that stores the result of generating a - * rule. - */ - EXPVARDEF; - } + VARIABLE(false); - /* Regexps for marking rule types. */ - private static final String SPECIAL_CASELEM = "\\{[^}]+\\}"; - private static final String REFER_CASELEM = "\\[[^\\]]+\\]"; - private static final String RANGE_CASELM = "\\[\\d+\\.\\.\\d+\\]"; + public final boolean spacing; + + private ElementType(boolean spacing) { + this.spacing = spacing; + } + } /** The type of this element. */ - public final ElementType type; + public boolean spacing; + + protected CaseElement() { + this(true); + } /** * Create a new case element. @@ -47,17 +46,17 @@ public class CaseElement { * @param typ * The type of this element. */ - protected CaseElement(ElementType typ) { - type = typ; + protected CaseElement(boolean spacing) { + this.spacing = spacing; } - @Override - public String toString() { - switch (type) { - default: - return String.format("Unknown type '%s'", type); - } - } + /** + * Generate this case element. + * + * @param state + * The current state of generation. + */ + public abstract void generate(GenerationState state); /** * Create a case element from a string. @@ -72,52 +71,80 @@ public class CaseElement { throw new NullPointerException("Case part cannot be null"); } - if (csepart.matches(SPECIAL_CASELEM)) { - /* Handle special cases. */ + if (csepart.matches("\\(\\S+\\)")) { + return createElement(csepart.substring(1, csepart.length() - 1)); + } else if (csepart.matches("\\{\\S+\\}")) { + /* + * Handle special case elements. + * + */ String specialBody = csepart.substring(1, csepart.length() - 1); - System.out.printf("\t\tTRACE: special body is '%s'\n", specialBody); + if (specialBody.matches("\\S+:\\S=\\S+")) { + String[] parts = StringUtils.levelSplit(specialBody, "=").toArray(new String[0]); - if (specialBody.matches("\\S+:=\\S+")) { - /* Handle expanding variable definitions. */ - String[] parts = specialBody.split(":="); + if(parts.length != 2) { + throw new GrammarException("Colon variables must have a name and a definition"); + } + + String varName = parts[0]; + + char op = varName.charAt(varName.length() - 1); + + System.err.printf("\t\tTRACE: Colon definition w/ op %d", (int)op); - if (parts.length != 2) { - String msg = "Expanded variables must be a name and a definition, seperated by :="; + // Remove the colon, plus any tacked on operator + varName = varName.substring(0, varName.length() - 2); - throw new GrammarException(msg); + return VariableDefCaseElement.parseVariable(varName, parts[1], op, true); + } else if (specialBody.matches("\\S+:=\\S+")) { + String[] parts = StringUtils.levelSplit(specialBody, "=").toArray(new String[0]); + + if(parts.length != 2) { + throw new GrammarException("Colon variables must have a name and a definition"); } - return new ExpVariableCaseElement(parts[0], parts[1]); - } else if (specialBody.matches("\\S+=\\S+")) { - /* Handle regular variable definitions. */ - String[] parts = specialBody.split("="); + String varName = parts[0]; - if (parts.length != 2) { - String msg = "Variables must be a name and a definition, seperated by ="; + varName = varName.substring(0, varName.length() - 1); - throw new GrammarException(msg); + return VariableDefCaseElement.parseVariable(varName, parts[1], ' ', true); + } else if (specialBody.matches("\\S+=\\S+")) { + String[] parts = specialBody.split("="); + if(parts.length != 2) { + throw new GrammarException("Variables must have a name and a definition"); } - return new LitVariableCaseElement(parts[0], parts[1]); - } else if (specialBody.matches("{empty}")) { + // Non-colon variables can't take an operator + return VariableDefCaseElement.parseVariable(parts[0], parts[1], (char)0, false); + } else if (specialBody.matches("empty")) { /* Literal blank, for empty cases. */ return new BlankCaseElement(); } else { throw new IllegalArgumentException(String.format("Unknown special case part '%s'", specialBody)); } - } else if (csepart.matches(REFER_CASELEM)) { - if (csepart.matches(RANGE_CASELM)) { - /* Handle ranges */ - String rawRange = csepart.substring(1, csepart.length() - 1); + } else if (csepart.matches("\\[\\S+\\]")) { + String rawCase = csepart.substring(1, csepart.length() - 1); - int firstNum = Integer.parseInt(rawRange.substring(0, rawRange.indexOf('.'))); - int secondNum = Integer.parseInt(rawRange.substring(rawRange.lastIndexOf('.') + 1)); + if (rawCase.matches("\\d+\\.{2}\\d+")) { + int firstNum = Integer.parseInt(rawCase.substring(0, rawCase.indexOf('.'))); + int secondNum = Integer.parseInt(rawCase.substring(rawCase.lastIndexOf('.') + 1)); return new RangeCaseElement(firstNum, secondNum); - } + } else if(rawCase.contains("||")) { + String[] elms = StringUtils.levelSplit(rawCase, "||").toArray(new String[0]); - return new RuleCaseElement(csepart); + return new InlineRuleCaseElement(elms); + } else if(rawCase.contains("|")) { + throw new GrammarException("\t\tERROR: Inline rule using | found, they use || now"); + + // String[] elms = StringUtils.levelSplit(rawCase, "|").toArray(new String[0]); + // return new InlineRuleCaseElement(elms); + } else { + return new RuleCaseElement(rawCase); + } + } else if(csepart.startsWith("%") && !csepart.equals("%")) { + return new RuleCaseElement(csepart); } else { return new LiteralCaseElement(csepart); } @@ -127,7 +154,7 @@ public class CaseElement { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((type == null) ? 0 : type.hashCode()); + result = prime * result + (spacing ? 0 : 2); return result; } @@ -140,8 +167,8 @@ public class CaseElement { if (getClass() != obj.getClass()) return false; CaseElement other = (CaseElement) obj; - if (type != other.type) + if (spacing != other.spacing) return false; return true; } -}
\ No newline at end of file +} diff --git a/src/main/java/bjc/rgens/parser/elements/ChanceCaseElement.java b/src/main/java/bjc/rgens/parser/elements/ChanceCaseElement.java new file mode 100644 index 0000000..483a103 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/ChanceCaseElement.java @@ -0,0 +1,20 @@ +package bjc.rgens.parser.elements; + +import bjc.rgens.parser.GenerationState; + +public class ChanceCaseElement extends CaseElement { + public final CaseElement elm; + + public int chance; + + public ChanceCaseElement(CaseElement elm, int chance) { + super(elm.spacing); + + this.elm = elm; + this.chance = chance; + } + + public void generate(GenerationState state) { + if(state.rnd.nextInt(chance) == 0) elm.generate(state); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java index 30925e2..e58d073 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java @@ -1,7 +1,40 @@ package bjc.rgens.parser.elements; -public class ExpVariableCaseElement extends VariableCaseElement { +import bjc.utils.data.IPair; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; +import bjc.rgens.parser.RecurLimitException; +import bjc.rgens.parser.RGrammar; +import bjc.rgens.parser.Rule; +import bjc.rgens.parser.RuleCase; + +public class ExpVariableCaseElement extends VariableDefCaseElement { public ExpVariableCaseElement(String name, String def) { - super(name, def, true); + super(name, def); + } + + @Override + public void generate(GenerationState state) { + GenerationState newState = state.newBuf(); + + Rule rl = state.findRule(varDef, true); + + if(rl != null) { + RGrammar destGrammar = rl.belongsTo; + newState.swapGrammar(destGrammar); + /* + * Don't post-process the string, we should only do that + * once. + */ + String res = destGrammar.generate(varDef, state, false); + + newState.contents = new StringBuilder(res); + } else { + String msg = String.format("No rule '%s' defined", varDef); + throw new GrammarException(msg); + } + + state.vars.put(varName, newState.contents.toString()); } } diff --git a/src/main/java/bjc/rgens/parser/elements/InlineRuleCaseElement.java b/src/main/java/bjc/rgens/parser/elements/InlineRuleCaseElement.java new file mode 100644 index 0000000..917dd33 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/InlineRuleCaseElement.java @@ -0,0 +1,41 @@ +package bjc.rgens.parser.elements; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.RGrammarParser; + +import bjc.utils.data.IPair; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; +import bjc.utils.funcutils.StringUtils; +import bjc.utils.gen.WeightedRandom; + +public class InlineRuleCaseElement extends CaseElement { + public final WeightedRandom<CaseElement> elements; + + public InlineRuleCaseElement(String... parts) { + super(true); + + this.elements = new WeightedRandom<>(); + + for(String part : parts) { + String[] partArr; + + if(StringUtils.levelContains(part, "|")) { + partArr = StringUtils.levelSplit(part, "||").toArray(new String[0]); + } else { + partArr = new String[] {part}; + } + + IPair<IList<CaseElement>, Integer> par = RGrammarParser.parseElementString(partArr); + int prob = par.getRight(); + + for(CaseElement elm : par.getLeft()) { + elements.addProbability(prob, elm); + } + } + } + + public void generate(GenerationState state) { + elements.generateValue(state.rnd).generate(state); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java index 11035b1..3c6ba98 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java @@ -1,7 +1,13 @@ package bjc.rgens.parser.elements; -public class LitVariableCaseElement extends VariableCaseElement { +import bjc.rgens.parser.GenerationState; + +public class LitVariableCaseElement extends VariableDefCaseElement { public LitVariableCaseElement(String name, String def) { - super(name, def, false); + super(name, def); + } + + public void generate(GenerationState state) { + state.vars.put(varName, varDef); } } diff --git a/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java b/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java index d96a32d..6e0f8fd 100644 --- a/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java @@ -1,7 +1,17 @@ package bjc.rgens.parser.elements; -public class LiteralCaseElement extends StringCaseElement { - public LiteralCaseElement(String vl) { - super(vl, true); +import bjc.rgens.parser.GenerationState; + +public class LiteralCaseElement extends CaseElement { + public String val; + + public LiteralCaseElement(String val) { + super(true); + + this.val = val; + } + + public void generate(GenerationState state) { + state.contents.append(val); } } diff --git a/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java b/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java index d98bc61..e877dd1 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java @@ -1,16 +1,25 @@ package bjc.rgens.parser.elements; +import bjc.rgens.parser.GenerationState; + public class RangeCaseElement extends CaseElement { public final int begin; public final int end; public RangeCaseElement(int beg, int en) { - super(ElementType.RANGE); + super(true); begin = beg; end = en; } + public void generate(GenerationState state) { + int val = state.rnd.nextInt(end - begin); + val += begin; + + state.contents.append(val); + } + @Override public int hashCode() { final int prime = 31; diff --git a/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java b/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java index f4d3512..f13dbdb 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java @@ -1,7 +1,103 @@ package bjc.rgens.parser.elements; -public class RuleCaseElement extends StringCaseElement { +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; + +import bjc.rgens.parser.*; +import bjc.rgens.parser.elements.vars.*; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class RuleCaseElement extends CaseElement { + public List<VariableElement> elements; + public RuleCaseElement(String vl) { - super(vl, false); + super(true); + + this.elements = VariableElement.parseElementString(vl); + } + + public RuleCaseElement(String vl, List<VariableElement> elements) { + super(true); + + this.elements = elements; + } + + public void generate(GenerationState state) { + GenerationState newState = state.newBuf(); + + boolean inName = false; + + for(VariableElement elm : elements) { + elm.generate(newState); + + if(inName == false) inName = elm.forbidSpaces; + } + + String body = newState.contents.toString(); + + if(inName) { + doGenerate(String.format("[%s]", body), state); + } else { + state.contents.append(body); + } + } + + protected void doGenerate(String actName, GenerationState state) { + GenerationState newState = state.newBuf(); + + Rule rl; + + if (actName.startsWith("[^")) { + actName = "[" + actName.substring(2); + + rl = state.findImport(actName); + } else { + rl = state.findRule(actName, true); + } + + if(rl != null) { + RGrammar destGrammar = rl.belongsTo; + newState.swapGrammar(destGrammar); + /* + * Don't postprocess the string, we should only do that + * once. + */ + String res = destGrammar.generate(actName, newState, false); + newState.contents = new StringBuilder(res); + } else { + /* + * @TODO 5/29/18 Ben Culkin :RuleSuggesting + * + * Re-get this working again. + */ + /* + if (ruleSearcher != null) { + Set<Match<? extends String>> results = ruleSearcher.search(actName, MAX_DISTANCE); + + String[] resArray = results.stream().map(Match::getMatch).toArray((i) -> new String[i]); + + String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", actName, + StringUtils.toEnglishList(resArray, false)); + + throw new GrammarException(msg); + } + */ + + String msg = String.format("No rule '%s' defined", actName); + throw new GrammarException(msg); + } + + String res = newState.contents.toString(); + + if (actName.contains("+")) { + /* Rule names with pluses in them get space-flattened */ + state.contents.append(res.replaceAll("\\s+", "")); + } else { + state.contents.append(res); + } } } diff --git a/src/main/java/bjc/rgens/parser/elements/RuleVariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/RuleVariableCaseElement.java new file mode 100644 index 0000000..29b6fc9 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/RuleVariableCaseElement.java @@ -0,0 +1,39 @@ +package bjc.rgens.parser.elements; + +import bjc.utils.data.IPair; +import bjc.utils.data.Pair; + +import bjc.rgens.parser.GrammarException; +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.Rule; +import bjc.rgens.parser.RGrammar; + +public class RuleVariableCaseElement extends VariableDefCaseElement { + public final boolean exhaust; + + public RuleVariableCaseElement(String varName, String varDef, boolean exhaust) { + super(varName, varDef); + + this.exhaust = exhaust; + } + + public void generate(GenerationState state) { + Rule rl = state.findRule(varDef, true); + + if(rl == null) { + throw new GrammarException("Can't create variable referencing non-existent rule " + varDef); + } + + if(exhaust) { + rl = rl.exhaust(); + } + + state.rlVars.put(varName, rl); + + if(exhaust) { + System.err.printf("\t\tFINE: Defined exhausted rulevar '%s' ('%s')\n", varName, varDef); + } else { + System.err.printf("\t\tFINE: Defined rulevar '%s' ('%s')\n", varName, varDef); + } + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/SerialCaseElement.java b/src/main/java/bjc/rgens/parser/elements/SerialCaseElement.java new file mode 100644 index 0000000..348cfbb --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/SerialCaseElement.java @@ -0,0 +1,30 @@ +package bjc.rgens.parser.elements; + +import bjc.rgens.parser.GenerationState; + +public class SerialCaseElement extends CaseElement { + public final CaseElement rep; + + public final int lower; + public final int upper; + + public SerialCaseElement(CaseElement rep, int lower, int upper) { + super(rep.spacing); + + this.rep = rep; + + this.lower = lower; + this.upper = upper; + } + + public void generate(GenerationState state) { + int num = state.rnd.nextInt(upper - lower) + lower; + + for(int i = 0; i < num; i++) { + rep.generate(state); + + if(rep.spacing) + state.contents.append(" "); + } + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java b/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java index 0e64fd3..00441c0 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java @@ -1,10 +1,10 @@ package bjc.rgens.parser.elements; -public class StringCaseElement extends CaseElement { +public abstract class StringCaseElement extends CaseElement { public final String val; - protected StringCaseElement(String vl, boolean isLiteral) { - super(isLiteral ? ElementType.LITERAL : ElementType.RULEREF); + protected StringCaseElement(String vl) { + super(true); val = vl; } diff --git a/src/main/java/bjc/rgens/parser/elements/VariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/VariableDefCaseElement.java index 920445a..37a12b6 100644..100755 --- a/src/main/java/bjc/rgens/parser/elements/VariableCaseElement.java +++ b/src/main/java/bjc/rgens/parser/elements/VariableDefCaseElement.java @@ -1,6 +1,8 @@ package bjc.rgens.parser.elements; -public class VariableCaseElement extends CaseElement { +import bjc.rgens.parser.GrammarException; + +public abstract class VariableDefCaseElement extends CaseElement { /** * The name of the variable this element defines. */ @@ -11,8 +13,8 @@ public class VariableCaseElement extends CaseElement { */ public final String varDef; - public VariableCaseElement(String name, String def, boolean isExp) { - super(isExp ? ElementType.EXPVARDEF : ElementType.VARDEF); + public VariableDefCaseElement(String name, String def) { + super(false); varName = name; varDef = def; @@ -35,7 +37,7 @@ public class VariableCaseElement extends CaseElement { return false; if (getClass() != obj.getClass()) return false; - VariableCaseElement other = (VariableCaseElement) obj; + VariableDefCaseElement other = (VariableDefCaseElement) obj; if (varDef == null) { if (other.varDef != null) return false; @@ -49,12 +51,16 @@ public class VariableCaseElement extends CaseElement { return true; } - @Override - public String toString() { - if (type == ElementType.VARDEF) { - return String.format("{%s:=%s}", varName, varDef); + public static CaseElement parseVariable(String varName, String varDef, char op, boolean colon) { + if(varName.startsWith("$")) { + // Handle normal/expanding variable definitions + if(colon) return new ExpVariableCaseElement(varName.substring(1), varDef); + + return new LitVariableCaseElement(varName.substring(1), varDef); + } else if(varName.startsWith("@")) { + return new RuleVariableCaseElement(varName.substring(1), varDef, colon); } else { - return String.format("{%s=%s}", varName, varDef); + throw new GrammarException("Unrecognized declaration sigil " + varName.charAt(0)); } } } diff --git a/src/main/java/bjc/rgens/parser/elements/vars/ARefVariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/ARefVariableElement.java new file mode 100644 index 0000000..a4bb730 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/ARefVariableElement.java @@ -0,0 +1,31 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; +import bjc.rgens.parser.Rule; + +public class ARefVariableElement extends VariableElement { + public String value; + + public ARefVariableElement(String val) { + super(false); + + value = val; + } + + public void generate(GenerationState state) { + if(!state.rlVars.containsKey(value)) { + throw new GrammarException("No rule variable named " + value); + } + + Rule rl = state.rlVars.get(value); + + GenerationState newState = state.newBuf(); + + rl.generate(newState); + + String res = newState.contents.toString(); + + state.contents.append(res); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/vars/LiteralVariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/LiteralVariableElement.java new file mode 100644 index 0000000..cf33c66 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/LiteralVariableElement.java @@ -0,0 +1,17 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.rgens.parser.GenerationState; + +public class LiteralVariableElement extends VariableElement { + public String val; + + public LiteralVariableElement(boolean forbidSpaces, String val) { + super(forbidSpaces); + + this.val = val; + } + + public void generate(GenerationState state) { + state.contents.append(val); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/vars/RRefVariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/RRefVariableElement.java new file mode 100644 index 0000000..d731d64 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/RRefVariableElement.java @@ -0,0 +1,31 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; +import bjc.rgens.parser.Rule; + +public class RRefVariableElement extends VariableElement { + public String value; + + public RRefVariableElement(boolean forbidSpaces, String val) { + super(forbidSpaces); + + value = val; + } + + public void generate(GenerationState state) { + Rule rl = state.findRule(value, true); + + GenerationState newState = state.newBuf(); + + rl.generate(newState); + + String res = newState.contents.toString(); + + if(forbidSpaces && res.contains(" ")) { + throw new GrammarException("Spaces not allowed in this context (rule-reference %s)"); + } + + state.contents.append(res); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/vars/TRefVariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/TRefVariableElement.java new file mode 100644 index 0000000..a33d78c --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/TRefVariableElement.java @@ -0,0 +1,42 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; +import bjc.rgens.parser.templates.GrammarTemplate; + +/* + * @TODO + * + * finish when template vars are implemented. + */ +public class TRefVariableElement extends VariableElement { + public String value; + + public TRefVariableElement(boolean forbidSpaces, String val) { + super(forbidSpaces); + + value = val; + } + + public void generate(GenerationState state) { + /* + if(!state.rlVars.containsKey(val)) { + throw new GrammarException("No rule variable named " + val); + } + + Rule rl = state.rlVars.get(val); + + GenerationState newState = state.newBuf(); + + rl.generate(newState); + + String res = newState.contents.toString(); + + if(forbidSpaces && res.contains(" ")) { + throw new GrammarException("Spaces not allowed in this context (rule-var %s)"); + } + + return res; + */ + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/vars/VRefVariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/VRefVariableElement.java new file mode 100644 index 0000000..b19f785 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/VRefVariableElement.java @@ -0,0 +1,27 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; + +public class VRefVariableElement extends VariableElement { + public final String nam; + + public VRefVariableElement(boolean forbidSpaces, String nam) { + super(forbidSpaces); + + this.nam = nam; + } + + public void generate(GenerationState state) { + if (!state.vars.containsKey(nam)) { + throw new GrammarException(String.format("No variable '%s' defined", nam)); + } + + String strang = state.vars.get(nam); + if(forbidSpaces && strang.contains(" ")) { + throw new GrammarException(String.format("Cannot include variable %s w/ spaces in body in rule name", nam)); + } + + state.contents.append(strang); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/vars/VariableElement.java b/src/main/java/bjc/rgens/parser/elements/vars/VariableElement.java new file mode 100644 index 0000000..19c1e2c --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/vars/VariableElement.java @@ -0,0 +1,71 @@ +package bjc.rgens.parser.elements.vars; + +import bjc.utils.funcutils.StringUtils; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.GrammarException; + +import java.util.ArrayList; +import java.util.List; + +public abstract class VariableElement { + public boolean forbidSpaces; + + protected VariableElement(boolean forbidSpacing) { + forbidSpaces = forbidSpacing; + } + + public abstract void generate(GenerationState state); + + public static List<VariableElement> parseElementString(String varElm) { + boolean forbidSpaces = StringUtils.levelContains(varElm, "-", "+"); + + String[] parts; + + if(forbidSpaces) { + parts = StringUtils.levelSplit(varElm, true, "-", "+").toArray(new String[0]); + } else { + parts = new String[] { varElm }; + } + + return parseElementString(forbidSpaces, parts); + } + + public static List<VariableElement> parseElementString(boolean forbidSpaces, String... parts) { + List<VariableElement> elms = new ArrayList<>(parts.length); + + VariableElement prevElement = null; + + for (String part : parts) { + VariableElement elm = null; + + if(part.startsWith("$")) { + elm = new VRefVariableElement(forbidSpaces, part.substring(1)); + } else if (part.startsWith("@")) { + if(forbidSpaces) + throw new GrammarException("Arrays references aren't allowed in rule names"); + + elm = new ARefVariableElement(part.substring(1)); + } else if (part.startsWith("%")) { + elm = new RRefVariableElement(forbidSpaces, part.substring(1)); + } else if (part.startsWith("/")) { + throw new GrammarException("Template variables aren't implemented yet"); + } else { + if(prevElement != null && prevElement instanceof LiteralVariableElement) { + /* Aggregate chain literals together */ + ((LiteralVariableElement)prevElement).val += part; + } else { + elm = new LiteralVariableElement(forbidSpaces, part); + } + } + + if(elm != null) { + elms.add(elm); + + prevElement = elm; + } + } + + return elms; + } +} diff --git a/src/main/java/bjc/rgens/parser/new-syntax.txt b/src/main/java/bjc/rgens/parser/new-syntax.txt index f6578b4..f6578b4 100644..100755 --- a/src/main/java/bjc/rgens/parser/new-syntax.txt +++ b/src/main/java/bjc/rgens/parser/new-syntax.txt diff --git a/src/main/java/bjc/rgens/parser/templates/GrammarTemplate.java b/src/main/java/bjc/rgens/parser/templates/GrammarTemplate.java new file mode 100644 index 0000000..fa634a5 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/templates/GrammarTemplate.java @@ -0,0 +1,75 @@ +package bjc.rgens.parser.templates; + +import bjc.rgens.parser.ConfigSet; +import bjc.rgens.parser.GenerationState; + +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; + +public class GrammarTemplate { + public ConfigSet belongsTo; + + public String name; + + public final List<TemplateElement> elements; + + public boolean doSpacing = true; + + public GrammarTemplate(List<TemplateElement> elements) { + this.elements = elements; + } + + public void generate(GenerationState state) { + for(TemplateElement element : elements) { + element.generate(state); + + if(doSpacing && element.spacing) + state.contents.append("\n"); + } + } + + public static GrammarTemplate readTemplate(Reader rdr) { + List<TemplateElement> elements = new ArrayList<>(); + GrammarTemplate template = new GrammarTemplate(elements); + + Scanner scn = new Scanner(rdr); + scn.useDelimiter("\\R"); + + int lno = 0; + while(scn.hasNextLine()) { + String ln = scn.nextLine(); + lno += 1; + + switch(ln.charAt(0)) { + case '#': + // Ignore comments + break; + case '/': + handlePragma(elements, template, ln.substring(1)); + break; + default: + handleLine(elements, template, ln); + } + } + + + return template; + } + + private static void handleLine(List<TemplateElement> elements, GrammarTemplate template, String ln) { + if(ln.matches("^.*?\\$@.+?@\\$.*$")) { + /* + * Handle live templates + */ + elements.add(new LiveTemplateElement(ln)); + } else { + elements.add(new LiteralTemplateElement(ln)); + } + } + + private static void handlePragma(List<TemplateElement> elements, GrammarTemplate template, String ln) { + + } +} diff --git a/src/main/java/bjc/rgens/parser/templates/LiteralTemplateElement.java b/src/main/java/bjc/rgens/parser/templates/LiteralTemplateElement.java new file mode 100644 index 0000000..36cdb12 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/templates/LiteralTemplateElement.java @@ -0,0 +1,17 @@ +package bjc.rgens.parser.templates; + +import bjc.rgens.parser.GenerationState; + +public class LiteralTemplateElement extends TemplateElement { + public final String val; + + public LiteralTemplateElement(String val) { + super(true); + + this.val = val; + } + + public void generate(GenerationState state) { + state.contents.append(val); + } +} diff --git a/src/main/java/bjc/rgens/parser/templates/LiveTemplateElement.java b/src/main/java/bjc/rgens/parser/templates/LiveTemplateElement.java new file mode 100644 index 0000000..154ea68 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/templates/LiveTemplateElement.java @@ -0,0 +1,60 @@ +package bjc.rgens.parser.templates; + +import bjc.utils.data.BooleanToggle; +import bjc.utils.funcdata.FunctionalList; + +import bjc.rgens.parser.GenerationState; +import bjc.rgens.parser.RGrammarParser; +import bjc.rgens.parser.elements.CaseElement; +import bjc.rgens.parser.elements.LiteralCaseElement; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class LiveTemplateElement extends TemplateElement { + private static final Pattern INSERT_PAT = Pattern.compile("\\$@(.+?)@\\$"); + + public final List<List<CaseElement>> elements; + + public LiveTemplateElement(String val) { + super(true); + + elements = new ArrayList<>(); + + Matcher mat = INSERT_PAT.matcher(val); + StringBuffer sb = new StringBuffer(); + + while(mat.find()) { + mat.appendReplacement(sb, ""); + String body = mat.group(1); + + FunctionalList<CaseElement> elms = (FunctionalList<CaseElement>)RGrammarParser.parseElementString(body).getLeft(); + + elements.add(Arrays.asList(new LiteralCaseElement(sb.toString()))); + elements.add(elms.getInternal()); + + sb = new StringBuffer(); + } + + mat.appendTail(sb); + elements.add(Arrays.asList(new LiteralCaseElement(sb.toString()))); + } + + public void generate(GenerationState state) { + BooleanToggle bt = new BooleanToggle(false); + + for(List<CaseElement> elmList : elements) { + boolean doSpacing = bt.get(); + + for(CaseElement elm : elmList) { + elm.generate(state); + + if(doSpacing && elm.spacing) + state.contents.append(" "); + } + } + } +} diff --git a/src/main/java/bjc/rgens/parser/templates/TemplateElement.java b/src/main/java/bjc/rgens/parser/templates/TemplateElement.java new file mode 100644 index 0000000..2d0724b --- /dev/null +++ b/src/main/java/bjc/rgens/parser/templates/TemplateElement.java @@ -0,0 +1,15 @@ +package bjc.rgens.parser.templates; + +import bjc.rgens.parser.GenerationState; + +public abstract class TemplateElement { + public boolean spacing; + + public GrammarTemplate belongsTo; + + protected TemplateElement(boolean spacing) { + this.spacing = spacing; + } + + public abstract void generate(GenerationState state); +} diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java index e21d60f..e21d60f 100644..100755 --- a/src/main/java/bjc/rgens/text/markov/Markov.java +++ b/src/main/java/bjc/rgens/text/markov/Markov.java diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java index cebf2bc..cebf2bc 100644..100755 --- a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java +++ b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java index 339e8d5..339e8d5 100644..100755 --- a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java +++ b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java diff --git a/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/src/main/java/bjc/rgens/text/markov/TextGenerator.java index f629d49..f629d49 100644..100755 --- a/src/main/java/bjc/rgens/text/markov/TextGenerator.java +++ b/src/main/java/bjc/rgens/text/markov/TextGenerator.java |
