diff options
| author | student <student@69.161.224.76> | 2018-03-29 11:38:02 -0400 |
|---|---|---|
| committer | student <student@69.161.224.76> | 2018-03-29 11:38:02 -0400 |
| commit | 6aa15e30fa75211964428e386b4b6b0f2c66dbc5 (patch) | |
| tree | 5beeb6016a94b284eeed80daf65b9c2800ec7e63 /RGens/src/main/java/bjc/rgens/parser | |
| parent | c921b00c99cf46bc33f724581ab9bde2b0d8bb6a (diff) | |
Rename package
Diffstat (limited to 'RGens/src/main/java/bjc/rgens/parser')
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/CaseElement.java | 423 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/GrammarException.java | 36 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammar.java | 495 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammarBuilder.java | 245 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammarFormatter.java | 96 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammarParser.java | 390 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammarSet.java | 290 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammarTest.java | 72 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RGrammars.java | 55 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RegexRuleCase.java | 31 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/Rule.java | 134 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/RuleCase.java | 87 | ||||
| -rw-r--r-- | RGens/src/main/java/bjc/rgens/parser/new-syntax.txt | 13 |
13 files changed, 2367 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/rgens/parser/CaseElement.java b/RGens/src/main/java/bjc/rgens/parser/CaseElement.java new file mode 100644 index 0000000..e9e3a0a --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/CaseElement.java @@ -0,0 +1,423 @@ +package bjc.rgens.parser; + +import static bjc.rgens.parser.CaseElement.ElementType.*; + +/* + * @TODO 10/11/17 Ben Culkin :CaseElementSplit + * Split this into multiple subclasses based off of a value of ElementType. + */ +/** + * A element in a rule case. + * + * @author EVE + */ +public class CaseElement { + /** + * The possible types of an element. + * + * @author EVE + */ + public static enum ElementType { + /** An element that represents a literal string. */ + LITERAL, + /** An element that represents a rule reference. */ + RULEREF, + /** An element that represents a random range. */ + RANGE, + /** An element that represents a variable that stores a string. */ + VARDEF, + /** + * An element that represents a variable that stores the result + * of generating a rule. + */ + EXPVARDEF; + } + + /* Regexps for marking rule types. */ + private static final String SPECIAL_CASELEM = "\\{[^}]+\\}"; + private static final String REFER_CASELEM = "\\[[^\\]]+\\]"; + private static final String RANGE_CASELM = "\\[\\d+\\.\\.\\d+\\]"; + + /** The type of this element. */ + public final ElementType type; + + /** + * The literal string value of this element. + * + * This means that it is a string whose value should always mean the + * same thing. + * + * <h2>Used For</h2> + * <dl> + * <dt>LITERAL</dt> + * <dd>The string this element represents</dd> + * <dt>RULEREF</dt> + * <dd>The name of the rule this element references</dd> + * </dl> + */ + private String literalVal; + + /** + * The starting integer value of this element. + * + * <h2>Used For</h2> + * <dl> + * <dt>RANGE</dt> + * <dd>The inclusive start of the range</dd> + * </dl> + */ + private int start; + + /** + * The starting integer value of this element. + * + * <h2>Used For</h2> + * <dl> + * <dt>RANGE</dt> + * <dd>The inclusive end of the range</dd> + * </dl> + */ + private int end; + + /** + * The name of the variable this element defines. + * + * <h2>Used For</h2> + * <dl> + * <dt>VARDEF</dt> + * <dd>The name of the variable</dd> + * <dt>EXPVARDEF</dt> + * <dd>The name of the variable</dd> + * </dl> + */ + private String varName; + + /** + * The definition of the variable this element defines. + * + * <h2>Used For</h2> + * <dl> + * <dt>VARDEF</dt> + * <dd>The value of the variable</dd> + * <dt>EXPVARDEF</dt> + * <dd>The rule to expand for the value of this variable</dd> + * </dl> + */ + private String varDef; + + /** + * Create a new case element. + * + * @param typ + * The type of this element. + * + * @throws IllegalArgumentException + * If the specified type needs parameters. + */ + public CaseElement(ElementType typ) { + switch (typ) { + case LITERAL: + case RULEREF: + throw new IllegalArgumentException("This type requires a string parameter"); + case RANGE: + throw new IllegalArgumentException("This type requires two integer parameters"); + case VARDEF: + case EXPVARDEF: + throw new IllegalArgumentException("This type requires two string parameters"); + default: + break; + } + + type = typ; + } + + /** + * Create a new case element that has a single string value. + * + * @param typ + * The type of this element. + * + * @param val + * The string value of this element. + * + * @throws IllegalArgumentException + * If the specified type doesn't take a single string parameter. + */ + public CaseElement(ElementType typ, String val) { + switch (typ) { + case LITERAL: + case RULEREF: + break; + case RANGE: + throw new IllegalArgumentException("This type requires two integer parameters"); + case VARDEF: + case EXPVARDEF: + throw new IllegalArgumentException("This type requires two string parameters"); + default: + throw new IllegalArgumentException("This type doesn't have a string parameter"); + } + + type = typ; + + literalVal = val; + } + + /** + * Create a new case element that has two integer values. + * + * @param typ + * The type of this element. + * + * @param first + * The first integer value for this element. + * + * @param second + * The second integer value for this element. + * + * @throws IllegalArgumentException + * If the specified type doesn't take two integer parameters. + */ + public CaseElement(ElementType typ, int first, int second) { + switch (typ) { + case LITERAL: + case RULEREF: + throw new IllegalArgumentException("This type requires a string parameter"); + case RANGE: + break; + case VARDEF: + case EXPVARDEF: + throw new IllegalArgumentException("This type requires two string parameters"); + default: + throw new IllegalArgumentException("This type doesn't have two integer parameters"); + } + + type = typ; + + this.start = first; + this.end = second; + } + + /** + * Create a new case element that has two string values. + * + * @param typ + * The type of this element. + * + * @param name + * The first string value for this element. + * + * @param def + * The second string value for this element. + * + * @throws IllegalArgumentException + * If the specified type doesn't take two string parameters. + */ + public CaseElement(ElementType typ, String name, String def) { + switch (typ) { + case LITERAL: + case RULEREF: + throw new IllegalArgumentException("This type requires a string parameter"); + case RANGE: + throw new IllegalArgumentException("This type requires two integer parameters"); + case VARDEF: + case EXPVARDEF: + break; + default: + throw new IllegalArgumentException("This type doesn't have two string parameters"); + } + + type = typ; + + this.varName = name; + this.varDef = def; + } + + /** + * Get the literal string value for this element. + * + * @return + * The literal string value for this element. + * + * @throws IllegalStateException + * If this type doesn't have a literal string value. + */ + public String getLiteral() { + switch (type) { + case LITERAL: + case RULEREF: + break; + default: + throw new IllegalStateException( + String.format("Type '%s' doesn't have a literal string value")); + } + + return literalVal; + } + + /** + * Get the starting integer value for this element. + * + * @return + * The starting integer value for this element. + * + * @throws IllegalStateException + * If this type doesn't have a starting integer value. + */ + public int getStart() { + switch (type) { + case RANGE: + break; + default: + throw new IllegalStateException( + String.format("Type '%s' doesn't have a starting integer value", type)); + } + + return start; + } + + /** + * Get the ending integer value for this element. + * + * @return + * The ending integer value for this element. + * + * @throws IllegalStateException + * If this type doesn't have a ending integer value. + */ + public int getEnd() { + switch (type) { + case RANGE: + break; + default: + throw new IllegalStateException( + String.format("Type '%s' doesn't have a ending integer value", type)); + } + + return end; + } + + /** + * Get the variable name for this element. + * + * @return + * The variable name of this element. + * + * @throws IllegalStateException + * If the type doesn't have a variable name. + */ + public String getName() { + switch (type) { + case VARDEF: + case EXPVARDEF: + break; + default: + throw new IllegalStateException(String.format("Type '%s' doesn't have a name", type)); + } + + return varName; + } + + /** + * Get the variable definition for this element. + * + * @return + * The variable definition of this element. + * + * @throws IllegalStateException + * If the type doesn't have a variable definition. + */ + public String getDefn() { + switch (type) { + case VARDEF: + case EXPVARDEF: + break; + default: + throw new IllegalStateException(String.format("Type '%s' doesn't have a name", type)); + } + + return varDef; + } + + @Override + public String toString() { + switch (type) { + case LITERAL: + case RULEREF: + return literalVal; + case RANGE: + return String.format("[%d..%d]", start, end); + case VARDEF: + return String.format("{%s:=%s}", varName, varDef); + case EXPVARDEF: + return String.format("{%s=%s}", varName, varDef); + default: + return String.format("Unknown type '%s'", type); + } + } + + /** + * Create a case element from a string. + * + * @param csepart + * The string to convert. + * + * @return + * A case element representing the string. + */ + public static CaseElement createElement(String csepart) { + if (csepart == null) { + throw new NullPointerException("Case part cannot be null"); + } + + if (csepart.matches(SPECIAL_CASELEM)) { + /* Handle special cases. */ + String specialBody = csepart.substring(1, csepart.length() - 1); + + System.out.printf("\t\tTRACE: special body is '%s'\n", specialBody); + + if (specialBody.matches("\\S+:=\\S+")) { + /* Handle expanding variable definitions. */ + String[] parts = specialBody.split(":="); + + if (parts.length != 2) { + String msg = "Expanded variables must be a name and a definition, seperated by :="; + + throw new GrammarException(msg); + } + + return new CaseElement(EXPVARDEF, parts[0], parts[1]); + } else if (specialBody.matches("\\S+=\\S+")) { + /* Handle regular variable definitions. */ + String[] parts = specialBody.split("="); + + if (parts.length != 2) { + String msg = "Variables must be a name and a definition, seperated by ="; + + throw new GrammarException(msg); + } + + return new CaseElement(VARDEF, parts[0], parts[1]); + } else if (specialBody.matches("{empty}")) { + /* Literal blank, for empty cases. */ + return new CaseElement(LITERAL, ""); + } else { + throw new IllegalArgumentException( + String.format("Unknown special case part '%s'", specialBody)); + } + } else if (csepart.matches(REFER_CASELEM)) { + if (csepart.matches(RANGE_CASELM)) { + /* Handle ranges */ + String rawRange = csepart.substring(1, csepart.length() - 1); + + int firstNum = Integer.parseInt(rawRange.substring(0, rawRange.indexOf('.'))); + int secondNum = Integer.parseInt(rawRange.substring(rawRange.lastIndexOf('.') + 1)); + + return new CaseElement(RANGE, firstNum, secondNum); + } + + return new CaseElement(RULEREF, csepart); + } else { + return new CaseElement(LITERAL, csepart); + } + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/GrammarException.java b/RGens/src/main/java/bjc/rgens/parser/GrammarException.java new file mode 100644 index 0000000..9eaa0a1 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/GrammarException.java @@ -0,0 +1,36 @@ +package bjc.rgens.parser; + +/** + * The exception thrown when something goes wrong while parsing a + * grammar. + * + * @author student + */ +public class GrammarException extends RuntimeException { + /* Serialization ID. */ + private static final long serialVersionUID = -7287427479316953668L; + + /** + * Create a new grammar exception with the specified message. + * + * @param msg + * The message for this exception. + */ + public GrammarException(String msg) { + super(msg); + } + + /** + * Create a new grammar exception with the specified message and + * cause. + * + * @param msg + * The message for this exception. + * + * @param cause + * The cause of this exception. + */ + public GrammarException(String msg, Exception cause) { + super(msg, cause); + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammar.java b/RGens/src/main/java/bjc/rgens/parser/RGrammar.java new file mode 100644 index 0000000..17ca4fe --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammar.java @@ -0,0 +1,495 @@ +package bjc.rgens.parser; + +import bjc.utils.funcutils.StringUtils; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.text.similarity.LevenshteinDistance; + +import edu.gatech.gtri.bktree.BkTreeSearcher; +import edu.gatech.gtri.bktree.BkTreeSearcher.Match; +import edu.gatech.gtri.bktree.Metric; +import edu.gatech.gtri.bktree.MutableBkTree; + +/** + * Represents a randomized grammar. + * + * @author EVE + */ +public class RGrammar { + /* The max distance between possible alternate rules. */ + private static final int MAX_DISTANCE = 6; + + /* The metric for the levenshtein distance. */ + private static final class LevenshteinMetric implements Metric<String> { + private static LevenshteinDistance DIST; + + static { + DIST = LevenshteinDistance.getDefaultInstance(); + } + + public LevenshteinMetric() { + } + + @Override + public int distance(String x, String y) { + return DIST.apply(x, y); + } + } + + /* The current state during generation. */ + private static class GenerationState { + /* The current string. */ + public StringBuilder contents; + /* The RNG. */ + public Random rnd; + + /* The current set of variables. */ + public Map<String, String> vars; + + /** + * Create a new generation state. + * + * @param cont + * The string being generated. + * + * @param rand + * The RNG to use. + * + * @param vs + * The variables to use. + */ + public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs) { + contents = cont; + rnd = rand; + vars = vs; + } + } + + /* The pattern for matching the name of a variable. */ + private static Pattern NAMEVAR_PATTERN = Pattern.compile("\\$(\\w+)"); + + /* The rules of the grammar. */ + private Map<String, Rule> rules; + /* The rules imported from other grammars. */ + private Map<String, RGrammar> importRules; + /* The rules exported from this grammar. */ + private Set<String> exportRules; + /* The initial rule of this grammar. */ + private String initialRule; + + /* The tree to use for finding rule suggestions. */ + private BkTreeSearcher<String> ruleSearcher; + + /** + * Create a new randomized grammar using the specified set of rules. + * + * @param ruls + * The rules to use. + */ + public RGrammar(Map<String, Rule> ruls) { + rules = ruls; + } + + /** + * Sets the imported rules to use. + * + * Imported rules are checked for rule definitions after local + * definitions are checked. + * + * @param importedRules + * The set of imported rules to use. + */ + public void setImportedRules(Map<String, RGrammar> importedRules) { + importRules = importedRules; + } + + /** + * Generates the data structure backing rule suggestions for unknown + * rules. + */ + public void generateSuggestions() { + MutableBkTree<String> ruleSuggester = new MutableBkTree<>(new LevenshteinMetric()); + + ruleSuggester.addAll(rules.keySet()); + ruleSuggester.addAll(importRules.keySet()); + + ruleSearcher = new BkTreeSearcher<>(ruleSuggester); + } + + /** + * Generate a string from this grammar, starting from the specified + * rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @return + * A possible string from the grammar. + */ + public String generate(String startRule) { + return generate(startRule, new Random(), new HashMap<>()); + } + + /** + * Generate a string from this grammar, starting from the specified + * rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @param rnd + * The random number generator to use. + * + * @param vars + * The set of variables to use. + * + * @return + * A possible string from the grammar. + */ + public String generate(String startRule, Random rnd, Map<String, String> vars) { + String fromRule = startRule; + + if (startRule == null) { + if (initialRule == null) { + throw new GrammarException( + "Must specify a start rule for grammars with no initial rule"); + } + + fromRule = initialRule; + } else { + if (startRule.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } + } + + RuleCase start = rules.get(fromRule).getCase(rnd); + + StringBuilder contents = new StringBuilder(); + + generateCase(start, new GenerationState(contents, rnd, vars)); + + String body = contents.toString(); + /* + * Collapse duplicate spaces. + */ + body = body.replaceAll("\\s+", " "); + + /* + * Remove extraneous spaces around punctutation marks. + * + * This can be done in the grammars, but it is very tedious to + * do so. + */ + + /* Handle 's */ + body = body.replaceAll(" 's ", "'s "); + + /* Handle opening/closing punctuation. */ + body = body.replaceAll("([(\\[]) ", " $1"); + body = body.replaceAll(" ([)\\]'\"])", "$1 "); + + /* Remove spaces around series of opening/closing punctuation. */ + body = body.replaceAll("([(\\[])\\s+([(\\[])", "$1$2"); + body = body.replaceAll("([)\\]])\\s+([)\\]])", "$1$2"); + + /* Handle inter-word punctuation. */ + body = body.replaceAll(" ([,:.!])", "$1 "); + + /* Handle intra-word punctuation. */ + body = body.replaceAll("\\s?([-/])\\s?", "$1"); + + /* + * Collapse duplicate spaces. + */ + body = body.replaceAll("\\s+", " "); + + /* @TODO 11/01/17 Ben Culkin :RegexRule + * Replace this once it is no longer needed. + */ + body = body.replaceAll("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1"); + + return body; + } + + /* Generate a rule case. */ + private void generateCase(RuleCase start, GenerationState state) { + try { + switch (start.type) { + case NORMAL: + for (CaseElement elm : start.getElements()) { + generateElement(elm, state); + + if(elm.type != CaseElement.ElementType.VARDEF) { + state.contents.append(" "); + } + } + break; + case SPACEFLATTEN: + for (CaseElement elm : start.getElements()) { + generateElement(elm, state); + } + break; + default: + String msg = String.format("Unknown case type '%s'", start.type); + throw new GrammarException(msg); + } + } catch (GrammarException gex) { + String msg = String.format("Error in generating case (%s)", start); + throw new GrammarException(msg, gex); + } + } + + /* Generate a case element. */ + private void generateElement(CaseElement elm, GenerationState state) { + try { + switch (elm.type) { + case LITERAL: + state.contents.append(elm.getLiteral()); + break; + case RULEREF: + generateRuleReference(elm, state); + break; + case RANGE: + int start = elm.getStart(); + int end = elm.getEnd(); + + int val = state.rnd.nextInt(end - start); + val += start; + + state.contents.append(val); + break; + case VARDEF: + generateVarDef(elm.getName(), elm.getDefn(), state); + break; + case EXPVARDEF: + generateExpVarDef(elm.getName(), elm.getDefn(), state); + break; + default: + String msg = String.format("Unknown element type '%s'", elm.type); + throw new GrammarException(msg); + } + } catch (GrammarException gex) { + String msg = String.format("Error in generating case element (%s)", elm); + throw new GrammarException(msg, gex); + } + } + + /* Generate a expanding variable definition. */ + private void generateExpVarDef(String name, String defn, GenerationState state) { + GenerationState newState = new GenerationState( + new StringBuilder(), state.rnd, state.vars); + + if (rules.containsKey(defn)) { + RuleCase destCase = rules.get(defn).getCase(); + + generateCase(destCase, newState); + } else if (importRules.containsKey(defn)) { + RGrammar destGrammar = importRules.get(defn); + String res = destGrammar.generate(defn, state.rnd, state.vars); + + newState.contents.append(res); + } else { + String msg = String.format("No rule '%s' defined", defn); + throw new GrammarException(msg); + } + + state.vars.put(name, newState.contents.toString()); + } + + /* Generate a variable definition. */ + private static void generateVarDef(String name, String defn, GenerationState state) { + state.vars.put(name, defn); + } + + /* Generate a rule reference. */ + private void generateRuleReference(CaseElement elm, GenerationState state) { + String refersTo = elm.getLiteral(); + + GenerationState newState = new GenerationState( + new StringBuilder(), state.rnd, state.vars); + + if (refersTo.contains("$")) { + /* Parse variables */ + String refBody = refersTo.substring(1, refersTo.length() - 1); + + if (refBody.contains("-")) { + /* Handle dependent rule names. */ + StringBuffer nameBuffer = new StringBuffer(); + + Matcher nameMatcher = NAMEVAR_PATTERN.matcher(refBody); + + while (nameMatcher.find()) { + String var = nameMatcher.group(1); + + if (!state.vars.containsKey(var)) { + String msg = String.format("No variable '%s' defined", var); + throw new GrammarException(msg); + } + + String name = state.vars.get(var); + + if (name.contains(" ")) { + throw new GrammarException( + "Variables substituted into names cannot contain spaces"); + } else if (name.equals("")) { + throw new GrammarException( + "Variables substituted into names cannot be empty"); + } + + nameMatcher.appendReplacement(nameBuffer, name); + } + + nameMatcher.appendTail(nameBuffer); + + refersTo = "[" + nameBuffer.toString() + "]"; + } else { + /* Handle string references. */ + if (refBody.equals("$")) { + throw new GrammarException("Cannot refer to unnamed variables"); + } + + String key = refBody.substring(1); + + if (!state.vars.containsKey(key)) { + String msg = String.format("No variable '%s' defined", key); + throw new GrammarException(msg); + } + + state.contents.append(state.vars.get(key)); + + return; + } + } + + if(refersTo.startsWith("[^")) { + refersTo = "[" + refersTo.substring(2); + + RGrammar dst = importRules.get(refersTo); + + newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); + } else if (rules.containsKey(refersTo)) { + RuleCase cse = rules.get(refersTo).getCase(state.rnd); + + generateCase(cse, newState); + } else if (importRules.containsKey(refersTo)) { + RGrammar dst = importRules.get(refersTo); + + newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); + } else { + if (ruleSearcher != null) { + Set<Match<? extends String>> results = ruleSearcher.search(refersTo, MAX_DISTANCE); + + String[] resArray = results.stream() + .map(Match::getMatch).toArray((i) -> new String[i]); + + String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", + refersTo, StringUtils.toEnglishList(resArray, false)); + + throw new GrammarException(msg); + } + + String msg = String.format("No rule '%s' defined", refersTo); + throw new GrammarException(msg); + } + + if (refersTo.contains("+")) { + /* Rule names with pluses in them get space-flattened */ + state.contents.append(newState.contents.toString().replaceAll("\\s+", "")); + } else { + state.contents.append(newState.contents.toString()); + } + } + + /** + * Get the initial rule of this grammar. + * + * @return + * The initial rule of this grammar. + */ + public String getInitialRule() { + return initialRule; + } + + /** + * Set the initial rule of this grammar. + * + * @param initRule + * The initial rule of this grammar, or null to say there is no + * initial rule. + */ + public void setInitialRule(String initRule) { + /* Passing null, nulls our initial rule. */ + if (initRule == null) { + this.initialRule = null; + return; + } + + if (initRule.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(initRule)) { + String msg = String.format("No rule '%s' local to this grammar defined.", initRule); + + throw new GrammarException(msg); + } + + initialRule = initRule; + } + + /** + * Gets the rules exported by this grammar. + * + * The initial rule is exported by default if specified. + * + * @return + * The rules exported by this grammar. + */ + public Set<Rule> getExportedRules() { + Set<Rule> res = new HashSet<>(); + + for (String rname : exportRules) { + if (!rules.containsKey(rname)) { + String msg = String.format("No rule '%s' local to this grammar defined", + initialRule); + + throw new GrammarException(msg); + } + + res.add(rules.get(rname)); + } + + if (initialRule != null) { + res.add(rules.get(initialRule)); + } + + return res; + } + + /** + * Set the rules exported by this grammar. + * + * @param exportedRules + * The rules exported by this grammar. + */ + public void setExportedRules(Set<String> exportedRules) { + exportRules = exportedRules; + } + + /** + * Get all the rules in this grammar. + * + * @return + * All the rules in this grammar. + */ + public Map<String, Rule> getRules() { + return rules; + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammarBuilder.java b/RGens/src/main/java/bjc/rgens/parser/RGrammarBuilder.java new file mode 100644 index 0000000..096decf --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammarBuilder.java @@ -0,0 +1,245 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import static bjc.rgens.parser.RuleCase.CaseType.*; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Construct randomized grammars piece by piece. + * + * @author EVE + */ +public class RGrammarBuilder { + /* The rules being built. */ + private Map<String, Rule> rules; + /* The current set of exported rules. */ + private Set<String> exportedRules; + /* The current initial rule. */ + private String initialRule; + + /** Create a new randomized grammar builder. */ + public RGrammarBuilder() { + rules = new HashMap<>(); + + exportedRules = new HashSet<>(); + } + + /** + * Get or create a rule by the given name. + * + * @param rName + * The name of the rule. + * + * @return + * The rule by that name, or a new one if none existed. + */ + public Rule getOrCreateRule(String rName) { + if(rName == null) + throw new NullPointerException("Rule name must not be null"); + else if(rName.equals("")) + throw new IllegalArgumentException("The empty string is not a valid rule name"); + + if(rules.containsKey(rName)) + return rules.get(rName); + else { + Rule ret = new Rule(rName); + + rules.put(rName, ret); + + return ret; + } + } + + /** + * Convert this builder into a grammar. + * + * @return + * The grammar built by this builder + */ + public RGrammar toRGrammar() { + RGrammar grammar = new RGrammar(rules); + + grammar.setInitialRule(initialRule); + + grammar.setExportedRules(exportedRules); + + return grammar; + } + + /** + * Set the initial rule of the grammar. + * + * @param init + * The initial rule of the grammar. + * + * @throws IllegalArgumentException + * If the rule is either not valid or not defined in the grammar. + */ + public void setInitialRule(String init) { + if (init == null) { + throw new NullPointerException("init must not be null"); + } else if (init.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + initialRule = init; + } + + /** + * Add an exported rule to this grammar. + * + * @param export + * The name of the rule to export. + * + * @throws IllegalArgumentException + * If the rule is either not valid or not defined in the grammar. + */ + public void addExport(String export) { + if (export == null) { + throw new NullPointerException("Export name must not be null"); + } else if (export.equals("")) { + throw new NullPointerException("The empty string is not a valid rule name"); + } + + exportedRules.add(export); + } + + /** + * Suffix a given case element to every case of a specific rule. + * + * @param ruleName + * The rule to suffix. + * + * @param suffix + * The suffix to add. + * + * @throws IllegalArgumentException + * If the rule name is either invalid or not defined by this + * grammar, or if the suffix is invalid. + */ + public void suffixWith(String ruleName, String suffix) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if(!rules.containsKey(ruleName)) { + String msg = String.format("Rule '%s' is not a valid rule name."); + + throw new IllegalArgumentException(msg); + } + + CaseElement element = CaseElement.createElement(suffix); + + FunctionalList<RuleCase> newCases = new FunctionalList<>(); + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + for (RuleCase ruleCase : caseList) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); + + for(CaseElement elm : ruleCase.getElements()) { + newCase.add(elm); + } + + newCase.add(element); + + newCases.add(new RuleCase(NORMAL, newCase)); + } + + + for (RuleCase newCase : newCases) { + caseList.add(newCase); + } + } + + /** + * Prefix a given case element to every case of a specific rule. + * + * @param ruleName + * The rule to prefix. + * + * @param prefix + * The prefix to add. + * + * @throws IllegalArgumentException + * If the rule name is either invalid or not defined by this + * grammar, or if the prefix is invalid. + */ + public void prefixWith(String ruleName, String prefix) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if(!rules.containsKey(ruleName)) { + String msg = String.format("Rule '%s' is not a valid rule name."); + + throw new IllegalArgumentException(msg); + } + + CaseElement element = CaseElement.createElement(prefix); + + FunctionalList<RuleCase> newCases = new FunctionalList<>(); + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + for (RuleCase ruleCase : caseList) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); + + newCase.add(element); + + for(CaseElement elm : ruleCase.getElements()) { + newCase.add(elm); + } + + newCases.add(new RuleCase(NORMAL, newCase)); + } + + + for (RuleCase newCase : newCases) { + caseList.add(newCase); + } + } + + public void despaceRule(String ruleName) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + + IList<RuleCase> newCaseList = new FunctionalList<>(); + + for(RuleCase cse : caseList) { + newCaseList.add(new RuleCase(SPACEFLATTEN, cse.getElements())); + } + + rules.get(ruleName).replaceCases(newCaseList); + } + + public void regexizeRule(String rule, String pattern) { + if (rule == null) { + throw new NullPointerException("rule must not be null"); + } else if(pattern == null) { + throw new NullPointerException("pattern must not be null"); + } else if (rule.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + IList<RuleCase> caseList = rules.get(rule).getCases(); + + IList<RuleCase> newCaseList = new FunctionalList<>(); + + for(RuleCase cse : caseList) { + newCaseList.add(new RegexRuleCase(cse.getElements(), pattern)); + } + + rules.get(rule).replaceCases(newCaseList); + + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammarFormatter.java b/RGens/src/main/java/bjc/rgens/parser/RGrammarFormatter.java new file mode 100644 index 0000000..96bdee8 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammarFormatter.java @@ -0,0 +1,96 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.IList; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Format randomized grammars to strings properly. + * + * @author EVE + */ +public class RGrammarFormatter { + /** + * Format a grammar into a file that represents that grammar. + * + * @param gram + * The grammar to format. + * + * @return + * The formatted grammar. + */ + public static String formatGrammar(RGrammar gram) { + StringBuilder sb = new StringBuilder(); + + Map<String, Rule> rules = gram.getRules(); + + String initRuleName = gram.getInitialRule(); + + Set<String> processedRules = new HashSet<>(); + + if (initRuleName != null) { + processRule(rules.get(initRuleName), sb); + + processedRules.add(initRuleName); + } + + for (Rule rule : rules.values()) { + if (!processedRules.contains(rule.name)) { + sb.append("\n\n"); + + processRule(rule, sb); + } + + processedRules.add(rule.name); + } + + return sb.toString().trim(); + } + + /* Format a rule. */ + private static void processRule(Rule rule, StringBuilder sb) { + IList<RuleCase> cases = rule.getCases(); + + StringBuilder ruleBuilder = new StringBuilder(); + + ruleBuilder.append(rule.name); + ruleBuilder.append(" \u2192 "); + + int markerPos = ruleBuilder.length(); + + processCase(cases.first(), ruleBuilder); + + sb.append(ruleBuilder.toString().trim()); + + ruleBuilder = new StringBuilder(); + + for (RuleCase cse : cases.tail()) { + sb.append("\n\t"); + + for (int i = 8; i < markerPos; i++) { + ruleBuilder.append(" "); + } + + processCase(cse, ruleBuilder); + + sb.append(ruleBuilder.toString()); + + ruleBuilder = new StringBuilder(); + } + + } + + /* Format a case. */ + private static void processCase(RuleCase cse, StringBuilder sb) { + /* Process each element, adding a space. */ + for (CaseElement element : cse.getElements()) { + sb.append(element.toString()); + sb.append(" "); + } + + /* Remove the trailing space. */ + sb.deleteCharAt(sb.length() - 1); + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammarParser.java b/RGens/src/main/java/bjc/rgens/parser/RGrammarParser.java new file mode 100644 index 0000000..83b295a --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammarParser.java @@ -0,0 +1,390 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; +import bjc.utils.funcutils.TriConsumer; +import bjc.utils.ioutils.blocks.Block; +import bjc.utils.ioutils.blocks.BlockReader; +import bjc.utils.ioutils.blocks.SimpleBlockReader; + +import java.io.Reader; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +/** + * Reads {@link RGrammar} from a input stream. + * + * @author student + */ +public class RGrammarParser { + /* Whether we are in debug mode or not. */ + public static final boolean DEBUG = false; + + /* + * Templates for level-dependent delimiters. + */ + /* Pragma block delimiter. */ + private static final String TMPL_PRAGMA_BLOCK_DELIM = "\\R\\t{%d}(?!\\t)"; + /* Rule declaration block delimiter. */ + private static final String TMPL_RULEDECL_BLOCK_DELIM = "\\R\\t\\t{%d}"; + /* Where block delimiter. */ + private static final String TMPL_WHERE_BLOCK_DELIM = "\\R\\t{%d}(?:in|end)\\R"; + /* Top-level block delimiter. */ + private static final String TMPL_TOPLEVEL_BLOCK_DELIM = "\\R\\t{%d}\\.?\\R"; + + /* Pragma impls. */ + private static Map<String, TriConsumer<String, RGrammarBuilder, Integer>> pragmas; + + /* Initialize pragmas. */ + static { + pragmas = new HashMap<>(); + + pragmas.put("initial-rule", (body, build, level) -> { + int sep = body.indexOf(' '); + + if (sep != -1) { + String msg = "Initial-rule pragma takes only one argument, the name of the initial rule"; + throw new GrammarException(msg); + } + + build.setInitialRule(body); + }); + + pragmas.put("despace-rule", (body, build, level) -> { + int sep = body.indexOf(' '); + + if (sep != -1) { + String msg = "despace-rule pragma takes only one argument, the name of the rule to despace"; + throw new GrammarException(msg); + } + + build.despaceRule(body); + }); + + pragmas.put("export-rule", (body, build, level) -> { + String[] exports = body.split(" "); + + for (String export : exports) { + build.addExport(export); + } + }); + + pragmas.put("regex-rule", (body, build, level) -> { + int nameIndex = body.indexOf(" "); + + if(nameIndex == -1) { + throw new GrammarException("Regex-rule pragma takes two arguments: the name of the rule to process, then the regex to apply after the rule has been generated."); + } + + String name = body.substring(0, nameIndex).trim(); + String patt = body.substring(nameIndex + 1).trim(); + + build.regexizeRule(name, patt); + }); + + pragmas.put("suffix-with", (body, build, level) -> { + String[] parts = body.trim().split(" "); + + if (parts.length != 2) { + String msg = "Suffix-with pragma takes two arguments, the name of the rule to suffix, then what to suffix it with"; + + throw new GrammarException(msg); + } + + build.suffixWith(parts[0], parts[1]); + }); + + pragmas.put("prefix-with", (body, build, level) -> { + String[] parts = body.trim().split(" "); + + if (parts.length != 2) { + String msg = "Prefix-with pragma takes two arguments, the name of the rule to prefix, then what to prefix it with"; + + throw new GrammarException(msg); + } + + build.prefixWith(parts[0], parts[1]); + }); + } + + /** + * Read a {@link RGrammar} from an input stream. + * + * @param is + * The input stream to read from. + * + * @return + * The grammar represented by the stream. + * + * @throws GrammarException + * Thrown if the grammar has a syntax error. + */ + public static RGrammar readGrammar(Reader is) throws GrammarException { + String dlm = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, 0); + + try (BlockReader reader = new SimpleBlockReader(dlm, is)) { + if (!reader.hasNextBlock()) { + throw new GrammarException("At least one top-level block must be present"); + } + + try { + RGrammarBuilder build = new RGrammarBuilder(); + + reader.forEachBlock((block) -> { + if(DEBUG) + System.err.printf("Handling top-level block (%s)\n", block); + + handleBlock(build, block.contents, 0); + }); + + return build.toRGrammar(); + } catch (GrammarException gex) { + String msg = String.format("Error in block (%s)", reader.getBlock()); + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling block", ex); + } + } + + /* Throughout these, level indicates the nesting level of that construct. */ + + /* Handles an arbitrary block. */ + private static void handleBlock(RGrammarBuilder build, String block, + int level) throws GrammarException { + /* Discard empty blocks. */ + if (block.equals("") || block.matches("\\R")) + return; + + int typeSep = block.indexOf(' '); + + if (typeSep == -1) { + throw new GrammarException( + "A block must start with a introducer, followed by a space, then the rest of the block"); + } + + String blockType = block.substring(0, typeSep).trim(); + + if (blockType.equalsIgnoreCase("pragma")) { + handlePragmaBlock(block, build, level); + } else if (blockType.startsWith("[")) { + handleRuleBlock(block, build, level); + } else if (blockType.equalsIgnoreCase("where")) { + handleWhereBlock(block, build, level); + } else if (blockType.equalsIgnoreCase("#")) { + if(DEBUG) + System.err.printf("Handled comment block (%s)\n", block); + /* + * Comment block. + * + * @TODO 10/11/17 Ben Culkin :GrammarComment + * Attach these to the grammar somehow so that they + * can be re-output during formatting. + */ + return; + } else { + String msg = String.format("Unknown block type: '%s'", blockType); + throw new GrammarException(msg); + } + } + + /* Handle reading a block of pragmas. */ + private static void handlePragmaBlock(String block, RGrammarBuilder build, + int level) throws GrammarException { + String dlm = String.format(TMPL_PRAGMA_BLOCK_DELIM, level); + try (BlockReader pragmaReader = new SimpleBlockReader(dlm, new StringReader(block))) { + try { + pragmaReader.forEachBlock((pragma) -> { + if(DEBUG) + System.err.printf("Handled pragma block (%s)\n", pragma); + + String pragmaContents = pragma.contents; + + int pragmaSep = pragmaContents.indexOf(' '); + + if (pragmaSep == -1) { + String msg = "A pragma invocation must consist of the word pragma, followed by a space, then the body of the pragma"; + + throw new GrammarException(msg); + } + + String pragmaLeader = pragmaContents.substring(0, pragmaSep); + String pragmaBody = pragmaContents.substring(pragmaSep + 1); + + if (!pragmaLeader.equalsIgnoreCase("pragma")) { + String msg = String.format("Illegal line leader in pragma block: '%s'", pragmaLeader); + + throw new GrammarException(msg); + } + + handlePragma(pragmaBody, build, level); + }); + } catch (GrammarException gex) { + Block pragma = pragmaReader.getBlock(); + String msg = String.format("Error in pragma: (%s)", pragma); + + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling pragma block", ex); + } + } + + /* Handle an individual pragma in a block. */ + private static void handlePragma(String pragma, RGrammarBuilder build, + int level) throws GrammarException { + int bodySep = pragma.indexOf(' '); + + if (bodySep == -1) + bodySep = pragma.length(); + + String pragmaName = pragma.substring(0, bodySep); + String pragmaBody = pragma.substring(bodySep + 1); + + if (pragmas.containsKey(pragmaName)) { + try { + if(DEBUG) + System.err.printf("Handled pragma '%s'\n", pragmaName); + + pragmas.get(pragmaName).accept(pragmaBody, build, level); + } catch (GrammarException gex) { + String msg = String.format("Error in pragma '%s'", pragmaName); + + throw new GrammarException(msg, gex); + } + } else { + String msg = String.format("Unknown pragma '%s'", pragmaName); + + throw new GrammarException(msg); + } + } + + /* Handle a block of a rule declaration and one or more cases. */ + private static void handleRuleBlock(String ruleBlock, RGrammarBuilder build, + int level) throws GrammarException { + String dlm = String.format(TMPL_RULEDECL_BLOCK_DELIM, level); + try (BlockReader ruleReader = new SimpleBlockReader(dlm, new StringReader(ruleBlock))) { + try { + if (ruleReader.hasNextBlock()) { + /* Rule with a declaration followed by multiple cases. */ + ruleReader.nextBlock(); + Block declBlock = ruleReader.getBlock(); + + String declContents = declBlock.contents; + Rule rl = handleRuleDecl(build, declContents); + + ruleReader.forEachBlock((block) -> { + /* Ignore comment lines. */ + if(block.contents.trim().startsWith("#")) return; + + handleRuleCase(block.contents, build, rl); + }); + } else { + /* Rule with a declaration followed by a single case. */ + handleRuleDecl(build, ruleBlock); + } + } catch (GrammarException gex) { + String msg = String.format("Error in rule case (%s)", ruleReader.getBlock()); + + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling rule block", ex); + } + } + + /* Handle a rule declaration and its initial case. */ + private static Rule handleRuleDecl(RGrammarBuilder build, String declContents) { + int declSep = declContents.indexOf("\u2192"); + + if (declSep == -1) { + /* + * @NOTE + * We should maybe remove support for the old + * syntax at some point. However, maybe we don't + * want to do so so as to make inputting grammars + * easier. + */ + declSep = declContents.indexOf(' '); + + if (declSep == -1) { + String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192"; + + throw new GrammarException(msg); + } + } + + String ruleName = declContents.substring(0, declSep).trim(); + String ruleBody = declContents.substring(declSep + 1).trim(); + + if (ruleName.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } + + Rule rul = build.getOrCreateRule(ruleName); + + handleRuleCase(ruleBody, build, rul); + + return rul; + } + + /* Handle a single case of a rule. */ + private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul) { + IList<CaseElement> caseParts = new FunctionalList<>(); + + for (String csepart : cse.split(" ")) { + String partToAdd = csepart.trim(); + + /* Ignore empty parts */ + if (partToAdd.equals("")) + continue; + + caseParts.add(CaseElement.createElement(partToAdd)); + } + + rul.addCase(new RuleCase(RuleCase.CaseType.NORMAL, caseParts)); + } + + /* Handle a where block (a block with local rules). */ + private static void handleWhereBlock(String block, RGrammarBuilder build, + int level) throws GrammarException { + int nlIndex = block.indexOf("\\n"); + + if (nlIndex == -1) { + throw new GrammarException("Where block must be a context followed by a body"); + } + + String trimBlock = block.substring(nlIndex).trim(); + + String whereDelim = String.format(TMPL_WHERE_BLOCK_DELIM, level); + + try (BlockReader whereReader = new SimpleBlockReader(whereDelim, + new StringReader(trimBlock))) { + try { + Block whereCtx = whereReader.next(); + + StringReader ctxReader = new StringReader(whereCtx.contents.trim()); + String ctxDelim = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, level + 1); + + try (BlockReader bodyReader = new SimpleBlockReader(ctxDelim, ctxReader)) { + Block whereBody = whereReader.next(); + + /** + * @TODO 10/11/17 Ben Culkin :WhereBlocks + * Implement where blocks. + * + * A where block has the context evaluated + * in a new context, and the body executed + * in that context. + */ + } + } catch (GrammarException gex) { + throw new GrammarException(String.format("Error in where block (%s)", + whereReader.getBlock()), gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error in where block", ex); + } + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammarSet.java b/RGens/src/main/java/bjc/rgens/parser/RGrammarSet.java new file mode 100644 index 0000000..41974c0 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammarSet.java @@ -0,0 +1,290 @@ +package bjc.rgens.parser; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; + +/** + * Represents a set of grammars that can share rules via exports. + * + * @author EVE + */ +public class RGrammarSet { + /* Contains all the grammars in this set. */ + private Map<String, RGrammar> grammars; + + /* Contains all the exported rules from grammars. */ + private Map<String, RGrammar> exportedRules; + + /* Contains which export came from which grammar. */ + private Map<String, String> exportFrom; + + /* Contains which file a grammar was loaded from. */ + private Map<String, String> loadedFrom; + + /** Create a new set of randomized grammars. */ + public RGrammarSet() { + grammars = new HashMap<>(); + + exportedRules = new HashMap<>(); + + exportFrom = new HashMap<>(); + loadedFrom = new HashMap<>(); + } + + /** + * Add a grammar to this grammar set. + * + * @param grammarName + * The name of the grammar to add. + * + * @param gram + * The grammar to add. + * + * @throws IllegalArgumentException + * If the grammar name is invalid. + */ + public void addGrammar(String grammarName, RGrammar gram) { + /* Make sure a grammar is valid. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (gram == null) { + throw new NullPointerException("Grammar must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } + + grammars.put(grammarName, gram); + + /* Process exports from the grammar. */ + for (Rule export : gram.getExportedRules()) { + exportedRules.put(export.name, gram); + + exportFrom.put(export.name, grammarName); + } + + /* Add exports to grammar. */ + gram.setImportedRules(exportedRules); + } + + /** + * Get a grammar from this grammar set. + * + * @param grammarName + * The name of the grammar to get. + * + * @return + * The grammar with that name. + * + * @throws IllegalArgumentException + * If the grammar name is invalid or not present in this set. + */ + public RGrammar getGrammar(String grammarName) { + /* Check arguments. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } else if (!grammars.containsKey(grammarName)) { + String msg = String.format("No grammar with name '%s' found", grammarName); + + throw new IllegalArgumentException(msg); + } + + return grammars.get(grammarName); + } + + /** + * Get the grammar a rule was exported from. + * + * @param exportName + * The name of the exported rule. + * + * @return + * The grammar the exported rule came from. + * + * @throws IllegalArgumentException + * If the export name is invalid or not present in this set. + */ + public RGrammar getExportSource(String exportName) { + /* Check arguments. */ + if (exportName == null) { + throw new NullPointerException("Export name must not be null"); + } else if (exportName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!exportedRules.containsKey(exportName)) { + String msg = String.format("No export with name '%s' defined", exportName); + throw new IllegalArgumentException(msg); + } + + return exportedRules.get(exportName); + } + + /** + * Get the source of an exported rule. + * + * This will often be a grammar name, but is not required to be one. + * + * @param exportName + * The name of the exported rule. + * + * @return + * The source of an exported rule. + * + * @throws IllegalArgumentException + * If the exported rule is invalid or not present in this set. + */ + public String exportedFrom(String exportName) { + /* Check arguments. */ + if (exportName == null) { + throw new NullPointerException("Export name must not be null"); + } else if (exportName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!exportedRules.containsKey(exportName)) { + String msg = String.format("No export with name '%s' defined", exportName); + + throw new IllegalArgumentException(msg); + } + + return exportFrom.getOrDefault(exportName, "Unknown"); + } + + /** + * Get the source of an grammar + * + * This will often be a file name, but is not required to be one. + * + * @param grammarName + * The name of the exported grammar. + * + * @return + * The source of an exported grammar. + * + * @throws IllegalArgumentException + * If the exported grammar is invalid or not present in this set. + */ + public String loadedFrom(String grammarName) { + /* Check arguments. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } else if (grammarName.equals("unknown")) { + return grammarName; + } else if (!grammars.containsKey(grammarName)) { + String msg = String.format("No grammar with name '%s' defined", grammarName); + throw new IllegalArgumentException(); + } + + return loadedFrom.getOrDefault(grammarName, "Unknown"); + } + + /** + * Get the names of all the grammars in this set. + * + * @return + * The names of all the grammars in this set. + */ + public Set<String> getGrammars() { + return grammars.keySet(); + } + + /** + * Get the names of all the exported rules in this set. + * + * @return + * The names of all the exported rules in this set. + */ + public Set<String> getExportedRules() { + return exportedRules.keySet(); + } + + /** + * Load a grammar set from a configuration file. + * + * @param cfgFile + * The configuration file to load from. + * + * @return + * The grammar set created by the configuration file. + * + * @throws IOException + * If something goes wrong during configuration loading. + */ + public static RGrammarSet fromConfigFile(Path cfgFile) throws IOException { + /* The grammar set to hand back. */ + RGrammarSet set = new RGrammarSet(); + + /* Get the directory that contains the config file. */ + Path cfgParent = cfgFile.getParent(); + + try(Scanner scn = new Scanner(cfgFile)) { + /* Execute lines from the configuration file. */ + while (scn.hasNextLine()) { + String ln = scn.nextLine().trim(); + + /* Ignore blank/comment lines. */ + if (ln.equals("")) continue; + + if (ln.startsWith("#")) continue; + + /* Handle mixed whitespace. */ + ln = ln.replaceAll("\\s+", " "); + + /* + * Get the place where the name of the grammar + * ends. + */ + int nameIdx = ln.indexOf(" "); + if (nameIdx == -1) { + throw new GrammarException("Must specify a name for a loaded grammar"); + } + + /* Name and path of grammar. */ + String name = ln.substring(0, nameIdx); + Path path = Paths.get(ln.substring(nameIdx).trim()); + + /* + * Convert from configuration relative path to + * absolute path. + */ + Path convPath = cfgParent.resolve(path.toString()); + + //if(Files.isDirectory(convPath)) { + // /* @TODO implement subset grammars */ + // throw new GrammarException("Sub-grammar sets aren't implemented yet"); + //} else if (convPath.getFileName().endsWith(".gram")) { + /* Load grammar file. */ + try { + BufferedReader fis = Files.newBufferedReader(convPath); + RGrammar gram = RGrammarParser.readGrammar(fis); + fis.close(); + + /* Add grammar to the set. */ + set.addGrammar(name, gram); + + /* + * Mark where the grammar came + * from. + */ + set.loadedFrom.put(name, path.toString()); + } catch (GrammarException gex) { + String msg = String.format("Error loading file '%s'", path); + throw new GrammarException(msg, gex); + } + //} else { + // String msg = String.format("Unrecognized file type '%s'", convPath.getFileName()); + // throw new GrammarException(msg); + //} + } + } + + return set; + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammarTest.java b/RGens/src/main/java/bjc/rgens/parser/RGrammarTest.java new file mode 100644 index 0000000..4b1f283 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammarTest.java @@ -0,0 +1,72 @@ +package bjc.rgens.parser; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Test for new grammar syntax. + * + * @author EVE + */ +public class RGrammarTest { + /** + * Main method. + * + * @param args + * Unused CLI args. + */ + public static void main(String[] args) { + URL rsc = RGrammarTest.class.getResource("/server-config-sample.cfg"); + + try { + /* Load a grammar set. */ + Path cfgPath = Paths.get(rsc.toURI()); + RGrammarSet gramSet = RGrammarSet.fromConfigFile(cfgPath); + + /* Generate rule suggestions for all the grammars in the set. */ + for (String gramName : gramSet.getGrammars()) { + gramSet.getGrammar(gramName).generateSuggestions(); + } + + /* Generate for each exported rule. */ + for (String exportName : gramSet.getExportedRules()) { + /* Where we loaded the rule from. */ + String loadSrc = gramSet.loadedFrom(gramSet.exportedFrom(exportName)); + + System.out.println(); + System.out.printf("Generating for exported rule '%s' from file '%s'\n", exportName, loadSrc); + + RGrammar grammar = gramSet.getExportSource(exportName); + for (int i = 0; i < 100; i++) { + try { + String res = grammar.generate(exportName); + if(exportName.contains("+")) res = res.replaceAll("\\s+", ""); + + if(res.length() > 120) { + System.out.printf("\t\n\tContents: %s\n\t\n", res); + } else { + System.out.printf("\tContents: %s\n", res); + } + } catch (GrammarException gex) { + /* Print out errors with generation. */ + String fmt = "Error in exported rule '%s' (loaded from '%s')\n"; + + System.out.printf(fmt, exportName, loadSrc); + System.out.println(); + gex.printStackTrace(); + + System.out.println(); + System.out.println(); + } + } + } + } catch (IOException ioex) { + ioex.printStackTrace(); + } catch (URISyntaxException urisex) { + urisex.printStackTrace(); + } + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RGrammars.java b/RGens/src/main/java/bjc/rgens/parser/RGrammars.java new file mode 100644 index 0000000..69cca0d --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RGrammars.java @@ -0,0 +1,55 @@ +package bjc.rgens.parser; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +/** + * Get access to the included grammars. + * + * @author Ben Culkin + */ +public class RGrammars { + private static RGrammarSet gramSet; + + private static void loadSet() { + try { + URI rsc = RGrammarTest.class.getResource("/server-config-sample.cfg").toURI(); + + Map<String, String> env = new HashMap<>(); + env.put("create", "true"); + FileSystem zipfs = FileSystems.newFileSystem(rsc, env); + + Path cfgPath = Paths.get(rsc); + + gramSet = RGrammarSet.fromConfigFile(cfgPath); + } catch (IOException | URISyntaxException ex) { + RuntimeException rtex = new RuntimeException("Could not load grammars"); + + rtex.initCause(ex); + + throw rtex; + } + } + + public static String generateExport(String exportName) throws GrammarException { + if(gramSet == null) loadSet(); + + if(!gramSet.getExportedRules().contains(exportName)) { + throw new GrammarException(String.format("No built-in rule named %s", exportName)); + } + + RGrammar gram = gramSet.getExportSource(exportName); + + String res = gram.generate(exportName); + if(exportName.contains("+")) res = res.replaceAll("\\s+", ""); + + return res; + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RegexRuleCase.java b/RGens/src/main/java/bjc/rgens/parser/RegexRuleCase.java new file mode 100644 index 0000000..82417da --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RegexRuleCase.java @@ -0,0 +1,31 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.IList; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +public class RegexRuleCase extends RuleCase { + private Pattern patt; + + public RegexRuleCase(IList<CaseElement> elements, String pattern) { + super(RuleCase.CaseType.REGEX); + + elementList = elements; + + try { + patt = Pattern.compile(pattern); + } catch (PatternSyntaxException psex) { + IllegalArgumentException iaex = + new IllegalArgumentException("This type requires a valid regular expression parameter"); + + iaex.initCause(psex); + + throw iaex; + } + } + + public Pattern getPattern() { + return patt; + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/Rule.java b/RGens/src/main/java/bjc/rgens/parser/Rule.java new file mode 100644 index 0000000..7043e0f --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/Rule.java @@ -0,0 +1,134 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import java.util.Random; + +/** + * A rule in a randomized grammar. + * + * @author EVE + */ +public class Rule { + /** The name of this grammar rule. */ + public final String name; + + /* The cases for this rule. */ + private IList<RuleCase> cases; + + /** + * Create a new grammar rule. + * + * @param ruleName + * The name of the grammar rule. + * + * @throws IllegalArgumentException + * If the rule name is invalid. + */ + public Rule(String ruleName) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + name = ruleName; + + cases = new FunctionalList<>(); + } + + /** + * Adds a case to the rule. + * + * @param cse + * The case to add. + */ + public void addCase(RuleCase cse) { + if (cse == null) { + throw new NullPointerException("Case must not be null"); + } + + cases.add(cse); + } + + /** + * Get a random case from this rule. + * + * @return + * A random case from this rule. + */ + public RuleCase getCase() { + return cases.randItem(); + } + + /** + * Get a random case from this rule. + * + * @param rnd + * The random number generator to use. + * + * @return + * A random case from this rule. + */ + public RuleCase getCase(Random rnd) { + return cases.randItem(rnd::nextInt); + } + + /** + * Get all the cases of this rule. + * + * @return + * All the cases in this rule. + */ + public IList<RuleCase> getCases() { + return cases; + } + + /** + * Replace the current list of cases with a new one. + * + * @param cases + * The new list of cases. + */ + public void replaceCases(IList<RuleCase> cases) { + this.cases = cases; + } + + @Override + public int hashCode() { + final int prime = 31; + + int result = 1; + result = prime * result + ((cases == null) ? 0 : cases.hashCode()); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + + if (obj == null) return false; + + if (!(obj instanceof Rule)) return false; + + Rule other = (Rule) obj; + + if (cases == null) { + if (other.cases != null) return false; + } else if (!cases.equals(other.cases)) return false; + + if (name == null) { + if (other.name != null) return false; + } else if (!name.equals(other.name)) return false; + + return true; + } + + @Override + public String toString() { + return String.format("Rule [ruleName='%s', ruleCases=%s]", name, cases); + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/RuleCase.java b/RGens/src/main/java/bjc/rgens/parser/RuleCase.java new file mode 100644 index 0000000..764fa89 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/RuleCase.java @@ -0,0 +1,87 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.IList; + +/* + * @NOTE + * If at some point we add new case types, they should go into subclasses, + * not into this class. + */ +/** + * A case in a rule in a randomized grammar. + * + * @author EVE + */ +public class RuleCase { + /** + * The possible types of a case. + * + * @author EVE + */ + public static enum CaseType { + /** A normal case, composed from a list of elements. */ + NORMAL, + /** A case that doesn't insert spaces. */ + SPACEFLATTEN, + /** A case that applies a regex after generation. */ + REGEX + } + + /** The type of this case. */ + public final CaseType type; + + /** + * The list of element values for this case. + * + * <h2>Used For</h2> + * <dl> + * <dt>NORMAL, SPACEFLATTEN</dt> + * <dd>Used as the list of elementList the rule is composed of.</dd> + * </dl> + */ + protected IList<CaseElement> elementList; + + protected RuleCase(CaseType typ) { + type = typ; + } + + /** + * Create a new case of the specified type that takes a element list + * parameter. + * + * @param typ + * The type of case to create. + * + * @param elements + * The element list parameter of the case. + * + * @throws IllegalArgumentException + * If this type doesn't take a element list parameter. + */ + public RuleCase(CaseType typ, IList<CaseElement> elements) { + this(typ); + + switch (typ) { + case NORMAL: + case SPACEFLATTEN: + break; + case REGEX: + throw new IllegalArgumentException("This type requires an element list and a pattern"); + default: + throw new IllegalArgumentException("This type doesn't have a element list parameter"); + } + + elementList = elements; + } + + /** + * Get the element list value of this type. + * + * @return + * The element list value of this case, or null if this type + * doesn't have one. + */ + public IList<CaseElement> getElements() { + return elementList; + } +} diff --git a/RGens/src/main/java/bjc/rgens/parser/new-syntax.txt b/RGens/src/main/java/bjc/rgens/parser/new-syntax.txt new file mode 100644 index 0000000..f6578b4 --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/parser/new-syntax.txt @@ -0,0 +1,13 @@ +[grammar] → ([block] ( / \n\.\n ? / [block])*) ? + +[block] → [pragma - block] +→ [rule - block] +→ [where - block] + +[pragma - block] → [pragma] ( / \n( ? !\t) / [pragma])* + +[rule - block] → [rule - decl] [rule - case] ( / \n\t / [rule - case])* + +[where - block] → where / \n\t / ([rule - block] / \n\t / ) + in / \n\t / [rule - block] + +[pragma] → pragma [pragma - name] [pragma - body] |
