diff options
Diffstat (limited to 'src/main')
27 files changed, 2948 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/ZadronsPouch.java b/src/main/java/bjc/rgens/ZadronsPouch.java new file mode 100755 index 0000000..827d022 --- /dev/null +++ b/src/main/java/bjc/rgens/ZadronsPouch.java @@ -0,0 +1,220 @@ +package bjc.rgens; + +import bjc.utils.funcdata.FunctionalStringTokenizer; +import bjc.utils.funcdata.IList; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.gen.RandomGrammar; + +/** + * Example showing code manipulate of random grammars + * + * @author ben + */ +public class ZadronsPouch { + /** + * Main method for running application + * + * @param args + * Unused CLI args + */ + public static void main(String[] args) { + ZadronsPouch zp = new ZadronsPouch(); + + for (int i = 0; i < 100; i++) { + IList<String> ls = zp.grammar.generateListValues("[item]", " "); + + StringBuilder sb = new StringBuilder(); + + ls.forEach(sp -> sb.append(sp)); + + System.out.println(sb.toString().replaceAll("\\s+", " ")); + } + } + + private RandomGrammar<String> grammar; + + /** Create a new instance with a grammar */ + public ZadronsPouch() { + grammar = new RandomGrammar<>(); + + /* + * @NOTE + * Should there be some sort of builder sort of interface? + */ + addRule("[item]", + "[egg]", "[glove]", "[crys-sphere]", "[rock]", + "[figurine]", "[vial]", "[mini-weapon]", "[bag]", + "[card]", "[rope]", "[box]", "[wand]"); + + addEggRules(); + addGloveRules(); + addCrysSphereRules(); + addRockRules(); + + addFigurineRules(); + addVialRules(); + addMiniWeaponRules(); + addBagRules(); + + addCardRules(); + addRopeRules(); + addBoxRules(); + addWandRules(); + } + + private void addBagRules() { + addRule("[bag]", + "bag of [bag-type]", "[sack-type] sack", "[purse-type] purse"); + addRule("[bag-type]", + "holding", "tricks", "useful items", + "devouring", "dwarf-kind", "invisible cloth", + "monster summoning"); + addRule("[sack-type]", + "lunch", "recursive"); + addRule("[purse-type]", + "everfull"); + } + + private void addBoxRules() { + addRule("[box]", + "[box-type] box", "cube of [box-type]"); + addRule("[box-type]", + "limited-force", "frost-resisting", "morphing", + "self-destructing", "pandora", "panicking"); + } + + private void addCardRules() { + addRule("[card]", + "card of [card-type]", "[card-type] card"); + addRule("[card-type]", + "fate", "teleporting", "elusive treasure", "spell-storing", + "many-things", "imprisoning", "messaging", "bounty"); + } + + private void addCrysSphereRules() { + addRule("[crys-sphere]", + "[sphere-type] spheres", "[sphere-type] sphere", + "lens of [lens-type]", "[crystal-type] crystal", + "crystal of [crystal-type]", "crystal ball", + "crystal ball of [crys-suffix]"); + addRule("[sphere-type]", + "microphonic", "seeing-eye"); + addRule("[lens-type]", + "detection"); + addRule("[crystal-type]", + "prison", "radar"); + addRule("[crys-suffix]", + "jumping"); + } + + private void addEggRules() { + addRule("[egg]", + "[egg-type] egg"); + addRule("[egg-type]", + "copper", "stone", "golden", + "white", "white/pink", "glass"); + } + + private void addFigurineRules() { + addRule("[figurine]", + "[fig-material] [fig-animal]"); + addRule("[fig-material]", + "golden", "onyx", "serpentine", "ivory", + "marble", "bronze", "jade", "limestone"); + addRule("[fig-animal]", + "lion", "dog", "owl", "goat", + "elephant", "warrior", "palace", "leprechaun"); + } + + private void addGloveRules() { + addRule("[glove]", + "gauntlets of [gauntlet-type]", + "gloves of [glove-type]", + "[glove-type] gloves"); + addRule("[gauntlet-type]", + "dexterity", "power"); + addRule("[glove-type]", + "pushing", "choking", "bigby", "stunning"); + } + + private void addMiniWeaponRules() { + addRule("[mini-weapon]", + "minature [weapon-type]", "small [weapon-type]", + "tiny [weapon-type]", "[sling-type] sling", + "[weapon-type]"); + addRule("[weapon-type]", + "boomerang", "arrow", "net", + "catapult", "hammer", "sword", "club"); + addRule("[sling-type]", + "seeking"); + } + + private void addRockRules() { + addRule("[rock]", + "[pebble-type] pebble", "stone of [stone-type]", + "[stone-type] stone", "brick of [brick-type]", + "[geode-type] geode"); + addRule("[pebble-type]", + "inscribed", "elemental control"); + addRule("[stone-type]", + "good-luck", "weight", + "blind-defense", "metal-clinging"); + addRule("[brick-type]", + "flying"); + addRule("[geode-type]", + "ioun"); + } + + private void addRopeRules() { + addRule("[rope]", + "[rope-type] rope", "rope of [rope-type]", + "ball of [string-type] [string-kind]"); + addRule("[rope-type]", + "trick", "entangling", "climbing", "dancing", + "tripping", "snaring", "levitating", "self-entangling"); + addRule("[string-type]", + "endless"); + addRule("[string-kind]", + "string", "yarn"); + } + + private void addRule(String rule, String... cases) { + IList<IList<String>> cses = new FunctionalList<>(); + + for (String strang : cases) { + cses.add(FunctionalStringTokenizer.fromString(strang).toList(s -> s)); + } + + grammar.makeRule(rule, cses); + } + + private void addVialRules() { + addRule("[vial]", + "vial of [vial-type]", "[vial-type] vial", + "[bottle-type] bottle", "[flask-type] flask"); + addRule("[vial-type]", + "holding", "trapping", + "experience", "unnatural regeneration"); + addRule("[bottle-type]", + "ever-smoking", "wheezing", + "blank potion"); + addRule("[flask-type]", + "iron"); + } + + private void addWandRules() { + addRule("[wand]", + "[wand-type] wand", "wand of [wand-type]", + "canceling [wand-type] wand"); + addRule("[wand-type]", + "magic missile", "[spell-1]", "[spell-2]", + "gusting", "life-detecting", "zadron"); + addRule("[spell-1]", + "frost", "fire", "lightning", "fear", + "illumination", "polymorphing", "conjuration", "paralyzing"); + addRule("[spell-2]", + "[spell2-type] detecting"); + addRule("[spell2-type]", + "magic", "enemy", "secret door/trap"); + } +} diff --git a/src/main/java/bjc/rgens/parser/GrammarException.java b/src/main/java/bjc/rgens/parser/GrammarException.java new file mode 100755 index 0000000..9eaa0a1 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/GrammarException.java @@ -0,0 +1,36 @@ +package bjc.rgens.parser; + +/** + * The exception thrown when something goes wrong while parsing a + * grammar. + * + * @author student + */ +public class GrammarException extends RuntimeException { + /* Serialization ID. */ + private static final long serialVersionUID = -7287427479316953668L; + + /** + * Create a new grammar exception with the specified message. + * + * @param msg + * The message for this exception. + */ + public GrammarException(String msg) { + super(msg); + } + + /** + * Create a new grammar exception with the specified message and + * cause. + * + * @param msg + * The message for this exception. + * + * @param cause + * The cause of this exception. + */ + public GrammarException(String msg, Exception cause) { + super(msg, cause); + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammar.java b/src/main/java/bjc/rgens/parser/RGrammar.java new file mode 100755 index 0000000..38f38c8 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammar.java @@ -0,0 +1,491 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.rgens.parser.elements.LiteralCaseElement; +import bjc.rgens.parser.elements.RangeCaseElement; +import bjc.rgens.parser.elements.RuleCaseElement; +import bjc.rgens.parser.elements.VariableCaseElement; +import bjc.utils.funcutils.StringUtils; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.text.similarity.LevenshteinDistance; + +import edu.gatech.gtri.bktree.BkTreeSearcher; +import edu.gatech.gtri.bktree.BkTreeSearcher.Match; +import edu.gatech.gtri.bktree.Metric; +import edu.gatech.gtri.bktree.MutableBkTree; + +/** + * Represents a randomized grammar. + * + * @author EVE + */ +public class RGrammar { + /* The max distance between possible alternate rules. */ + private static final int MAX_DISTANCE = 6; + + /* The metric for the levenshtein distance. */ + private static final class LevenshteinMetric implements Metric<String> { + private static LevenshteinDistance DIST; + + static { + DIST = LevenshteinDistance.getDefaultInstance(); + } + + public LevenshteinMetric() { + } + + @Override + public int distance(String x, String y) { + return DIST.apply(x, y); + } + } + + /* The current state during generation. */ + private static class GenerationState { + /* The current string. */ + public StringBuilder contents; + /* The RNG. */ + public Random rnd; + + /* The current set of variables. */ + public Map<String, String> vars; + + /** + * Create a new generation state. + * + * @param cont + * The string being generated. + * + * @param rand + * The RNG to use. + * + * @param vs + * The variables to use. + */ + public GenerationState(StringBuilder cont, Random rand, Map<String, String> vs) { + contents = cont; + rnd = rand; + vars = vs; + } + } + + /* The pattern for matching the name of a variable. */ + private static Pattern NAMEVAR_PATTERN = Pattern.compile("\\$(\\w+)"); + + /* The rules of the grammar. */ + private Map<String, Rule> rules; + /* The rules imported from other grammars. */ + private Map<String, RGrammar> importRules; + /* The rules exported from this grammar. */ + private Set<String> exportRules; + /* The initial rule of this grammar. */ + private String initialRule; + + /* The tree to use for finding rule suggestions. */ + private BkTreeSearcher<String> ruleSearcher; + + /** + * Create a new randomized grammar using the specified set of rules. + * + * @param ruls + * The rules to use. + */ + public RGrammar(Map<String, Rule> ruls) { + rules = ruls; + } + + /** + * Sets the imported rules to use. + * + * Imported rules are checked for rule definitions after local definitions are + * checked. + * + * @param importedRules + * The set of imported rules to use. + */ + public void setImportedRules(Map<String, RGrammar> importedRules) { + importRules = importedRules; + } + + /** + * Generates the data structure backing rule suggestions for unknown rules. + */ + public void generateSuggestions() { + MutableBkTree<String> ruleSuggester = new MutableBkTree<>(new LevenshteinMetric()); + + ruleSuggester.addAll(rules.keySet()); + ruleSuggester.addAll(importRules.keySet()); + + ruleSearcher = new BkTreeSearcher<>(ruleSuggester); + } + + /** + * Generate a string from this grammar, starting from the specified rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @return A possible string from the grammar. + */ + public String generate(String startRule) { + return generate(startRule, new Random(), new HashMap<>()); + } + + /** + * Generate a string from this grammar, starting from the specified rule. + * + * @param startRule + * The rule to start generating at, or null to use the initial rule + * for this grammar. + * + * @param rnd + * The random number generator to use. + * + * @param vars + * The set of variables to use. + * + * @return A possible string from the grammar. + */ + public String generate(String startRule, Random rnd, Map<String, String> vars) { + String fromRule = startRule; + + if (startRule == null) { + if (initialRule == null) { + throw new GrammarException("Must specify a start rule for grammars with no initial rule"); + } + + fromRule = initialRule; + } else { + if (startRule.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } + } + + RuleCase start = rules.get(fromRule).getCase(rnd); + + StringBuilder contents = new StringBuilder(); + + generateCase(start, new GenerationState(contents, rnd, vars)); + + String body = contents.toString(); + /* + * Collapse duplicate spaces. + */ + body = body.replaceAll("\\s+", " "); + + /* + * Remove extraneous spaces around punctutation marks. + * + * This can be done in the grammars, but it is very tedious to do so. + */ + + /* Handle 's */ + body = body.replaceAll(" 's ", "'s "); + + /* Handle opening/closing punctuation. */ + body = body.replaceAll("([(\\[]) ", " $1"); + body = body.replaceAll(" ([)\\]'\"])", "$1 "); + + /* Remove spaces around series of opening/closing punctuation. */ + body = body.replaceAll("([(\\[])\\s+([(\\[])", "$1$2"); + body = body.replaceAll("([)\\]])\\s+([)\\]])", "$1$2"); + + /* Handle inter-word punctuation. */ + body = body.replaceAll(" ([,:.!])", "$1 "); + + /* Handle intra-word punctuation. */ + body = body.replaceAll("\\s?([-/])\\s?", "$1"); + + /* + * Collapse duplicate spaces. + */ + body = body.replaceAll("\\s+", " "); + + /* + * @TODO 11/01/17 Ben Culkin :RegexRule Replace this once it is no longer + * needed. + */ + body = body.replaceAll("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1"); + + return body; + } + + /* Generate a rule case. */ + private void generateCase(RuleCase start, GenerationState state) { + try { + switch (start.type) { + case NORMAL: + for (CaseElement elm : start.getElements()) { + generateElement(elm, state); + + if (elm.type != CaseElement.ElementType.VARDEF) { + state.contents.append(" "); + } + } + break; + case SPACEFLATTEN: + for (CaseElement elm : start.getElements()) { + generateElement(elm, state); + } + break; + default: + String msg = String.format("Unknown case type '%s'", start.type); + throw new GrammarException(msg); + } + } catch (GrammarException gex) { + String msg = String.format("Error in generating case (%s)", start); + throw new GrammarException(msg, gex); + } + } + + /* Generate a case element. */ + private void generateElement(CaseElement elm, GenerationState state) { + try { + switch (elm.type) { + case LITERAL: { + LiteralCaseElement lit = (LiteralCaseElement)elm; + + state.contents.append(lit.val); + break; + } + case RULEREF: { + RuleCaseElement rle = (RuleCaseElement)elm; + + generateRuleReference(rle, state); + break; + } + case RANGE: { + RangeCaseElement rang = (RangeCaseElement)elm; + + int val = state.rnd.nextInt(rang.end - rang.begin); + val += rang.begin; + + state.contents.append(val); + break; + } + case VARDEF: + generateVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); + break; + case EXPVARDEF: + generateExpVarDef(((VariableCaseElement)elm).varName, ((VariableCaseElement)elm).varDef, state); + break; + default: + String msg = String.format("Unknown element type '%s'", elm.type); + throw new GrammarException(msg); + } + } catch (GrammarException gex) { + String msg = String.format("Error in generating case element (%s)", elm); + throw new GrammarException(msg, gex); + } + } + + /* Generate a expanding variable definition. */ + private void generateExpVarDef(String name, String defn, GenerationState state) { + GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); + + if (rules.containsKey(defn)) { + RuleCase destCase = rules.get(defn).getCase(); + + generateCase(destCase, newState); + } else if (importRules.containsKey(defn)) { + RGrammar destGrammar = importRules.get(defn); + String res = destGrammar.generate(defn, state.rnd, state.vars); + + newState.contents.append(res); + } else { + String msg = String.format("No rule '%s' defined", defn); + throw new GrammarException(msg); + } + + state.vars.put(name, newState.contents.toString()); + } + + /* Generate a variable definition. */ + private static void generateVarDef(String name, String defn, GenerationState state) { + state.vars.put(name, defn); + } + + /* Generate a rule reference. */ + private void generateRuleReference(RuleCaseElement elm, GenerationState state) { + String refersTo = elm.val; + + GenerationState newState = new GenerationState(new StringBuilder(), state.rnd, state.vars); + + if (refersTo.contains("$")) { + /* Parse variables */ + String refBody = refersTo.substring(1, refersTo.length() - 1); + + if (refBody.contains("-")) { + /* Handle dependent rule names. */ + StringBuffer nameBuffer = new StringBuffer(); + + Matcher nameMatcher = NAMEVAR_PATTERN.matcher(refBody); + + while (nameMatcher.find()) { + String var = nameMatcher.group(1); + + if (!state.vars.containsKey(var)) { + String msg = String.format("No variable '%s' defined", var); + throw new GrammarException(msg); + } + + String name = state.vars.get(var); + + if (name.contains(" ")) { + throw new GrammarException("Variables substituted into names cannot contain spaces"); + } else if (name.equals("")) { + throw new GrammarException("Variables substituted into names cannot be empty"); + } + + nameMatcher.appendReplacement(nameBuffer, name); + } + + nameMatcher.appendTail(nameBuffer); + + refersTo = "[" + nameBuffer.toString() + "]"; + } else { + /* Handle string references. */ + if (refBody.equals("$")) { + throw new GrammarException("Cannot refer to unnamed variables"); + } + + String key = refBody.substring(1); + + if (!state.vars.containsKey(key)) { + String msg = String.format("No variable '%s' defined", key); + throw new GrammarException(msg); + } + + state.contents.append(state.vars.get(key)); + + return; + } + } + + if (refersTo.startsWith("[^")) { + refersTo = "[" + refersTo.substring(2); + + RGrammar dst = importRules.get(refersTo); + + newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); + } else if (rules.containsKey(refersTo)) { + RuleCase cse = rules.get(refersTo).getCase(state.rnd); + + generateCase(cse, newState); + } else if (importRules.containsKey(refersTo)) { + RGrammar dst = importRules.get(refersTo); + + newState.contents.append(dst.generate(refersTo, state.rnd, state.vars)); + } else { + if (ruleSearcher != null) { + Set<Match<? extends String>> results = ruleSearcher.search(refersTo, MAX_DISTANCE); + + String[] resArray = results.stream().map(Match::getMatch).toArray((i) -> new String[i]); + + String msg = String.format("No rule '%s' defined (perhaps you meant %s?)", refersTo, + StringUtils.toEnglishList(resArray, false)); + + throw new GrammarException(msg); + } + + String msg = String.format("No rule '%s' defined", refersTo); + throw new GrammarException(msg); + } + + if (refersTo.contains("+")) { + /* Rule names with pluses in them get space-flattened */ + state.contents.append(newState.contents.toString().replaceAll("\\s+", "")); + } else { + state.contents.append(newState.contents.toString()); + } + } + + /** + * Get the initial rule of this grammar. + * + * @return The initial rule of this grammar. + */ + public String getInitialRule() { + return initialRule; + } + + /** + * Set the initial rule of this grammar. + * + * @param initRule + * The initial rule of this grammar, or null to say there is no + * initial rule. + */ + public void setInitialRule(String initRule) { + /* Passing null, nulls our initial rule. */ + if (initRule == null) { + this.initialRule = null; + return; + } + + if (initRule.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } else if (!rules.containsKey(initRule)) { + String msg = String.format("No rule '%s' local to this grammar defined.", initRule); + + throw new GrammarException(msg); + } + + initialRule = initRule; + } + + /** + * Gets the rules exported by this grammar. + * + * The initial rule is exported by default if specified. + * + * @return The rules exported by this grammar. + */ + public Set<Rule> getExportedRules() { + Set<Rule> res = new HashSet<>(); + + for (String rname : exportRules) { + if (!rules.containsKey(rname)) { + String msg = String.format("No rule '%s' local to this grammar defined", initialRule); + + throw new GrammarException(msg); + } + + res.add(rules.get(rname)); + } + + if (initialRule != null) { + res.add(rules.get(initialRule)); + } + + return res; + } + + /** + * Set the rules exported by this grammar. + * + * @param exportedRules + * The rules exported by this grammar. + */ + public void setExportedRules(Set<String> exportedRules) { + exportRules = exportedRules; + } + + /** + * Get all the rules in this grammar. + * + * @return All the rules in this grammar. + */ + public Map<String, Rule> getRules() { + return rules; + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammarBuilder.java b/src/main/java/bjc/rgens/parser/RGrammarBuilder.java new file mode 100755 index 0000000..b4cb04a --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammarBuilder.java @@ -0,0 +1,246 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import static bjc.rgens.parser.RuleCase.CaseType.*; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Construct randomized grammars piece by piece. + * + * @author EVE + */ +public class RGrammarBuilder { + /* The rules being built. */ + private Map<String, Rule> rules; + /* The current set of exported rules. */ + private Set<String> exportedRules; + /* The current initial rule. */ + private String initialRule; + + /** Create a new randomized grammar builder. */ + public RGrammarBuilder() { + rules = new HashMap<>(); + + exportedRules = new HashSet<>(); + } + + /** + * Get or create a rule by the given name. + * + * @param rName + * The name of the rule. + * + * @return + * The rule by that name, or a new one if none existed. + */ + public Rule getOrCreateRule(String rName) { + if(rName == null) + throw new NullPointerException("Rule name must not be null"); + else if(rName.equals("")) + throw new IllegalArgumentException("The empty string is not a valid rule name"); + + if(rules.containsKey(rName)) + return rules.get(rName); + else { + Rule ret = new Rule(rName); + + rules.put(rName, ret); + + return ret; + } + } + + /** + * Convert this builder into a grammar. + * + * @return + * The grammar built by this builder + */ + public RGrammar toRGrammar() { + RGrammar grammar = new RGrammar(rules); + + grammar.setInitialRule(initialRule); + + grammar.setExportedRules(exportedRules); + + return grammar; + } + + /** + * Set the initial rule of the grammar. + * + * @param init + * The initial rule of the grammar. + * + * @throws IllegalArgumentException + * If the rule is either not valid or not defined in the grammar. + */ + public void setInitialRule(String init) { + if (init == null) { + throw new NullPointerException("init must not be null"); + } else if (init.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + initialRule = init; + } + + /** + * Add an exported rule to this grammar. + * + * @param export + * The name of the rule to export. + * + * @throws IllegalArgumentException + * If the rule is either not valid or not defined in the grammar. + */ + public void addExport(String export) { + if (export == null) { + throw new NullPointerException("Export name must not be null"); + } else if (export.equals("")) { + throw new NullPointerException("The empty string is not a valid rule name"); + } + + exportedRules.add(export); + } + + /** + * Suffix a given case element to every case of a specific rule. + * + * @param ruleName + * The rule to suffix. + * + * @param suffix + * The suffix to add. + * + * @throws IllegalArgumentException + * If the rule name is either invalid or not defined by this + * grammar, or if the suffix is invalid. + */ + public void suffixWith(String ruleName, String suffix) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if(!rules.containsKey(ruleName)) { + String msg = String.format("Rule '%s' is not a valid rule name."); + + throw new IllegalArgumentException(msg); + } + + CaseElement element = CaseElement.createElement(suffix); + + FunctionalList<RuleCase> newCases = new FunctionalList<>(); + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + for (RuleCase ruleCase : caseList) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); + + for(CaseElement elm : ruleCase.getElements()) { + newCase.add(elm); + } + + newCase.add(element); + + newCases.add(new RuleCase(NORMAL, newCase)); + } + + + for (RuleCase newCase : newCases) { + caseList.add(newCase); + } + } + + /** + * Prefix a given case element to every case of a specific rule. + * + * @param ruleName + * The rule to prefix. + * + * @param prefix + * The prefix to add. + * + * @throws IllegalArgumentException + * If the rule name is either invalid or not defined by this + * grammar, or if the prefix is invalid. + */ + public void prefixWith(String ruleName, String prefix) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if(!rules.containsKey(ruleName)) { + String msg = String.format("Rule '%s' is not a valid rule name."); + + throw new IllegalArgumentException(msg); + } + + CaseElement element = CaseElement.createElement(prefix); + + FunctionalList<RuleCase> newCases = new FunctionalList<>(); + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + for (RuleCase ruleCase : caseList) { + FunctionalList<CaseElement> newCase = new FunctionalList<>(); + + newCase.add(element); + + for(CaseElement elm : ruleCase.getElements()) { + newCase.add(elm); + } + + newCases.add(new RuleCase(NORMAL, newCase)); + } + + + for (RuleCase newCase : newCases) { + caseList.add(newCase); + } + } + + public void despaceRule(String ruleName) { + if (ruleName == null) { + throw new NullPointerException("ruleName must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + IList<RuleCase> caseList = rules.get(ruleName).getCases(); + + IList<RuleCase> newCaseList = new FunctionalList<>(); + + for(RuleCase cse : caseList) { + newCaseList.add(new RuleCase(SPACEFLATTEN, cse.getElements())); + } + + rules.get(ruleName).replaceCases(newCaseList); + } + + public void regexizeRule(String rule, String pattern) { + if (rule == null) { + throw new NullPointerException("rule must not be null"); + } else if(pattern == null) { + throw new NullPointerException("pattern must not be null"); + } else if (rule.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + IList<RuleCase> caseList = rules.get(rule).getCases(); + + IList<RuleCase> newCaseList = new FunctionalList<>(); + + for(RuleCase cse : caseList) { + newCaseList.add(new RegexRuleCase(cse.getElements(), pattern)); + } + + rules.get(rule).replaceCases(newCaseList); + + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammarFormatter.java b/src/main/java/bjc/rgens/parser/RGrammarFormatter.java new file mode 100755 index 0000000..a2454dc --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammarFormatter.java @@ -0,0 +1,97 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.utils.funcdata.IList; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Format randomized grammars to strings properly. + * + * @author EVE + */ +public class RGrammarFormatter { + /** + * Format a grammar into a file that represents that grammar. + * + * @param gram + * The grammar to format. + * + * @return + * The formatted grammar. + */ + public static String formatGrammar(RGrammar gram) { + StringBuilder sb = new StringBuilder(); + + Map<String, Rule> rules = gram.getRules(); + + String initRuleName = gram.getInitialRule(); + + Set<String> processedRules = new HashSet<>(); + + if (initRuleName != null) { + processRule(rules.get(initRuleName), sb); + + processedRules.add(initRuleName); + } + + for (Rule rule : rules.values()) { + if (!processedRules.contains(rule.name)) { + sb.append("\n\n"); + + processRule(rule, sb); + } + + processedRules.add(rule.name); + } + + return sb.toString().trim(); + } + + /* Format a rule. */ + private static void processRule(Rule rule, StringBuilder sb) { + IList<RuleCase> cases = rule.getCases(); + + StringBuilder ruleBuilder = new StringBuilder(); + + ruleBuilder.append(rule.name); + ruleBuilder.append(" \u2192 "); + + int markerPos = ruleBuilder.length(); + + processCase(cases.first(), ruleBuilder); + + sb.append(ruleBuilder.toString().trim()); + + ruleBuilder = new StringBuilder(); + + for (RuleCase cse : cases.tail()) { + sb.append("\n\t"); + + for (int i = 8; i < markerPos; i++) { + ruleBuilder.append(" "); + } + + processCase(cse, ruleBuilder); + + sb.append(ruleBuilder.toString()); + + ruleBuilder = new StringBuilder(); + } + + } + + /* Format a case. */ + private static void processCase(RuleCase cse, StringBuilder sb) { + /* Process each element, adding a space. */ + for (CaseElement element : cse.getElements()) { + sb.append(element.toString()); + sb.append(" "); + } + + /* Remove the trailing space. */ + sb.deleteCharAt(sb.length() - 1); + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammarParser.java b/src/main/java/bjc/rgens/parser/RGrammarParser.java new file mode 100755 index 0000000..3a357b1 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammarParser.java @@ -0,0 +1,394 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; +import bjc.utils.funcutils.TriConsumer; +import bjc.utils.ioutils.blocks.Block; +import bjc.utils.ioutils.blocks.BlockReader; +import bjc.utils.ioutils.blocks.SimpleBlockReader; + +import java.io.Reader; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +/** + * Reads {@link RGrammar} from a input stream. + * + * @author student + */ +public class RGrammarParser { + /** + * Whether we are in debug mode or not. + */ + public static final boolean DEBUG = false; + + /* + * Templates for level-dependent delimiters. + */ + /* Pragma block delimiter. */ + private static final String TMPL_PRAGMA_BLOCK_DELIM = "\\R\\t{%d}(?!\\t)"; + /* Rule declaration block delimiter. */ + private static final String TMPL_RULEDECL_BLOCK_DELIM = "\\R\\t\\t{%d}"; + /* Where block delimiter. */ + private static final String TMPL_WHERE_BLOCK_DELIM = "\\R\\t{%d}(?:in|end)\\R"; + /* Top-level block delimiter. */ + private static final String TMPL_TOPLEVEL_BLOCK_DELIM = "\\R\\t{%d}\\.?\\R"; + + /* Pragma impls. */ + private static Map<String, TriConsumer<String, RGrammarBuilder, Integer>> pragmas; + + /* Initialize pragmas. */ + static { + pragmas = new HashMap<>(); + + pragmas.put("initial-rule", (body, build, level) -> { + int sep = body.indexOf(' '); + + if (sep != -1) { + String msg = "Initial-rule pragma takes only one argument, the name of the initial rule"; + throw new GrammarException(msg); + } + + build.setInitialRule(body); + }); + + pragmas.put("despace-rule", (body, build, level) -> { + int sep = body.indexOf(' '); + + if (sep != -1) { + String msg = "despace-rule pragma takes only one argument, the name of the rule to despace"; + throw new GrammarException(msg); + } + + build.despaceRule(body); + }); + + pragmas.put("export-rule", (body, build, level) -> { + String[] exports = body.split(" "); + + for (String export : exports) { + build.addExport(export); + } + }); + + pragmas.put("regex-rule", (body, build, level) -> { + int nameIndex = body.indexOf(" "); + + if(nameIndex == -1) { + throw new GrammarException("Regex-rule pragma takes two arguments: the name of the rule to process, then the regex to apply after the rule has been generated."); + } + + String name = body.substring(0, nameIndex).trim(); + String patt = body.substring(nameIndex + 1).trim(); + + build.regexizeRule(name, patt); + }); + + pragmas.put("suffix-with", (body, build, level) -> { + String[] parts = body.trim().split(" "); + + if (parts.length != 2) { + String msg = "Suffix-with pragma takes two arguments, the name of the rule to suffix, then what to suffix it with"; + + throw new GrammarException(msg); + } + + build.suffixWith(parts[0], parts[1]); + }); + + pragmas.put("prefix-with", (body, build, level) -> { + String[] parts = body.trim().split(" "); + + if (parts.length != 2) { + String msg = "Prefix-with pragma takes two arguments, the name of the rule to prefix, then what to prefix it with"; + + throw new GrammarException(msg); + } + + build.prefixWith(parts[0], parts[1]); + }); + } + + /** + * Read a {@link RGrammar} from an input stream. + * + * @param is + * The input stream to read from. + * + * @return + * The grammar represented by the stream. + * + * @throws GrammarException + * Thrown if the grammar has a syntax error. + */ + public static RGrammar readGrammar(Reader is) throws GrammarException { + String dlm = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, 0); + + try (BlockReader reader = new SimpleBlockReader(dlm, is)) { + if (!reader.hasNextBlock()) { + throw new GrammarException("At least one top-level block must be present"); + } + + try { + RGrammarBuilder build = new RGrammarBuilder(); + + reader.forEachBlock((block) -> { + if(DEBUG) + System.err.printf("Handling top-level block (%s)\n", block); + + handleBlock(build, block.contents, 0); + }); + + return build.toRGrammar(); + } catch (GrammarException gex) { + String msg = String.format("Error in block (%s)", reader.getBlock()); + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling block", ex); + } + } + + /* Throughout these, level indicates the nesting level of that construct. */ + + /* Handles an arbitrary block. */ + private static void handleBlock(RGrammarBuilder build, String block, + int level) throws GrammarException { + /* Discard empty blocks. */ + if (block.equals("") || block.matches("\\R")) + return; + + int typeSep = block.indexOf(' '); + + if (typeSep == -1) { + throw new GrammarException( + "A block must start with a introducer, followed by a space, then the rest of the block"); + } + + String blockType = block.substring(0, typeSep).trim(); + + if (blockType.equalsIgnoreCase("pragma")) { + handlePragmaBlock(block, build, level); + } else if (blockType.startsWith("[")) { + handleRuleBlock(block, build, level); + } else if (blockType.equalsIgnoreCase("where")) { + handleWhereBlock(block, build, level); + } else if (blockType.equalsIgnoreCase("#")) { + if(DEBUG) + System.err.printf("Handled comment block (%s)\n", block); + /* + * Comment block. + * + * @TODO 10/11/17 Ben Culkin :GrammarComment + * Attach these to the grammar somehow so that they + * can be re-output during formatting. + */ + return; + } else { + String msg = String.format("Unknown block type: '%s'", blockType); + throw new GrammarException(msg); + } + } + + /* Handle reading a block of pragmas. */ + private static void handlePragmaBlock(String block, RGrammarBuilder build, + int level) throws GrammarException { + String dlm = String.format(TMPL_PRAGMA_BLOCK_DELIM, level); + try (BlockReader pragmaReader = new SimpleBlockReader(dlm, new StringReader(block))) { + try { + pragmaReader.forEachBlock((pragma) -> { + if(DEBUG) + System.err.printf("Handled pragma block (%s)\n", pragma); + + String pragmaContents = pragma.contents; + + int pragmaSep = pragmaContents.indexOf(' '); + + if (pragmaSep == -1) { + String msg = "A pragma invocation must consist of the word pragma, followed by a space, then the body of the pragma"; + + throw new GrammarException(msg); + } + + String pragmaLeader = pragmaContents.substring(0, pragmaSep); + String pragmaBody = pragmaContents.substring(pragmaSep + 1); + + if (!pragmaLeader.equalsIgnoreCase("pragma")) { + String msg = String.format("Illegal line leader in pragma block: '%s'", pragmaLeader); + + throw new GrammarException(msg); + } + + handlePragma(pragmaBody, build, level); + }); + } catch (GrammarException gex) { + Block pragma = pragmaReader.getBlock(); + String msg = String.format("Error in pragma: (%s)", pragma); + + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling pragma block", ex); + } + } + + /* Handle an individual pragma in a block. */ + private static void handlePragma(String pragma, RGrammarBuilder build, + int level) throws GrammarException { + int bodySep = pragma.indexOf(' '); + + if (bodySep == -1) + bodySep = pragma.length(); + + String pragmaName = pragma.substring(0, bodySep); + String pragmaBody = pragma.substring(bodySep + 1); + + if (pragmas.containsKey(pragmaName)) { + try { + if(DEBUG) + System.err.printf("Handled pragma '%s'\n", pragmaName); + + pragmas.get(pragmaName).accept(pragmaBody, build, level); + } catch (GrammarException gex) { + String msg = String.format("Error in pragma '%s'", pragmaName); + + throw new GrammarException(msg, gex); + } + } else { + String msg = String.format("Unknown pragma '%s'", pragmaName); + + throw new GrammarException(msg); + } + } + + /* Handle a block of a rule declaration and one or more cases. */ + private static void handleRuleBlock(String ruleBlock, RGrammarBuilder build, + int level) throws GrammarException { + String dlm = String.format(TMPL_RULEDECL_BLOCK_DELIM, level); + try (BlockReader ruleReader = new SimpleBlockReader(dlm, new StringReader(ruleBlock))) { + try { + if (ruleReader.hasNextBlock()) { + /* Rule with a declaration followed by multiple cases. */ + ruleReader.nextBlock(); + Block declBlock = ruleReader.getBlock(); + + String declContents = declBlock.contents; + Rule rl = handleRuleDecl(build, declContents); + + ruleReader.forEachBlock((block) -> { + /* Ignore comment lines. */ + if(block.contents.trim().startsWith("#")) return; + + handleRuleCase(block.contents, build, rl); + }); + } else { + /* Rule with a declaration followed by a single case. */ + handleRuleDecl(build, ruleBlock); + } + } catch (GrammarException gex) { + String msg = String.format("Error in rule case (%s)", ruleReader.getBlock()); + + throw new GrammarException(msg, gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error handling rule block", ex); + } + } + + /* Handle a rule declaration and its initial case. */ + private static Rule handleRuleDecl(RGrammarBuilder build, String declContents) { + int declSep = declContents.indexOf("\u2192"); + + if (declSep == -1) { + /* + * @NOTE + * We should maybe remove support for the old + * syntax at some point. However, maybe we don't + * want to do so so as to make inputting grammars + * easier. + */ + declSep = declContents.indexOf(' '); + + if (declSep == -1) { + String msg = "A rule must be given at least one case in its declaration, and seperated from that case by \u2192"; + + throw new GrammarException(msg); + } + } + + String ruleName = declContents.substring(0, declSep).trim(); + String ruleBody = declContents.substring(declSep + 1).trim(); + + if (ruleName.equals("")) { + throw new GrammarException("The empty string is not a valid rule name"); + } + + Rule rul = build.getOrCreateRule(ruleName); + + handleRuleCase(ruleBody, build, rul); + + return rul; + } + + /* Handle a single case of a rule. */ + private static void handleRuleCase(String cse, RGrammarBuilder build, Rule rul) { + IList<CaseElement> caseParts = new FunctionalList<>(); + + for (String csepart : cse.split(" ")) { + String partToAdd = csepart.trim(); + + /* Ignore empty parts */ + if (partToAdd.equals("")) + continue; + + caseParts.add(CaseElement.createElement(partToAdd)); + } + + rul.addCase(new RuleCase(RuleCase.CaseType.NORMAL, caseParts)); + } + + /* Handle a where block (a block with local rules). */ + private static void handleWhereBlock(String block, RGrammarBuilder build, + int level) throws GrammarException { + int nlIndex = block.indexOf("\\n"); + + if (nlIndex == -1) { + throw new GrammarException("Where block must be a context followed by a body"); + } + + String trimBlock = block.substring(nlIndex).trim(); + + String whereDelim = String.format(TMPL_WHERE_BLOCK_DELIM, level); + + try (BlockReader whereReader = new SimpleBlockReader(whereDelim, + new StringReader(trimBlock))) { + try { + Block whereCtx = whereReader.next(); + + StringReader ctxReader = new StringReader(whereCtx.contents.trim()); + String ctxDelim = String.format(TMPL_TOPLEVEL_BLOCK_DELIM, level + 1); + + try (BlockReader bodyReader = new SimpleBlockReader(ctxDelim, ctxReader)) { + @SuppressWarnings("unused") + Block whereBody = whereReader.next(); + + /** + * @TODO 10/11/17 Ben Culkin :WhereBlocks + * Implement where blocks. + * + * A where block has the context evaluated + * in a new context, and the body executed + * in that context. + */ + } + } catch (GrammarException gex) { + throw new GrammarException(String.format("Error in where block (%s)", + whereReader.getBlock()), gex); + } + } catch (Exception ex) { + throw new GrammarException("Unknown error in where block", ex); + } + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammarSet.java b/src/main/java/bjc/rgens/parser/RGrammarSet.java new file mode 100755 index 0000000..975510a --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammarSet.java @@ -0,0 +1,290 @@ +package bjc.rgens.parser; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; + +/** + * Represents a set of grammars that can share rules via exports. + * + * @author EVE + */ +public class RGrammarSet { + /* Contains all the grammars in this set. */ + private Map<String, RGrammar> grammars; + + /* Contains all the exported rules from grammars. */ + private Map<String, RGrammar> exportedRules; + + /* Contains which export came from which grammar. */ + private Map<String, String> exportFrom; + + /* Contains which file a grammar was loaded from. */ + private Map<String, String> loadedFrom; + + /** Create a new set of randomized grammars. */ + public RGrammarSet() { + grammars = new HashMap<>(); + + exportedRules = new HashMap<>(); + + exportFrom = new HashMap<>(); + loadedFrom = new HashMap<>(); + } + + /** + * Add a grammar to this grammar set. + * + * @param grammarName + * The name of the grammar to add. + * + * @param gram + * The grammar to add. + * + * @throws IllegalArgumentException + * If the grammar name is invalid. + */ + public void addGrammar(String grammarName, RGrammar gram) { + /* Make sure a grammar is valid. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (gram == null) { + throw new NullPointerException("Grammar must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } + + grammars.put(grammarName, gram); + + /* Process exports from the grammar. */ + for (Rule export : gram.getExportedRules()) { + exportedRules.put(export.name, gram); + + exportFrom.put(export.name, grammarName); + } + + /* Add exports to grammar. */ + gram.setImportedRules(exportedRules); + } + + /** + * Get a grammar from this grammar set. + * + * @param grammarName + * The name of the grammar to get. + * + * @return + * The grammar with that name. + * + * @throws IllegalArgumentException + * If the grammar name is invalid or not present in this set. + */ + public RGrammar getGrammar(String grammarName) { + /* Check arguments. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } else if (!grammars.containsKey(grammarName)) { + String msg = String.format("No grammar with name '%s' found", grammarName); + + throw new IllegalArgumentException(msg); + } + + return grammars.get(grammarName); + } + + /** + * Get the grammar a rule was exported from. + * + * @param exportName + * The name of the exported rule. + * + * @return + * The grammar the exported rule came from. + * + * @throws IllegalArgumentException + * If the export name is invalid or not present in this set. + */ + public RGrammar getExportSource(String exportName) { + /* Check arguments. */ + if (exportName == null) { + throw new NullPointerException("Export name must not be null"); + } else if (exportName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!exportedRules.containsKey(exportName)) { + String msg = String.format("No export with name '%s' defined", exportName); + throw new IllegalArgumentException(msg); + } + + return exportedRules.get(exportName); + } + + /** + * Get the source of an exported rule. + * + * This will often be a grammar name, but is not required to be one. + * + * @param exportName + * The name of the exported rule. + * + * @return + * The source of an exported rule. + * + * @throws IllegalArgumentException + * If the exported rule is invalid or not present in this set. + */ + public String exportedFrom(String exportName) { + /* Check arguments. */ + if (exportName == null) { + throw new NullPointerException("Export name must not be null"); + } else if (exportName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } else if (!exportedRules.containsKey(exportName)) { + String msg = String.format("No export with name '%s' defined", exportName); + + throw new IllegalArgumentException(msg); + } + + return exportFrom.getOrDefault(exportName, "Unknown"); + } + + /** + * Get the source of an grammar + * + * This will often be a file name, but is not required to be one. + * + * @param grammarName + * The name of the exported grammar. + * + * @return + * The source of an exported grammar. + * + * @throws IllegalArgumentException + * If the exported grammar is invalid or not present in this set. + */ + public String loadedFrom(String grammarName) { + /* Check arguments. */ + if (grammarName == null) { + throw new NullPointerException("Grammar name must not be null"); + } else if (grammarName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid grammar name"); + } else if (grammarName.equals("unknown")) { + return grammarName; + } else if (!grammars.containsKey(grammarName)) { + String msg = String.format("No grammar with name '%s' defined", grammarName); + throw new IllegalArgumentException(msg); + } + + return loadedFrom.getOrDefault(grammarName, "Unknown"); + } + + /** + * Get the names of all the grammars in this set. + * + * @return + * The names of all the grammars in this set. + */ + public Set<String> getGrammars() { + return grammars.keySet(); + } + + /** + * Get the names of all the exported rules in this set. + * + * @return + * The names of all the exported rules in this set. + */ + public Set<String> getExportedRules() { + return exportedRules.keySet(); + } + + /** + * Load a grammar set from a configuration file. + * + * @param cfgFile + * The configuration file to load from. + * + * @return + * The grammar set created by the configuration file. + * + * @throws IOException + * If something goes wrong during configuration loading. + */ + public static RGrammarSet fromConfigFile(Path cfgFile) throws IOException { + /* The grammar set to hand back. */ + RGrammarSet set = new RGrammarSet(); + + /* Get the directory that contains the config file. */ + Path cfgParent = cfgFile.getParent(); + + try(Scanner scn = new Scanner(cfgFile)) { + /* Execute lines from the configuration file. */ + while (scn.hasNextLine()) { + String ln = scn.nextLine().trim(); + + /* Ignore blank/comment lines. */ + if (ln.equals("")) continue; + + if (ln.startsWith("#")) continue; + + /* Handle mixed whitespace. */ + ln = ln.replaceAll("\\s+", " "); + + /* + * Get the place where the name of the grammar + * ends. + */ + int nameIdx = ln.indexOf(" "); + if (nameIdx == -1) { + throw new GrammarException("Must specify a name for a loaded grammar"); + } + + /* Name and path of grammar. */ + String name = ln.substring(0, nameIdx); + Path path = Paths.get(ln.substring(nameIdx).trim()); + + /* + * Convert from configuration relative path to + * absolute path. + */ + Path convPath = cfgParent.resolve(path.toString()); + + //if(Files.isDirectory(convPath)) { + // /* @TODO implement subset grammars */ + // throw new GrammarException("Sub-grammar sets aren't implemented yet"); + //} else if (convPath.getFileName().endsWith(".gram")) { + /* Load grammar file. */ + try { + BufferedReader fis = Files.newBufferedReader(convPath); + RGrammar gram = RGrammarParser.readGrammar(fis); + fis.close(); + + /* Add grammar to the set. */ + set.addGrammar(name, gram); + + /* + * Mark where the grammar came + * from. + */ + set.loadedFrom.put(name, path.toString()); + } catch (GrammarException gex) { + String msg = String.format("Error loading file '%s'", path); + throw new GrammarException(msg, gex); + } + //} else { + // String msg = String.format("Unrecognized file type '%s'", convPath.getFileName()); + // throw new GrammarException(msg); + //} + } + } + + return set; + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammarTest.java b/src/main/java/bjc/rgens/parser/RGrammarTest.java new file mode 100755 index 0000000..4b1f283 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammarTest.java @@ -0,0 +1,72 @@ +package bjc.rgens.parser; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Test for new grammar syntax. + * + * @author EVE + */ +public class RGrammarTest { + /** + * Main method. + * + * @param args + * Unused CLI args. + */ + public static void main(String[] args) { + URL rsc = RGrammarTest.class.getResource("/server-config-sample.cfg"); + + try { + /* Load a grammar set. */ + Path cfgPath = Paths.get(rsc.toURI()); + RGrammarSet gramSet = RGrammarSet.fromConfigFile(cfgPath); + + /* Generate rule suggestions for all the grammars in the set. */ + for (String gramName : gramSet.getGrammars()) { + gramSet.getGrammar(gramName).generateSuggestions(); + } + + /* Generate for each exported rule. */ + for (String exportName : gramSet.getExportedRules()) { + /* Where we loaded the rule from. */ + String loadSrc = gramSet.loadedFrom(gramSet.exportedFrom(exportName)); + + System.out.println(); + System.out.printf("Generating for exported rule '%s' from file '%s'\n", exportName, loadSrc); + + RGrammar grammar = gramSet.getExportSource(exportName); + for (int i = 0; i < 100; i++) { + try { + String res = grammar.generate(exportName); + if(exportName.contains("+")) res = res.replaceAll("\\s+", ""); + + if(res.length() > 120) { + System.out.printf("\t\n\tContents: %s\n\t\n", res); + } else { + System.out.printf("\tContents: %s\n", res); + } + } catch (GrammarException gex) { + /* Print out errors with generation. */ + String fmt = "Error in exported rule '%s' (loaded from '%s')\n"; + + System.out.printf(fmt, exportName, loadSrc); + System.out.println(); + gex.printStackTrace(); + + System.out.println(); + System.out.println(); + } + } + } + } catch (IOException ioex) { + ioex.printStackTrace(); + } catch (URISyntaxException urisex) { + urisex.printStackTrace(); + } + } +} diff --git a/src/main/java/bjc/rgens/parser/RGrammars.java b/src/main/java/bjc/rgens/parser/RGrammars.java new file mode 100755 index 0000000..f74a756 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RGrammars.java @@ -0,0 +1,67 @@ +package bjc.rgens.parser; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +/** + * Get access to the included grammars. + * + * @author Ben Culkin + */ +public class RGrammars { + private static RGrammarSet gramSet; + + private static void loadSet() { + try { + URI rsc = RGrammarTest.class.getResource("/server-config-sample.cfg").toURI(); + + Map<String, String> env = new HashMap<>(); + env.put("create", "true"); + @SuppressWarnings("unused") + FileSystem zipfs = FileSystems.newFileSystem(rsc, env); + + Path cfgPath = Paths.get(rsc); + + gramSet = RGrammarSet.fromConfigFile(cfgPath); + } catch (IOException | URISyntaxException ex) { + RuntimeException rtex = new RuntimeException("Could not load grammars"); + + rtex.initCause(ex); + + throw rtex; + } + } + + /** + * Generate an exported rule. + * + * @param exportName + * The rule to generate. + * @return The generated rule + * @throws GrammarException + * If something went wrong. + */ + public static String generateExport(String exportName) throws GrammarException { + if (gramSet == null) + loadSet(); + + if (!gramSet.getExportedRules().contains(exportName)) { + throw new GrammarException(String.format("No exported rule named %s", exportName)); + } + + RGrammar gram = gramSet.getExportSource(exportName); + + String res = gram.generate(exportName); + if (exportName.contains("+")) + res = res.replaceAll("\\s+", ""); + + return res; + } +} diff --git a/src/main/java/bjc/rgens/parser/RegexRuleCase.java b/src/main/java/bjc/rgens/parser/RegexRuleCase.java new file mode 100755 index 0000000..5e03cd6 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RegexRuleCase.java @@ -0,0 +1,32 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.utils.funcdata.IList; + +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +public class RegexRuleCase extends RuleCase { + private Pattern patt; + + public RegexRuleCase(IList<CaseElement> elements, String pattern) { + super(RuleCase.CaseType.REGEX); + + elementList = elements; + + try { + patt = Pattern.compile(pattern); + } catch (PatternSyntaxException psex) { + IllegalArgumentException iaex = + new IllegalArgumentException("This type requires a valid regular expression parameter"); + + iaex.initCause(psex); + + throw iaex; + } + } + + public Pattern getPattern() { + return patt; + } +} diff --git a/src/main/java/bjc/rgens/parser/Rule.java b/src/main/java/bjc/rgens/parser/Rule.java new file mode 100755 index 0000000..7043e0f --- /dev/null +++ b/src/main/java/bjc/rgens/parser/Rule.java @@ -0,0 +1,134 @@ +package bjc.rgens.parser; + +import bjc.utils.funcdata.FunctionalList; +import bjc.utils.funcdata.IList; + +import java.util.Random; + +/** + * A rule in a randomized grammar. + * + * @author EVE + */ +public class Rule { + /** The name of this grammar rule. */ + public final String name; + + /* The cases for this rule. */ + private IList<RuleCase> cases; + + /** + * Create a new grammar rule. + * + * @param ruleName + * The name of the grammar rule. + * + * @throws IllegalArgumentException + * If the rule name is invalid. + */ + public Rule(String ruleName) { + if (ruleName == null) { + throw new NullPointerException("Rule name must not be null"); + } else if (ruleName.equals("")) { + throw new IllegalArgumentException("The empty string is not a valid rule name"); + } + + name = ruleName; + + cases = new FunctionalList<>(); + } + + /** + * Adds a case to the rule. + * + * @param cse + * The case to add. + */ + public void addCase(RuleCase cse) { + if (cse == null) { + throw new NullPointerException("Case must not be null"); + } + + cases.add(cse); + } + + /** + * Get a random case from this rule. + * + * @return + * A random case from this rule. + */ + public RuleCase getCase() { + return cases.randItem(); + } + + /** + * Get a random case from this rule. + * + * @param rnd + * The random number generator to use. + * + * @return + * A random case from this rule. + */ + public RuleCase getCase(Random rnd) { + return cases.randItem(rnd::nextInt); + } + + /** + * Get all the cases of this rule. + * + * @return + * All the cases in this rule. + */ + public IList<RuleCase> getCases() { + return cases; + } + + /** + * Replace the current list of cases with a new one. + * + * @param cases + * The new list of cases. + */ + public void replaceCases(IList<RuleCase> cases) { + this.cases = cases; + } + + @Override + public int hashCode() { + final int prime = 31; + + int result = 1; + result = prime * result + ((cases == null) ? 0 : cases.hashCode()); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + + if (obj == null) return false; + + if (!(obj instanceof Rule)) return false; + + Rule other = (Rule) obj; + + if (cases == null) { + if (other.cases != null) return false; + } else if (!cases.equals(other.cases)) return false; + + if (name == null) { + if (other.name != null) return false; + } else if (!name.equals(other.name)) return false; + + return true; + } + + @Override + public String toString() { + return String.format("Rule [ruleName='%s', ruleCases=%s]", name, cases); + } +} diff --git a/src/main/java/bjc/rgens/parser/RuleCase.java b/src/main/java/bjc/rgens/parser/RuleCase.java new file mode 100755 index 0000000..9c0a856 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/RuleCase.java @@ -0,0 +1,88 @@ +package bjc.rgens.parser; + +import bjc.rgens.parser.elements.CaseElement; +import bjc.utils.funcdata.IList; + +/* + * @NOTE + * If at some point we add new case types, they should go into subclasses, + * not into this class. + */ +/** + * A case in a rule in a randomized grammar. + * + * @author EVE + */ +public class RuleCase { + /** + * The possible types of a case. + * + * @author EVE + */ + public static enum CaseType { + /** A normal case, composed from a list of elements. */ + NORMAL, + /** A case that doesn't insert spaces. */ + SPACEFLATTEN, + /** A case that applies a regex after generation. */ + REGEX + } + + /** The type of this case. */ + public final CaseType type; + + /** + * The list of element values for this case. + * + * <h2>Used For</h2> + * <dl> + * <dt>NORMAL, SPACEFLATTEN</dt> + * <dd>Used as the list of elementList the rule is composed of.</dd> + * </dl> + */ + protected IList<CaseElement> elementList; + + protected RuleCase(CaseType typ) { + type = typ; + } + + /** + * Create a new case of the specified type that takes a element list + * parameter. + * + * @param typ + * The type of case to create. + * + * @param elements + * The element list parameter of the case. + * + * @throws IllegalArgumentException + * If this type doesn't take a element list parameter. + */ + public RuleCase(CaseType typ, IList<CaseElement> elements) { + this(typ); + + switch (typ) { + case NORMAL: + case SPACEFLATTEN: + break; + case REGEX: + throw new IllegalArgumentException("This type requires an element list and a pattern"); + default: + throw new IllegalArgumentException("This type doesn't have a element list parameter"); + } + + elementList = elements; + } + + /** + * Get the element list value of this type. + * + * @return + * The element list value of this case, or null if this type + * doesn't have one. + */ + public IList<CaseElement> getElements() { + return elementList; + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java b/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java new file mode 100755 index 0000000..7229e92 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/BlankCaseElement.java @@ -0,0 +1,7 @@ +package bjc.rgens.parser.elements; + +public class BlankCaseElement extends LiteralCaseElement { + public BlankCaseElement() { + super(""); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/CaseElement.java b/src/main/java/bjc/rgens/parser/elements/CaseElement.java new file mode 100755 index 0000000..d74ab52 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/CaseElement.java @@ -0,0 +1,147 @@ +package bjc.rgens.parser.elements; + +import bjc.rgens.parser.GrammarException; + +/* + * @TODO 10/11/17 Ben Culkin :CaseElementSplit Split this into multiple + * subclasses based off of a value of ElementType. + */ +/** + * A element in a rule case. + * + * @author EVE + */ +public class CaseElement { + /** + * The possible types of an element. + * + * @author EVE + */ + public static enum ElementType { + /** An element that represents a literal string. */ + LITERAL, + /** An element that represents a rule reference. */ + RULEREF, + /** An element that represents a random range. */ + RANGE, + /** An element that represents a variable that stores a string. */ + VARDEF, + /** + * An element that represents a variable that stores the result of generating a + * rule. + */ + EXPVARDEF; + } + + /* Regexps for marking rule types. */ + private static final String SPECIAL_CASELEM = "\\{[^}]+\\}"; + private static final String REFER_CASELEM = "\\[[^\\]]+\\]"; + private static final String RANGE_CASELM = "\\[\\d+\\.\\.\\d+\\]"; + + /** The type of this element. */ + public final ElementType type; + + /** + * Create a new case element. + * + * @param typ + * The type of this element. + */ + protected CaseElement(ElementType typ) { + type = typ; + } + + @Override + public String toString() { + switch (type) { + default: + return String.format("Unknown type '%s'", type); + } + } + + /** + * Create a case element from a string. + * + * @param csepart + * The string to convert. + * + * @return A case element representing the string. + */ + public static CaseElement createElement(String csepart) { + if (csepart == null) { + throw new NullPointerException("Case part cannot be null"); + } + + if (csepart.matches(SPECIAL_CASELEM)) { + /* Handle special cases. */ + String specialBody = csepart.substring(1, csepart.length() - 1); + + System.out.printf("\t\tTRACE: special body is '%s'\n", specialBody); + + if (specialBody.matches("\\S+:=\\S+")) { + /* Handle expanding variable definitions. */ + String[] parts = specialBody.split(":="); + + if (parts.length != 2) { + String msg = "Expanded variables must be a name and a definition, seperated by :="; + + throw new GrammarException(msg); + } + + return new ExpVariableCaseElement(parts[0], parts[1]); + } else if (specialBody.matches("\\S+=\\S+")) { + /* Handle regular variable definitions. */ + String[] parts = specialBody.split("="); + + if (parts.length != 2) { + String msg = "Variables must be a name and a definition, seperated by ="; + + throw new GrammarException(msg); + } + + return new LitVariableCaseElement(parts[0], parts[1]); + } else if (specialBody.matches("{empty}")) { + /* Literal blank, for empty cases. */ + return new BlankCaseElement(); + } else { + throw new IllegalArgumentException(String.format("Unknown special case part '%s'", specialBody)); + } + } else if (csepart.matches(REFER_CASELEM)) { + if (csepart.matches(RANGE_CASELM)) { + /* Handle ranges */ + String rawRange = csepart.substring(1, csepart.length() - 1); + + int firstNum = Integer.parseInt(rawRange.substring(0, rawRange.indexOf('.'))); + int secondNum = Integer.parseInt(rawRange.substring(rawRange.lastIndexOf('.') + 1)); + + return new RangeCaseElement(firstNum, secondNum); + } + + return new RuleCaseElement(csepart); + } else { + return new LiteralCaseElement(csepart); + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + CaseElement other = (CaseElement) obj; + if (type != other.type) + return false; + return true; + } +}
\ No newline at end of file diff --git a/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java new file mode 100755 index 0000000..30925e2 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/ExpVariableCaseElement.java @@ -0,0 +1,7 @@ +package bjc.rgens.parser.elements; + +public class ExpVariableCaseElement extends VariableCaseElement { + public ExpVariableCaseElement(String name, String def) { + super(name, def, true); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java new file mode 100755 index 0000000..11035b1 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/LitVariableCaseElement.java @@ -0,0 +1,7 @@ +package bjc.rgens.parser.elements; + +public class LitVariableCaseElement extends VariableCaseElement { + public LitVariableCaseElement(String name, String def) { + super(name, def, false); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java b/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java new file mode 100755 index 0000000..d96a32d --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/LiteralCaseElement.java @@ -0,0 +1,7 @@ +package bjc.rgens.parser.elements; + +public class LiteralCaseElement extends StringCaseElement { + public LiteralCaseElement(String vl) { + super(vl, true); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java b/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java new file mode 100755 index 0000000..d98bc61 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/RangeCaseElement.java @@ -0,0 +1,43 @@ +package bjc.rgens.parser.elements; + +public class RangeCaseElement extends CaseElement { + public final int begin; + public final int end; + + public RangeCaseElement(int beg, int en) { + super(ElementType.RANGE); + + begin = beg; + end = en; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + begin; + result = prime * result + end; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + RangeCaseElement other = (RangeCaseElement) obj; + if (begin != other.begin) + return false; + if (end != other.end) + return false; + return true; + } + + @Override + public String toString() { + return String.format("[%d..%d]", begin, end); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java b/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java new file mode 100755 index 0000000..f4d3512 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/RuleCaseElement.java @@ -0,0 +1,7 @@ +package bjc.rgens.parser.elements; + +public class RuleCaseElement extends StringCaseElement { + public RuleCaseElement(String vl) { + super(vl, false); + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java b/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java new file mode 100755 index 0000000..0e64fd3 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/StringCaseElement.java @@ -0,0 +1,41 @@ +package bjc.rgens.parser.elements; + +public class StringCaseElement extends CaseElement { + public final String val; + + protected StringCaseElement(String vl, boolean isLiteral) { + super(isLiteral ? ElementType.LITERAL : ElementType.RULEREF); + + val = vl; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((val == null) ? 0 : val.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + StringCaseElement other = (StringCaseElement) obj; + if (val == null) { + if (other.val != null) + return false; + } else if (!val.equals(other.val)) + return false; + return true; + } + + @Override + public String toString() { + return val; + } +} diff --git a/src/main/java/bjc/rgens/parser/elements/VariableCaseElement.java b/src/main/java/bjc/rgens/parser/elements/VariableCaseElement.java new file mode 100755 index 0000000..920445a --- /dev/null +++ b/src/main/java/bjc/rgens/parser/elements/VariableCaseElement.java @@ -0,0 +1,60 @@ +package bjc.rgens.parser.elements; + +public class VariableCaseElement extends CaseElement { + /** + * The name of the variable this element defines. + */ + public final String varName; + + /** + * The definition of the variable this element defines. + */ + public final String varDef; + + public VariableCaseElement(String name, String def, boolean isExp) { + super(isExp ? ElementType.EXPVARDEF : ElementType.VARDEF); + + varName = name; + varDef = def; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((varDef == null) ? 0 : varDef.hashCode()); + result = prime * result + ((varName == null) ? 0 : varName.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + VariableCaseElement other = (VariableCaseElement) obj; + if (varDef == null) { + if (other.varDef != null) + return false; + } else if (!varDef.equals(other.varDef)) + return false; + if (varName == null) { + if (other.varName != null) + return false; + } else if (!varName.equals(other.varName)) + return false; + return true; + } + + @Override + public String toString() { + if (type == ElementType.VARDEF) { + return String.format("{%s:=%s}", varName, varDef); + } else { + return String.format("{%s=%s}", varName, varDef); + } + } +} diff --git a/src/main/java/bjc/rgens/parser/new-syntax.txt b/src/main/java/bjc/rgens/parser/new-syntax.txt new file mode 100755 index 0000000..f6578b4 --- /dev/null +++ b/src/main/java/bjc/rgens/parser/new-syntax.txt @@ -0,0 +1,13 @@ +[grammar] → ([block] ( / \n\.\n ? / [block])*) ? + +[block] → [pragma - block] +→ [rule - block] +→ [where - block] + +[pragma - block] → [pragma] ( / \n( ? !\t) / [pragma])* + +[rule - block] → [rule - decl] [rule - case] ( / \n\t / [rule - case])* + +[where - block] → where / \n\t / ([rule - block] / \n\t / ) + in / \n\t / [rule - block] + +[pragma] → pragma [pragma - name] [pragma - body] diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java new file mode 100755 index 0000000..e21d60f --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/Markov.java @@ -0,0 +1,208 @@ +package bjc.rgens.text.markov; + +import java.util.Map.Entry; +import java.util.*; + +/** + * Represents a k-character substring. + * + * Can give a pseudo-random suffix character based on probability. + * + * @author Daniel Friedman (Fall 2011) + */ +public class Markov { + String substring; + int count = 0; + + TreeMap<Character, Integer> map; + + /** + * Constructs a Markov object from a given substring. + * + * @param substr + * The given substring. + */ + public Markov(String substr) { + this.substring = substr; + + map = new TreeMap<>(); + + add(); + } + + /** + * Constructs a Markov object from a given substring and suffix + * character. + * + * Suffix characters are stored in a TreeMap. + * + * @param substr + * The specified substring. + * + * @param suffix + * The specified suffix. + */ + public Markov(String substr, Character suffix) { + this.substring = substr; + + map = new TreeMap<>(); + + add(suffix); + } + + /** + * Increments the count of number of times the substring appears in a + * text. + */ + public void add() { + count++; + } + + /** + * Adds a suffix character to the TreeMap. + * + * @param c + * The suffix character to be added. + */ + public void add(char c) { + add(); + + if (map.containsKey(c)) { + int frequency = map.get(c); + map.put(c, frequency + 1); + } else { + map.put(c, 1); + } + } + + /** + * Gives the frequency count of a suffix character; that is, the number + * of times the specified suffix follows the substring in a text. + * + * @param c + * The specified suffix. + * + * @return + * The frequency count. + */ + public int getFrequencyCount(char c) { + if (!map.containsKey(c)) { + return -1; + } + + return map.get(c); + } + + /** + * Gives a percentage of frequency count / number of total suffixes. + * + * @param c + * The character to look for the frequency for. + * + * @return + * The ratio of frequency count of a single character to the total + * number of suffixes. + */ + public double getCharFrequency(char c) { + if (getFrequencyCount(c) == -1) { + return -1; + } + + return (double) getFrequencyCount(c) / (double) count; + } + + /** + * Finds whether or not the given suffix is in the TreeMap. + * + * @param c + * The given suffix. + * + * @return + * True if the suffix exists in the TreeMap, false otherwise. + */ + public boolean containsChar(char c) { + if (!map.containsKey(c)) { + return false; + } + + return true; + } + + /** + * Gives the number of times this substring occurs in a text. + * + * @return + * Said number of times. + */ + public int count() { + return count; + } + + /** + * Gives the TreeMap. + * + * @return + * The TreeMap. + */ + public TreeMap<Character, Integer> getMap() { + return map; + } + + /** + * Using probability, returns a pseudo-random character to follow the + * substring. + * + * Character possibilities are added to an ArrayList (duplicates + * allowed), and a random number from 0 to the last index in the + * ArrayList is picked. Since more common suffixes occupy more indices + * in the ArrayList, the probability of getting a more common suffix is + * greater than the probability of getting a less common suffix. + * + * @return + * The pseudo-random suffix. + */ + public char random() { + Character ret = null; + + Set<Entry<Character, Integer>> s = map.entrySet(); + + Iterator<Entry<Character, Integer>> it = s.iterator(); + + ArrayList<Character> suffixes = new ArrayList<>(); + + while (it.hasNext()) { + Entry<Character, Integer> tmp = it.next(); + + for (int i = 0; i < tmp.getValue(); i++) { + suffixes.add(tmp.getKey()); + } + } + + Random rand = new Random(); + + int retIndex = rand.nextInt(suffixes.size()); + ret = suffixes.get(retIndex); + + return ret; + } + + /** + * Gives a String representation of the Markov object. + * + * @return + * Said String representation. + */ + @Override + public String toString() { + String ret = "Substring: " + substring + ", Count: " + count; + ret += "\n" + "Suffixes and frequency counts: "; + + for (Entry<Character, Integer> entry : map.entrySet()) { + char key = entry.getKey(); + int value = entry.getValue(); + ret += "\n" + "Suffix: " + key + ", frequency count: " + value; + } + + return ret; + } +} diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java new file mode 100755 index 0000000..cebf2bc --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java @@ -0,0 +1,70 @@ +package bjc.rgens.text.markov; + +import java.util.Map; + +/** + * A standalone Markov generator. + * + * @author bjculkin + */ +public class StandaloneMarkov { + /* The order of the generator. */ + private int ord; + + /* The generators to use. */ + private Map<String, Markov> hash; + /* The initial string. */ + private String first; + + /** + * Create a new standalone Markov generator. + * + * @param order + * The order of this generator. + * + * @param markovHash + * The generators to use. + * + * @param firstSub + * The string to start out with. + */ + public StandaloneMarkov(int order, Map<String, Markov> markovHash, String firstSub) { + ord = order; + hash = markovHash; + first = firstSub; + } + + /** + * Generate random text from the markov generator. + * + * @param charsToGenerate + * The number of characters of text to generate. + * + * @return + * The randomly generate text. + */ + public String generateTextFromMarkov(int charsToGenerate) { + StringBuilder text = new StringBuilder(); + + for (int i = ord; i < charsToGenerate; i++) { + if (i == ord) { + text.append(first); + + if (text.length() > ord) i = text.length(); + } + + String sub = text.substring(i - ord, i); + Markov tmp = hash.get(sub); + + if (tmp != null) { + Character nextChar = tmp.random(); + + text.append(nextChar); + } else { + i = ord - 1; + } + } + + return text.toString(); + } +} diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java new file mode 100755 index 0000000..339e8d5 --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java @@ -0,0 +1,76 @@ +package bjc.rgens.text.markov; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; + +/** + * Create a Markov generate from a provided source. + * + * @author bjculkin + */ +public class StandaloneTextGenerator { + /** + * Build a markov generator from a provided source. + * + * @param order + * The markov order to use. + * + * @param reader + * The source to seed the generator from. + * + * @return + * The markov generator for the provided text. + */ + public static StandaloneMarkov generateMarkovMap(int order, Reader reader) { + Map<String, Markov> hash = new HashMap<>(); + + Character next = null; + + try { + next = (char) reader.read(); + } catch (IOException e1) { + System.out.println("IOException in stepping through the reader"); + + e1.printStackTrace(); + + System.exit(1); + } + + StringBuilder origFileBuffer = new StringBuilder(); + + while (next != null && Character.isDefined(next)) { + Character.toString(next); + origFileBuffer.append(next); + + try { + next = (char) reader.read(); + } catch (IOException e) { + System.out.println("IOException in stepping through the reader"); + + e.printStackTrace(); + } + + } + + String origFile = origFileBuffer.toString(); + String firstSub = origFile.substring(0, order); + + for (int i = 0; i < origFile.length() - order; i++) { + String sub = origFile.substring(i, i + order); + Character suffix = origFile.charAt(i + order); + + if (hash.containsKey(sub)) { + Markov marvin = hash.get(sub); + marvin.add(suffix); + hash.put(sub, marvin); + } else { + Markov marvin = new Markov(sub, suffix); + hash.put(sub, marvin); + } + } + + return new StandaloneMarkov(order, hash, firstSub); + } +} diff --git a/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/src/main/java/bjc/rgens/text/markov/TextGenerator.java new file mode 100755 index 0000000..f629d49 --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/TextGenerator.java @@ -0,0 +1,73 @@ +package bjc.rgens.text.markov; + +import java.io.*; + +/** + * Generate text from a markov model of an input text + * + * @author ben + * + */ +public class TextGenerator { + /** + * Main method. + * + * @param args + * When used with three arguments, the first represents the k-order + * of the Markov objects. The second represents the number of + * characters to print out. The third represents the file to be + * read. + * + * When used with two arguments, the first represents the k-order + * of the Markov objects, and the second represents the file to be + * read. The generated text will be the same number of characters + * as the original file. + */ + public static void main(String[] args) { + int k = 0; + int M = 0; + + String file = ""; + StringBuilder text = new StringBuilder(); + + if (args.length == 3) { + k = Integer.parseInt(args[0]); + M = Integer.parseInt(args[1]); + + file = args[2]; + } else if (args.length == 2) { + k = Integer.parseInt(args[0]); + + file = args[1]; + } else { + System.out.println("\n" + "Usage: java TextGenerator k M file"); + System.out.println("where k is the markov order, M is the number"); + System.out.println("of characters to be printed, and file is the"); + System.out.println("name of the file to print from. M may be left out." + "\n"); + System.exit(1); + } + + StandaloneMarkov markov = null; + + try (FileReader reader = new FileReader(file)) { + markov = StandaloneTextGenerator.generateMarkovMap(k, reader); + + String generatedText = markov.generateTextFromMarkov(M); + String desiredText = generatedText.substring(0, Math.min(M, text.length())); + + System.out.println(desiredText); + } catch (FileNotFoundException e) { + System.out.println("File not found."); + + e.printStackTrace(); + + System.exit(1); + } catch (IOException ioex) { + System.out.println("IOException"); + + ioex.printStackTrace(); + + System.exit(1); + } + } +} diff --git a/src/main/sql/db_schema.sql b/src/main/sql/db_schema.sql new file mode 100755 index 0000000..553c7a8 --- /dev/null +++ b/src/main/sql/db_schema.sql @@ -0,0 +1,15 @@ +create table Rules ( + ruleid serial, + name varchar(255) not null, + casecount int, + primary key(id) +); + +create index Rules_name on Rules(name); + +create table RuleCases ( + ruleid serial, + cse varchar(255) array +); + +create index RuleCase_id on RuleCases(ruleid); |
