src/main/java/bjc/rgens/parser/RGrammar.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352

package bjc.rgens.parser;

import bjc.utils.data.IPair;
import bjc.utils.data.Pair;
import bjc.utils.funcutils.StringUtils;

import bjc.rgens.parser.elements.*;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.text.similarity.LevenshteinDistance;

import edu.gatech.gtri.bktree.BkTreeSearcher;
import edu.gatech.gtri.bktree.BkTreeSearcher.Match;
import edu.gatech.gtri.bktree.Metric;
import edu.gatech.gtri.bktree.MutableBkTree;

import static bjc.utils.data.IPair.pair;

/**
 * Represents a randomized grammar.
 *
 * @author EVE
 */
public class RGrammar {
	public RGrammarSet belongsTo;

	public String name;

	public List<IPair<String, String>> postprocs;

	private static final List<IPair<String, String>> builtinPostprocs;
	public boolean useBuiltinPostprocs = true;

	/* The max distance between possible alternate rules. */
	private static final int MAX_DISTANCE = 6;

	/* The metric for the levenshtein distance. */
	private static final class LevenshteinMetric implements Metric<String> {
		private static LevenshteinDistance DIST;

		static {
			DIST = LevenshteinDistance.getDefaultInstance();
		}

		public LevenshteinMetric() {
		}

		@Override
		public int distance(String x, String y) {
			return DIST.apply(x, y);
		}
	}

	/* The rules of the grammar. */
	public Map<String, Rule> rules;
	/* The rules imported from other grammars. */
	private Map<String, Rule> importRules;
	/* The rules exported from this grammar. */
	private Set<String> exportRules;
	/* The initial rule of this grammar. */
	private String initialRule;

	/* The tree to use for finding rule suggestions. */
	private BkTreeSearcher<String> ruleSearcher;

	static {
		/* Collapse duplicate spaces */
		IPair<String, String> collapseDupSpaces = pair("\\s+", " ");

		/* Built-in post-processing steps */
		builtinPostprocs = Arrays.asList(
				collapseDupSpaces,

				/* 
				 * Remove extraneous spaces around punctuation
				 * marks, forced by the way the language syntax
				 * works.
				 *
				 * This can be done in grammars, but it is quite
				 * tedious to do so.
				 */


				/* Handle 's */
				pair(" 's ", "'s "),
				/* Handle opening/closing punctuation. */
				pair("([(\\[]) ", " $1"),
				pair(" ([)\\]'\"])", "$1 "),
				/* Remove spaces around series of opening/closing punctuation. */
				pair("([(\\[])\\s+([(\\[])", "$1$2"),
				pair("([)\\]])\\s+([)\\]])", "$1$2"),
				/* Handle inter-word punctuation. */
				pair(" ([,:.!])", "$1 "),
				/* Handle intra-word punctuation. */
				pair("\\s?([-/])\\s?", "$1"),

				collapseDupSpaces,

				/* Replace this once it is no longer needed. */
				pair("\\s(ish|burg|ton|ville|opolis|field|boro|dale)", "$1")
				);
	}
	/**
	 * Create a new randomized grammar using the specified set of rules.
	 *
	 * @param ruls
	 *            The rules to use.
	 */
	public RGrammar(Map<String, Rule> ruls) {
		rules = ruls;

		for(Rule rl : ruls.values()) {
			rl.belongsTo = this;
		}

		postprocs = new ArrayList<>();
	}

	/**
	 * Sets the imported rules to use.
	 *
	 * Imported rules are checked for rule definitions after local definitions are
	 * checked.
	 *
	 * @param importedRules
	 *            The set of imported rules to use.
	 */
	public void setImportedRules(Map<String, Rule> importedRules) {
		importRules = importedRules;
	}

	/**
	 * Generates the data structure backing rule suggestions for unknown rules.
	 */
	public void generateSuggestions() {
		MutableBkTree<String> ruleSuggester = new MutableBkTree<>(new LevenshteinMetric());

		ruleSuggester.addAll(rules.keySet());
		ruleSuggester.addAll(importRules.keySet());

		ruleSearcher = new BkTreeSearcher<>(ruleSuggester);
	}

	/**
	 * Generate a string from this grammar, starting from the specified rule.
	 *
	 * @param startRule
	 *            The rule to start generating at, or null to use the initial rule
	 *            for this grammar.
	 *
	 * @return A possible string from the grammar.
	 */
	public String generate(String startRule) {
		return generate(startRule, new Random(), new HashMap<>(), new HashMap<>());
	}

	/**
	 * Generate a string from this grammar, starting from the specified rule.
	 *
	 * @param startRule
	 *            The rule to start generating at, or null to use the initial rule
	 *            for this grammar.
	 *
	 * @param rnd
	 *            The random number generator to use.
	 *
	 * @param vars
	 *            The set of variables to use.
	 *
	 * @return A possible string from the grammar.
	 */
	public String generate(String startRule, Random rnd, Map<String, String> vars,
			Map<String, Rule> rlVars) {
		return generate(startRule, new GenerationState(new StringBuilder(), rnd, vars, rlVars, this));
	}

	/**
	 * Generate a string from this grammar, starting from the specified rule.
	 *
	 * @param startRule
	 *            The rule to start generating at, or null to use the initial rule
	 *            for this grammar.
	 *
	 * @param state
	 * 	The generation state.
	 */
	public String generate(String startRule, GenerationState state) {
		return generate(startRule, state, true);
	}

	public String generate(String startRule, GenerationState state, boolean doPostprocess) {
		String fromRule = startRule;

		if (startRule == null) {
			if (initialRule == null) {
				throw new GrammarException("Must specify a start rule for grammars with no initial rule");
			}

			fromRule = initialRule;
		} else {
			if (startRule.equals("")) {
				throw new GrammarException("The empty string is not a valid rule name");
			}
		}

		/* 
		 * We don't search imports, so it will always belong to this
		 * grammar.
		 */
		Rule rl = state.findRule(fromRule, false);

		if(rl == null)
			throw new GrammarException("Could not find rule " + rl.name);

		rl.generate(state);

		String body = state.contents.toString();

		if(doPostprocess) {
			body = postprocessRes(body);
		}

		return body;
	}

	private String postprocessRes(String strang) {
		String body = strang;

		if(useBuiltinPostprocs) {
			for(IPair<String, String> par : builtinPostprocs) {
				body = body.replaceAll(par.getLeft(), par.getRight());
			}
		}

		for(IPair<String, String> par : postprocs) {
			body = body.replaceAll(par.getLeft(), par.getRight());
		}

		return body.trim();
	}
	/**
	 * Generate a rule case.
	 *
	 * @param start
	 * 	The rule case to generate.
	 * @param state
	 * 	The current generation state.
	 */
	public void generateCase(RuleCase start, GenerationState state) {
		try {
			start.generate(state);
		} catch (GrammarException gex) {
			String msg = String.format("Error in generating case (%s)", start);
			throw new GrammarException(msg, gex);
		}
	}

	/**
	 * Get the initial rule of this grammar.
	 *
	 * @return The initial rule of this grammar.
	 */
	public String getInitialRule() {
		return initialRule;
	}

	/**
	 * Set the initial rule of this grammar.
	 *
	 * @param initRule
	 *            The initial rule of this grammar, or null to say there is no
	 *            initial rule.
	 */
	public void setInitialRule(String initRule) {
		/* Passing null, nulls our initial rule. */
		if (initRule == null) {
			this.initialRule = null;
			return;
		}

		if (initRule.equals("")) {
			throw new GrammarException("The empty string is not a valid rule name");
		} else if (!rules.containsKey(initRule)) {
			String msg = String.format("No rule '%s' local to this grammar (%s) defined.", initRule, name);

			throw new GrammarException(msg);
		}

		initialRule = initRule;
	}

	/**
	 * Gets the rules exported by this grammar.
	 *
	 * The initial rule is exported by default if specified.
	 *
	 * @return The rules exported by this grammar.
	 */
	public Set<Rule> getExportedRules() {
		Set<Rule> res = new HashSet<>();

		for (String rname : exportRules) {
			if (!rules.containsKey(rname)) {
				String msg = String.format("No rule '%s' local to this grammar (%s) defined for export",
						name, rname);

				throw new GrammarException(msg);
			}

			res.add(rules.get(rname));
		}

		if (initialRule != null) {
			res.add(rules.get(initialRule));
		}

		return res;
	}

	/**
	 * Set the rules exported by this grammar.
	 *
	 * @param exportedRules
	 *            The rules exported by this grammar.
	 */
	public void setExportedRules(Set<String> exportedRules) {
		exportRules = exportedRules;
	}

	/**
	 * Get all the rules in this grammar.
	 *
	 * @return All the rules in this grammar.
	 */
	public Map<String, Rule> getRules() {
		return rules;
	}

	public Map<String, Rule> getImportRules() {
		return importRules;
	}
}