Move things around, and start on new parser.

author: student <student@student-OptiPlex-9020> 2017-03-17 10:49:27 -0400
committer: student <student@student-OptiPlex-9020> 2017-03-17 10:49:27 -0400
commit: 0ea49dd4a52358f053c9be7138c392b16de05899 (patch)
tree: 802e275aaf279480ee8626136f56bfa1fbab6845 /RGens/src/main/java/bjc/rgens/text/markov
parent: 36cf3a0f0604ef43ce838ff6e9a7fc4e7c299522 (diff)
4 files changed, 370 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/Markov.java b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java
new file mode 100755
index 0000000..a07f44e
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java
@@ -0,0 +1,189 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring. Can give a pseudo-random suffix
+ * character based on probability.
+ * 
+ * @author Daniel Friedman (Fall 2011)
+ *
+ */
+public class Markov {
+	String						substring;
+	int							count	= 0;
+
+	TreeMap<Character, Integer>	map;
+
+	/**
+	 * Constructs a Markov object from a given substring.
+	 * 
+	 * @param substring
+	 *            the given substring.
+	 */
+	public Markov(String substring) {
+		this.substring = substring;
+
+		map = new TreeMap<>();
+
+		add();
+	}
+
+	/**
+	 * Constructs a Markov object from a given substring and suffix
+	 * character. Suffix characters are stored in a TreeMap.
+	 * 
+	 * @param substring
+	 *            the specified substring.
+	 * @param suffix
+	 *            the specified suffix.
+	 */
+	public Markov(String substring, Character suffix) {
+		this.substring = substring;
+
+		map = new TreeMap<>();
+
+		add(suffix);
+	}
+
+	/**
+	 * Increments the count of number of times the substring appears in a
+	 * text.
+	 */
+	public void add() {
+		count++;
+	}
+
+	/**
+	 * Adds a suffix character to the TreeMap.
+	 * 
+	 * @param c
+	 *            the suffix character to be added.
+	 */
+	public void add(char c) {
+		add();
+
+		if (map.containsKey(c)) {
+			int frequency = map.get(c);
+			map.put(c, frequency + 1);
+		} else
+			map.put(c, 1);
+	}
+
+	/**
+	 * Gives the frequency count of a suffix character; that is, the number
+	 * of times the specified suffix follows the substring in a text.
+	 * 
+	 * @param c
+	 *            the specified suffix.
+	 * @return the frequency count.
+	 */
+	public int getFrequencyCount(char c) {
+		if (!map.containsKey(c)) {
+			return -1;
+		}
+
+		return map.get(c);
+	}
+
+	/**
+	 * Gives a percentage of frequency count / number of total suffixes.
+	 * 
+	 * @param c
+	 * @return the ratio of frequency count of a single character to the
+	 *         total number of suffixes
+	 */
+	public double getCharFrequency(char c) {
+		if (getFrequencyCount(c) == -1) {
+			return -1;
+		}
+
+		return (double) getFrequencyCount(c) / (double) count;
+	}
+
+	/**
+	 * Finds whether or not the given suffix is in the TreeMap.
+	 * 
+	 * @param c
+	 *            the given suffix.
+	 * @return True if the suffix exists in the TreeMap, false otherwise.
+	 */
+	public boolean containsChar(char c) {
+		if (!map.containsKey(c)) {
+			return false;
+		}
+
+		return true;
+	}
+
+	/**
+	 * Gives the number of times this substring occurs in a text.
+	 * 
+	 * @return said number of times.
+	 */
+	public int count() {
+		return count;
+	}
+
+	/**
+	 * Gives the TreeMap.
+	 * 
+	 * @return the TreeMap.
+	 */
+	public TreeMap<Character, Integer> getMap() {
+		return map;
+	}
+
+	/**
+	 * Using probability, returns a pseudo-random character to follow the
+	 * substring. Character possibilities are added to an ArrayList
+	 * (duplicates allowed), and a random number from 0 to the last index
+	 * in the ArrayList is picked. Since more common suffixes occupy more
+	 * indices in the ArrayList, the probability of getting a more common
+	 * suffix is greater than the probability of getting a less common
+	 * suffix.
+	 * 
+	 * @return the pseudo-random suffix.
+	 */
+	public char random() {
+		Character ret = null;
+
+		Set<Entry<Character, Integer>> s = map.entrySet();
+
+		Iterator<Entry<Character, Integer>> it = s.iterator();
+
+		ArrayList<Character> suffixes = new ArrayList<>();
+
+		while (it.hasNext()) {
+			Entry<Character, Integer> tmp = it.next();
+
+			for (int i = 0; i < tmp.getValue(); i++) {
+				suffixes.add(tmp.getKey());
+			}
+		}
+
+		Random rand = new Random();
+		int retIndex = rand.nextInt(suffixes.size());
+		ret = suffixes.get(retIndex);
+		return ret;
+	}
+
+	/**
+	 * Gives a String representation of the Markov object.
+	 * 
+	 * @return said String representation.
+	 */
+	@Override
+	public String toString() {
+		String ret = "Substring: " + substring + ", Count: " + count;
+		ret += "\n" + "Suffixes and frequency counts: ";
+
+		for (Entry<Character, Integer> entry : map.entrySet()) {
+			char key = entry.getKey();
+			int value = entry.getValue();
+			ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+		}
+		return ret;
+	}
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
new file mode 100644
index 0000000..0edaaa8
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
@@ -0,0 +1,42 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map;
+
+public class StandaloneMarkov {
+	private int					k;
+
+	private Map<String, Markov>	markovHash;
+	private String				firstSub;
+
+	public StandaloneMarkov(int k, Map<String, Markov> markovHash,
+			String firstSub) {
+		this.k = k;
+		this.markovHash = markovHash;
+		this.firstSub = firstSub;
+	}
+
+	public String generateTextFromMarkov(int M) {
+		StringBuilder text = new StringBuilder();
+		for (int i = k; i < M; i++) {
+			if (i == k) {
+				text.append(firstSub);
+
+				if (text.length() > k)
+					i = text.length();
+			}
+
+			String sub = text.substring((i - k), (i));
+			Markov tmp = markovHash.get(sub);
+
+			if (tmp != null) {
+				Character nextChar = tmp.random();
+				text.append(nextChar);
+			} else {
+				i = k - 1;
+			}
+		}
+
+		return text.toString();
+	}
+
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
new file mode 100644
index 0000000..92bc653
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
@@ -0,0 +1,70 @@
+package bjc.rgens.text.markov;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+public class StandaloneTextGenerator {
+
+	/**
+	 * Build a markov generator from a provided source
+	 * 
+	 * @param k
+	 *            The markov order to use
+	 * @param reader
+	 *            The source to seed the generator from
+	 * @return The markov generator for the provided text
+	 */
+	public static StandaloneMarkov generateMarkovMap(int k,
+			Reader reader) {
+		Map<String, Markov> hash = new HashMap<>();
+
+		Character next = null;
+
+		try {
+			next = (char) reader.read();
+		} catch (IOException e1) {
+			System.out
+					.println("IOException in stepping through the reader");
+			e1.printStackTrace();
+			System.exit(1);
+		}
+
+		StringBuilder origFileBuffer = new StringBuilder();
+
+		while (next != null && Character.isDefined(next)) {
+			Character.toString(next);
+			origFileBuffer.append(next);
+
+			try {
+				next = (char) reader.read();
+			} catch (IOException e) {
+				System.out.println(
+						"IOException in stepping through the reader");
+				e.printStackTrace();
+			}
+
+		}
+
+		String origFile = origFileBuffer.toString();
+		String firstSub = origFile.substring(0, k);
+
+		for (int i = 0; i < origFile.length() - k; i++) {
+			String sub = origFile.substring(i, i + k);
+			Character suffix = origFile.charAt(i + k);
+
+			if (hash.containsKey(sub)) {
+				Markov marvin = hash.get(sub);
+				marvin.add(suffix);
+				hash.put(sub, marvin);
+			} else {
+				Markov marvin = new Markov(sub, suffix);
+				hash.put(sub, marvin);
+			}
+		}
+
+		return new StandaloneMarkov(k, hash, firstSub);
+	}
+
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java
new file mode 100755
index 0000000..770acd9
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java
@@ -0,0 +1,69 @@
+package bjc.rgens.text.markov;
+
+import java.io.*;
+
+/**
+ * Generate text from a markov model of an input text
+ * 
+ * @author ben
+ *
+ */
+public class TextGenerator {
+	/**
+	 * @param args
+	 *            when used with three arguments, the first represents the
+	 *            k-order of the Markov objects. The second represents the
+	 *            number of characters to print out. The third represents
+	 *            the file to be read.
+	 * 
+	 *            When used with two arguments, the first represents the
+	 *            k-order of the Markov objects, and the second represents
+	 *            the file to be read. The generated text will be the same
+	 *            number of characters as the original file.
+	 */
+	public static void main(String[] args) {
+		int k = 0;
+		int M = 0;
+
+		String file = "";
+		StringBuilder text = new StringBuilder();
+
+		if (args.length == 3) {
+			k = Integer.parseInt(args[0]);
+			M = Integer.parseInt(args[1]);
+			file = args[2];
+		} else if (args.length == 2) {
+			k = Integer.parseInt(args[0]);
+			file = args[1];
+		} else {
+			System.out
+					.println("\n" + "Usage: java TextGenerator k M file");
+			System.out.println(
+					"where k is the markov order, M is the number");
+			System.out.println(
+					"of characters to be printed, and file is the");
+			System.out.println(
+					"name of the file to print from. M may be left out."
+							+ "\n");
+			System.exit(1);
+		}
+
+		StandaloneMarkov markov = null;
+
+		try (FileReader reader = new FileReader(file)) {
+			markov = StandaloneTextGenerator.generateMarkovMap(k,
+					reader);
+
+			System.out.println(markov.generateTextFromMarkov(M)
+					.substring(0, Math.min(M, text.length())));
+		} catch (FileNotFoundException e) {
+			System.out.println("File not found.");
+			e.printStackTrace();
+			System.exit(1);
+		} catch (IOException ioex) {
+			System.out.println("IOException");
+			ioex.printStackTrace();
+			System.exit(1);
+		}
+	}
+}
+\ No newline at end of file
author	student <student@student-OptiPlex-9020>	2017-03-17 10:49:27 -0400
committer	student <student@student-OptiPlex-9020>	2017-03-17 10:49:27 -0400
commit	0ea49dd4a52358f053c9be7138c392b16de05899 (patch)
tree	802e275aaf279480ee8626136f56bfa1fbab6845 /RGens/src/main/java/bjc/rgens/text/markov
parent	36cf3a0f0604ef43ce838ff6e9a7fc4e7c299522 (diff)