summaryrefslogtreecommitdiff
path: root/RGens/src/main/java/bjc/rgens/text/markov
diff options
context:
space:
mode:
authorstudent <student@student-OptiPlex-9020>2017-03-17 10:49:27 -0400
committerstudent <student@student-OptiPlex-9020>2017-03-17 10:49:27 -0400
commit0ea49dd4a52358f053c9be7138c392b16de05899 (patch)
tree802e275aaf279480ee8626136f56bfa1fbab6845 /RGens/src/main/java/bjc/rgens/text/markov
parent36cf3a0f0604ef43ce838ff6e9a7fc4e7c299522 (diff)
Move things around, and start on new parser.
Diffstat (limited to 'RGens/src/main/java/bjc/rgens/text/markov')
-rwxr-xr-xRGens/src/main/java/bjc/rgens/text/markov/Markov.java189
-rw-r--r--RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java42
-rw-r--r--RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java70
-rwxr-xr-xRGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java69
4 files changed, 370 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/Markov.java b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java
new file mode 100755
index 0000000..a07f44e
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java
@@ -0,0 +1,189 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring. Can give a pseudo-random suffix
+ * character based on probability.
+ *
+ * @author Daniel Friedman (Fall 2011)
+ *
+ */
+public class Markov {
+ String substring;
+ int count = 0;
+
+ TreeMap<Character, Integer> map;
+
+ /**
+ * Constructs a Markov object from a given substring.
+ *
+ * @param substring
+ * the given substring.
+ */
+ public Markov(String substring) {
+ this.substring = substring;
+
+ map = new TreeMap<>();
+
+ add();
+ }
+
+ /**
+ * Constructs a Markov object from a given substring and suffix
+ * character. Suffix characters are stored in a TreeMap.
+ *
+ * @param substring
+ * the specified substring.
+ * @param suffix
+ * the specified suffix.
+ */
+ public Markov(String substring, Character suffix) {
+ this.substring = substring;
+
+ map = new TreeMap<>();
+
+ add(suffix);
+ }
+
+ /**
+ * Increments the count of number of times the substring appears in a
+ * text.
+ */
+ public void add() {
+ count++;
+ }
+
+ /**
+ * Adds a suffix character to the TreeMap.
+ *
+ * @param c
+ * the suffix character to be added.
+ */
+ public void add(char c) {
+ add();
+
+ if (map.containsKey(c)) {
+ int frequency = map.get(c);
+ map.put(c, frequency + 1);
+ } else
+ map.put(c, 1);
+ }
+
+ /**
+ * Gives the frequency count of a suffix character; that is, the number
+ * of times the specified suffix follows the substring in a text.
+ *
+ * @param c
+ * the specified suffix.
+ * @return the frequency count.
+ */
+ public int getFrequencyCount(char c) {
+ if (!map.containsKey(c)) {
+ return -1;
+ }
+
+ return map.get(c);
+ }
+
+ /**
+ * Gives a percentage of frequency count / number of total suffixes.
+ *
+ * @param c
+ * @return the ratio of frequency count of a single character to the
+ * total number of suffixes
+ */
+ public double getCharFrequency(char c) {
+ if (getFrequencyCount(c) == -1) {
+ return -1;
+ }
+
+ return (double) getFrequencyCount(c) / (double) count;
+ }
+
+ /**
+ * Finds whether or not the given suffix is in the TreeMap.
+ *
+ * @param c
+ * the given suffix.
+ * @return True if the suffix exists in the TreeMap, false otherwise.
+ */
+ public boolean containsChar(char c) {
+ if (!map.containsKey(c)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Gives the number of times this substring occurs in a text.
+ *
+ * @return said number of times.
+ */
+ public int count() {
+ return count;
+ }
+
+ /**
+ * Gives the TreeMap.
+ *
+ * @return the TreeMap.
+ */
+ public TreeMap<Character, Integer> getMap() {
+ return map;
+ }
+
+ /**
+ * Using probability, returns a pseudo-random character to follow the
+ * substring. Character possibilities are added to an ArrayList
+ * (duplicates allowed), and a random number from 0 to the last index
+ * in the ArrayList is picked. Since more common suffixes occupy more
+ * indices in the ArrayList, the probability of getting a more common
+ * suffix is greater than the probability of getting a less common
+ * suffix.
+ *
+ * @return the pseudo-random suffix.
+ */
+ public char random() {
+ Character ret = null;
+
+ Set<Entry<Character, Integer>> s = map.entrySet();
+
+ Iterator<Entry<Character, Integer>> it = s.iterator();
+
+ ArrayList<Character> suffixes = new ArrayList<>();
+
+ while (it.hasNext()) {
+ Entry<Character, Integer> tmp = it.next();
+
+ for (int i = 0; i < tmp.getValue(); i++) {
+ suffixes.add(tmp.getKey());
+ }
+ }
+
+ Random rand = new Random();
+ int retIndex = rand.nextInt(suffixes.size());
+ ret = suffixes.get(retIndex);
+ return ret;
+ }
+
+ /**
+ * Gives a String representation of the Markov object.
+ *
+ * @return said String representation.
+ */
+ @Override
+ public String toString() {
+ String ret = "Substring: " + substring + ", Count: " + count;
+ ret += "\n" + "Suffixes and frequency counts: ";
+
+ for (Entry<Character, Integer> entry : map.entrySet()) {
+ char key = entry.getKey();
+ int value = entry.getValue();
+ ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+ }
+ return ret;
+ }
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
new file mode 100644
index 0000000..0edaaa8
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
@@ -0,0 +1,42 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map;
+
+public class StandaloneMarkov {
+ private int k;
+
+ private Map<String, Markov> markovHash;
+ private String firstSub;
+
+ public StandaloneMarkov(int k, Map<String, Markov> markovHash,
+ String firstSub) {
+ this.k = k;
+ this.markovHash = markovHash;
+ this.firstSub = firstSub;
+ }
+
+ public String generateTextFromMarkov(int M) {
+ StringBuilder text = new StringBuilder();
+ for (int i = k; i < M; i++) {
+ if (i == k) {
+ text.append(firstSub);
+
+ if (text.length() > k)
+ i = text.length();
+ }
+
+ String sub = text.substring((i - k), (i));
+ Markov tmp = markovHash.get(sub);
+
+ if (tmp != null) {
+ Character nextChar = tmp.random();
+ text.append(nextChar);
+ } else {
+ i = k - 1;
+ }
+ }
+
+ return text.toString();
+ }
+
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
new file mode 100644
index 0000000..92bc653
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
@@ -0,0 +1,70 @@
+package bjc.rgens.text.markov;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+public class StandaloneTextGenerator {
+
+ /**
+ * Build a markov generator from a provided source
+ *
+ * @param k
+ * The markov order to use
+ * @param reader
+ * The source to seed the generator from
+ * @return The markov generator for the provided text
+ */
+ public static StandaloneMarkov generateMarkovMap(int k,
+ Reader reader) {
+ Map<String, Markov> hash = new HashMap<>();
+
+ Character next = null;
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e1) {
+ System.out
+ .println("IOException in stepping through the reader");
+ e1.printStackTrace();
+ System.exit(1);
+ }
+
+ StringBuilder origFileBuffer = new StringBuilder();
+
+ while (next != null && Character.isDefined(next)) {
+ Character.toString(next);
+ origFileBuffer.append(next);
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e) {
+ System.out.println(
+ "IOException in stepping through the reader");
+ e.printStackTrace();
+ }
+
+ }
+
+ String origFile = origFileBuffer.toString();
+ String firstSub = origFile.substring(0, k);
+
+ for (int i = 0; i < origFile.length() - k; i++) {
+ String sub = origFile.substring(i, i + k);
+ Character suffix = origFile.charAt(i + k);
+
+ if (hash.containsKey(sub)) {
+ Markov marvin = hash.get(sub);
+ marvin.add(suffix);
+ hash.put(sub, marvin);
+ } else {
+ Markov marvin = new Markov(sub, suffix);
+ hash.put(sub, marvin);
+ }
+ }
+
+ return new StandaloneMarkov(k, hash, firstSub);
+ }
+
+}
diff --git a/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java
new file mode 100755
index 0000000..770acd9
--- /dev/null
+++ b/RGens/src/main/java/bjc/rgens/text/markov/TextGenerator.java
@@ -0,0 +1,69 @@
+package bjc.rgens.text.markov;
+
+import java.io.*;
+
+/**
+ * Generate text from a markov model of an input text
+ *
+ * @author ben
+ *
+ */
+public class TextGenerator {
+ /**
+ * @param args
+ * when used with three arguments, the first represents the
+ * k-order of the Markov objects. The second represents the
+ * number of characters to print out. The third represents
+ * the file to be read.
+ *
+ * When used with two arguments, the first represents the
+ * k-order of the Markov objects, and the second represents
+ * the file to be read. The generated text will be the same
+ * number of characters as the original file.
+ */
+ public static void main(String[] args) {
+ int k = 0;
+ int M = 0;
+
+ String file = "";
+ StringBuilder text = new StringBuilder();
+
+ if (args.length == 3) {
+ k = Integer.parseInt(args[0]);
+ M = Integer.parseInt(args[1]);
+ file = args[2];
+ } else if (args.length == 2) {
+ k = Integer.parseInt(args[0]);
+ file = args[1];
+ } else {
+ System.out
+ .println("\n" + "Usage: java TextGenerator k M file");
+ System.out.println(
+ "where k is the markov order, M is the number");
+ System.out.println(
+ "of characters to be printed, and file is the");
+ System.out.println(
+ "name of the file to print from. M may be left out."
+ + "\n");
+ System.exit(1);
+ }
+
+ StandaloneMarkov markov = null;
+
+ try (FileReader reader = new FileReader(file)) {
+ markov = StandaloneTextGenerator.generateMarkovMap(k,
+ reader);
+
+ System.out.println(markov.generateTextFromMarkov(M)
+ .substring(0, Math.min(M, text.length())));
+ } catch (FileNotFoundException e) {
+ System.out.println("File not found.");
+ e.printStackTrace();
+ System.exit(1);
+ } catch (IOException ioex) {
+ System.out.println("IOException");
+ ioex.printStackTrace();
+ System.exit(1);
+ }
+ }
+} \ No newline at end of file