Move files out of folder

author: Benjamin J. Culkin <bjculkin@mix.wvu.edu> 2018-05-29 15:35:02 -0300
committer: Benjamin J. Culkin <bjculkin@mix.wvu.edu> 2018-05-29 15:35:02 -0300
commit: 0faa5175b6f0de8835ed514615ac64135f406b29 (patch)
tree: 6a1a5a9b01a1175b8ae2203418005f7bc625be9d /src/main/java/bjc/rgens/text
parent: 9356498bebab9342222cd87be669abae9d7ac3a2 (diff)
4 files changed, 427 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java
new file mode 100644
index 0000000..e21d60f
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/Markov.java
@@ -0,0 +1,208 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring.
+ *
+ * Can give a pseudo-random suffix character based on probability.
+ *
+ * @author Daniel Friedman (Fall 2011)
+ */
+public class Markov {
+	String  substring;
+	int     count   = 0;
+
+	TreeMap<Character, Integer> map;
+
+	/**
+	 * Constructs a Markov object from a given substring.
+	 *
+	 * @param substr
+	 * 	The given substring.
+	 */
+	public Markov(String substr) {
+		this.substring = substr;
+
+		map = new TreeMap<>();
+
+		add();
+	}
+
+	/**
+	 * Constructs a Markov object from a given substring and suffix
+	 * character. 
+	 *
+	 * Suffix characters are stored in a TreeMap.
+	 *
+	 * @param substr
+	 * 	The specified substring.
+	 *
+	 * @param suffix
+	 * 	The specified suffix.
+	 */
+	public Markov(String substr, Character suffix) {
+		this.substring = substr;
+
+		map = new TreeMap<>();
+
+		add(suffix);
+	}
+
+	/**
+	 * Increments the count of number of times the substring appears in a
+	 * text.
+	 */
+	public void add() {
+		count++;
+	}
+
+	/**
+	 * Adds a suffix character to the TreeMap.
+	 *
+	 * @param c
+	 * 	The suffix character to be added.
+	 */
+	public void add(char c) {
+		add();
+
+		if (map.containsKey(c)) {
+			int frequency = map.get(c);
+			map.put(c, frequency + 1);
+		} else {
+			map.put(c, 1);
+		}
+	}
+
+	/**
+	 * Gives the frequency count of a suffix character; that is, the number
+	 * of times the specified suffix follows the substring in a text.
+	 *
+	 * @param c
+	 * 	The specified suffix.
+	 *
+	 * @return 
+	 * 	The frequency count.
+	 */
+	public int getFrequencyCount(char c) {
+		if (!map.containsKey(c)) {
+			return -1;
+		}
+
+		return map.get(c);
+	}
+
+	/**
+	 * Gives a percentage of frequency count / number of total suffixes.
+	 *
+	 * @param c
+	 * 	The character to look for the frequency for.
+	 *
+	 * @return
+	 * 	The ratio of frequency count of a single character to the total
+	 * 	number of suffixes.
+	 */
+	public double getCharFrequency(char c) {
+		if (getFrequencyCount(c) == -1) {
+			return -1;
+		}
+
+		return (double) getFrequencyCount(c) / (double) count;
+	}
+
+	/**
+	 * Finds whether or not the given suffix is in the TreeMap.
+	 *
+	 * @param c
+	 * 	The given suffix.
+	 *
+	 * @return
+	 * 	True if the suffix exists in the TreeMap, false otherwise.
+	 */
+	public boolean containsChar(char c) {
+		if (!map.containsKey(c)) {
+			return false;
+		}
+
+		return true;
+	}
+
+	/**
+	 * Gives the number of times this substring occurs in a text.
+	 *
+	 * @return
+	 * 	Said number of times.
+	 */
+	public int count() {
+		return count;
+	}
+
+	/**
+	 * Gives the TreeMap.
+	 *
+	 * @return 
+	 * 	The TreeMap.
+	 */
+	public TreeMap<Character, Integer> getMap() {
+		return map;
+	}
+
+	/**
+	 * Using probability, returns a pseudo-random character to follow the
+	 * substring.
+	 *
+	 * Character possibilities are added to an ArrayList (duplicates
+	 * allowed), and a random number from 0 to the last index in the
+	 * ArrayList is picked. Since more common suffixes occupy more indices
+	 * in the ArrayList, the probability of getting a more common suffix is
+	 * greater than the probability of getting a less common suffix.
+	 *
+	 * @return 
+	 * 	The pseudo-random suffix.
+	 */
+	public char random() {
+		Character ret = null;
+
+		Set<Entry<Character, Integer>> s = map.entrySet();
+
+		Iterator<Entry<Character, Integer>> it = s.iterator();
+
+		ArrayList<Character> suffixes = new ArrayList<>();
+
+		while (it.hasNext()) {
+			Entry<Character, Integer> tmp = it.next();
+
+			for (int i = 0; i < tmp.getValue(); i++) {
+				suffixes.add(tmp.getKey());
+			}
+		}
+
+		Random rand = new Random();
+
+		int retIndex = rand.nextInt(suffixes.size());
+		ret = suffixes.get(retIndex);
+
+		return ret;
+	}
+
+	/**
+	 * Gives a String representation of the Markov object.
+	 *
+	 * @return 
+	 * 	Said String representation.
+	 */
+	@Override
+	public String toString() {
+		String ret = "Substring: " + substring + ", Count: " + count;
+		ret += "\n" + "Suffixes and frequency counts: ";
+
+		for (Entry<Character, Integer> entry : map.entrySet()) {
+			char key = entry.getKey();
+			int value = entry.getValue();
+			ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+		}
+
+		return ret;
+	}
+}
diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
new file mode 100644
index 0000000..cebf2bc
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
@@ -0,0 +1,70 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map;
+
+/**
+ * A standalone Markov generator.
+ *
+ * @author bjculkin
+ */
+public class StandaloneMarkov {
+	/* The order of the generator. */
+	private int ord;
+
+	/* The generators to use. */
+	private Map<String, Markov>     hash;
+	/* The initial string.    */
+	private String                  first;
+
+	/**
+	 * Create a new standalone Markov generator.
+	 *
+	 * @param order
+	 * 	The order of this generator.
+	 *
+	 * @param markovHash
+	 * 	The generators to use.
+	 *
+	 * @param firstSub
+	 * 	The string to start out with.
+	 */
+	public StandaloneMarkov(int order, Map<String, Markov> markovHash, String firstSub) {
+		ord = order;
+		hash = markovHash;
+		first = firstSub;
+	}
+
+	/**
+	 * Generate random text from the markov generator.
+	 *
+	 * @param charsToGenerate
+	 * 	The number of characters of text to generate.
+	 *
+	 * @return
+	 * 	The randomly generate text.
+	 */
+	public String generateTextFromMarkov(int charsToGenerate) {
+		StringBuilder text = new StringBuilder();
+
+		for (int i = ord; i < charsToGenerate; i++) {
+			if (i == ord) {
+				text.append(first);
+
+				if (text.length() > ord) i = text.length();
+			}
+
+			String sub = text.substring(i - ord, i);
+			Markov tmp = hash.get(sub);
+
+			if (tmp != null) {
+				Character nextChar = tmp.random();
+
+				text.append(nextChar);
+			} else {
+				i = ord - 1;
+			}
+		}
+
+		return text.toString();
+	}
+}
diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
new file mode 100644
index 0000000..339e8d5
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
@@ -0,0 +1,76 @@
+package bjc.rgens.text.markov;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Create a Markov generate from a provided source.
+ *
+ * @author bjculkin
+ */
+public class StandaloneTextGenerator {
+	/**
+	 * Build a markov generator from a provided source.
+	 *
+	 * @param order
+	 * 	The markov order to use.
+	 *
+	 * @param reader
+	 * 	The source to seed the generator from.
+	 *
+	 * @return
+	 * 	The markov generator for the provided text.
+	 */
+	public static StandaloneMarkov generateMarkovMap(int order, Reader reader) {
+		Map<String, Markov> hash = new HashMap<>();
+
+		Character next = null;
+
+		try {
+			next = (char) reader.read();
+		} catch (IOException e1) {
+			System.out.println("IOException in stepping through the reader");
+
+			e1.printStackTrace();
+
+			System.exit(1);
+		}
+
+		StringBuilder origFileBuffer = new StringBuilder();
+
+		while (next != null && Character.isDefined(next)) {
+			Character.toString(next);
+			origFileBuffer.append(next);
+
+			try {
+				next = (char) reader.read();
+			} catch (IOException e) {
+				System.out.println("IOException in stepping through the reader");
+
+				e.printStackTrace();
+			}
+
+		}
+
+		String origFile = origFileBuffer.toString();
+		String firstSub = origFile.substring(0, order);
+
+		for (int i = 0; i < origFile.length() - order; i++) {
+			String sub = origFile.substring(i, i + order);
+			Character suffix = origFile.charAt(i + order);
+
+			if (hash.containsKey(sub)) {
+				Markov marvin = hash.get(sub);
+				marvin.add(suffix);
+				hash.put(sub, marvin);
+			} else {
+				Markov marvin = new Markov(sub, suffix);
+				hash.put(sub, marvin);
+			}
+		}
+
+		return new StandaloneMarkov(order, hash, firstSub);
+	}
+}
diff --git a/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/src/main/java/bjc/rgens/text/markov/TextGenerator.java
new file mode 100644
index 0000000..f629d49
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/TextGenerator.java
@@ -0,0 +1,73 @@
+package bjc.rgens.text.markov;
+
+import java.io.*;
+
+/**
+ * Generate text from a markov model of an input text
+ *
+ * @author ben
+ *
+ */
+public class TextGenerator {
+	/**
+	 * Main method.
+	 *
+	 * @param args
+	 * 	When used with three arguments, the first represents the k-order
+	 * 	of the Markov objects. The second represents the number of
+	 * 	characters to print out. The third represents the file to be
+	 * 	read. 
+	 *
+	 * 	When used with two arguments, the first represents the k-order
+	 * 	of the Markov objects, and the second represents the file to be
+	 * 	read. The generated text will be the same number of characters
+	 * 	as the original file.
+	 */
+	public static void main(String[] args) {
+		int k = 0;
+		int M = 0;
+
+		String file = "";
+		StringBuilder text = new StringBuilder();
+
+		if (args.length == 3) {
+			k = Integer.parseInt(args[0]);
+			M = Integer.parseInt(args[1]);
+
+			file = args[2];
+		} else if (args.length == 2) {
+			k = Integer.parseInt(args[0]);
+
+			file = args[1];
+		} else {
+			System.out.println("\n" + "Usage: java TextGenerator k M file");
+			System.out.println("where k is the markov order, M is the number");
+			System.out.println("of characters to be printed, and file is the");
+			System.out.println("name of the file to print from. M may be left out." + "\n");
+			System.exit(1);
+		}
+
+		StandaloneMarkov markov = null;
+
+		try (FileReader reader = new FileReader(file)) {
+			markov = StandaloneTextGenerator.generateMarkovMap(k, reader);
+
+			String generatedText = markov.generateTextFromMarkov(M);
+			String desiredText = generatedText.substring(0, Math.min(M, text.length()));
+
+			System.out.println(desiredText);
+		} catch (FileNotFoundException e) {
+			System.out.println("File not found.");
+
+			e.printStackTrace();
+
+			System.exit(1);
+		} catch (IOException ioex) {
+			System.out.println("IOException");
+
+			ioex.printStackTrace();
+
+			System.exit(1);
+		}
+	}
+}
author	Benjamin J. Culkin <bjculkin@mix.wvu.edu>	2018-05-29 15:35:02 -0300
committer	Benjamin J. Culkin <bjculkin@mix.wvu.edu>	2018-05-29 15:35:02 -0300
commit	0faa5175b6f0de8835ed514615ac64135f406b29 (patch)
tree	6a1a5a9b01a1175b8ae2203418005f7bc625be9d /src/main/java/bjc/rgens/text
parent	9356498bebab9342222cd87be669abae9d7ac3a2 (diff)