Added markov-based generator

author: bculkin2442 <bjculkin@mix.wvu.edu> 2016-01-14 14:01:21 -0500
committer: bculkin2442 <bjculkin@mix.wvu.edu> 2016-01-14 14:01:21 -0500
commit: 90708215a1b85f90738628260e822aa5eade3f56 (patch)
tree: c42874821c8a321a9ef1ba7ee05a924c0e3af706 /RGens/src/main/java
parent: f1943c74de8fe61f4990def15cbcfe6e83bea00a (diff)
7 files changed, 972 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java
new file mode 100755
index 0000000..6009828
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java
@@ -0,0 +1,57 @@
+package bjc.RGens.text.markov;
+
+import java.util.*;
+
+/**
+ * Counts the frequencies of k-length substrings of input text.
+ * 
+ * @author Daniel Friedman
+ *
+ */
+public class FrequencyCounter {
+
+	public static void main(String[] args) {
+		String text;
+
+		int k = 0;
+
+		if (args.length == 1) {
+			k = Integer.parseInt(args[0]);
+		}
+
+		System.out.print("Enter string: ");
+
+		Scanner s = new Scanner(System.in);
+		text = s.nextLine();
+
+		MyHashMap<String, Integer> hash = new MyHashMap<String, Integer>();
+		int distinct = 0;
+
+		for (int i = 0; i <= text.length() - k; i++) {
+			String sub = text.substring(i, i + k);
+
+			Markov m = new Markov(sub);
+			m.add();
+
+			if (hash.containsKey(sub)) {
+				Integer count = hash.get(sub);
+				hash.put(sub, count + 1);
+			} else {
+				hash.put(sub, m.count);
+				distinct++;
+			}
+
+			s.close();
+		}
+
+		System.out.println(distinct + " distinct keys");
+
+		Iterator<String> keys = hash.keys();
+
+		while (keys.hasNext()) {
+			String key = keys.next();
+			Integer value = hash.get(key);
+			System.out.println(value + " " + key);
+		}
+	}
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/Markov.java b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java
new file mode 100755
index 0000000..1956111
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java
@@ -0,0 +1,187 @@
+package bjc.RGens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring. Can give a pseudo-random suffix
+ * character based on probability.
+ * 
+ * @author Daniel Friedman (Fall 2011)
+ *
+ */
+public class Markov {
+	String substring;
+	int count = 0;
+
+	TreeMap<Character, Integer> map;
+
+	/**
+	 * Constructs a Markov object from a given substring.
+	 * 
+	 * @param substring
+	 *            the given substring.
+	 */
+	public Markov(String substring) {
+		this.substring = substring;
+
+		map = new TreeMap<Character, Integer>();
+
+		add();
+	}
+
+	/**
+	 * Constructs a Markov object from a given substring and suffix
+	 * character. Suffix characters are stored in a TreeMap.
+	 * 
+	 * @param substring
+	 *            the specified substring.
+	 * @param suffix
+	 *            the specified suffix.
+	 */
+	public Markov(String substring, Character suffix) {
+		this.substring = substring;
+
+		map = new TreeMap<Character, Integer>();
+
+		add(suffix);
+	}
+
+	/**
+	 * Increments the count of number of times the substring appears in a
+	 * text.
+	 */
+	public void add() {
+		count++;
+	}
+
+	/**
+	 * Adds a suffix character to the TreeMap.
+	 * 
+	 * @param c
+	 *            the suffix character to be added.
+	 */
+	public void add(char c) {
+		add();
+
+		if (map.containsKey(c)) {
+			int frequency = map.get(c);
+			map.put(c, frequency + 1);
+		} else
+			map.put(c, 1);
+	}
+
+	/**
+	 * Gives the frequency count of a suffix character; that is, the number
+	 * of times the specified suffix follows the substring in a text.
+	 * 
+	 * @param c
+	 *            the specified suffix.
+	 * @return the frequency count.
+	 */
+	public int getFrequencyCount(char c) {
+		if (!map.containsKey(c)) {
+			return -1;
+		} else {
+			return map.get(c);
+		}
+	}
+
+	/**
+	 * Gives a percentage of frequency count / number of total suffixes.
+	 * 
+	 * @param c
+	 * @return
+	 */
+	public double getCharFrequency(char c) {
+		if (getFrequencyCount(c) == -1) {
+			return -1;
+		} else {
+			return (double) getFrequencyCount(c) / (double) count;
+		}
+	}
+
+	/**
+	 * Finds whether or not the given suffix is in the TreeMap.
+	 * 
+	 * @param c
+	 *            the given suffix.
+	 * @return True if the suffix exists in the TreeMap, false otherwise.
+	 */
+	public boolean containsChar(char c) {
+		if (!map.containsKey(c)) {
+			return false;
+		} else {
+			return true;
+		}
+	}
+
+	/**
+	 * Gives the number of times this substring occurs in a text.
+	 * 
+	 * @return said number of times.
+	 */
+	public int count() {
+		return count;
+	}
+
+	/**
+	 * Gives the TreeMap.
+	 * 
+	 * @return the TreeMap.
+	 */
+	public TreeMap<Character, Integer> getMap() {
+		return map;
+	}
+
+	/**
+	 * Using probability, returns a pseudo-random character to follow the
+	 * substring. Character possibilities are added to an ArrayList
+	 * (duplicates allowed), and a random number from 0 to the last index
+	 * in the ArrayList is picked. Since more common suffixes occupy more
+	 * indices in the ArrayList, the probability of getting a more common
+	 * suffix is greater than the probability of getting a less common
+	 * suffix.
+	 * 
+	 * @return the pseudo-random suffix.
+	 */
+	public char random() {
+		Character ret = null;
+
+		Set<Entry<Character, Integer>> s = map.entrySet();
+
+		Iterator<Entry<Character, Integer>> it = s.iterator();
+
+		ArrayList<Character> suffixes = new ArrayList<Character>();
+
+		while (it.hasNext()) {
+			Entry<Character, Integer> tmp = it.next();
+
+			for (int i = 0; i < tmp.getValue(); i++) {
+				suffixes.add(tmp.getKey());
+			}
+		}
+
+		Random rand = new Random();
+		int retIndex = rand.nextInt(suffixes.size());
+		ret = suffixes.get(retIndex);
+		return ret;
+	}
+
+	/**
+	 * Gives a String representation of the Markov object.
+	 * 
+	 * @return said String representation.
+	 */
+	public String toString() {
+		String ret = "Substring: " + substring + ", Count: " + count;
+		ret += "\n" + "Suffixes and frequency counts: ";
+
+		for (Entry<Character, Integer> entry : map.entrySet()) {
+			char key = entry.getKey();
+			int value = entry.getValue();
+			ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+		}
+		return ret;
+	}
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java
new file mode 100644
index 0000000..c33fa18
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java
@@ -0,0 +1,71 @@
+package bjc.RGens.text.markov;
+
+/**
+ * Represents a key, value pairing.
+ * 
+ * @author Daniel Friedman
+ *
+ */
+public class MyEntry<K, V> {
+	protected K	key;
+	protected V	value;
+
+	@Override
+	/**
+	 * Gives the hashcode of a MyEntry object.
+	 * 
+	 * @return the hashcode of the MyEntry's key.
+	 */
+	public int hashCode() {
+		return key.hashCode();
+	}
+
+	/**
+	 * Tests whether this object is equivalent to a specified other.
+	 * 
+	 * @param obj
+	 *            the other object.
+	 * @return true if the keys are equivalent.
+	 */
+	@SuppressWarnings("unchecked")
+	@Override
+	public boolean equals(Object obj) {
+		boolean ret = false;
+
+		if (obj == null) {
+			ret = false;
+		}
+
+		MyEntry<K, V> other = (MyEntry<K, V>) obj;
+
+		if (key.equals(other.key)) {
+			ret = true;
+		}
+
+		return ret;
+	}
+
+	/**
+	 * Constructs a MyEntry object with given key and value.
+	 * 
+	 * @param key
+	 *            the key to be stored.
+	 * @param value
+	 *            the value to be stored.
+	 * @param myHashMap
+	 *            TODO
+	 */
+	public MyEntry(K key, V value) {
+		this.key = key;
+		this.value = value;
+	}
+
+	/**
+	 * Gives a String representation of this object.
+	 * 
+	 * @return the String representation.
+	 */
+	public String toString() {
+		return "(" + key + ", " + value + ")";
+	}
+}
+\ No newline at end of file
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java
new file mode 100755
index 0000000..10158ea
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java
@@ -0,0 +1,347 @@
+package bjc.RGens.text.markov;
+
+import java.util.*;
+
+/**
+ * A Hash table implementation. Uses an array of LinkedList<MyEntry> as
+ * backing storage.
+ * 
+ * @author Daniel Friedman
+ *
+ * @param <K>
+ *            generic type for Keys.
+ * @param <V>
+ *            generic type for Values.
+ */
+
+public class MyHashMap<K, V> {
+	private int		size;
+	private float	loadFactor;
+
+	LinkedList<MyEntry<K, V>>	table[];
+	private ArrayList<Integer>	primes	= new ArrayList<Integer>();
+
+	/**
+	 * Constructs a new MyHashMap object.
+	 * 
+	 * @param capacity
+	 *            the size of the array.
+	 * @param loadFactor
+	 *            the specified load factor. The array will resize when the
+	 *            load factor is reached.
+	 */
+	@SuppressWarnings("unchecked")
+	public MyHashMap(int capacity, float loadFactor) {
+		table = (LinkedList<MyEntry<K, V>>[]) new LinkedList[capacity];
+
+		this.loadFactor = loadFactor;
+
+		for (int i = 0; i < table.length; i++) {
+			table[i] = new LinkedList<MyEntry<K, V>>();
+		}
+	}
+
+	/**
+	 * Constructs a MyHashMap with capacity 11 and load factor 0.75.
+	 */
+	public MyHashMap() {
+		this(11, (float) 0.75);
+
+		primes.add(11);
+		primes.add(23);
+		primes.add(47);
+		primes.add(97);
+		primes.add(197);
+		primes.add(397);
+		primes.add(797);
+		primes.add(1597);
+		primes.add(3203);
+		primes.add(6421);
+		primes.add(12853);
+		primes.add(25717);
+		primes.add(51437);
+		primes.add(102877);
+		primes.add(205759);
+		primes.add(411527);
+		primes.add(823117);
+		primes.add(1646237);
+		primes.add(3292489);
+		primes.add(6584983);
+		primes.add(13169977);
+		primes.add(26339969);
+		primes.add(52679969);
+		primes.add(105359939);
+		primes.add(210719881);
+		primes.add(421439783);
+		primes.add(842879579);
+		primes.add(1685759167);
+	}
+
+	/**
+	 * Gives the number of MyEntries in the array.
+	 * 
+	 * @return number of MyEntry objects in the array.
+	 */
+	public int size() {
+		return size;
+	}
+
+	/**
+	 * Tests whether or not the array is empty.
+	 * 
+	 * @return true if empty, false otherwise.
+	 */
+	public boolean isEmpty() {
+		if (size == 0) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	/**
+	 * Removes all objects from the array.
+	 */
+	public void clear() {
+		for (int i = 0; i < table.length; i++) {
+			table[i].clear();
+		}
+	}
+
+	/**
+	 * Gives a String representation of the array.
+	 * 
+	 * @return said String representation.
+	 */
+	public String toString() {
+		String ret = "";
+
+		for (int i = 0; i < table.length; i++) {
+			ret += ("Bucket " + i + ": " + table[i] + "\n");
+		}
+
+		return ret;
+	}
+
+	/**
+	 * Adds a key, value mapping to the array. If the key already is
+	 * contained, the value will be updated.
+	 * 
+	 * @param key
+	 *            the key to be added.
+	 * @param value
+	 *            the value to be added.
+	 * @return the previous value if the key already was contained, null
+	 *         otherwise.
+	 */
+	public V put(K key, V value) {
+		MyEntry<K, V> entry = new MyEntry<K, V>(key, value);
+
+		int hashCode = Math.abs(key.hashCode());
+		int mapping = hashCode % (table.length);
+
+		LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+		V ret = null;
+
+		if (bucket.contains(entry)) {
+			int index = bucket.indexOf(entry);
+
+			ret = bucket.get(index).value;
+			bucket.set(index, entry);
+		} else {
+			bucket.add(entry);
+		}
+
+		size++;
+
+		if ((double) size / (double) table.length >= loadFactor) {
+			resize();
+		}
+
+		return ret;
+	}
+
+	/**
+	 * Gives the value associated with a specified key.
+	 * 
+	 * @param key
+	 *            the specified key.
+	 * @return the value associated. Null if the key is not found.
+	 */
+	public V get(K key) {
+		V ret = null;
+
+		int hashCode = Math.abs(key.hashCode());
+		int mapping = hashCode % (table.length);
+
+		LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+		for (int i = 0; i < bucket.size(); i++) {
+			MyEntry<K, V> cur = bucket.get(i);
+
+			if (cur.key.equals(key)) {
+				ret = cur.value;
+			}
+		}
+		return ret;
+	}
+
+	/**
+	 * Removes a specified object from the array.
+	 * 
+	 * @param key
+	 *            the key corresponding to the MyEntry to be removed.
+	 * @return the value associated with the removed key.
+	 */
+	public V remove(K key) {
+		V ret = null;
+
+		int hashCode = Math.abs(key.hashCode());
+		int mapping = hashCode % (table.length);
+
+		LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+		for (int i = 0; i < bucket.size(); i++) {
+			MyEntry<K, V> cur = bucket.get(i);
+
+			if (cur.key.equals(key)) {
+				ret = cur.value;
+				bucket.remove(cur);
+			}
+		}
+
+		size--;
+		return ret;
+	}
+
+	/**
+	 * Finds whether or not the array contains a given key.
+	 * 
+	 * @param key
+	 *            the key to be tested.
+	 * @return true if found, false if not.
+	 */
+	public boolean containsKey(K key) {
+		boolean ret = false;
+
+		int hashCode = Math.abs(key.hashCode());
+		int mapping = hashCode % (table.length);
+
+		LinkedList<MyEntry<K, V>> bucket = table[mapping];
+		MyEntry<K, V> test = new MyEntry<K, V>(key, null);
+
+		if (bucket.contains(test)) {
+			ret = true;
+		}
+
+		return ret;
+	}
+
+	/**
+	 * Finds whether or not the array contains a given value.
+	 * 
+	 * @param value
+	 *            the value to be tested.
+	 * @return true if found, false if not.
+	 */
+	public boolean containsValue(V value) {
+		boolean ret = false;
+
+		for (int i = 0; i < table.length; i++) {
+			LinkedList<MyEntry<K, V>> bucket = table[i];
+
+			for (int j = 0; j < bucket.size(); j++) {
+				MyEntry<K, V> entry = bucket.get(j);
+
+				if (entry.value.equals(value))
+					ret = true;
+			}
+		}
+		return ret;
+	}
+
+	/**
+	 * Iterates over the MyEntry objects in the array, looking at their
+	 * keys.
+	 * 
+	 * @return a keys iterator.
+	 */
+	public Iterator<K> keys() {
+		return new MyIterator<K, V>(this);
+	}
+
+	/**
+	 * Resizes the array to the next largest prime number that is at least
+	 * double the current array size.
+	 */
+	public void resize() {
+		MyHashMap<K, V> tmp = new MyHashMap<K, V>(helper(table.length),
+				(float) 0.75);
+		Iterator<K> it = keys();
+
+		while (it.hasNext()) {
+			K key = it.next();
+			V value = get(key);
+			tmp.put(key, value);
+		}
+
+		this.table = tmp.table;
+		this.size = tmp.size;
+		this.loadFactor = tmp.loadFactor;
+	}
+
+	/**
+	 * Gives the next largest prime number that is at least double the
+	 * current array size.
+	 * 
+	 * @param i
+	 * @return
+	 */
+	public int helper(int i) {
+		int ret = -1;
+
+		for (int j = 0; j < primes.size(); j++) {
+			if (primes.get(j) > i) {
+				ret = primes.get(j);
+				break;
+			}
+		}
+
+		return ret;
+	}
+
+	/**
+	 * Tests the MyHashMap class and its methods.
+	 * 
+	 */
+	public static void main(String[] args) {
+		// test1:
+		/*
+		 * MyHashMap<String, Integer> testMap = new MyHashMap<String,
+		 * Integer>();
+		 * 
+		 * System.out.println("HashMap size: "+testMap.size());
+		 * System.out.println("HashMap capacity: "+testMap.table.length);
+		 * System.out.println("HashMap Load Factor: "+testMap.loadFactor);
+		 * System.out.println("HashMap isEmpty? "+testMap.isEmpty());
+		 * System.out.println("HashMap toString: "+testMap);
+		 */
+
+		// test2:
+		MyHashMap<String, Integer> testMap = new MyHashMap<String, Integer>();
+
+		for (int i = 0; i < 100; i++) {
+			testMap.put("" + i, i);
+			System.out.println("Hashtable:");
+			System.out.println(testMap);
+		}
+
+		for (int i = 0; i < 100; i++) {
+			String key = "" + i;
+			System.out.println("Removing key " + key + ":");
+			testMap.remove(key);
+			System.out.println(testMap);
+		}
+	}
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java
new file mode 100644
index 0000000..bb6d0d9
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java
@@ -0,0 +1,84 @@
+package bjc.RGens.text.markov;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.NoSuchElementException;
+
+public class MyIterator<K, V> implements Iterator<K> {
+	private final MyHashMap<K, V> attachedMap;
+
+	public MyIterator(MyHashMap<K, V> myHashMap) {
+		attachedMap = myHashMap;
+		bucket = attachedMap.table[bucketIndex];
+	}
+
+	int bucketIndex = 0;
+
+	LinkedList<MyEntry<K, V>>	bucket;
+	Iterator<MyEntry<K, V>>		listIt	= bucket.iterator();
+
+	MyEntry<K, V> cur;
+
+	/**
+	 * Finds whether or not there is a next MyEntry object in the array.
+	 * 
+	 * @return
+	 */
+	@Override
+	public boolean hasNext() {
+		if (listIt.hasNext()) {
+			return true;
+		} else if (bucketIndex == (attachedMap.table.length - 1)) {
+			return false;
+		} else {
+			for (int i = bucketIndex
+					+ 1; i < attachedMap.table.length; i++) {
+				if (!attachedMap.table[i].isEmpty()) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+
+	/**
+	 * Gives the key associated with the next MyEntry object in the array.
+	 * Throws a NoSuchElementException if the array has been completely
+	 * iterated.
+	 * 
+	 * @return the key associated with the next MyEntry object.
+	 */
+	@Override
+	public K next() {
+		K ret = null;
+
+		if (listIt.hasNext()) {
+			cur = listIt.next();
+			ret = cur.key;
+		} else if (hasNext()) {
+			for (int i = bucketIndex
+					+ 1; i < attachedMap.table.length; i++) {
+				if (!attachedMap.table[i].isEmpty()) {
+					bucketIndex = i;
+
+					bucket = attachedMap.table[bucketIndex];
+					listIt = bucket.iterator();
+					cur = listIt.next();
+
+					ret = cur.key;
+					break;
+				}
+			}
+		} else
+			throw new NoSuchElementException("All buckets iterated.");
+		return ret;
+	}
+
+	/**
+	 * Removes the current MyEntry.
+	 */
+	@Override
+	public void remove() {
+		bucket.remove(cur);
+	}
+}
+\ No newline at end of file
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java
new file mode 100755
index 0000000..67a8d01
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java
@@ -0,0 +1,80 @@
+package bjc.RGens.text.markov;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+/**
+ * Similar to FrequencyCounter, but also counts the suffixes for each
+ * k-length substring in the text.
+ * 
+ * @author Daniel Friedman
+ *
+ */
+
+public class SuffixCounter {
+	public static void main(String[] args) {
+		String text;
+		int k = 0;
+
+		if (args.length == 1) {
+			k = Integer.parseInt(args[0]);
+		}
+
+		System.out.print("Enter string: ");
+		Scanner s = new Scanner(System.in);
+		text = s.nextLine();
+
+		MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>();
+
+		int distinct = 0;
+
+		for (int i = 0; i <= text.length() - k; i++) {
+			String sub = text.substring(i, i + k);
+			Character suffix = null;
+
+			if (((i + k) <= (text.length() - 1))) {
+				suffix = text.charAt(i + k);
+			}
+
+			if (hash.containsKey(sub)) {
+				Markov temp = hash.get(sub);
+
+				if (!(suffix == null)) {
+					temp.add(suffix);
+					hash.put(sub, temp);
+				}
+
+			} else {
+				Markov m;
+
+				if (!(suffix == null)) {
+					m = new Markov(sub, suffix);
+					hash.put(sub, m);
+					distinct++;
+				}
+			}
+		}
+
+		System.out.println(distinct + " distinct keys");
+		Iterator<String> keys = hash.keys();
+
+		while (keys.hasNext()) {
+			String hashKey = keys.next();
+
+			Markov hashValue = hash.get(hashKey);
+			System.out.print(hashValue.count() + " " + hashKey + ":");
+
+			for (Entry<Character, Integer> entry : hashValue.getMap()
+					.entrySet()) {
+				char suffix = entry.getKey();
+				int frequencyCount = entry.getValue();
+				System.out.print(" " + frequencyCount + " " + suffix);
+			}
+
+			System.out.println();
+		}
+		
+		s.close();
+	}
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java
new file mode 100755
index 0000000..d347179
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java
@@ -0,0 +1,146 @@
+package bjc.RGens.text.markov;
+
+import java.io.*;
+import java.util.*;
+import java.util.Map.Entry;
+
+public class TextGenerator {
+
+	/**
+	 * @param args
+	 *            when used with three arguments, the first represents the
+	 *            k-order of the Markov objects. The second represents the
+	 *            number of characters to print out. The third represents
+	 *            the file to be read.
+	 * 
+	 *            When used with two arguments, the first represents the
+	 *            k-order of the Markov objects, and the second represents
+	 *            the file to be read. The generated text will be the same
+	 *            number of characters as the original file.
+	 */
+	public static void main(String[] args) {
+		int k = 0;
+		int M = 0;
+
+		String file = "";
+		StringBuilder text = new StringBuilder();
+
+		if (args.length == 3) {
+			k = Integer.parseInt(args[0]);
+			M = Integer.parseInt(args[1]);
+			file = args[2];
+		} else if (args.length == 2) {
+			k = Integer.parseInt(args[0]);
+			file = args[1];
+		} else {
+			System.out
+					.println("\n" + "Usage: java TextGenerator k M file");
+			System.out.println(
+					"where k is the markov order, M is the number");
+			System.out.println(
+					"of characters to be printed, and file is the");
+			System.out.println(
+					"name of the file to print from. M may be left out."
+							+ "\n");
+			System.exit(1);
+		}
+
+		FileReader reader = null;
+
+		try {
+			reader = new FileReader(file);
+		} catch (FileNotFoundException e) {
+			System.out.println("File not found.");
+			e.printStackTrace();
+		}
+
+		MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>();
+
+		Character next = null;
+
+		try {
+			next = (char) reader.read();
+		} catch (IOException e1) {
+			System.out.println("IOException in stepping through the file");
+			e1.printStackTrace();
+		}
+
+		StringBuilder origFileBuffer = new StringBuilder();
+
+		while (Character.isDefined(next)) {
+			Character.toString(next);
+			origFileBuffer.append(next);
+
+			try {
+				next = (char) reader.read();
+			} catch (IOException e) {
+				System.out.println(
+						"IOException in stepping through the file");
+				e.printStackTrace();
+			}
+
+		}
+
+		String origFile = origFileBuffer.toString();
+		String firstSub = origFile.substring(0, k);
+
+		for (int i = 0; i < origFile.length() - k; i++) {
+			String sub = origFile.substring(i, i + k);
+			Character suffix = origFile.charAt(i + k);
+
+			if (hash.containsKey(sub)) {
+				Markov marvin = hash.get(sub);
+				marvin.add(suffix);
+				hash.put(sub, marvin);
+			} else {
+				Markov marvin = new Markov(sub, suffix);
+				hash.put(sub, marvin);
+			}
+		}
+
+		if (M == 0) {
+			M = origFile.length();
+		}
+		for (int i = k; i < M; i++) {
+			if (i == k) {
+				text.append(firstSub);
+
+				if (text.length() > k)
+					i = text.length();
+			}
+
+			String sub = text.substring((i - k), (i));
+			Markov tmp = hash.get(sub);
+
+			if (tmp != null) {
+				Character nextChar = tmp.random();
+				text.append(nextChar);
+			} else {
+				i = k - 1;
+			}
+		}
+
+		if (hash.size() < 100) {
+			Iterator<String> keys = hash.keys();
+
+			while (keys.hasNext()) {
+				String hashKey = keys.next();
+				Markov hashValue = hash.get(hashKey);
+
+				System.out.print(hashValue.count() + " " + hashKey + ":");
+
+				for (Entry<Character, Integer> entry : hashValue.getMap()
+						.entrySet()) {
+					char suffix = entry.getKey();
+					int frequencyCount = entry.getValue();
+					System.out.print(" " + frequencyCount + " " + suffix);
+				}
+
+				System.out.println();
+			}
+		}
+
+		System.out.println(
+				text.toString().substring(0, Math.min(M, text.length())));
+	}
+}
+\ No newline at end of file
author	bculkin2442 <bjculkin@mix.wvu.edu>	2016-01-14 14:01:21 -0500
committer	bculkin2442 <bjculkin@mix.wvu.edu>	2016-01-14 14:01:21 -0500
commit	90708215a1b85f90738628260e822aa5eade3f56 (patch)
tree	c42874821c8a321a9ef1ba7ee05a924c0e3af706 /RGens/src/main/java
parent	f1943c74de8fe61f4990def15cbcfe6e83bea00a (diff)