summaryrefslogtreecommitdiff
path: root/RGens/src/main
diff options
context:
space:
mode:
authorbculkin2442 <bjculkin@mix.wvu.edu>2016-01-14 14:01:21 -0500
committerbculkin2442 <bjculkin@mix.wvu.edu>2016-01-14 14:01:21 -0500
commit90708215a1b85f90738628260e822aa5eade3f56 (patch)
treec42874821c8a321a9ef1ba7ee05a924c0e3af706 /RGens/src/main
parentf1943c74de8fe61f4990def15cbcfe6e83bea00a (diff)
Added markov-based generator
Diffstat (limited to 'RGens/src/main')
-rwxr-xr-xRGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java57
-rwxr-xr-xRGens/src/main/java/bjc/RGens/text/markov/Markov.java187
-rw-r--r--RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java71
-rwxr-xr-xRGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java347
-rw-r--r--RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java84
-rwxr-xr-xRGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java80
-rwxr-xr-xRGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java146
7 files changed, 972 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java
new file mode 100755
index 0000000..6009828
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java
@@ -0,0 +1,57 @@
+package bjc.RGens.text.markov;
+
+import java.util.*;
+
+/**
+ * Counts the frequencies of k-length substrings of input text.
+ *
+ * @author Daniel Friedman
+ *
+ */
+public class FrequencyCounter {
+
+ public static void main(String[] args) {
+ String text;
+
+ int k = 0;
+
+ if (args.length == 1) {
+ k = Integer.parseInt(args[0]);
+ }
+
+ System.out.print("Enter string: ");
+
+ Scanner s = new Scanner(System.in);
+ text = s.nextLine();
+
+ MyHashMap<String, Integer> hash = new MyHashMap<String, Integer>();
+ int distinct = 0;
+
+ for (int i = 0; i <= text.length() - k; i++) {
+ String sub = text.substring(i, i + k);
+
+ Markov m = new Markov(sub);
+ m.add();
+
+ if (hash.containsKey(sub)) {
+ Integer count = hash.get(sub);
+ hash.put(sub, count + 1);
+ } else {
+ hash.put(sub, m.count);
+ distinct++;
+ }
+
+ s.close();
+ }
+
+ System.out.println(distinct + " distinct keys");
+
+ Iterator<String> keys = hash.keys();
+
+ while (keys.hasNext()) {
+ String key = keys.next();
+ Integer value = hash.get(key);
+ System.out.println(value + " " + key);
+ }
+ }
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/Markov.java b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java
new file mode 100755
index 0000000..1956111
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java
@@ -0,0 +1,187 @@
+package bjc.RGens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring. Can give a pseudo-random suffix
+ * character based on probability.
+ *
+ * @author Daniel Friedman (Fall 2011)
+ *
+ */
+public class Markov {
+ String substring;
+ int count = 0;
+
+ TreeMap<Character, Integer> map;
+
+ /**
+ * Constructs a Markov object from a given substring.
+ *
+ * @param substring
+ * the given substring.
+ */
+ public Markov(String substring) {
+ this.substring = substring;
+
+ map = new TreeMap<Character, Integer>();
+
+ add();
+ }
+
+ /**
+ * Constructs a Markov object from a given substring and suffix
+ * character. Suffix characters are stored in a TreeMap.
+ *
+ * @param substring
+ * the specified substring.
+ * @param suffix
+ * the specified suffix.
+ */
+ public Markov(String substring, Character suffix) {
+ this.substring = substring;
+
+ map = new TreeMap<Character, Integer>();
+
+ add(suffix);
+ }
+
+ /**
+ * Increments the count of number of times the substring appears in a
+ * text.
+ */
+ public void add() {
+ count++;
+ }
+
+ /**
+ * Adds a suffix character to the TreeMap.
+ *
+ * @param c
+ * the suffix character to be added.
+ */
+ public void add(char c) {
+ add();
+
+ if (map.containsKey(c)) {
+ int frequency = map.get(c);
+ map.put(c, frequency + 1);
+ } else
+ map.put(c, 1);
+ }
+
+ /**
+ * Gives the frequency count of a suffix character; that is, the number
+ * of times the specified suffix follows the substring in a text.
+ *
+ * @param c
+ * the specified suffix.
+ * @return the frequency count.
+ */
+ public int getFrequencyCount(char c) {
+ if (!map.containsKey(c)) {
+ return -1;
+ } else {
+ return map.get(c);
+ }
+ }
+
+ /**
+ * Gives a percentage of frequency count / number of total suffixes.
+ *
+ * @param c
+ * @return
+ */
+ public double getCharFrequency(char c) {
+ if (getFrequencyCount(c) == -1) {
+ return -1;
+ } else {
+ return (double) getFrequencyCount(c) / (double) count;
+ }
+ }
+
+ /**
+ * Finds whether or not the given suffix is in the TreeMap.
+ *
+ * @param c
+ * the given suffix.
+ * @return True if the suffix exists in the TreeMap, false otherwise.
+ */
+ public boolean containsChar(char c) {
+ if (!map.containsKey(c)) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * Gives the number of times this substring occurs in a text.
+ *
+ * @return said number of times.
+ */
+ public int count() {
+ return count;
+ }
+
+ /**
+ * Gives the TreeMap.
+ *
+ * @return the TreeMap.
+ */
+ public TreeMap<Character, Integer> getMap() {
+ return map;
+ }
+
+ /**
+ * Using probability, returns a pseudo-random character to follow the
+ * substring. Character possibilities are added to an ArrayList
+ * (duplicates allowed), and a random number from 0 to the last index
+ * in the ArrayList is picked. Since more common suffixes occupy more
+ * indices in the ArrayList, the probability of getting a more common
+ * suffix is greater than the probability of getting a less common
+ * suffix.
+ *
+ * @return the pseudo-random suffix.
+ */
+ public char random() {
+ Character ret = null;
+
+ Set<Entry<Character, Integer>> s = map.entrySet();
+
+ Iterator<Entry<Character, Integer>> it = s.iterator();
+
+ ArrayList<Character> suffixes = new ArrayList<Character>();
+
+ while (it.hasNext()) {
+ Entry<Character, Integer> tmp = it.next();
+
+ for (int i = 0; i < tmp.getValue(); i++) {
+ suffixes.add(tmp.getKey());
+ }
+ }
+
+ Random rand = new Random();
+ int retIndex = rand.nextInt(suffixes.size());
+ ret = suffixes.get(retIndex);
+ return ret;
+ }
+
+ /**
+ * Gives a String representation of the Markov object.
+ *
+ * @return said String representation.
+ */
+ public String toString() {
+ String ret = "Substring: " + substring + ", Count: " + count;
+ ret += "\n" + "Suffixes and frequency counts: ";
+
+ for (Entry<Character, Integer> entry : map.entrySet()) {
+ char key = entry.getKey();
+ int value = entry.getValue();
+ ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+ }
+ return ret;
+ }
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java
new file mode 100644
index 0000000..c33fa18
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java
@@ -0,0 +1,71 @@
+package bjc.RGens.text.markov;
+
+/**
+ * Represents a key, value pairing.
+ *
+ * @author Daniel Friedman
+ *
+ */
+public class MyEntry<K, V> {
+ protected K key;
+ protected V value;
+
+ @Override
+ /**
+ * Gives the hashcode of a MyEntry object.
+ *
+ * @return the hashcode of the MyEntry's key.
+ */
+ public int hashCode() {
+ return key.hashCode();
+ }
+
+ /**
+ * Tests whether this object is equivalent to a specified other.
+ *
+ * @param obj
+ * the other object.
+ * @return true if the keys are equivalent.
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public boolean equals(Object obj) {
+ boolean ret = false;
+
+ if (obj == null) {
+ ret = false;
+ }
+
+ MyEntry<K, V> other = (MyEntry<K, V>) obj;
+
+ if (key.equals(other.key)) {
+ ret = true;
+ }
+
+ return ret;
+ }
+
+ /**
+ * Constructs a MyEntry object with given key and value.
+ *
+ * @param key
+ * the key to be stored.
+ * @param value
+ * the value to be stored.
+ * @param myHashMap
+ * TODO
+ */
+ public MyEntry(K key, V value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ /**
+ * Gives a String representation of this object.
+ *
+ * @return the String representation.
+ */
+ public String toString() {
+ return "(" + key + ", " + value + ")";
+ }
+} \ No newline at end of file
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java
new file mode 100755
index 0000000..10158ea
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java
@@ -0,0 +1,347 @@
+package bjc.RGens.text.markov;
+
+import java.util.*;
+
+/**
+ * A Hash table implementation. Uses an array of LinkedList<MyEntry> as
+ * backing storage.
+ *
+ * @author Daniel Friedman
+ *
+ * @param <K>
+ * generic type for Keys.
+ * @param <V>
+ * generic type for Values.
+ */
+
+public class MyHashMap<K, V> {
+ private int size;
+ private float loadFactor;
+
+ LinkedList<MyEntry<K, V>> table[];
+ private ArrayList<Integer> primes = new ArrayList<Integer>();
+
+ /**
+ * Constructs a new MyHashMap object.
+ *
+ * @param capacity
+ * the size of the array.
+ * @param loadFactor
+ * the specified load factor. The array will resize when the
+ * load factor is reached.
+ */
+ @SuppressWarnings("unchecked")
+ public MyHashMap(int capacity, float loadFactor) {
+ table = (LinkedList<MyEntry<K, V>>[]) new LinkedList[capacity];
+
+ this.loadFactor = loadFactor;
+
+ for (int i = 0; i < table.length; i++) {
+ table[i] = new LinkedList<MyEntry<K, V>>();
+ }
+ }
+
+ /**
+ * Constructs a MyHashMap with capacity 11 and load factor 0.75.
+ */
+ public MyHashMap() {
+ this(11, (float) 0.75);
+
+ primes.add(11);
+ primes.add(23);
+ primes.add(47);
+ primes.add(97);
+ primes.add(197);
+ primes.add(397);
+ primes.add(797);
+ primes.add(1597);
+ primes.add(3203);
+ primes.add(6421);
+ primes.add(12853);
+ primes.add(25717);
+ primes.add(51437);
+ primes.add(102877);
+ primes.add(205759);
+ primes.add(411527);
+ primes.add(823117);
+ primes.add(1646237);
+ primes.add(3292489);
+ primes.add(6584983);
+ primes.add(13169977);
+ primes.add(26339969);
+ primes.add(52679969);
+ primes.add(105359939);
+ primes.add(210719881);
+ primes.add(421439783);
+ primes.add(842879579);
+ primes.add(1685759167);
+ }
+
+ /**
+ * Gives the number of MyEntries in the array.
+ *
+ * @return number of MyEntry objects in the array.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Tests whether or not the array is empty.
+ *
+ * @return true if empty, false otherwise.
+ */
+ public boolean isEmpty() {
+ if (size == 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Removes all objects from the array.
+ */
+ public void clear() {
+ for (int i = 0; i < table.length; i++) {
+ table[i].clear();
+ }
+ }
+
+ /**
+ * Gives a String representation of the array.
+ *
+ * @return said String representation.
+ */
+ public String toString() {
+ String ret = "";
+
+ for (int i = 0; i < table.length; i++) {
+ ret += ("Bucket " + i + ": " + table[i] + "\n");
+ }
+
+ return ret;
+ }
+
+ /**
+ * Adds a key, value mapping to the array. If the key already is
+ * contained, the value will be updated.
+ *
+ * @param key
+ * the key to be added.
+ * @param value
+ * the value to be added.
+ * @return the previous value if the key already was contained, null
+ * otherwise.
+ */
+ public V put(K key, V value) {
+ MyEntry<K, V> entry = new MyEntry<K, V>(key, value);
+
+ int hashCode = Math.abs(key.hashCode());
+ int mapping = hashCode % (table.length);
+
+ LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+ V ret = null;
+
+ if (bucket.contains(entry)) {
+ int index = bucket.indexOf(entry);
+
+ ret = bucket.get(index).value;
+ bucket.set(index, entry);
+ } else {
+ bucket.add(entry);
+ }
+
+ size++;
+
+ if ((double) size / (double) table.length >= loadFactor) {
+ resize();
+ }
+
+ return ret;
+ }
+
+ /**
+ * Gives the value associated with a specified key.
+ *
+ * @param key
+ * the specified key.
+ * @return the value associated. Null if the key is not found.
+ */
+ public V get(K key) {
+ V ret = null;
+
+ int hashCode = Math.abs(key.hashCode());
+ int mapping = hashCode % (table.length);
+
+ LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+ for (int i = 0; i < bucket.size(); i++) {
+ MyEntry<K, V> cur = bucket.get(i);
+
+ if (cur.key.equals(key)) {
+ ret = cur.value;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Removes a specified object from the array.
+ *
+ * @param key
+ * the key corresponding to the MyEntry to be removed.
+ * @return the value associated with the removed key.
+ */
+ public V remove(K key) {
+ V ret = null;
+
+ int hashCode = Math.abs(key.hashCode());
+ int mapping = hashCode % (table.length);
+
+ LinkedList<MyEntry<K, V>> bucket = table[mapping];
+
+ for (int i = 0; i < bucket.size(); i++) {
+ MyEntry<K, V> cur = bucket.get(i);
+
+ if (cur.key.equals(key)) {
+ ret = cur.value;
+ bucket.remove(cur);
+ }
+ }
+
+ size--;
+ return ret;
+ }
+
+ /**
+ * Finds whether or not the array contains a given key.
+ *
+ * @param key
+ * the key to be tested.
+ * @return true if found, false if not.
+ */
+ public boolean containsKey(K key) {
+ boolean ret = false;
+
+ int hashCode = Math.abs(key.hashCode());
+ int mapping = hashCode % (table.length);
+
+ LinkedList<MyEntry<K, V>> bucket = table[mapping];
+ MyEntry<K, V> test = new MyEntry<K, V>(key, null);
+
+ if (bucket.contains(test)) {
+ ret = true;
+ }
+
+ return ret;
+ }
+
+ /**
+ * Finds whether or not the array contains a given value.
+ *
+ * @param value
+ * the value to be tested.
+ * @return true if found, false if not.
+ */
+ public boolean containsValue(V value) {
+ boolean ret = false;
+
+ for (int i = 0; i < table.length; i++) {
+ LinkedList<MyEntry<K, V>> bucket = table[i];
+
+ for (int j = 0; j < bucket.size(); j++) {
+ MyEntry<K, V> entry = bucket.get(j);
+
+ if (entry.value.equals(value))
+ ret = true;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Iterates over the MyEntry objects in the array, looking at their
+ * keys.
+ *
+ * @return a keys iterator.
+ */
+ public Iterator<K> keys() {
+ return new MyIterator<K, V>(this);
+ }
+
+ /**
+ * Resizes the array to the next largest prime number that is at least
+ * double the current array size.
+ */
+ public void resize() {
+ MyHashMap<K, V> tmp = new MyHashMap<K, V>(helper(table.length),
+ (float) 0.75);
+ Iterator<K> it = keys();
+
+ while (it.hasNext()) {
+ K key = it.next();
+ V value = get(key);
+ tmp.put(key, value);
+ }
+
+ this.table = tmp.table;
+ this.size = tmp.size;
+ this.loadFactor = tmp.loadFactor;
+ }
+
+ /**
+ * Gives the next largest prime number that is at least double the
+ * current array size.
+ *
+ * @param i
+ * @return
+ */
+ public int helper(int i) {
+ int ret = -1;
+
+ for (int j = 0; j < primes.size(); j++) {
+ if (primes.get(j) > i) {
+ ret = primes.get(j);
+ break;
+ }
+ }
+
+ return ret;
+ }
+
+ /**
+ * Tests the MyHashMap class and its methods.
+ *
+ */
+ public static void main(String[] args) {
+ // test1:
+ /*
+ * MyHashMap<String, Integer> testMap = new MyHashMap<String,
+ * Integer>();
+ *
+ * System.out.println("HashMap size: "+testMap.size());
+ * System.out.println("HashMap capacity: "+testMap.table.length);
+ * System.out.println("HashMap Load Factor: "+testMap.loadFactor);
+ * System.out.println("HashMap isEmpty? "+testMap.isEmpty());
+ * System.out.println("HashMap toString: "+testMap);
+ */
+
+ // test2:
+ MyHashMap<String, Integer> testMap = new MyHashMap<String, Integer>();
+
+ for (int i = 0; i < 100; i++) {
+ testMap.put("" + i, i);
+ System.out.println("Hashtable:");
+ System.out.println(testMap);
+ }
+
+ for (int i = 0; i < 100; i++) {
+ String key = "" + i;
+ System.out.println("Removing key " + key + ":");
+ testMap.remove(key);
+ System.out.println(testMap);
+ }
+ }
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java
new file mode 100644
index 0000000..bb6d0d9
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java
@@ -0,0 +1,84 @@
+package bjc.RGens.text.markov;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.NoSuchElementException;
+
+public class MyIterator<K, V> implements Iterator<K> {
+ private final MyHashMap<K, V> attachedMap;
+
+ public MyIterator(MyHashMap<K, V> myHashMap) {
+ attachedMap = myHashMap;
+ bucket = attachedMap.table[bucketIndex];
+ }
+
+ int bucketIndex = 0;
+
+ LinkedList<MyEntry<K, V>> bucket;
+ Iterator<MyEntry<K, V>> listIt = bucket.iterator();
+
+ MyEntry<K, V> cur;
+
+ /**
+ * Finds whether or not there is a next MyEntry object in the array.
+ *
+ * @return
+ */
+ @Override
+ public boolean hasNext() {
+ if (listIt.hasNext()) {
+ return true;
+ } else if (bucketIndex == (attachedMap.table.length - 1)) {
+ return false;
+ } else {
+ for (int i = bucketIndex
+ + 1; i < attachedMap.table.length; i++) {
+ if (!attachedMap.table[i].isEmpty()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Gives the key associated with the next MyEntry object in the array.
+ * Throws a NoSuchElementException if the array has been completely
+ * iterated.
+ *
+ * @return the key associated with the next MyEntry object.
+ */
+ @Override
+ public K next() {
+ K ret = null;
+
+ if (listIt.hasNext()) {
+ cur = listIt.next();
+ ret = cur.key;
+ } else if (hasNext()) {
+ for (int i = bucketIndex
+ + 1; i < attachedMap.table.length; i++) {
+ if (!attachedMap.table[i].isEmpty()) {
+ bucketIndex = i;
+
+ bucket = attachedMap.table[bucketIndex];
+ listIt = bucket.iterator();
+ cur = listIt.next();
+
+ ret = cur.key;
+ break;
+ }
+ }
+ } else
+ throw new NoSuchElementException("All buckets iterated.");
+ return ret;
+ }
+
+ /**
+ * Removes the current MyEntry.
+ */
+ @Override
+ public void remove() {
+ bucket.remove(cur);
+ }
+} \ No newline at end of file
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java
new file mode 100755
index 0000000..67a8d01
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java
@@ -0,0 +1,80 @@
+package bjc.RGens.text.markov;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+/**
+ * Similar to FrequencyCounter, but also counts the suffixes for each
+ * k-length substring in the text.
+ *
+ * @author Daniel Friedman
+ *
+ */
+
+public class SuffixCounter {
+ public static void main(String[] args) {
+ String text;
+ int k = 0;
+
+ if (args.length == 1) {
+ k = Integer.parseInt(args[0]);
+ }
+
+ System.out.print("Enter string: ");
+ Scanner s = new Scanner(System.in);
+ text = s.nextLine();
+
+ MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>();
+
+ int distinct = 0;
+
+ for (int i = 0; i <= text.length() - k; i++) {
+ String sub = text.substring(i, i + k);
+ Character suffix = null;
+
+ if (((i + k) <= (text.length() - 1))) {
+ suffix = text.charAt(i + k);
+ }
+
+ if (hash.containsKey(sub)) {
+ Markov temp = hash.get(sub);
+
+ if (!(suffix == null)) {
+ temp.add(suffix);
+ hash.put(sub, temp);
+ }
+
+ } else {
+ Markov m;
+
+ if (!(suffix == null)) {
+ m = new Markov(sub, suffix);
+ hash.put(sub, m);
+ distinct++;
+ }
+ }
+ }
+
+ System.out.println(distinct + " distinct keys");
+ Iterator<String> keys = hash.keys();
+
+ while (keys.hasNext()) {
+ String hashKey = keys.next();
+
+ Markov hashValue = hash.get(hashKey);
+ System.out.print(hashValue.count() + " " + hashKey + ":");
+
+ for (Entry<Character, Integer> entry : hashValue.getMap()
+ .entrySet()) {
+ char suffix = entry.getKey();
+ int frequencyCount = entry.getValue();
+ System.out.print(" " + frequencyCount + " " + suffix);
+ }
+
+ System.out.println();
+ }
+
+ s.close();
+ }
+}
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java
new file mode 100755
index 0000000..d347179
--- /dev/null
+++ b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java
@@ -0,0 +1,146 @@
+package bjc.RGens.text.markov;
+
+import java.io.*;
+import java.util.*;
+import java.util.Map.Entry;
+
+public class TextGenerator {
+
+ /**
+ * @param args
+ * when used with three arguments, the first represents the
+ * k-order of the Markov objects. The second represents the
+ * number of characters to print out. The third represents
+ * the file to be read.
+ *
+ * When used with two arguments, the first represents the
+ * k-order of the Markov objects, and the second represents
+ * the file to be read. The generated text will be the same
+ * number of characters as the original file.
+ */
+ public static void main(String[] args) {
+ int k = 0;
+ int M = 0;
+
+ String file = "";
+ StringBuilder text = new StringBuilder();
+
+ if (args.length == 3) {
+ k = Integer.parseInt(args[0]);
+ M = Integer.parseInt(args[1]);
+ file = args[2];
+ } else if (args.length == 2) {
+ k = Integer.parseInt(args[0]);
+ file = args[1];
+ } else {
+ System.out
+ .println("\n" + "Usage: java TextGenerator k M file");
+ System.out.println(
+ "where k is the markov order, M is the number");
+ System.out.println(
+ "of characters to be printed, and file is the");
+ System.out.println(
+ "name of the file to print from. M may be left out."
+ + "\n");
+ System.exit(1);
+ }
+
+ FileReader reader = null;
+
+ try {
+ reader = new FileReader(file);
+ } catch (FileNotFoundException e) {
+ System.out.println("File not found.");
+ e.printStackTrace();
+ }
+
+ MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>();
+
+ Character next = null;
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e1) {
+ System.out.println("IOException in stepping through the file");
+ e1.printStackTrace();
+ }
+
+ StringBuilder origFileBuffer = new StringBuilder();
+
+ while (Character.isDefined(next)) {
+ Character.toString(next);
+ origFileBuffer.append(next);
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e) {
+ System.out.println(
+ "IOException in stepping through the file");
+ e.printStackTrace();
+ }
+
+ }
+
+ String origFile = origFileBuffer.toString();
+ String firstSub = origFile.substring(0, k);
+
+ for (int i = 0; i < origFile.length() - k; i++) {
+ String sub = origFile.substring(i, i + k);
+ Character suffix = origFile.charAt(i + k);
+
+ if (hash.containsKey(sub)) {
+ Markov marvin = hash.get(sub);
+ marvin.add(suffix);
+ hash.put(sub, marvin);
+ } else {
+ Markov marvin = new Markov(sub, suffix);
+ hash.put(sub, marvin);
+ }
+ }
+
+ if (M == 0) {
+ M = origFile.length();
+ }
+ for (int i = k; i < M; i++) {
+ if (i == k) {
+ text.append(firstSub);
+
+ if (text.length() > k)
+ i = text.length();
+ }
+
+ String sub = text.substring((i - k), (i));
+ Markov tmp = hash.get(sub);
+
+ if (tmp != null) {
+ Character nextChar = tmp.random();
+ text.append(nextChar);
+ } else {
+ i = k - 1;
+ }
+ }
+
+ if (hash.size() < 100) {
+ Iterator<String> keys = hash.keys();
+
+ while (keys.hasNext()) {
+ String hashKey = keys.next();
+ Markov hashValue = hash.get(hashKey);
+
+ System.out.print(hashValue.count() + " " + hashKey + ":");
+
+ for (Entry<Character, Integer> entry : hashValue.getMap()
+ .entrySet()) {
+ char suffix = entry.getKey();
+ int frequencyCount = entry.getValue();
+ System.out.print(" " + frequencyCount + " " + suffix);
+ }
+
+ System.out.println();
+ }
+ }
+
+ System.out.println(
+ text.toString().substring(0, Math.min(M, text.length())));
+ }
+} \ No newline at end of file