diff options
| author | bculkin2442 <bjculkin@mix.wvu.edu> | 2016-01-14 14:01:21 -0500 |
|---|---|---|
| committer | bculkin2442 <bjculkin@mix.wvu.edu> | 2016-01-14 14:01:21 -0500 |
| commit | 90708215a1b85f90738628260e822aa5eade3f56 (patch) | |
| tree | c42874821c8a321a9ef1ba7ee05a924c0e3af706 /RGens/src/main/java | |
| parent | f1943c74de8fe61f4990def15cbcfe6e83bea00a (diff) | |
Added markov-based generator
Diffstat (limited to 'RGens/src/main/java')
7 files changed, 972 insertions, 0 deletions
diff --git a/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java new file mode 100755 index 0000000..6009828 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/FrequencyCounter.java @@ -0,0 +1,57 @@ +package bjc.RGens.text.markov; + +import java.util.*; + +/** + * Counts the frequencies of k-length substrings of input text. + * + * @author Daniel Friedman + * + */ +public class FrequencyCounter { + + public static void main(String[] args) { + String text; + + int k = 0; + + if (args.length == 1) { + k = Integer.parseInt(args[0]); + } + + System.out.print("Enter string: "); + + Scanner s = new Scanner(System.in); + text = s.nextLine(); + + MyHashMap<String, Integer> hash = new MyHashMap<String, Integer>(); + int distinct = 0; + + for (int i = 0; i <= text.length() - k; i++) { + String sub = text.substring(i, i + k); + + Markov m = new Markov(sub); + m.add(); + + if (hash.containsKey(sub)) { + Integer count = hash.get(sub); + hash.put(sub, count + 1); + } else { + hash.put(sub, m.count); + distinct++; + } + + s.close(); + } + + System.out.println(distinct + " distinct keys"); + + Iterator<String> keys = hash.keys(); + + while (keys.hasNext()) { + String key = keys.next(); + Integer value = hash.get(key); + System.out.println(value + " " + key); + } + } +} diff --git a/RGens/src/main/java/bjc/RGens/text/markov/Markov.java b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java new file mode 100755 index 0000000..1956111 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/Markov.java @@ -0,0 +1,187 @@ +package bjc.RGens.text.markov; + +import java.util.Map.Entry; +import java.util.*; + +/** + * Represents a k-character substring. Can give a pseudo-random suffix + * character based on probability. + * + * @author Daniel Friedman (Fall 2011) + * + */ +public class Markov { + String substring; + int count = 0; + + TreeMap<Character, Integer> map; + + /** + * Constructs a Markov object from a given substring. + * + * @param substring + * the given substring. + */ + public Markov(String substring) { + this.substring = substring; + + map = new TreeMap<Character, Integer>(); + + add(); + } + + /** + * Constructs a Markov object from a given substring and suffix + * character. Suffix characters are stored in a TreeMap. + * + * @param substring + * the specified substring. + * @param suffix + * the specified suffix. + */ + public Markov(String substring, Character suffix) { + this.substring = substring; + + map = new TreeMap<Character, Integer>(); + + add(suffix); + } + + /** + * Increments the count of number of times the substring appears in a + * text. + */ + public void add() { + count++; + } + + /** + * Adds a suffix character to the TreeMap. + * + * @param c + * the suffix character to be added. + */ + public void add(char c) { + add(); + + if (map.containsKey(c)) { + int frequency = map.get(c); + map.put(c, frequency + 1); + } else + map.put(c, 1); + } + + /** + * Gives the frequency count of a suffix character; that is, the number + * of times the specified suffix follows the substring in a text. + * + * @param c + * the specified suffix. + * @return the frequency count. + */ + public int getFrequencyCount(char c) { + if (!map.containsKey(c)) { + return -1; + } else { + return map.get(c); + } + } + + /** + * Gives a percentage of frequency count / number of total suffixes. + * + * @param c + * @return + */ + public double getCharFrequency(char c) { + if (getFrequencyCount(c) == -1) { + return -1; + } else { + return (double) getFrequencyCount(c) / (double) count; + } + } + + /** + * Finds whether or not the given suffix is in the TreeMap. + * + * @param c + * the given suffix. + * @return True if the suffix exists in the TreeMap, false otherwise. + */ + public boolean containsChar(char c) { + if (!map.containsKey(c)) { + return false; + } else { + return true; + } + } + + /** + * Gives the number of times this substring occurs in a text. + * + * @return said number of times. + */ + public int count() { + return count; + } + + /** + * Gives the TreeMap. + * + * @return the TreeMap. + */ + public TreeMap<Character, Integer> getMap() { + return map; + } + + /** + * Using probability, returns a pseudo-random character to follow the + * substring. Character possibilities are added to an ArrayList + * (duplicates allowed), and a random number from 0 to the last index + * in the ArrayList is picked. Since more common suffixes occupy more + * indices in the ArrayList, the probability of getting a more common + * suffix is greater than the probability of getting a less common + * suffix. + * + * @return the pseudo-random suffix. + */ + public char random() { + Character ret = null; + + Set<Entry<Character, Integer>> s = map.entrySet(); + + Iterator<Entry<Character, Integer>> it = s.iterator(); + + ArrayList<Character> suffixes = new ArrayList<Character>(); + + while (it.hasNext()) { + Entry<Character, Integer> tmp = it.next(); + + for (int i = 0; i < tmp.getValue(); i++) { + suffixes.add(tmp.getKey()); + } + } + + Random rand = new Random(); + int retIndex = rand.nextInt(suffixes.size()); + ret = suffixes.get(retIndex); + return ret; + } + + /** + * Gives a String representation of the Markov object. + * + * @return said String representation. + */ + public String toString() { + String ret = "Substring: " + substring + ", Count: " + count; + ret += "\n" + "Suffixes and frequency counts: "; + + for (Entry<Character, Integer> entry : map.entrySet()) { + char key = entry.getKey(); + int value = entry.getValue(); + ret += "\n" + "Suffix: " + key + ", frequency count: " + value; + } + return ret; + } +} diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java new file mode 100644 index 0000000..c33fa18 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/MyEntry.java @@ -0,0 +1,71 @@ +package bjc.RGens.text.markov; + +/** + * Represents a key, value pairing. + * + * @author Daniel Friedman + * + */ +public class MyEntry<K, V> { + protected K key; + protected V value; + + @Override + /** + * Gives the hashcode of a MyEntry object. + * + * @return the hashcode of the MyEntry's key. + */ + public int hashCode() { + return key.hashCode(); + } + + /** + * Tests whether this object is equivalent to a specified other. + * + * @param obj + * the other object. + * @return true if the keys are equivalent. + */ + @SuppressWarnings("unchecked") + @Override + public boolean equals(Object obj) { + boolean ret = false; + + if (obj == null) { + ret = false; + } + + MyEntry<K, V> other = (MyEntry<K, V>) obj; + + if (key.equals(other.key)) { + ret = true; + } + + return ret; + } + + /** + * Constructs a MyEntry object with given key and value. + * + * @param key + * the key to be stored. + * @param value + * the value to be stored. + * @param myHashMap + * TODO + */ + public MyEntry(K key, V value) { + this.key = key; + this.value = value; + } + + /** + * Gives a String representation of this object. + * + * @return the String representation. + */ + public String toString() { + return "(" + key + ", " + value + ")"; + } +}
\ No newline at end of file diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java new file mode 100755 index 0000000..10158ea --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/MyHashMap.java @@ -0,0 +1,347 @@ +package bjc.RGens.text.markov; + +import java.util.*; + +/** + * A Hash table implementation. Uses an array of LinkedList<MyEntry> as + * backing storage. + * + * @author Daniel Friedman + * + * @param <K> + * generic type for Keys. + * @param <V> + * generic type for Values. + */ + +public class MyHashMap<K, V> { + private int size; + private float loadFactor; + + LinkedList<MyEntry<K, V>> table[]; + private ArrayList<Integer> primes = new ArrayList<Integer>(); + + /** + * Constructs a new MyHashMap object. + * + * @param capacity + * the size of the array. + * @param loadFactor + * the specified load factor. The array will resize when the + * load factor is reached. + */ + @SuppressWarnings("unchecked") + public MyHashMap(int capacity, float loadFactor) { + table = (LinkedList<MyEntry<K, V>>[]) new LinkedList[capacity]; + + this.loadFactor = loadFactor; + + for (int i = 0; i < table.length; i++) { + table[i] = new LinkedList<MyEntry<K, V>>(); + } + } + + /** + * Constructs a MyHashMap with capacity 11 and load factor 0.75. + */ + public MyHashMap() { + this(11, (float) 0.75); + + primes.add(11); + primes.add(23); + primes.add(47); + primes.add(97); + primes.add(197); + primes.add(397); + primes.add(797); + primes.add(1597); + primes.add(3203); + primes.add(6421); + primes.add(12853); + primes.add(25717); + primes.add(51437); + primes.add(102877); + primes.add(205759); + primes.add(411527); + primes.add(823117); + primes.add(1646237); + primes.add(3292489); + primes.add(6584983); + primes.add(13169977); + primes.add(26339969); + primes.add(52679969); + primes.add(105359939); + primes.add(210719881); + primes.add(421439783); + primes.add(842879579); + primes.add(1685759167); + } + + /** + * Gives the number of MyEntries in the array. + * + * @return number of MyEntry objects in the array. + */ + public int size() { + return size; + } + + /** + * Tests whether or not the array is empty. + * + * @return true if empty, false otherwise. + */ + public boolean isEmpty() { + if (size == 0) { + return true; + } else { + return false; + } + } + + /** + * Removes all objects from the array. + */ + public void clear() { + for (int i = 0; i < table.length; i++) { + table[i].clear(); + } + } + + /** + * Gives a String representation of the array. + * + * @return said String representation. + */ + public String toString() { + String ret = ""; + + for (int i = 0; i < table.length; i++) { + ret += ("Bucket " + i + ": " + table[i] + "\n"); + } + + return ret; + } + + /** + * Adds a key, value mapping to the array. If the key already is + * contained, the value will be updated. + * + * @param key + * the key to be added. + * @param value + * the value to be added. + * @return the previous value if the key already was contained, null + * otherwise. + */ + public V put(K key, V value) { + MyEntry<K, V> entry = new MyEntry<K, V>(key, value); + + int hashCode = Math.abs(key.hashCode()); + int mapping = hashCode % (table.length); + + LinkedList<MyEntry<K, V>> bucket = table[mapping]; + + V ret = null; + + if (bucket.contains(entry)) { + int index = bucket.indexOf(entry); + + ret = bucket.get(index).value; + bucket.set(index, entry); + } else { + bucket.add(entry); + } + + size++; + + if ((double) size / (double) table.length >= loadFactor) { + resize(); + } + + return ret; + } + + /** + * Gives the value associated with a specified key. + * + * @param key + * the specified key. + * @return the value associated. Null if the key is not found. + */ + public V get(K key) { + V ret = null; + + int hashCode = Math.abs(key.hashCode()); + int mapping = hashCode % (table.length); + + LinkedList<MyEntry<K, V>> bucket = table[mapping]; + + for (int i = 0; i < bucket.size(); i++) { + MyEntry<K, V> cur = bucket.get(i); + + if (cur.key.equals(key)) { + ret = cur.value; + } + } + return ret; + } + + /** + * Removes a specified object from the array. + * + * @param key + * the key corresponding to the MyEntry to be removed. + * @return the value associated with the removed key. + */ + public V remove(K key) { + V ret = null; + + int hashCode = Math.abs(key.hashCode()); + int mapping = hashCode % (table.length); + + LinkedList<MyEntry<K, V>> bucket = table[mapping]; + + for (int i = 0; i < bucket.size(); i++) { + MyEntry<K, V> cur = bucket.get(i); + + if (cur.key.equals(key)) { + ret = cur.value; + bucket.remove(cur); + } + } + + size--; + return ret; + } + + /** + * Finds whether or not the array contains a given key. + * + * @param key + * the key to be tested. + * @return true if found, false if not. + */ + public boolean containsKey(K key) { + boolean ret = false; + + int hashCode = Math.abs(key.hashCode()); + int mapping = hashCode % (table.length); + + LinkedList<MyEntry<K, V>> bucket = table[mapping]; + MyEntry<K, V> test = new MyEntry<K, V>(key, null); + + if (bucket.contains(test)) { + ret = true; + } + + return ret; + } + + /** + * Finds whether or not the array contains a given value. + * + * @param value + * the value to be tested. + * @return true if found, false if not. + */ + public boolean containsValue(V value) { + boolean ret = false; + + for (int i = 0; i < table.length; i++) { + LinkedList<MyEntry<K, V>> bucket = table[i]; + + for (int j = 0; j < bucket.size(); j++) { + MyEntry<K, V> entry = bucket.get(j); + + if (entry.value.equals(value)) + ret = true; + } + } + return ret; + } + + /** + * Iterates over the MyEntry objects in the array, looking at their + * keys. + * + * @return a keys iterator. + */ + public Iterator<K> keys() { + return new MyIterator<K, V>(this); + } + + /** + * Resizes the array to the next largest prime number that is at least + * double the current array size. + */ + public void resize() { + MyHashMap<K, V> tmp = new MyHashMap<K, V>(helper(table.length), + (float) 0.75); + Iterator<K> it = keys(); + + while (it.hasNext()) { + K key = it.next(); + V value = get(key); + tmp.put(key, value); + } + + this.table = tmp.table; + this.size = tmp.size; + this.loadFactor = tmp.loadFactor; + } + + /** + * Gives the next largest prime number that is at least double the + * current array size. + * + * @param i + * @return + */ + public int helper(int i) { + int ret = -1; + + for (int j = 0; j < primes.size(); j++) { + if (primes.get(j) > i) { + ret = primes.get(j); + break; + } + } + + return ret; + } + + /** + * Tests the MyHashMap class and its methods. + * + */ + public static void main(String[] args) { + // test1: + /* + * MyHashMap<String, Integer> testMap = new MyHashMap<String, + * Integer>(); + * + * System.out.println("HashMap size: "+testMap.size()); + * System.out.println("HashMap capacity: "+testMap.table.length); + * System.out.println("HashMap Load Factor: "+testMap.loadFactor); + * System.out.println("HashMap isEmpty? "+testMap.isEmpty()); + * System.out.println("HashMap toString: "+testMap); + */ + + // test2: + MyHashMap<String, Integer> testMap = new MyHashMap<String, Integer>(); + + for (int i = 0; i < 100; i++) { + testMap.put("" + i, i); + System.out.println("Hashtable:"); + System.out.println(testMap); + } + + for (int i = 0; i < 100; i++) { + String key = "" + i; + System.out.println("Removing key " + key + ":"); + testMap.remove(key); + System.out.println(testMap); + } + } +} diff --git a/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java new file mode 100644 index 0000000..bb6d0d9 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/MyIterator.java @@ -0,0 +1,84 @@ +package bjc.RGens.text.markov; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.NoSuchElementException; + +public class MyIterator<K, V> implements Iterator<K> { + private final MyHashMap<K, V> attachedMap; + + public MyIterator(MyHashMap<K, V> myHashMap) { + attachedMap = myHashMap; + bucket = attachedMap.table[bucketIndex]; + } + + int bucketIndex = 0; + + LinkedList<MyEntry<K, V>> bucket; + Iterator<MyEntry<K, V>> listIt = bucket.iterator(); + + MyEntry<K, V> cur; + + /** + * Finds whether or not there is a next MyEntry object in the array. + * + * @return + */ + @Override + public boolean hasNext() { + if (listIt.hasNext()) { + return true; + } else if (bucketIndex == (attachedMap.table.length - 1)) { + return false; + } else { + for (int i = bucketIndex + + 1; i < attachedMap.table.length; i++) { + if (!attachedMap.table[i].isEmpty()) { + return true; + } + } + } + return false; + } + + /** + * Gives the key associated with the next MyEntry object in the array. + * Throws a NoSuchElementException if the array has been completely + * iterated. + * + * @return the key associated with the next MyEntry object. + */ + @Override + public K next() { + K ret = null; + + if (listIt.hasNext()) { + cur = listIt.next(); + ret = cur.key; + } else if (hasNext()) { + for (int i = bucketIndex + + 1; i < attachedMap.table.length; i++) { + if (!attachedMap.table[i].isEmpty()) { + bucketIndex = i; + + bucket = attachedMap.table[bucketIndex]; + listIt = bucket.iterator(); + cur = listIt.next(); + + ret = cur.key; + break; + } + } + } else + throw new NoSuchElementException("All buckets iterated."); + return ret; + } + + /** + * Removes the current MyEntry. + */ + @Override + public void remove() { + bucket.remove(cur); + } +}
\ No newline at end of file diff --git a/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java new file mode 100755 index 0000000..67a8d01 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/SuffixCounter.java @@ -0,0 +1,80 @@ +package bjc.RGens.text.markov; + +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.Scanner; + +/** + * Similar to FrequencyCounter, but also counts the suffixes for each + * k-length substring in the text. + * + * @author Daniel Friedman + * + */ + +public class SuffixCounter { + public static void main(String[] args) { + String text; + int k = 0; + + if (args.length == 1) { + k = Integer.parseInt(args[0]); + } + + System.out.print("Enter string: "); + Scanner s = new Scanner(System.in); + text = s.nextLine(); + + MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>(); + + int distinct = 0; + + for (int i = 0; i <= text.length() - k; i++) { + String sub = text.substring(i, i + k); + Character suffix = null; + + if (((i + k) <= (text.length() - 1))) { + suffix = text.charAt(i + k); + } + + if (hash.containsKey(sub)) { + Markov temp = hash.get(sub); + + if (!(suffix == null)) { + temp.add(suffix); + hash.put(sub, temp); + } + + } else { + Markov m; + + if (!(suffix == null)) { + m = new Markov(sub, suffix); + hash.put(sub, m); + distinct++; + } + } + } + + System.out.println(distinct + " distinct keys"); + Iterator<String> keys = hash.keys(); + + while (keys.hasNext()) { + String hashKey = keys.next(); + + Markov hashValue = hash.get(hashKey); + System.out.print(hashValue.count() + " " + hashKey + ":"); + + for (Entry<Character, Integer> entry : hashValue.getMap() + .entrySet()) { + char suffix = entry.getKey(); + int frequencyCount = entry.getValue(); + System.out.print(" " + frequencyCount + " " + suffix); + } + + System.out.println(); + } + + s.close(); + } +} diff --git a/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java new file mode 100755 index 0000000..d347179 --- /dev/null +++ b/RGens/src/main/java/bjc/RGens/text/markov/TextGenerator.java @@ -0,0 +1,146 @@ +package bjc.RGens.text.markov; + +import java.io.*; +import java.util.*; +import java.util.Map.Entry; + +public class TextGenerator { + + /** + * @param args + * when used with three arguments, the first represents the + * k-order of the Markov objects. The second represents the + * number of characters to print out. The third represents + * the file to be read. + * + * When used with two arguments, the first represents the + * k-order of the Markov objects, and the second represents + * the file to be read. The generated text will be the same + * number of characters as the original file. + */ + public static void main(String[] args) { + int k = 0; + int M = 0; + + String file = ""; + StringBuilder text = new StringBuilder(); + + if (args.length == 3) { + k = Integer.parseInt(args[0]); + M = Integer.parseInt(args[1]); + file = args[2]; + } else if (args.length == 2) { + k = Integer.parseInt(args[0]); + file = args[1]; + } else { + System.out + .println("\n" + "Usage: java TextGenerator k M file"); + System.out.println( + "where k is the markov order, M is the number"); + System.out.println( + "of characters to be printed, and file is the"); + System.out.println( + "name of the file to print from. M may be left out." + + "\n"); + System.exit(1); + } + + FileReader reader = null; + + try { + reader = new FileReader(file); + } catch (FileNotFoundException e) { + System.out.println("File not found."); + e.printStackTrace(); + } + + MyHashMap<String, Markov> hash = new MyHashMap<String, Markov>(); + + Character next = null; + + try { + next = (char) reader.read(); + } catch (IOException e1) { + System.out.println("IOException in stepping through the file"); + e1.printStackTrace(); + } + + StringBuilder origFileBuffer = new StringBuilder(); + + while (Character.isDefined(next)) { + Character.toString(next); + origFileBuffer.append(next); + + try { + next = (char) reader.read(); + } catch (IOException e) { + System.out.println( + "IOException in stepping through the file"); + e.printStackTrace(); + } + + } + + String origFile = origFileBuffer.toString(); + String firstSub = origFile.substring(0, k); + + for (int i = 0; i < origFile.length() - k; i++) { + String sub = origFile.substring(i, i + k); + Character suffix = origFile.charAt(i + k); + + if (hash.containsKey(sub)) { + Markov marvin = hash.get(sub); + marvin.add(suffix); + hash.put(sub, marvin); + } else { + Markov marvin = new Markov(sub, suffix); + hash.put(sub, marvin); + } + } + + if (M == 0) { + M = origFile.length(); + } + for (int i = k; i < M; i++) { + if (i == k) { + text.append(firstSub); + + if (text.length() > k) + i = text.length(); + } + + String sub = text.substring((i - k), (i)); + Markov tmp = hash.get(sub); + + if (tmp != null) { + Character nextChar = tmp.random(); + text.append(nextChar); + } else { + i = k - 1; + } + } + + if (hash.size() < 100) { + Iterator<String> keys = hash.keys(); + + while (keys.hasNext()) { + String hashKey = keys.next(); + Markov hashValue = hash.get(hashKey); + + System.out.print(hashValue.count() + " " + hashKey + ":"); + + for (Entry<Character, Integer> entry : hashValue.getMap() + .entrySet()) { + char suffix = entry.getKey(); + int frequencyCount = entry.getValue(); + System.out.print(" " + frequencyCount + " " + suffix); + } + + System.out.println(); + } + } + + System.out.println( + text.toString().substring(0, Math.min(M, text.length()))); + } +}
\ No newline at end of file |
