From 0ea49dd4a52358f053c9be7138c392b16de05899 Mon Sep 17 00:00:00 2001 From: student Date: Fri, 17 Mar 2017 10:49:27 -0400 Subject: Move things around, and start on new parser. --- .../main/java/bjc/rgens/text/markov/Markov.java | 189 +++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100755 RGens/src/main/java/bjc/rgens/text/markov/Markov.java (limited to 'RGens/src/main/java/bjc/rgens/text/markov/Markov.java') diff --git a/RGens/src/main/java/bjc/rgens/text/markov/Markov.java b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java new file mode 100755 index 0000000..a07f44e --- /dev/null +++ b/RGens/src/main/java/bjc/rgens/text/markov/Markov.java @@ -0,0 +1,189 @@ +package bjc.rgens.text.markov; + +import java.util.Map.Entry; +import java.util.*; + +/** + * Represents a k-character substring. Can give a pseudo-random suffix + * character based on probability. + * + * @author Daniel Friedman (Fall 2011) + * + */ +public class Markov { + String substring; + int count = 0; + + TreeMap map; + + /** + * Constructs a Markov object from a given substring. + * + * @param substring + * the given substring. + */ + public Markov(String substring) { + this.substring = substring; + + map = new TreeMap<>(); + + add(); + } + + /** + * Constructs a Markov object from a given substring and suffix + * character. Suffix characters are stored in a TreeMap. + * + * @param substring + * the specified substring. + * @param suffix + * the specified suffix. + */ + public Markov(String substring, Character suffix) { + this.substring = substring; + + map = new TreeMap<>(); + + add(suffix); + } + + /** + * Increments the count of number of times the substring appears in a + * text. + */ + public void add() { + count++; + } + + /** + * Adds a suffix character to the TreeMap. + * + * @param c + * the suffix character to be added. + */ + public void add(char c) { + add(); + + if (map.containsKey(c)) { + int frequency = map.get(c); + map.put(c, frequency + 1); + } else + map.put(c, 1); + } + + /** + * Gives the frequency count of a suffix character; that is, the number + * of times the specified suffix follows the substring in a text. + * + * @param c + * the specified suffix. + * @return the frequency count. + */ + public int getFrequencyCount(char c) { + if (!map.containsKey(c)) { + return -1; + } + + return map.get(c); + } + + /** + * Gives a percentage of frequency count / number of total suffixes. + * + * @param c + * @return the ratio of frequency count of a single character to the + * total number of suffixes + */ + public double getCharFrequency(char c) { + if (getFrequencyCount(c) == -1) { + return -1; + } + + return (double) getFrequencyCount(c) / (double) count; + } + + /** + * Finds whether or not the given suffix is in the TreeMap. + * + * @param c + * the given suffix. + * @return True if the suffix exists in the TreeMap, false otherwise. + */ + public boolean containsChar(char c) { + if (!map.containsKey(c)) { + return false; + } + + return true; + } + + /** + * Gives the number of times this substring occurs in a text. + * + * @return said number of times. + */ + public int count() { + return count; + } + + /** + * Gives the TreeMap. + * + * @return the TreeMap. + */ + public TreeMap getMap() { + return map; + } + + /** + * Using probability, returns a pseudo-random character to follow the + * substring. Character possibilities are added to an ArrayList + * (duplicates allowed), and a random number from 0 to the last index + * in the ArrayList is picked. Since more common suffixes occupy more + * indices in the ArrayList, the probability of getting a more common + * suffix is greater than the probability of getting a less common + * suffix. + * + * @return the pseudo-random suffix. + */ + public char random() { + Character ret = null; + + Set> s = map.entrySet(); + + Iterator> it = s.iterator(); + + ArrayList suffixes = new ArrayList<>(); + + while (it.hasNext()) { + Entry tmp = it.next(); + + for (int i = 0; i < tmp.getValue(); i++) { + suffixes.add(tmp.getKey()); + } + } + + Random rand = new Random(); + int retIndex = rand.nextInt(suffixes.size()); + ret = suffixes.get(retIndex); + return ret; + } + + /** + * Gives a String representation of the Markov object. + * + * @return said String representation. + */ + @Override + public String toString() { + String ret = "Substring: " + substring + ", Count: " + count; + ret += "\n" + "Suffixes and frequency counts: "; + + for (Entry entry : map.entrySet()) { + char key = entry.getKey(); + int value = entry.getValue(); + ret += "\n" + "Suffix: " + key + ", frequency count: " + value; + } + return ret; + } +} -- cgit v1.2.3