diff options
Diffstat (limited to 'src/main/java/bjc/rgens/text/markov/Markov.java')
| -rwxr-xr-x | src/main/java/bjc/rgens/text/markov/Markov.java | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java new file mode 100755 index 0000000..e21d60f --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/Markov.java @@ -0,0 +1,208 @@ +package bjc.rgens.text.markov; + +import java.util.Map.Entry; +import java.util.*; + +/** + * Represents a k-character substring. + * + * Can give a pseudo-random suffix character based on probability. + * + * @author Daniel Friedman (Fall 2011) + */ +public class Markov { + String substring; + int count = 0; + + TreeMap<Character, Integer> map; + + /** + * Constructs a Markov object from a given substring. + * + * @param substr + * The given substring. + */ + public Markov(String substr) { + this.substring = substr; + + map = new TreeMap<>(); + + add(); + } + + /** + * Constructs a Markov object from a given substring and suffix + * character. + * + * Suffix characters are stored in a TreeMap. + * + * @param substr + * The specified substring. + * + * @param suffix + * The specified suffix. + */ + public Markov(String substr, Character suffix) { + this.substring = substr; + + map = new TreeMap<>(); + + add(suffix); + } + + /** + * Increments the count of number of times the substring appears in a + * text. + */ + public void add() { + count++; + } + + /** + * Adds a suffix character to the TreeMap. + * + * @param c + * The suffix character to be added. + */ + public void add(char c) { + add(); + + if (map.containsKey(c)) { + int frequency = map.get(c); + map.put(c, frequency + 1); + } else { + map.put(c, 1); + } + } + + /** + * Gives the frequency count of a suffix character; that is, the number + * of times the specified suffix follows the substring in a text. + * + * @param c + * The specified suffix. + * + * @return + * The frequency count. + */ + public int getFrequencyCount(char c) { + if (!map.containsKey(c)) { + return -1; + } + + return map.get(c); + } + + /** + * Gives a percentage of frequency count / number of total suffixes. + * + * @param c + * The character to look for the frequency for. + * + * @return + * The ratio of frequency count of a single character to the total + * number of suffixes. + */ + public double getCharFrequency(char c) { + if (getFrequencyCount(c) == -1) { + return -1; + } + + return (double) getFrequencyCount(c) / (double) count; + } + + /** + * Finds whether or not the given suffix is in the TreeMap. + * + * @param c + * The given suffix. + * + * @return + * True if the suffix exists in the TreeMap, false otherwise. + */ + public boolean containsChar(char c) { + if (!map.containsKey(c)) { + return false; + } + + return true; + } + + /** + * Gives the number of times this substring occurs in a text. + * + * @return + * Said number of times. + */ + public int count() { + return count; + } + + /** + * Gives the TreeMap. + * + * @return + * The TreeMap. + */ + public TreeMap<Character, Integer> getMap() { + return map; + } + + /** + * Using probability, returns a pseudo-random character to follow the + * substring. + * + * Character possibilities are added to an ArrayList (duplicates + * allowed), and a random number from 0 to the last index in the + * ArrayList is picked. Since more common suffixes occupy more indices + * in the ArrayList, the probability of getting a more common suffix is + * greater than the probability of getting a less common suffix. + * + * @return + * The pseudo-random suffix. + */ + public char random() { + Character ret = null; + + Set<Entry<Character, Integer>> s = map.entrySet(); + + Iterator<Entry<Character, Integer>> it = s.iterator(); + + ArrayList<Character> suffixes = new ArrayList<>(); + + while (it.hasNext()) { + Entry<Character, Integer> tmp = it.next(); + + for (int i = 0; i < tmp.getValue(); i++) { + suffixes.add(tmp.getKey()); + } + } + + Random rand = new Random(); + + int retIndex = rand.nextInt(suffixes.size()); + ret = suffixes.get(retIndex); + + return ret; + } + + /** + * Gives a String representation of the Markov object. + * + * @return + * Said String representation. + */ + @Override + public String toString() { + String ret = "Substring: " + substring + ", Count: " + count; + ret += "\n" + "Suffixes and frequency counts: "; + + for (Entry<Character, Integer> entry : map.entrySet()) { + char key = entry.getKey(); + int value = entry.getValue(); + ret += "\n" + "Suffix: " + key + ", frequency count: " + value; + } + + return ret; + } +} |
