summaryrefslogtreecommitdiff
path: root/src/main/java/bjc/rgens/text/markov/Markov.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/bjc/rgens/text/markov/Markov.java')
-rwxr-xr-xsrc/main/java/bjc/rgens/text/markov/Markov.java208
1 files changed, 208 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java
new file mode 100755
index 0000000..e21d60f
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/Markov.java
@@ -0,0 +1,208 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring.
+ *
+ * Can give a pseudo-random suffix character based on probability.
+ *
+ * @author Daniel Friedman (Fall 2011)
+ */
+public class Markov {
+ String substring;
+ int count = 0;
+
+ TreeMap<Character, Integer> map;
+
+ /**
+ * Constructs a Markov object from a given substring.
+ *
+ * @param substr
+ * The given substring.
+ */
+ public Markov(String substr) {
+ this.substring = substr;
+
+ map = new TreeMap<>();
+
+ add();
+ }
+
+ /**
+ * Constructs a Markov object from a given substring and suffix
+ * character.
+ *
+ * Suffix characters are stored in a TreeMap.
+ *
+ * @param substr
+ * The specified substring.
+ *
+ * @param suffix
+ * The specified suffix.
+ */
+ public Markov(String substr, Character suffix) {
+ this.substring = substr;
+
+ map = new TreeMap<>();
+
+ add(suffix);
+ }
+
+ /**
+ * Increments the count of number of times the substring appears in a
+ * text.
+ */
+ public void add() {
+ count++;
+ }
+
+ /**
+ * Adds a suffix character to the TreeMap.
+ *
+ * @param c
+ * The suffix character to be added.
+ */
+ public void add(char c) {
+ add();
+
+ if (map.containsKey(c)) {
+ int frequency = map.get(c);
+ map.put(c, frequency + 1);
+ } else {
+ map.put(c, 1);
+ }
+ }
+
+ /**
+ * Gives the frequency count of a suffix character; that is, the number
+ * of times the specified suffix follows the substring in a text.
+ *
+ * @param c
+ * The specified suffix.
+ *
+ * @return
+ * The frequency count.
+ */
+ public int getFrequencyCount(char c) {
+ if (!map.containsKey(c)) {
+ return -1;
+ }
+
+ return map.get(c);
+ }
+
+ /**
+ * Gives a percentage of frequency count / number of total suffixes.
+ *
+ * @param c
+ * The character to look for the frequency for.
+ *
+ * @return
+ * The ratio of frequency count of a single character to the total
+ * number of suffixes.
+ */
+ public double getCharFrequency(char c) {
+ if (getFrequencyCount(c) == -1) {
+ return -1;
+ }
+
+ return (double) getFrequencyCount(c) / (double) count;
+ }
+
+ /**
+ * Finds whether or not the given suffix is in the TreeMap.
+ *
+ * @param c
+ * The given suffix.
+ *
+ * @return
+ * True if the suffix exists in the TreeMap, false otherwise.
+ */
+ public boolean containsChar(char c) {
+ if (!map.containsKey(c)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Gives the number of times this substring occurs in a text.
+ *
+ * @return
+ * Said number of times.
+ */
+ public int count() {
+ return count;
+ }
+
+ /**
+ * Gives the TreeMap.
+ *
+ * @return
+ * The TreeMap.
+ */
+ public TreeMap<Character, Integer> getMap() {
+ return map;
+ }
+
+ /**
+ * Using probability, returns a pseudo-random character to follow the
+ * substring.
+ *
+ * Character possibilities are added to an ArrayList (duplicates
+ * allowed), and a random number from 0 to the last index in the
+ * ArrayList is picked. Since more common suffixes occupy more indices
+ * in the ArrayList, the probability of getting a more common suffix is
+ * greater than the probability of getting a less common suffix.
+ *
+ * @return
+ * The pseudo-random suffix.
+ */
+ public char random() {
+ Character ret = null;
+
+ Set<Entry<Character, Integer>> s = map.entrySet();
+
+ Iterator<Entry<Character, Integer>> it = s.iterator();
+
+ ArrayList<Character> suffixes = new ArrayList<>();
+
+ while (it.hasNext()) {
+ Entry<Character, Integer> tmp = it.next();
+
+ for (int i = 0; i < tmp.getValue(); i++) {
+ suffixes.add(tmp.getKey());
+ }
+ }
+
+ Random rand = new Random();
+
+ int retIndex = rand.nextInt(suffixes.size());
+ ret = suffixes.get(retIndex);
+
+ return ret;
+ }
+
+ /**
+ * Gives a String representation of the Markov object.
+ *
+ * @return
+ * Said String representation.
+ */
+ @Override
+ public String toString() {
+ String ret = "Substring: " + substring + ", Count: " + count;
+ ret += "\n" + "Suffixes and frequency counts: ";
+
+ for (Entry<Character, Integer> entry : map.entrySet()) {
+ char key = entry.getKey();
+ int value = entry.getValue();
+ ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+ }
+
+ return ret;
+ }
+}