summaryrefslogtreecommitdiff
path: root/RGens/src/main/java/bjc/rgens/text/markov/Markov.java
blob: e21d60fc87014d82e08c30906b56d7e0991025e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
package bjc.rgens.text.markov;

import java.util.Map.Entry;
import java.util.*;

/**
 * Represents a k-character substring.
 *
 * Can give a pseudo-random suffix character based on probability.
 *
 * @author Daniel Friedman (Fall 2011)
 */
public class Markov {
	String  substring;
	int     count   = 0;

	TreeMap<Character, Integer> map;

	/**
	 * Constructs a Markov object from a given substring.
	 *
	 * @param substr
	 * 	The given substring.
	 */
	public Markov(String substr) {
		this.substring = substr;

		map = new TreeMap<>();

		add();
	}

	/**
	 * Constructs a Markov object from a given substring and suffix
	 * character. 
	 *
	 * Suffix characters are stored in a TreeMap.
	 *
	 * @param substr
	 * 	The specified substring.
	 *
	 * @param suffix
	 * 	The specified suffix.
	 */
	public Markov(String substr, Character suffix) {
		this.substring = substr;

		map = new TreeMap<>();

		add(suffix);
	}

	/**
	 * Increments the count of number of times the substring appears in a
	 * text.
	 */
	public void add() {
		count++;
	}

	/**
	 * Adds a suffix character to the TreeMap.
	 *
	 * @param c
	 * 	The suffix character to be added.
	 */
	public void add(char c) {
		add();

		if (map.containsKey(c)) {
			int frequency = map.get(c);
			map.put(c, frequency + 1);
		} else {
			map.put(c, 1);
		}
	}

	/**
	 * Gives the frequency count of a suffix character; that is, the number
	 * of times the specified suffix follows the substring in a text.
	 *
	 * @param c
	 * 	The specified suffix.
	 *
	 * @return 
	 * 	The frequency count.
	 */
	public int getFrequencyCount(char c) {
		if (!map.containsKey(c)) {
			return -1;
		}

		return map.get(c);
	}

	/**
	 * Gives a percentage of frequency count / number of total suffixes.
	 *
	 * @param c
	 * 	The character to look for the frequency for.
	 *
	 * @return
	 * 	The ratio of frequency count of a single character to the total
	 * 	number of suffixes.
	 */
	public double getCharFrequency(char c) {
		if (getFrequencyCount(c) == -1) {
			return -1;
		}

		return (double) getFrequencyCount(c) / (double) count;
	}

	/**
	 * Finds whether or not the given suffix is in the TreeMap.
	 *
	 * @param c
	 * 	The given suffix.
	 *
	 * @return
	 * 	True if the suffix exists in the TreeMap, false otherwise.
	 */
	public boolean containsChar(char c) {
		if (!map.containsKey(c)) {
			return false;
		}

		return true;
	}

	/**
	 * Gives the number of times this substring occurs in a text.
	 *
	 * @return
	 * 	Said number of times.
	 */
	public int count() {
		return count;
	}

	/**
	 * Gives the TreeMap.
	 *
	 * @return 
	 * 	The TreeMap.
	 */
	public TreeMap<Character, Integer> getMap() {
		return map;
	}

	/**
	 * Using probability, returns a pseudo-random character to follow the
	 * substring.
	 *
	 * Character possibilities are added to an ArrayList (duplicates
	 * allowed), and a random number from 0 to the last index in the
	 * ArrayList is picked. Since more common suffixes occupy more indices
	 * in the ArrayList, the probability of getting a more common suffix is
	 * greater than the probability of getting a less common suffix.
	 *
	 * @return 
	 * 	The pseudo-random suffix.
	 */
	public char random() {
		Character ret = null;

		Set<Entry<Character, Integer>> s = map.entrySet();

		Iterator<Entry<Character, Integer>> it = s.iterator();

		ArrayList<Character> suffixes = new ArrayList<>();

		while (it.hasNext()) {
			Entry<Character, Integer> tmp = it.next();

			for (int i = 0; i < tmp.getValue(); i++) {
				suffixes.add(tmp.getKey());
			}
		}

		Random rand = new Random();

		int retIndex = rand.nextInt(suffixes.size());
		ret = suffixes.get(retIndex);

		return ret;
	}

	/**
	 * Gives a String representation of the Markov object.
	 *
	 * @return 
	 * 	Said String representation.
	 */
	@Override
	public String toString() {
		String ret = "Substring: " + substring + ", Count: " + count;
		ret += "\n" + "Suffixes and frequency counts: ";

		for (Entry<Character, Integer> entry : map.entrySet()) {
			char key = entry.getKey();
			int value = entry.getValue();
			ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
		}

		return ret;
	}
}