diff options
Diffstat (limited to 'src/main/java/bjc/rgens/text/markov')
4 files changed, 427 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java new file mode 100755 index 0000000..e21d60f --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/Markov.java @@ -0,0 +1,208 @@ +package bjc.rgens.text.markov; + +import java.util.Map.Entry; +import java.util.*; + +/** + * Represents a k-character substring. + * + * Can give a pseudo-random suffix character based on probability. + * + * @author Daniel Friedman (Fall 2011) + */ +public class Markov { + String substring; + int count = 0; + + TreeMap<Character, Integer> map; + + /** + * Constructs a Markov object from a given substring. + * + * @param substr + * The given substring. + */ + public Markov(String substr) { + this.substring = substr; + + map = new TreeMap<>(); + + add(); + } + + /** + * Constructs a Markov object from a given substring and suffix + * character. + * + * Suffix characters are stored in a TreeMap. + * + * @param substr + * The specified substring. + * + * @param suffix + * The specified suffix. + */ + public Markov(String substr, Character suffix) { + this.substring = substr; + + map = new TreeMap<>(); + + add(suffix); + } + + /** + * Increments the count of number of times the substring appears in a + * text. + */ + public void add() { + count++; + } + + /** + * Adds a suffix character to the TreeMap. + * + * @param c + * The suffix character to be added. + */ + public void add(char c) { + add(); + + if (map.containsKey(c)) { + int frequency = map.get(c); + map.put(c, frequency + 1); + } else { + map.put(c, 1); + } + } + + /** + * Gives the frequency count of a suffix character; that is, the number + * of times the specified suffix follows the substring in a text. + * + * @param c + * The specified suffix. + * + * @return + * The frequency count. + */ + public int getFrequencyCount(char c) { + if (!map.containsKey(c)) { + return -1; + } + + return map.get(c); + } + + /** + * Gives a percentage of frequency count / number of total suffixes. + * + * @param c + * The character to look for the frequency for. + * + * @return + * The ratio of frequency count of a single character to the total + * number of suffixes. + */ + public double getCharFrequency(char c) { + if (getFrequencyCount(c) == -1) { + return -1; + } + + return (double) getFrequencyCount(c) / (double) count; + } + + /** + * Finds whether or not the given suffix is in the TreeMap. + * + * @param c + * The given suffix. + * + * @return + * True if the suffix exists in the TreeMap, false otherwise. + */ + public boolean containsChar(char c) { + if (!map.containsKey(c)) { + return false; + } + + return true; + } + + /** + * Gives the number of times this substring occurs in a text. + * + * @return + * Said number of times. + */ + public int count() { + return count; + } + + /** + * Gives the TreeMap. + * + * @return + * The TreeMap. + */ + public TreeMap<Character, Integer> getMap() { + return map; + } + + /** + * Using probability, returns a pseudo-random character to follow the + * substring. + * + * Character possibilities are added to an ArrayList (duplicates + * allowed), and a random number from 0 to the last index in the + * ArrayList is picked. Since more common suffixes occupy more indices + * in the ArrayList, the probability of getting a more common suffix is + * greater than the probability of getting a less common suffix. + * + * @return + * The pseudo-random suffix. + */ + public char random() { + Character ret = null; + + Set<Entry<Character, Integer>> s = map.entrySet(); + + Iterator<Entry<Character, Integer>> it = s.iterator(); + + ArrayList<Character> suffixes = new ArrayList<>(); + + while (it.hasNext()) { + Entry<Character, Integer> tmp = it.next(); + + for (int i = 0; i < tmp.getValue(); i++) { + suffixes.add(tmp.getKey()); + } + } + + Random rand = new Random(); + + int retIndex = rand.nextInt(suffixes.size()); + ret = suffixes.get(retIndex); + + return ret; + } + + /** + * Gives a String representation of the Markov object. + * + * @return + * Said String representation. + */ + @Override + public String toString() { + String ret = "Substring: " + substring + ", Count: " + count; + ret += "\n" + "Suffixes and frequency counts: "; + + for (Entry<Character, Integer> entry : map.entrySet()) { + char key = entry.getKey(); + int value = entry.getValue(); + ret += "\n" + "Suffix: " + key + ", frequency count: " + value; + } + + return ret; + } +} diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java new file mode 100755 index 0000000..cebf2bc --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java @@ -0,0 +1,70 @@ +package bjc.rgens.text.markov; + +import java.util.Map; + +/** + * A standalone Markov generator. + * + * @author bjculkin + */ +public class StandaloneMarkov { + /* The order of the generator. */ + private int ord; + + /* The generators to use. */ + private Map<String, Markov> hash; + /* The initial string. */ + private String first; + + /** + * Create a new standalone Markov generator. + * + * @param order + * The order of this generator. + * + * @param markovHash + * The generators to use. + * + * @param firstSub + * The string to start out with. + */ + public StandaloneMarkov(int order, Map<String, Markov> markovHash, String firstSub) { + ord = order; + hash = markovHash; + first = firstSub; + } + + /** + * Generate random text from the markov generator. + * + * @param charsToGenerate + * The number of characters of text to generate. + * + * @return + * The randomly generate text. + */ + public String generateTextFromMarkov(int charsToGenerate) { + StringBuilder text = new StringBuilder(); + + for (int i = ord; i < charsToGenerate; i++) { + if (i == ord) { + text.append(first); + + if (text.length() > ord) i = text.length(); + } + + String sub = text.substring(i - ord, i); + Markov tmp = hash.get(sub); + + if (tmp != null) { + Character nextChar = tmp.random(); + + text.append(nextChar); + } else { + i = ord - 1; + } + } + + return text.toString(); + } +} diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java new file mode 100755 index 0000000..339e8d5 --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java @@ -0,0 +1,76 @@ +package bjc.rgens.text.markov; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; + +/** + * Create a Markov generate from a provided source. + * + * @author bjculkin + */ +public class StandaloneTextGenerator { + /** + * Build a markov generator from a provided source. + * + * @param order + * The markov order to use. + * + * @param reader + * The source to seed the generator from. + * + * @return + * The markov generator for the provided text. + */ + public static StandaloneMarkov generateMarkovMap(int order, Reader reader) { + Map<String, Markov> hash = new HashMap<>(); + + Character next = null; + + try { + next = (char) reader.read(); + } catch (IOException e1) { + System.out.println("IOException in stepping through the reader"); + + e1.printStackTrace(); + + System.exit(1); + } + + StringBuilder origFileBuffer = new StringBuilder(); + + while (next != null && Character.isDefined(next)) { + Character.toString(next); + origFileBuffer.append(next); + + try { + next = (char) reader.read(); + } catch (IOException e) { + System.out.println("IOException in stepping through the reader"); + + e.printStackTrace(); + } + + } + + String origFile = origFileBuffer.toString(); + String firstSub = origFile.substring(0, order); + + for (int i = 0; i < origFile.length() - order; i++) { + String sub = origFile.substring(i, i + order); + Character suffix = origFile.charAt(i + order); + + if (hash.containsKey(sub)) { + Markov marvin = hash.get(sub); + marvin.add(suffix); + hash.put(sub, marvin); + } else { + Markov marvin = new Markov(sub, suffix); + hash.put(sub, marvin); + } + } + + return new StandaloneMarkov(order, hash, firstSub); + } +} diff --git a/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/src/main/java/bjc/rgens/text/markov/TextGenerator.java new file mode 100755 index 0000000..f629d49 --- /dev/null +++ b/src/main/java/bjc/rgens/text/markov/TextGenerator.java @@ -0,0 +1,73 @@ +package bjc.rgens.text.markov; + +import java.io.*; + +/** + * Generate text from a markov model of an input text + * + * @author ben + * + */ +public class TextGenerator { + /** + * Main method. + * + * @param args + * When used with three arguments, the first represents the k-order + * of the Markov objects. The second represents the number of + * characters to print out. The third represents the file to be + * read. + * + * When used with two arguments, the first represents the k-order + * of the Markov objects, and the second represents the file to be + * read. The generated text will be the same number of characters + * as the original file. + */ + public static void main(String[] args) { + int k = 0; + int M = 0; + + String file = ""; + StringBuilder text = new StringBuilder(); + + if (args.length == 3) { + k = Integer.parseInt(args[0]); + M = Integer.parseInt(args[1]); + + file = args[2]; + } else if (args.length == 2) { + k = Integer.parseInt(args[0]); + + file = args[1]; + } else { + System.out.println("\n" + "Usage: java TextGenerator k M file"); + System.out.println("where k is the markov order, M is the number"); + System.out.println("of characters to be printed, and file is the"); + System.out.println("name of the file to print from. M may be left out." + "\n"); + System.exit(1); + } + + StandaloneMarkov markov = null; + + try (FileReader reader = new FileReader(file)) { + markov = StandaloneTextGenerator.generateMarkovMap(k, reader); + + String generatedText = markov.generateTextFromMarkov(M); + String desiredText = generatedText.substring(0, Math.min(M, text.length())); + + System.out.println(desiredText); + } catch (FileNotFoundException e) { + System.out.println("File not found."); + + e.printStackTrace(); + + System.exit(1); + } catch (IOException ioex) { + System.out.println("IOException"); + + ioex.printStackTrace(); + + System.exit(1); + } + } +} |
