summaryrefslogtreecommitdiff
path: root/src/main/java/bjc/rgens/text
diff options
context:
space:
mode:
authorBenjamin J. Culkin <bjculkin@mix.wvu.edu>2018-05-29 15:35:02 -0300
committerBenjamin J. Culkin <bjculkin@mix.wvu.edu>2018-05-29 15:35:02 -0300
commit0faa5175b6f0de8835ed514615ac64135f406b29 (patch)
tree6a1a5a9b01a1175b8ae2203418005f7bc625be9d /src/main/java/bjc/rgens/text
parent9356498bebab9342222cd87be669abae9d7ac3a2 (diff)
Move files out of folder
Diffstat (limited to 'src/main/java/bjc/rgens/text')
-rw-r--r--src/main/java/bjc/rgens/text/markov/Markov.java208
-rw-r--r--src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java70
-rw-r--r--src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java76
-rw-r--r--src/main/java/bjc/rgens/text/markov/TextGenerator.java73
4 files changed, 427 insertions, 0 deletions
diff --git a/src/main/java/bjc/rgens/text/markov/Markov.java b/src/main/java/bjc/rgens/text/markov/Markov.java
new file mode 100644
index 0000000..e21d60f
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/Markov.java
@@ -0,0 +1,208 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map.Entry;
+import java.util.*;
+
+/**
+ * Represents a k-character substring.
+ *
+ * Can give a pseudo-random suffix character based on probability.
+ *
+ * @author Daniel Friedman (Fall 2011)
+ */
+public class Markov {
+ String substring;
+ int count = 0;
+
+ TreeMap<Character, Integer> map;
+
+ /**
+ * Constructs a Markov object from a given substring.
+ *
+ * @param substr
+ * The given substring.
+ */
+ public Markov(String substr) {
+ this.substring = substr;
+
+ map = new TreeMap<>();
+
+ add();
+ }
+
+ /**
+ * Constructs a Markov object from a given substring and suffix
+ * character.
+ *
+ * Suffix characters are stored in a TreeMap.
+ *
+ * @param substr
+ * The specified substring.
+ *
+ * @param suffix
+ * The specified suffix.
+ */
+ public Markov(String substr, Character suffix) {
+ this.substring = substr;
+
+ map = new TreeMap<>();
+
+ add(suffix);
+ }
+
+ /**
+ * Increments the count of number of times the substring appears in a
+ * text.
+ */
+ public void add() {
+ count++;
+ }
+
+ /**
+ * Adds a suffix character to the TreeMap.
+ *
+ * @param c
+ * The suffix character to be added.
+ */
+ public void add(char c) {
+ add();
+
+ if (map.containsKey(c)) {
+ int frequency = map.get(c);
+ map.put(c, frequency + 1);
+ } else {
+ map.put(c, 1);
+ }
+ }
+
+ /**
+ * Gives the frequency count of a suffix character; that is, the number
+ * of times the specified suffix follows the substring in a text.
+ *
+ * @param c
+ * The specified suffix.
+ *
+ * @return
+ * The frequency count.
+ */
+ public int getFrequencyCount(char c) {
+ if (!map.containsKey(c)) {
+ return -1;
+ }
+
+ return map.get(c);
+ }
+
+ /**
+ * Gives a percentage of frequency count / number of total suffixes.
+ *
+ * @param c
+ * The character to look for the frequency for.
+ *
+ * @return
+ * The ratio of frequency count of a single character to the total
+ * number of suffixes.
+ */
+ public double getCharFrequency(char c) {
+ if (getFrequencyCount(c) == -1) {
+ return -1;
+ }
+
+ return (double) getFrequencyCount(c) / (double) count;
+ }
+
+ /**
+ * Finds whether or not the given suffix is in the TreeMap.
+ *
+ * @param c
+ * The given suffix.
+ *
+ * @return
+ * True if the suffix exists in the TreeMap, false otherwise.
+ */
+ public boolean containsChar(char c) {
+ if (!map.containsKey(c)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * Gives the number of times this substring occurs in a text.
+ *
+ * @return
+ * Said number of times.
+ */
+ public int count() {
+ return count;
+ }
+
+ /**
+ * Gives the TreeMap.
+ *
+ * @return
+ * The TreeMap.
+ */
+ public TreeMap<Character, Integer> getMap() {
+ return map;
+ }
+
+ /**
+ * Using probability, returns a pseudo-random character to follow the
+ * substring.
+ *
+ * Character possibilities are added to an ArrayList (duplicates
+ * allowed), and a random number from 0 to the last index in the
+ * ArrayList is picked. Since more common suffixes occupy more indices
+ * in the ArrayList, the probability of getting a more common suffix is
+ * greater than the probability of getting a less common suffix.
+ *
+ * @return
+ * The pseudo-random suffix.
+ */
+ public char random() {
+ Character ret = null;
+
+ Set<Entry<Character, Integer>> s = map.entrySet();
+
+ Iterator<Entry<Character, Integer>> it = s.iterator();
+
+ ArrayList<Character> suffixes = new ArrayList<>();
+
+ while (it.hasNext()) {
+ Entry<Character, Integer> tmp = it.next();
+
+ for (int i = 0; i < tmp.getValue(); i++) {
+ suffixes.add(tmp.getKey());
+ }
+ }
+
+ Random rand = new Random();
+
+ int retIndex = rand.nextInt(suffixes.size());
+ ret = suffixes.get(retIndex);
+
+ return ret;
+ }
+
+ /**
+ * Gives a String representation of the Markov object.
+ *
+ * @return
+ * Said String representation.
+ */
+ @Override
+ public String toString() {
+ String ret = "Substring: " + substring + ", Count: " + count;
+ ret += "\n" + "Suffixes and frequency counts: ";
+
+ for (Entry<Character, Integer> entry : map.entrySet()) {
+ char key = entry.getKey();
+ int value = entry.getValue();
+ ret += "\n" + "Suffix: " + key + ", frequency count: " + value;
+ }
+
+ return ret;
+ }
+}
diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
new file mode 100644
index 0000000..cebf2bc
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/StandaloneMarkov.java
@@ -0,0 +1,70 @@
+package bjc.rgens.text.markov;
+
+import java.util.Map;
+
+/**
+ * A standalone Markov generator.
+ *
+ * @author bjculkin
+ */
+public class StandaloneMarkov {
+ /* The order of the generator. */
+ private int ord;
+
+ /* The generators to use. */
+ private Map<String, Markov> hash;
+ /* The initial string. */
+ private String first;
+
+ /**
+ * Create a new standalone Markov generator.
+ *
+ * @param order
+ * The order of this generator.
+ *
+ * @param markovHash
+ * The generators to use.
+ *
+ * @param firstSub
+ * The string to start out with.
+ */
+ public StandaloneMarkov(int order, Map<String, Markov> markovHash, String firstSub) {
+ ord = order;
+ hash = markovHash;
+ first = firstSub;
+ }
+
+ /**
+ * Generate random text from the markov generator.
+ *
+ * @param charsToGenerate
+ * The number of characters of text to generate.
+ *
+ * @return
+ * The randomly generate text.
+ */
+ public String generateTextFromMarkov(int charsToGenerate) {
+ StringBuilder text = new StringBuilder();
+
+ for (int i = ord; i < charsToGenerate; i++) {
+ if (i == ord) {
+ text.append(first);
+
+ if (text.length() > ord) i = text.length();
+ }
+
+ String sub = text.substring(i - ord, i);
+ Markov tmp = hash.get(sub);
+
+ if (tmp != null) {
+ Character nextChar = tmp.random();
+
+ text.append(nextChar);
+ } else {
+ i = ord - 1;
+ }
+ }
+
+ return text.toString();
+ }
+}
diff --git a/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
new file mode 100644
index 0000000..339e8d5
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/StandaloneTextGenerator.java
@@ -0,0 +1,76 @@
+package bjc.rgens.text.markov;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Create a Markov generate from a provided source.
+ *
+ * @author bjculkin
+ */
+public class StandaloneTextGenerator {
+ /**
+ * Build a markov generator from a provided source.
+ *
+ * @param order
+ * The markov order to use.
+ *
+ * @param reader
+ * The source to seed the generator from.
+ *
+ * @return
+ * The markov generator for the provided text.
+ */
+ public static StandaloneMarkov generateMarkovMap(int order, Reader reader) {
+ Map<String, Markov> hash = new HashMap<>();
+
+ Character next = null;
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e1) {
+ System.out.println("IOException in stepping through the reader");
+
+ e1.printStackTrace();
+
+ System.exit(1);
+ }
+
+ StringBuilder origFileBuffer = new StringBuilder();
+
+ while (next != null && Character.isDefined(next)) {
+ Character.toString(next);
+ origFileBuffer.append(next);
+
+ try {
+ next = (char) reader.read();
+ } catch (IOException e) {
+ System.out.println("IOException in stepping through the reader");
+
+ e.printStackTrace();
+ }
+
+ }
+
+ String origFile = origFileBuffer.toString();
+ String firstSub = origFile.substring(0, order);
+
+ for (int i = 0; i < origFile.length() - order; i++) {
+ String sub = origFile.substring(i, i + order);
+ Character suffix = origFile.charAt(i + order);
+
+ if (hash.containsKey(sub)) {
+ Markov marvin = hash.get(sub);
+ marvin.add(suffix);
+ hash.put(sub, marvin);
+ } else {
+ Markov marvin = new Markov(sub, suffix);
+ hash.put(sub, marvin);
+ }
+ }
+
+ return new StandaloneMarkov(order, hash, firstSub);
+ }
+}
diff --git a/src/main/java/bjc/rgens/text/markov/TextGenerator.java b/src/main/java/bjc/rgens/text/markov/TextGenerator.java
new file mode 100644
index 0000000..f629d49
--- /dev/null
+++ b/src/main/java/bjc/rgens/text/markov/TextGenerator.java
@@ -0,0 +1,73 @@
+package bjc.rgens.text.markov;
+
+import java.io.*;
+
+/**
+ * Generate text from a markov model of an input text
+ *
+ * @author ben
+ *
+ */
+public class TextGenerator {
+ /**
+ * Main method.
+ *
+ * @param args
+ * When used with three arguments, the first represents the k-order
+ * of the Markov objects. The second represents the number of
+ * characters to print out. The third represents the file to be
+ * read.
+ *
+ * When used with two arguments, the first represents the k-order
+ * of the Markov objects, and the second represents the file to be
+ * read. The generated text will be the same number of characters
+ * as the original file.
+ */
+ public static void main(String[] args) {
+ int k = 0;
+ int M = 0;
+
+ String file = "";
+ StringBuilder text = new StringBuilder();
+
+ if (args.length == 3) {
+ k = Integer.parseInt(args[0]);
+ M = Integer.parseInt(args[1]);
+
+ file = args[2];
+ } else if (args.length == 2) {
+ k = Integer.parseInt(args[0]);
+
+ file = args[1];
+ } else {
+ System.out.println("\n" + "Usage: java TextGenerator k M file");
+ System.out.println("where k is the markov order, M is the number");
+ System.out.println("of characters to be printed, and file is the");
+ System.out.println("name of the file to print from. M may be left out." + "\n");
+ System.exit(1);
+ }
+
+ StandaloneMarkov markov = null;
+
+ try (FileReader reader = new FileReader(file)) {
+ markov = StandaloneTextGenerator.generateMarkovMap(k, reader);
+
+ String generatedText = markov.generateTextFromMarkov(M);
+ String desiredText = generatedText.substring(0, Math.min(M, text.length()));
+
+ System.out.println(desiredText);
+ } catch (FileNotFoundException e) {
+ System.out.println("File not found.");
+
+ e.printStackTrace();
+
+ System.exit(1);
+ } catch (IOException ioex) {
+ System.out.println("IOException");
+
+ ioex.printStackTrace();
+
+ System.exit(1);
+ }
+ }
+}