From c82e3b3b2de0633317ec8fc85925e91422820597 Mon Sep 17 00:00:00 2001
From: "Benjamin J. Culkin" <bjculkin@mix.wvu.edu>
Date: Sun, 8 Oct 2017 22:39:59 -0300
Subject: Start splitting into maven modules

---
 .../java/bjc/utils/parserutils/TokenUtils.java     | 303 +++++++++++++++++++++
 1 file changed, 303 insertions(+)
 create mode 100644 base/src/main/java/bjc/utils/parserutils/TokenUtils.java

(limited to 'base/src/main/java/bjc/utils/parserutils/TokenUtils.java')
diff --git a/base/src/main/java/bjc/utils/parserutils/TokenUtils.java b/base/src/main/java/bjc/utils/parserutils/TokenUtils.java
new file mode 100644
index 0000000..67c1e5a
--- /dev/null
+++ b/base/src/main/java/bjc/utils/parserutils/TokenUtils.java
@@ -0,0 +1,303 @@
+package bjc.utils.parserutils;
+
+import static bjc.utils.PropertyDB.applyFormat;
+import static bjc.utils.PropertyDB.getCompiledRegex;
+import static bjc.utils.PropertyDB.getRegex;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import bjc.utils.funcdata.FunctionalList;
+import bjc.utils.funcdata.IList;
+import bjc.utils.parserutils.splitter.TokenSplitter;
+
+/**
+ * Utilities useful for operating on PL tokens.
+ *
+ * @author EVE
+ *
+ */
+public class TokenUtils {
+	/**
+	 * Simple implementation of TokenSplitter for removing double-quoted
+	 * strings.
+	 * 
+	 * @author EVE
+	 *
+	 */
+	public static class StringTokenSplitter implements TokenSplitter {
+		@Override
+		public IList<String> split(final String input) {
+			return new FunctionalList<>(TokenUtils.removeDQuotedStrings(input));
+		}
+	}
+
+	/*
+	 * Patterns and pattern parts.
+	 */
+	private static String rPossibleEscapeString = getRegex("possibleStringEscape");
+
+	private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString);
+
+	private static String	rShortEscape	= getRegex("shortFormStringEscape");
+	private static String	rOctalEscape	= getRegex("octalStringEscape");
+	private static String	rUnicodeEscape	= getRegex("unicodeStringEscape");
+
+	private static String rEscapeString = applyFormat("stringEscape", rShortEscape, rOctalEscape, rUnicodeEscape);
+
+	private static Pattern escapePatt = Pattern.compile(rEscapeString);
+
+	private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonStringEscape"),
+			rPossibleEscapeString);
+
+	private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString);
+
+	private static Pattern quotePatt = getCompiledRegex("unescapedQuote");
+
+	private static Pattern intLitPattern = getCompiledRegex("intLiteral");
+
+	/**
+	 * Remove double quoted strings from a string.
+	 *
+	 * Splits a string around instances of java-style double-quoted strings.
+	 *
+	 * @param inp
+	 *                The string to split.
+	 *
+	 * @return An list containing alternating bits of the string and the
+	 *         embedded double-quoted strings that separated them.
+	 */
+	public static List<String> removeDQuotedStrings(final String inp) {
+		if (inp == null) throw new NullPointerException("inp must not be null");
+
+		/*
+		 * What we need for piece-by-piece string building
+		 */
+		StringBuffer work = new StringBuffer();
+		final List<String> res = new LinkedList<>();
+
+		/*
+		 * Matcher for proper strings and single quotes.
+		 */
+		final Matcher mt = doubleQuotePatt.matcher(inp);
+		final Matcher corr = quotePatt.matcher(inp);
+
+		if (corr.find() && !corr.find()) {
+			/*
+			 * There's a unmatched opening quote with no strings.
+			 */
+			final String msg = String.format(
+					"Unclosed string literal '%s'. Opening quote was at position %d", inp,
+					inp.indexOf("\""));
+
+			throw new IllegalArgumentException(msg);
+		}
+
+		while (mt.find()) {
+			/*
+			 * Remove the string until the quoted string.
+			 */
+			mt.appendReplacement(work, "");
+
+			/*
+			 * Add the string preceding the double-quoted string and
+			 * the double-quoted string to the list.
+			 */
+			res.add(work.toString());
+			res.add(mt.group(1));
+
+			/*
+			 * Renew the buffer.
+			 */
+			work = new StringBuffer();
+		}
+
+		/*
+		 * Grab the remainder of the string.
+		 */
+		mt.appendTail(work);
+		final String tail = work.toString();
+
+		if (tail.contains("\"")) {
+			/*
+			 * There's a unmatched opening quote with at least one
+			 * string.
+			 */
+			final String msg = String.format(
+					"Unclosed string literal '%s'. Opening quote was at position %d", inp,
+					inp.lastIndexOf("\""));
+
+			throw new IllegalArgumentException(msg);
+		}
+
+		/*
+		 * Only add an empty tail if the string was empty.
+		 */
+		if (!tail.equals("") || res.isEmpty()) {
+			res.add(tail);
+		}
+
+		return res;
+	}
+
+	/**
+	 * Replace escape characters with their actual equivalents.
+	 *
+	 * @param inp
+	 *                The string to replace escape sequences in.
+	 *
+	 * @return The string with escape sequences replaced by their equivalent
+	 *         characters.
+	 */
+	public static String descapeString(final String inp) {
+		if (inp == null) throw new NullPointerException("inp must not be null");
+
+		/*
+		 * Prepare the buffer and escape finder.
+		 */
+		final StringBuffer work = new StringBuffer();
+		final Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp);
+		final Matcher escapeFinder = escapePatt.matcher(inp);
+
+		while (possibleEscapeFinder.find()) {
+			if (!escapeFinder.find()) {
+				/*
+				 * Found a possible escape that isn't actually an
+				 * escape.
+				 */
+				final String msg = String.format("Illegal escape sequence '%s' at position %d",
+						possibleEscapeFinder.group(), possibleEscapeFinder.start());
+
+				throw new IllegalArgumentException(msg);
+			}
+
+			final String escapeSeq = escapeFinder.group();
+
+			/*
+			 * Convert the escape to a string.
+			 */
+			String escapeRep = "";
+			switch (escapeSeq) {
+			case "\\b":
+				escapeRep = "\b";
+				break;
+			case "\\t":
+				escapeRep = "\t";
+				break;
+			case "\\n":
+				escapeRep = "\n";
+				break;
+			case "\\f":
+				escapeRep = "\f";
+				break;
+			case "\\r":
+				escapeRep = "\r";
+				break;
+			case "\\\"":
+				escapeRep = "\"";
+				break;
+			case "\\'":
+				escapeRep = "'";
+				break;
+			case "\\\\":
+				/*
+				 * Skip past the second slash.
+				 */
+				possibleEscapeFinder.find();
+				escapeRep = "\\";
+				break;
+			default:
+				if (escapeSeq.startsWith("u")) {
+					escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
+				} else {
+					escapeRep = handleOctalEscape(escapeSeq);
+				}
+			}
+
+			escapeFinder.appendReplacement(work, escapeRep);
+		}
+
+		escapeFinder.appendTail(work);
+
+		return work.toString();
+	}
+
+	/*
+	 * Handle a unicode codepoint.
+	 */
+	private static String handleUnicodeEscape(final String seq) {
+		try {
+			final int codepoint = Integer.parseInt(seq, 16);
+
+			return new String(Character.toChars(codepoint));
+		} catch (final IllegalArgumentException iaex) {
+			final String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq);
+
+			final IllegalArgumentException reiaex = new IllegalArgumentException(msg);
+
+			reiaex.initCause(iaex);
+
+			throw reiaex;
+		}
+	}
+
+	/*
+	 * Handle a octal codepoint.
+	 */
+	private static String handleOctalEscape(final String seq) {
+		try {
+			final int codepoint = Integer.parseInt(seq, 8);
+
+			if (codepoint > 255) {
+				final String msg = String
+						.format("'%d' is outside the range of octal escapes', codepoint");
+
+				throw new IllegalArgumentException(msg);
+			}
+
+			return new String(Character.toChars(codepoint));
+		} catch (final IllegalArgumentException iaex) {
+			final String msg = String.format("'%s' is not a valid octal escape sequence'", seq);
+
+			final IllegalArgumentException reiaex = new IllegalArgumentException(msg);
+
+			reiaex.initCause(iaex);
+
+			throw reiaex;
+		}
+	}
+
+	/**
+	 * Check if a given string would be successfully converted to a double
+	 * by {@link Double#parseDouble(String)}.
+	 *
+	 * @param inp
+	 *                The string to check.
+	 * @return Whether the string is a valid double or not.
+	 */
+	public static boolean isDouble(final String inp) {
+		return DoubleMatcher.doubleLiteral.matcher(inp).matches();
+	}
+
+	/**
+	 * Check if a given string would be successfully converted to a integer
+	 * by {@link Integer#parseInt(String)}.
+	 *
+	 * NOTE: This only checks syntax. Using values out of the range of
+	 * integers will still cause errors.
+	 *
+	 * @param inp
+	 *                The input to check.
+	 * @return Whether the string is a valid integer or not.
+	 */
+	public static boolean isInt(final String inp) {
+		try {
+			Integer.parseInt(inp);
+			return true;
+		} catch (NumberFormatException nfex) {
+			return false;
+		}
+	}
+}
-- 
cgit v1.2.3