Refactor StringUtils

Moved a bunch of token-oriented stuff from StringUtils to a new TokenUtils class.
author: bjculkin <bjculkin@mix.wvu.edu> 2017-03-17 08:33:37 -0400
committer: bjculkin <bjculkin@mix.wvu.edu> 2017-03-17 08:33:37 -0400
commit: a63c30f5fe9ee302e73bb30e35095d789adb1a80 (patch)
tree: 8bb952e6c4f61172597e945f58d8244c24ea88b0 /BJC-Utils2/src/main/java/bjc/utils/parserutils
parent: 897c15c70a6b11463686293893518bd9b4d5c29c (diff)
2 files changed, 251 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java
new file mode 100644
index 0000000..63eabca
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java
@@ -0,0 +1,71 @@
+package bjc.utils.parserutils;
+
+import java.util.regex.Pattern;
+
+/*
+ * Checks if a string would pass Double.parseDouble.
+ *
+ * Uses a regex from the javadoc for Double.valueOf()
+ */
+class DoubleMatcher {
+	private static final String	Digits		= "(\\p{Digit}+)";
+	private static final String	HexDigits	= "(\\p{XDigit}+)";
+
+	/*
+	 * an exponent is 'e' or 'E' followed by an optionally signed decimal
+	 * integer.
+	 */
+	private static final String Exp = "[eE][+-]?" + Digits;
+
+	private static final String fpRegex = 
+		"[\\x00-\\x20]*" // Optional  leading  "whitespace"
+		+ "[+-]?(" + // Optional sign character
+		"NaN|" + // "NaN" string
+		"Infinity|" + // "Infinity" string
+
+		/*
+		 * A decimal floating-point string representing a finite
+		 * positive number without a leading sign has at most
+		 * five basic pieces: Digits . Digits ExponentPart
+		 * FloatTypeSuffix
+		 *
+		 * Since this method allows integer-only strings as
+		 * input in addition to strings of floating-point
+		 * literals, the two sub-patterns below are
+		 * simplifications of the grammar productions from
+		 * section 3.10.2 of The Java™ Language Specification.
+		 */
+
+		/*
+		 * Digits ._opt Digits_opt ExponentPart_opt
+		 * FloatTypeSuffix_opt
+		 */
+		"(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" +
+
+		/*
+		 * . Digits ExponentPart_opt FloatTypeSuffix_opt
+		 */
+		"(\\.(" + Digits + ")(" + Exp + ")?)|" +
+
+		/*
+		 * Hexadecimal strings
+		 */
+		"((" +
+		/*
+		 * 0[xX] HexDigits ._opt BinaryExponent
+		 * FloatTypeSuffix_opt
+		 */
+		"(0[xX]" + HexDigits + "(\\.)?)|" +
+
+		/*
+		 * 0[xX] HexDigits_opt . HexDigits BinaryExponent
+		 * FloatTypeSuffix_opt
+		 */
+		"(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" +
+
+		")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*"; // Optional
+	// trailing
+	// "whitespace"
+
+	public static final Pattern floatingLiteral = Pattern.compile("\\A" + fpRegex + "\\Z");
+}
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java
new file mode 100644
index 0000000..8224928
--- /dev/null
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java
@@ -0,0 +1,180 @@
+package bjc.utils.parserutils;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities useful for operating on PL tokens.
+ *
+ * @author EVE
+ *
+ */
+public class TokenUtils {
+
+	/**
+	 * Checks if the given expression contains the specified operator in a
+	 * situation that indicates its use as an infix operator.
+	 *
+	 * @param expression
+	 *                The expression to check
+	 * @param operator
+	 *                The operator to see if it is contained
+	 * @return Whether or not the given expression contains the specified
+	 *         operator as a infix operator
+	 */
+	public static boolean containsInfixOperator(String expression, String operator) {
+		return StringUtils.countMatches(expression, operator) == 1 && !expression.equalsIgnoreCase(operator)
+				&& !expression.startsWith(operator);
+	}
+
+	/*
+	 * This regex matches java-style string escapes
+	 */
+	private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \"
+			+ "|[0-3]?[0-7]{1,2}"  // Match octal escape sequences
+			+ "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences
+
+	private static Pattern escapePatt = Pattern.compile(escapeString);
+
+	/*
+	 * This regular expression matches java style double quoted strings
+	 */
+	private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes
+			+ "|" + escapeString + ")" // Match escape sequences
+			+ "*\")"); // Match all of those things zero or more times, followed by a closing quote
+
+	/**
+	 * Remove double quoted strings from a string.
+	 *
+	 * Splits a string around instances of java-style double-quoted strings.
+	 *
+	 * @param inp
+	 *                The string to split.
+	 *
+	 * @return An list containing alternating bits of the string and the
+	 *         embedded double-quoted strings that separated them.
+	 */
+	public static List<String> removeDQuotedStrings(String inp) {
+		StringBuffer work = new StringBuffer();
+		List<String> res = new LinkedList<>();
+
+		Matcher mt = doubleQuotePatt.matcher(inp);
+
+		while(mt.find()) {
+			mt.appendReplacement(work, "");
+
+			res.add(work.toString());
+			res.add(mt.group(1));
+
+			work = new StringBuffer();
+		}
+
+		mt.appendTail(work);
+		res.add(work.toString());
+
+		return res;
+	}
+
+	/**
+	 * Replace escape characters with their actual equivalents.
+	 *
+	 * @param inp
+	 *                The string to replace escape sequences in.
+	 *
+	 * @return The string with escape sequences replaced by their equivalent
+	 *         characters.
+	 */
+	public static String descapeString(String inp) {
+		StringBuffer work = new StringBuffer();
+
+		Matcher escapeFinder = escapePatt.matcher(inp);
+		while(escapeFinder.find()) {
+			String escapeSeq = escapeFinder.group();
+
+			String escapeRep = "";
+			switch(escapeSeq) {
+			case "\\b":
+				escapeRep = "\b";
+				break;
+			case "\\t":
+				escapeRep = "\t";
+				break;
+			case "\\n":
+				escapeRep = "\n";
+				break;
+			case "\\f":
+				escapeRep = "\f";
+				break;
+			case "\\r":
+				escapeRep = "\r";
+				break;
+			case "\\\"":
+				escapeRep = "\"";
+				break;
+			case "\\'":
+				escapeRep = "'";
+				break;
+			case "\\\\":
+				escapeRep = "\\";
+				break;
+			default:
+				if(escapeSeq.startsWith("u")) {
+					escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
+				} else {
+					escapeRep = handleOctalEscape(escapeSeq);
+				}
+			}
+
+			escapeFinder.appendReplacement(work, escapeRep);
+		}
+
+		escapeFinder.appendTail(work);
+
+		return work.toString();
+	}
+
+	private static String handleUnicodeEscape(String seq) {
+		int codepoint = Integer.parseInt(seq, 16);
+
+		return new String(Character.toChars(codepoint));
+	}
+
+	private static String handleOctalEscape(String seq) {
+		int codepoint = Integer.parseInt(seq, 8);
+
+		return new String(Character.toChars(codepoint));
+	}
+
+	/**
+	 * Check if a given string would be successfully converted to a double
+	 * by {@link Double#parseDouble(String)}.
+	 * 
+	 * @param inp
+	 *                The string to check.
+	 * @return Whether the string is a valid double or not.
+	 */
+	public static boolean isDouble(String inp) {
+		return DoubleMatcher.floatingLiteral.matcher(inp).matches();
+	}
+
+	private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z");
+
+	/**
+	 * Check if a given string would be successfully converted to a integer
+	 * by {@link Integer#parseInt(String)}.
+	 * 
+	 * NOTE: This only checks syntax. Using values out of the range of
+	 * integers will still cause errors.
+	 * 
+	 * @param inp
+	 *                The input to check.
+	 * @return Whether the string is a valid double or not.
+	 */
+	public static boolean isInt(String inp) {
+		return intLitPattern.matcher(inp).matches();
+	}
+}
author	bjculkin <bjculkin@mix.wvu.edu>	2017-03-17 08:33:37 -0400
committer	bjculkin <bjculkin@mix.wvu.edu>	2017-03-17 08:33:37 -0400
commit	a63c30f5fe9ee302e73bb30e35095d789adb1a80 (patch)
tree	8bb952e6c4f61172597e945f58d8244c24ea88b0 /BJC-Utils2/src/main/java/bjc/utils/parserutils
parent	897c15c70a6b11463686293893518bd9b4d5c29c (diff)