Start work on testing.

author: bjculkin <bjculkin@mix.wvu.edu> 2017-03-20 08:47:55 -0400
committer: bjculkin <bjculkin@mix.wvu.edu> 2017-03-20 08:47:55 -0400
commit: a901f454f9ca1409bc3baa30cde9ae37098872e2 (patch)
tree: ae278fb4a0d4a3615fafcea918ab2512577acf93 /BJC-Utils2/src
parent: 76f83e963cf43023741f2c201d791a832c5b6bad (diff)
2 files changed, 298 insertions, 181 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java
index ce975f1..ad30f4c 100644
--- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java
+++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java
@@ -1,181 +1,209 @@
-package bjc.utils.parserutils;
-
-import java.util.LinkedList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.lang3.StringUtils;
-
-/**
- * Utilities useful for operating on PL tokens.
- *
- * @author EVE
- *
- */
-public class TokenUtils {
-
-	/**
-	 * Checks if the given expression contains the specified operator in a
-	 * situation that indicates its use as an infix operator.
-	 *
-	 * @param expression
-	 *                The expression to check.
-	 * @param operator
-	 *                The operator to see if it is contained.
-	 *                
-	 * @return Whether or not the given expression contains the specified
-	 *         operator as a infix operator.
-	 */
-	public static boolean containsInfixOperator(String expression, String operator) {
-		return StringUtils.countMatches(expression, operator) == 1 && !expression.equalsIgnoreCase(operator)
-				&& !expression.startsWith(operator);
-	}
-
-	/*
-	 * This regex matches java-style string escapes
-	 */
-	private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \"
-			+ "|[0-3]?[0-7]{1,2}"  // Match octal escape sequences
-			+ "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences
-
-	private static Pattern escapePatt = Pattern.compile(escapeString);
-
-	/*
-	 * This regular expression matches java style double quoted strings
-	 */
-	private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes
-			+ "|" + escapeString + ")" // Match escape sequences
-			+ "*\")"); // Match all of those things zero or more times, followed by a closing quote
-
-	/**
-	 * Remove double quoted strings from a string.
-	 *
-	 * Splits a string around instances of java-style double-quoted strings.
-	 *
-	 * @param inp
-	 *                The string to split.
-	 *
-	 * @return An list containing alternating bits of the string and the
-	 *         embedded double-quoted strings that separated them.
-	 */
-	public static List<String> removeDQuotedStrings(String inp) {
-		StringBuffer work = new StringBuffer();
-		List<String> res = new LinkedList<>();
-
-		Matcher mt = doubleQuotePatt.matcher(inp);
-
-		while(mt.find()) {
-			mt.appendReplacement(work, "");
-
-			res.add(work.toString());
-			res.add(mt.group(1));
-
-			work = new StringBuffer();
-		}
-
-		mt.appendTail(work);
-		res.add(work.toString());
-
-		return res;
-	}
-
-	/**
-	 * Replace escape characters with their actual equivalents.
-	 *
-	 * @param inp
-	 *                The string to replace escape sequences in.
-	 *
-	 * @return The string with escape sequences replaced by their equivalent
-	 *         characters.
-	 */
-	public static String descapeString(String inp) {
-		StringBuffer work = new StringBuffer();
-
-		Matcher escapeFinder = escapePatt.matcher(inp);
-		while(escapeFinder.find()) {
-			String escapeSeq = escapeFinder.group();
-
-			String escapeRep = "";
-			switch(escapeSeq) {
-			case "\\b":
-				escapeRep = "\b";
-				break;
-			case "\\t":
-				escapeRep = "\t";
-				break;
-			case "\\n":
-				escapeRep = "\n";
-				break;
-			case "\\f":
-				escapeRep = "\f";
-				break;
-			case "\\r":
-				escapeRep = "\r";
-				break;
-			case "\\\"":
-				escapeRep = "\"";
-				break;
-			case "\\'":
-				escapeRep = "'";
-				break;
-			case "\\\\":
-				escapeRep = "\\";
-				break;
-			default:
-				if(escapeSeq.startsWith("u")) {
-					escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
-				} else {
-					escapeRep = handleOctalEscape(escapeSeq);
-				}
-			}
-
-			escapeFinder.appendReplacement(work, escapeRep);
-		}
-
-		escapeFinder.appendTail(work);
-
-		return work.toString();
-	}
-
-	private static String handleUnicodeEscape(String seq) {
-		int codepoint = Integer.parseInt(seq, 16);
-
-		return new String(Character.toChars(codepoint));
-	}
-
-	private static String handleOctalEscape(String seq) {
-		int codepoint = Integer.parseInt(seq, 8);
-
-		return new String(Character.toChars(codepoint));
-	}
-
-	/**
-	 * Check if a given string would be successfully converted to a double
-	 * by {@link Double#parseDouble(String)}.
-	 * 
-	 * @param inp
-	 *                The string to check.
-	 * @return Whether the string is a valid double or not.
-	 */
-	public static boolean isDouble(String inp) {
-		return DoubleMatcher.floatingLiteral.matcher(inp).matches();
-	}
-
-	private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z");
-
-	/**
-	 * Check if a given string would be successfully converted to a integer
-	 * by {@link Integer#parseInt(String)}.
-	 * 
-	 * NOTE: This only checks syntax. Using values out of the range of
-	 * integers will still cause errors.
-	 * 
-	 * @param inp
-	 *                The input to check.
-	 * @return Whether the string is a valid double or not.
-	 */
-	public static boolean isInt(String inp) {
-		return intLitPattern.matcher(inp).matches();
-	}
-}
+package bjc.utils.parserutils;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities useful for operating on PL tokens.
+ *
+ * @author EVE
+ *
+ */
+public class TokenUtils {
+	/*
+	 * This regex matches potential single character escape sequences.
+	 */
+	private static Pattern possibleEscape = Pattern.compile("\\\\.");
+	/*
+	 * This regex matches java-style string escapes
+	 */
+	private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \"
+			+ "|[0-3]?[0-7]{1,2}"  // Match octal escape sequences
+			+ "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences
+
+	private static Pattern escapePatt = Pattern.compile(escapeString);
+
+	/*
+	 * This regular expression matches java style double quoted strings
+	 */
+	private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes
+			+ "|" + escapeString + ")" // Match escape sequences
+			+ "*\")"); // Match all of those things zero or more times, followed by a closing quote
+
+	/**
+	 * Remove double quoted strings from a string.
+	 *
+	 * Splits a string around instances of java-style double-quoted strings.
+	 *
+	 * @param inp
+	 *                The string to split.
+	 *
+	 * @return An list containing alternating bits of the string and the
+	 *         embedded double-quoted strings that separated them.
+	 */
+	public static List<String> removeDQuotedStrings(String inp) {
+		if(inp == null) {
+			throw new NullPointerException("inp must not be null");
+		}
+
+		StringBuffer work = new StringBuffer();
+		List<String> res = new LinkedList<>();
+
+		Matcher mt = doubleQuotePatt.matcher(inp);
+
+		while(mt.find()) {
+			mt.appendReplacement(work, "");
+
+			res.add(work.toString());
+			res.add(mt.group(1));
+
+			work = new StringBuffer();
+		}
+
+		mt.appendTail(work);
+		res.add(work.toString());
+
+		return res;
+	}
+
+	/**
+	 * Replace escape characters with their actual equivalents.
+	 *
+	 * @param inp
+	 *                The string to replace escape sequences in.
+	 *
+	 * @return The string with escape sequences replaced by their equivalent
+	 *         characters.
+	 */
+	public static String descapeString(String inp) {
+		if(inp == null) {
+			throw new NullPointerException("inp must not be null");
+		}
+
+		StringBuffer work = new StringBuffer();
+
+		Matcher possibleEscapeFinder = possibleEscape.matcher(inp);
+		Matcher escapeFinder = escapePatt.matcher(inp);
+
+		while(possibleEscapeFinder.find()) {
+			if(!escapeFinder.find()) {
+				throw new IllegalArgumentException(
+						"Illegal escape sequence " + possibleEscapeFinder.group());
+			}
+
+			String escapeSeq = escapeFinder.group();
+
+			String escapeRep = "";
+			switch(escapeSeq) {
+			case "\\b":
+				escapeRep = "\b";
+				break;
+			case "\\t":
+				escapeRep = "\t";
+				break;
+			case "\\n":
+				escapeRep = "\n";
+				break;
+			case "\\f":
+				escapeRep = "\f";
+				break;
+			case "\\r":
+				escapeRep = "\r";
+				break;
+			case "\\\"":
+				escapeRep = "\"";
+				break;
+			case "\\'":
+				escapeRep = "'";
+				break;
+			case "\\\\":
+				/*
+				 * Skip past the second slash.
+				 */
+				possibleEscapeFinder.find();
+				escapeRep = "\\";
+				break;
+			default:
+				if(escapeSeq.startsWith("u")) {
+					escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
+				} else {
+					escapeRep = handleOctalEscape(escapeSeq);
+				}
+			}
+
+			escapeFinder.appendReplacement(work, escapeRep);
+		}
+
+		escapeFinder.appendTail(work);
+
+		return work.toString();
+	}
+
+	private static String handleUnicodeEscape(String seq) {
+		try {
+			int codepoint = Integer.parseInt(seq, 16);
+
+			return new String(Character.toChars(codepoint));
+		} catch(IllegalArgumentException iaex) {
+			IllegalArgumentException reiaex = new IllegalArgumentException(
+					String.format("'%s' is not a valid Unicode escape sequence'", seq));
+
+			reiaex.initCause(iaex);
+
+			throw reiaex;
+		}
+	}
+
+	private static String handleOctalEscape(String seq) {
+		try {
+			int codepoint = Integer.parseInt(seq, 8);
+
+			if(codepoint > 255) {
+				throw new IllegalArgumentException(String
+						.format("'%d' is outside the range of octal escapes', codepoint"));
+			}
+
+			return new String(Character.toChars(codepoint));
+		} catch(IllegalArgumentException iaex) {
+			IllegalArgumentException reiaex = new IllegalArgumentException(
+					String.format("'%s' is not a valid octal escape sequence'", seq));
+
+			reiaex.initCause(iaex);
+
+			throw reiaex;
+		}
+	}
+
+	/**
+	 * Check if a given string would be successfully converted to a double
+	 * by {@link Double#parseDouble(String)}.
+	 * 
+	 * @param inp
+	 *                The string to check.
+	 * @return Whether the string is a valid double or not.
+	 */
+	public static boolean isDouble(String inp) {
+		return DoubleMatcher.floatingLiteral.matcher(inp).matches();
+	}
+
+	private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z");
+
+	/**
+	 * Check if a given string would be successfully converted to a integer
+	 * by {@link Integer#parseInt(String)}.
+	 * 
+	 * NOTE: This only checks syntax. Using values out of the range of
+	 * integers will still cause errors.
+	 * 
+	 * @param inp
+	 *                The input to check.
+	 * @return Whether the string is a valid double or not.
+	 */
+	public static boolean isInt(String inp) {
+		return intLitPattern.matcher(inp).matches();
+	}
+}
diff --git a/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java
new file mode 100644
index 0000000..125811d
--- /dev/null
+++ b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java
@@ -0,0 +1,89 @@
+/**
+ * 
+ */
+package bjc.utils.test.parserutils;
+
+import static org.junit.Assert.*;
+
+import bjc.utils.parserutils.TokenUtils;
+
+import java.util.List;
+
+import static org.hamcrest.CoreMatchers.*;
+
+import org.junit.Test;
+
+/**
+ * Tests on token utils.
+ * 
+ * @author EVE
+ *
+ */
+public class TokenUtilsTest {
+
+	/**
+	 * Test method for
+	 * {@link bjc.utils.parserutils.TokenUtils#removeDQuotedStrings(java.lang.String)}.
+	 */
+	@Test
+	public void testRemoveDQuotedStrings() {
+		/*
+		 * Check handling of empty strings.
+		 */
+		List<String> onEmptyString = TokenUtils.removeDQuotedStrings("");
+		assertThat(onEmptyString.size(), is(1));
+		assertThat(onEmptyString.get(0), is(""));
+
+		/*
+		 * Check handling of strings without embedded strings.
+		 */
+		List<String> onNonmatchingString = TokenUtils.removeDQuotedStrings("hello");
+		assertThat(onNonmatchingString.size(), is(1));
+		assertThat(onNonmatchingString.get(0), is("hello"));
+		
+		/*
+		 * Check handling of strings with a single embedded string.
+		 */
+		List<String> onSingleMatchString = TokenUtils.removeDQuotedStrings("hello\"there\"");
+		assertThat(onSingleMatchString.size(), is(2));
+		assertThat(onSingleMatchString.get(0), is("hello"));
+		assertThat(onSingleMatchString.get(1), is("\"there\""));
+		
+		/*
+		 * Check handling a string with mismatched quotes.
+		 * 
+		 * TODO is this the right behavior, or should we fail instead?
+		 */
+		List<String> onMismatchString = TokenUtils.removeDQuotedStrings("hello\"there");
+		assertThat(onMismatchString.size(), is(1));
+		assertThat(onMismatchString.get(0), is("hello\"there"));
+	}
+
+	/**
+	 * Test method for
+	 * {@link bjc.utils.parserutils.TokenUtils#descapeString(java.lang.String)}.
+	 */
+	@Test
+	public void testDescapeString() {
+		fail("Not yet implemented"); // TODO
+	}
+
+	/**
+	 * Test method for
+	 * {@link bjc.utils.parserutils.TokenUtils#isDouble(java.lang.String)}.
+	 */
+	@Test
+	public void testIsDouble() {
+		fail("Not yet implemented"); // TODO
+	}
+
+	/**
+	 * Test method for
+	 * {@link bjc.utils.parserutils.TokenUtils#isInt(java.lang.String)}.
+	 */
+	@Test
+	public void testIsInt() {
+		fail("Not yet implemented"); // TODO
+	}
+
+}
author	bjculkin <bjculkin@mix.wvu.edu>	2017-03-20 08:47:55 -0400
committer	bjculkin <bjculkin@mix.wvu.edu>	2017-03-20 08:47:55 -0400
commit	a901f454f9ca1409bc3baa30cde9ae37098872e2 (patch)
tree	ae278fb4a0d4a3615fafcea918ab2512577acf93 /BJC-Utils2/src
parent	76f83e963cf43023741f2c201d791a832c5b6bad (diff)