diff options
| author | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-20 08:47:55 -0400 |
|---|---|---|
| committer | bjculkin <bjculkin@mix.wvu.edu> | 2017-03-20 08:47:55 -0400 |
| commit | a901f454f9ca1409bc3baa30cde9ae37098872e2 (patch) | |
| tree | ae278fb4a0d4a3615fafcea918ab2512577acf93 /BJC-Utils2/src | |
| parent | 76f83e963cf43023741f2c201d791a832c5b6bad (diff) | |
Start work on testing.
Diffstat (limited to 'BJC-Utils2/src')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java | 390 | ||||
| -rw-r--r-- | BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java | 89 |
2 files changed, 298 insertions, 181 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java index ce975f1..ad30f4c 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -1,181 +1,209 @@ -package bjc.utils.parserutils; - -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.commons.lang3.StringUtils; - -/** - * Utilities useful for operating on PL tokens. - * - * @author EVE - * - */ -public class TokenUtils { - - /** - * Checks if the given expression contains the specified operator in a - * situation that indicates its use as an infix operator. - * - * @param expression - * The expression to check. - * @param operator - * The operator to see if it is contained. - * - * @return Whether or not the given expression contains the specified - * operator as a infix operator. - */ - public static boolean containsInfixOperator(String expression, String operator) { - return StringUtils.countMatches(expression, operator) == 1 && !expression.equalsIgnoreCase(operator) - && !expression.startsWith(operator); - } - - /* - * This regex matches java-style string escapes - */ - private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \" - + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences - + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences - - private static Pattern escapePatt = Pattern.compile(escapeString); - - /* - * This regular expression matches java style double quoted strings - */ - private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes - + "|" + escapeString + ")" // Match escape sequences - + "*\")"); // Match all of those things zero or more times, followed by a closing quote - - /** - * Remove double quoted strings from a string. - * - * Splits a string around instances of java-style double-quoted strings. - * - * @param inp - * The string to split. - * - * @return An list containing alternating bits of the string and the - * embedded double-quoted strings that separated them. - */ - public static List<String> removeDQuotedStrings(String inp) { - StringBuffer work = new StringBuffer(); - List<String> res = new LinkedList<>(); - - Matcher mt = doubleQuotePatt.matcher(inp); - - while(mt.find()) { - mt.appendReplacement(work, ""); - - res.add(work.toString()); - res.add(mt.group(1)); - - work = new StringBuffer(); - } - - mt.appendTail(work); - res.add(work.toString()); - - return res; - } - - /** - * Replace escape characters with their actual equivalents. - * - * @param inp - * The string to replace escape sequences in. - * - * @return The string with escape sequences replaced by their equivalent - * characters. - */ - public static String descapeString(String inp) { - StringBuffer work = new StringBuffer(); - - Matcher escapeFinder = escapePatt.matcher(inp); - while(escapeFinder.find()) { - String escapeSeq = escapeFinder.group(); - - String escapeRep = ""; - switch(escapeSeq) { - case "\\b": - escapeRep = "\b"; - break; - case "\\t": - escapeRep = "\t"; - break; - case "\\n": - escapeRep = "\n"; - break; - case "\\f": - escapeRep = "\f"; - break; - case "\\r": - escapeRep = "\r"; - break; - case "\\\"": - escapeRep = "\""; - break; - case "\\'": - escapeRep = "'"; - break; - case "\\\\": - escapeRep = "\\"; - break; - default: - if(escapeSeq.startsWith("u")) { - escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); - } else { - escapeRep = handleOctalEscape(escapeSeq); - } - } - - escapeFinder.appendReplacement(work, escapeRep); - } - - escapeFinder.appendTail(work); - - return work.toString(); - } - - private static String handleUnicodeEscape(String seq) { - int codepoint = Integer.parseInt(seq, 16); - - return new String(Character.toChars(codepoint)); - } - - private static String handleOctalEscape(String seq) { - int codepoint = Integer.parseInt(seq, 8); - - return new String(Character.toChars(codepoint)); - } - - /** - * Check if a given string would be successfully converted to a double - * by {@link Double#parseDouble(String)}. - * - * @param inp - * The string to check. - * @return Whether the string is a valid double or not. - */ - public static boolean isDouble(String inp) { - return DoubleMatcher.floatingLiteral.matcher(inp).matches(); - } - - private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); - - /** - * Check if a given string would be successfully converted to a integer - * by {@link Integer#parseInt(String)}. - * - * NOTE: This only checks syntax. Using values out of the range of - * integers will still cause errors. - * - * @param inp - * The input to check. - * @return Whether the string is a valid double or not. - */ - public static boolean isInt(String inp) { - return intLitPattern.matcher(inp).matches(); - } -} +package bjc.utils.parserutils;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Utilities useful for operating on PL tokens.
+ *
+ * @author EVE
+ *
+ */
+public class TokenUtils {
+ /*
+ * This regex matches potential single character escape sequences.
+ */
+ private static Pattern possibleEscape = Pattern.compile("\\\\.");
+ /*
+ * This regex matches java-style string escapes
+ */
+ private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \"
+ + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences
+ + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences
+
+ private static Pattern escapePatt = Pattern.compile(escapeString);
+
+ /*
+ * This regular expression matches java style double quoted strings
+ */
+ private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes
+ + "|" + escapeString + ")" // Match escape sequences
+ + "*\")"); // Match all of those things zero or more times, followed by a closing quote
+
+ /**
+ * Remove double quoted strings from a string.
+ *
+ * Splits a string around instances of java-style double-quoted strings.
+ *
+ * @param inp
+ * The string to split.
+ *
+ * @return An list containing alternating bits of the string and the
+ * embedded double-quoted strings that separated them.
+ */
+ public static List<String> removeDQuotedStrings(String inp) {
+ if(inp == null) {
+ throw new NullPointerException("inp must not be null");
+ }
+
+ StringBuffer work = new StringBuffer();
+ List<String> res = new LinkedList<>();
+
+ Matcher mt = doubleQuotePatt.matcher(inp);
+
+ while(mt.find()) {
+ mt.appendReplacement(work, "");
+
+ res.add(work.toString());
+ res.add(mt.group(1));
+
+ work = new StringBuffer();
+ }
+
+ mt.appendTail(work);
+ res.add(work.toString());
+
+ return res;
+ }
+
+ /**
+ * Replace escape characters with their actual equivalents.
+ *
+ * @param inp
+ * The string to replace escape sequences in.
+ *
+ * @return The string with escape sequences replaced by their equivalent
+ * characters.
+ */
+ public static String descapeString(String inp) {
+ if(inp == null) {
+ throw new NullPointerException("inp must not be null");
+ }
+
+ StringBuffer work = new StringBuffer();
+
+ Matcher possibleEscapeFinder = possibleEscape.matcher(inp);
+ Matcher escapeFinder = escapePatt.matcher(inp);
+
+ while(possibleEscapeFinder.find()) {
+ if(!escapeFinder.find()) {
+ throw new IllegalArgumentException(
+ "Illegal escape sequence " + possibleEscapeFinder.group());
+ }
+
+ String escapeSeq = escapeFinder.group();
+
+ String escapeRep = "";
+ switch(escapeSeq) {
+ case "\\b":
+ escapeRep = "\b";
+ break;
+ case "\\t":
+ escapeRep = "\t";
+ break;
+ case "\\n":
+ escapeRep = "\n";
+ break;
+ case "\\f":
+ escapeRep = "\f";
+ break;
+ case "\\r":
+ escapeRep = "\r";
+ break;
+ case "\\\"":
+ escapeRep = "\"";
+ break;
+ case "\\'":
+ escapeRep = "'";
+ break;
+ case "\\\\":
+ /*
+ * Skip past the second slash.
+ */
+ possibleEscapeFinder.find();
+ escapeRep = "\\";
+ break;
+ default:
+ if(escapeSeq.startsWith("u")) {
+ escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
+ } else {
+ escapeRep = handleOctalEscape(escapeSeq);
+ }
+ }
+
+ escapeFinder.appendReplacement(work, escapeRep);
+ }
+
+ escapeFinder.appendTail(work);
+
+ return work.toString();
+ }
+
+ private static String handleUnicodeEscape(String seq) {
+ try {
+ int codepoint = Integer.parseInt(seq, 16);
+
+ return new String(Character.toChars(codepoint));
+ } catch(IllegalArgumentException iaex) {
+ IllegalArgumentException reiaex = new IllegalArgumentException(
+ String.format("'%s' is not a valid Unicode escape sequence'", seq));
+
+ reiaex.initCause(iaex);
+
+ throw reiaex;
+ }
+ }
+
+ private static String handleOctalEscape(String seq) {
+ try {
+ int codepoint = Integer.parseInt(seq, 8);
+
+ if(codepoint > 255) {
+ throw new IllegalArgumentException(String
+ .format("'%d' is outside the range of octal escapes', codepoint"));
+ }
+
+ return new String(Character.toChars(codepoint));
+ } catch(IllegalArgumentException iaex) {
+ IllegalArgumentException reiaex = new IllegalArgumentException(
+ String.format("'%s' is not a valid octal escape sequence'", seq));
+
+ reiaex.initCause(iaex);
+
+ throw reiaex;
+ }
+ }
+
+ /**
+ * Check if a given string would be successfully converted to a double
+ * by {@link Double#parseDouble(String)}.
+ *
+ * @param inp
+ * The string to check.
+ * @return Whether the string is a valid double or not.
+ */
+ public static boolean isDouble(String inp) {
+ return DoubleMatcher.floatingLiteral.matcher(inp).matches();
+ }
+
+ private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z");
+
+ /**
+ * Check if a given string would be successfully converted to a integer
+ * by {@link Integer#parseInt(String)}.
+ *
+ * NOTE: This only checks syntax. Using values out of the range of
+ * integers will still cause errors.
+ *
+ * @param inp
+ * The input to check.
+ * @return Whether the string is a valid double or not.
+ */
+ public static boolean isInt(String inp) {
+ return intLitPattern.matcher(inp).matches();
+ }
+}
diff --git a/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java new file mode 100644 index 0000000..125811d --- /dev/null +++ b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java @@ -0,0 +1,89 @@ +/**
+ *
+ */
+package bjc.utils.test.parserutils;
+
+import static org.junit.Assert.*;
+
+import bjc.utils.parserutils.TokenUtils;
+
+import java.util.List;
+
+import static org.hamcrest.CoreMatchers.*;
+
+import org.junit.Test;
+
+/**
+ * Tests on token utils.
+ *
+ * @author EVE
+ *
+ */
+public class TokenUtilsTest {
+
+ /**
+ * Test method for
+ * {@link bjc.utils.parserutils.TokenUtils#removeDQuotedStrings(java.lang.String)}.
+ */
+ @Test
+ public void testRemoveDQuotedStrings() {
+ /*
+ * Check handling of empty strings.
+ */
+ List<String> onEmptyString = TokenUtils.removeDQuotedStrings("");
+ assertThat(onEmptyString.size(), is(1));
+ assertThat(onEmptyString.get(0), is(""));
+
+ /*
+ * Check handling of strings without embedded strings.
+ */
+ List<String> onNonmatchingString = TokenUtils.removeDQuotedStrings("hello");
+ assertThat(onNonmatchingString.size(), is(1));
+ assertThat(onNonmatchingString.get(0), is("hello"));
+
+ /*
+ * Check handling of strings with a single embedded string.
+ */
+ List<String> onSingleMatchString = TokenUtils.removeDQuotedStrings("hello\"there\"");
+ assertThat(onSingleMatchString.size(), is(2));
+ assertThat(onSingleMatchString.get(0), is("hello"));
+ assertThat(onSingleMatchString.get(1), is("\"there\""));
+
+ /*
+ * Check handling a string with mismatched quotes.
+ *
+ * TODO is this the right behavior, or should we fail instead?
+ */
+ List<String> onMismatchString = TokenUtils.removeDQuotedStrings("hello\"there");
+ assertThat(onMismatchString.size(), is(1));
+ assertThat(onMismatchString.get(0), is("hello\"there"));
+ }
+
+ /**
+ * Test method for
+ * {@link bjc.utils.parserutils.TokenUtils#descapeString(java.lang.String)}.
+ */
+ @Test
+ public void testDescapeString() {
+ fail("Not yet implemented"); // TODO
+ }
+
+ /**
+ * Test method for
+ * {@link bjc.utils.parserutils.TokenUtils#isDouble(java.lang.String)}.
+ */
+ @Test
+ public void testIsDouble() {
+ fail("Not yet implemented"); // TODO
+ }
+
+ /**
+ * Test method for
+ * {@link bjc.utils.parserutils.TokenUtils#isInt(java.lang.String)}.
+ */
+ @Test
+ public void testIsInt() {
+ fail("Not yet implemented"); // TODO
+ }
+
+}
|
