From ad2a312a3cff9aced3e56ef1440c9d30d981fea0 Mon Sep 17 00:00:00 2001 From: student Date: Mon, 20 Mar 2017 10:51:27 -0400 Subject: Test removeDQuotedStrings --- .../java/bjc/utils/parserutils/TokenUtils.java | 119 ++++++++++++++++----- .../bjc/utils/test/parserutils/TokenUtilsTest.java | 89 --------------- .../parserutils/TokenUtilsTest_removeDQuoted.java | 74 +++++++++++++ 3 files changed, 164 insertions(+), 118 deletions(-) delete mode 100644 BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java create mode 100644 BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest_removeDQuoted.java (limited to 'BJC-Utils2/src') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java index ad30f4c..4e2bc22 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -21,8 +21,12 @@ public class TokenUtils { /* * This regex matches java-style string escapes */ - private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \" - + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match + // shortform + // escape + // sequences + // like \t or \" + + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences private static Pattern escapePatt = Pattern.compile(escapeString); @@ -30,9 +34,24 @@ public class TokenUtils { /* * This regular expression matches java style double quoted strings */ - private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes + private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match + // one + // or + // more + // characters + // that + // aren't + // quotes + // or + // slashes + "|" + escapeString + ")" // Match escape sequences - + "*\")"); // Match all of those things zero or more times, followed by a closing quote + + "*\")"); // Match all of those things zero or more times, followed + // by a closing quote + + /* + * This regular expression matches non-escaped quotes. + */ + private static Pattern quotePatt = Pattern.compile("(? removeDQuotedStrings(String inp) { - if(inp == null) { + if (inp == null) { throw new NullPointerException("inp must not be null"); } + /* + * What we need for piece-by-piece string building + */ StringBuffer work = new StringBuffer(); List res = new LinkedList<>(); + /* + * Matcher for proper strings and single quotes. + */ Matcher mt = doubleQuotePatt.matcher(inp); + Matcher corr = quotePatt.matcher(inp); + + if (corr.find() && !corr.find()) { + /* + * There's a unmatched opening quote with no strings. + */ + throw new IllegalArgumentException(String + .format("Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.indexOf("\""))); + } - while(mt.find()) { + while (mt.find()) { + /* + * Remove the string until the quoted string. + */ mt.appendReplacement(work, ""); + /* + * Add the string preceeeding the double-quoted string and the + * double-quoted string to the list. + */ res.add(work.toString()); res.add(mt.group(1)); + /* + * Renew the buffer. + */ work = new StringBuffer(); } + /* + * Grab the remainder of the string. + */ mt.appendTail(work); - res.add(work.toString()); + String tail = work.toString(); + + if (tail.contains("\"")) { + /* + * There's a unmatched opening quote with at least one string. + */ + throw new IllegalArgumentException(String.format( + "Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.lastIndexOf("\""))); + } + + /* + * Only add an empty tail if the string was empty. + */ + if (!tail.equals("") || res.isEmpty()) { + res.add(tail); + } return res; } @@ -74,13 +136,13 @@ public class TokenUtils { * Replace escape characters with their actual equivalents. * * @param inp - * The string to replace escape sequences in. + * The string to replace escape sequences in. * * @return The string with escape sequences replaced by their equivalent * characters. */ public static String descapeString(String inp) { - if(inp == null) { + if (inp == null) { throw new NullPointerException("inp must not be null"); } @@ -89,16 +151,15 @@ public class TokenUtils { Matcher possibleEscapeFinder = possibleEscape.matcher(inp); Matcher escapeFinder = escapePatt.matcher(inp); - while(possibleEscapeFinder.find()) { - if(!escapeFinder.find()) { - throw new IllegalArgumentException( - "Illegal escape sequence " + possibleEscapeFinder.group()); + while (possibleEscapeFinder.find()) { + if (!escapeFinder.find()) { + throw new IllegalArgumentException("Illegal escape sequence " + possibleEscapeFinder.group()); } String escapeSeq = escapeFinder.group(); String escapeRep = ""; - switch(escapeSeq) { + switch (escapeSeq) { case "\\b": escapeRep = "\b"; break; @@ -128,7 +189,7 @@ public class TokenUtils { escapeRep = "\\"; break; default: - if(escapeSeq.startsWith("u")) { + if (escapeSeq.startsWith("u")) { escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); } else { escapeRep = handleOctalEscape(escapeSeq); @@ -148,7 +209,7 @@ public class TokenUtils { int codepoint = Integer.parseInt(seq, 16); return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { + } catch (IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid Unicode escape sequence'", seq)); @@ -162,13 +223,13 @@ public class TokenUtils { try { int codepoint = Integer.parseInt(seq, 8); - if(codepoint > 255) { - throw new IllegalArgumentException(String - .format("'%d' is outside the range of octal escapes', codepoint")); + if (codepoint > 255) { + throw new IllegalArgumentException( + String.format("'%d' is outside the range of octal escapes', codepoint")); } return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { + } catch (IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid octal escape sequence'", seq)); @@ -179,11 +240,11 @@ public class TokenUtils { } /** - * Check if a given string would be successfully converted to a double - * by {@link Double#parseDouble(String)}. + * Check if a given string would be successfully converted to a double by + * {@link Double#parseDouble(String)}. * * @param inp - * The string to check. + * The string to check. * @return Whether the string is a valid double or not. */ public static boolean isDouble(String inp) { @@ -193,14 +254,14 @@ public class TokenUtils { private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); /** - * Check if a given string would be successfully converted to a integer - * by {@link Integer#parseInt(String)}. + * Check if a given string would be successfully converted to a integer by + * {@link Integer#parseInt(String)}. * - * NOTE: This only checks syntax. Using values out of the range of - * integers will still cause errors. + * NOTE: This only checks syntax. Using values out of the range of integers + * will still cause errors. * * @param inp - * The input to check. + * The input to check. * @return Whether the string is a valid double or not. */ public static boolean isInt(String inp) { diff --git a/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java deleted file mode 100644 index 125811d..0000000 --- a/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * - */ -package bjc.utils.test.parserutils; - -import static org.junit.Assert.*; - -import bjc.utils.parserutils.TokenUtils; - -import java.util.List; - -import static org.hamcrest.CoreMatchers.*; - -import org.junit.Test; - -/** - * Tests on token utils. - * - * @author EVE - * - */ -public class TokenUtilsTest { - - /** - * Test method for - * {@link bjc.utils.parserutils.TokenUtils#removeDQuotedStrings(java.lang.String)}. - */ - @Test - public void testRemoveDQuotedStrings() { - /* - * Check handling of empty strings. - */ - List onEmptyString = TokenUtils.removeDQuotedStrings(""); - assertThat(onEmptyString.size(), is(1)); - assertThat(onEmptyString.get(0), is("")); - - /* - * Check handling of strings without embedded strings. - */ - List onNonmatchingString = TokenUtils.removeDQuotedStrings("hello"); - assertThat(onNonmatchingString.size(), is(1)); - assertThat(onNonmatchingString.get(0), is("hello")); - - /* - * Check handling of strings with a single embedded string. - */ - List onSingleMatchString = TokenUtils.removeDQuotedStrings("hello\"there\""); - assertThat(onSingleMatchString.size(), is(2)); - assertThat(onSingleMatchString.get(0), is("hello")); - assertThat(onSingleMatchString.get(1), is("\"there\"")); - - /* - * Check handling a string with mismatched quotes. - * - * TODO is this the right behavior, or should we fail instead? - */ - List onMismatchString = TokenUtils.removeDQuotedStrings("hello\"there"); - assertThat(onMismatchString.size(), is(1)); - assertThat(onMismatchString.get(0), is("hello\"there")); - } - - /** - * Test method for - * {@link bjc.utils.parserutils.TokenUtils#descapeString(java.lang.String)}. - */ - @Test - public void testDescapeString() { - fail("Not yet implemented"); // TODO - } - - /** - * Test method for - * {@link bjc.utils.parserutils.TokenUtils#isDouble(java.lang.String)}. - */ - @Test - public void testIsDouble() { - fail("Not yet implemented"); // TODO - } - - /** - * Test method for - * {@link bjc.utils.parserutils.TokenUtils#isInt(java.lang.String)}. - */ - @Test - public void testIsInt() { - fail("Not yet implemented"); // TODO - } - -} diff --git a/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest_removeDQuoted.java b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest_removeDQuoted.java new file mode 100644 index 0000000..371a50a --- /dev/null +++ b/BJC-Utils2/src/test/java/bjc/utils/test/parserutils/TokenUtilsTest_removeDQuoted.java @@ -0,0 +1,74 @@ +package bjc.utils.test.parserutils; + +import static org.junit.Assert.*; + +import java.util.List; + +import static org.hamcrest.CoreMatchers.*; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import static bjc.utils.parserutils.TokenUtils.*; + +public class TokenUtilsTest_removeDQuoted { + @Rule + public ExpectedException exp; + + public TokenUtilsTest_removeDQuoted(ExpectedException exp) { + this.exp = exp; + } + + /* + * Check handling of mismatched strings with no matching strings. + */ + @Test + public void testRemoveDQuoted_MismatchedStringNoMatch() throws IllegalArgumentException { + exp.expect(IllegalArgumentException.class); + exp.expectMessage(containsString("Opening quote was at position 0")); + + removeDQuotedStrings("\"hello"); + } + + /* + * Check handling of mismatched strings with a matching string. + */ + @Test + public void testRemoveDQuoted_MismatchedStringMatch() throws IllegalArgumentException { + exp.expect(IllegalArgumentException.class); + exp.expectMessage(containsString("Opening quote was at position 7")); + + removeDQuotedStrings("\"hello\"\""); + } + + /* + * Check handling of strings with a single embedded string. + */ + @Test + public void testRemoveDQuoted_SingleString() { + List onSingleMatchString = removeDQuotedStrings("hello\"there\""); + + assertThat(onSingleMatchString, hasItems("hello", "\"there\"")); + } + + /* + * Check handling of strings without embedded strings. + */ + @Test + public void testRemoveDQuote_NoString() { + List onNonmatchingString = removeDQuotedStrings("hello"); + + assertThat(onNonmatchingString, hasItems("hello")); + } + + /* + * Check handling of empty strings. + */ + @Test + public void testRemoveDQuote_EmptyString() { + List onEmptyString = removeDQuotedStrings(""); + + assertThat(onEmptyString, hasItems("")); + } +} \ No newline at end of file -- cgit v1.2.3