From ad2a312a3cff9aced3e56ef1440c9d30d981fea0 Mon Sep 17 00:00:00 2001 From: student Date: Mon, 20 Mar 2017 10:51:27 -0400 Subject: Test removeDQuotedStrings --- .../java/bjc/utils/parserutils/TokenUtils.java | 119 ++++++++++++++++----- 1 file changed, 90 insertions(+), 29 deletions(-) (limited to 'BJC-Utils2/src/main/java/bjc') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java index ad30f4c..4e2bc22 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -21,8 +21,12 @@ public class TokenUtils { /* * This regex matches java-style string escapes */ - private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \" - + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match + // shortform + // escape + // sequences + // like \t or \" + + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences private static Pattern escapePatt = Pattern.compile(escapeString); @@ -30,9 +34,24 @@ public class TokenUtils { /* * This regular expression matches java style double quoted strings */ - private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes + private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match + // one + // or + // more + // characters + // that + // aren't + // quotes + // or + // slashes + "|" + escapeString + ")" // Match escape sequences - + "*\")"); // Match all of those things zero or more times, followed by a closing quote + + "*\")"); // Match all of those things zero or more times, followed + // by a closing quote + + /* + * This regular expression matches non-escaped quotes. + */ + private static Pattern quotePatt = Pattern.compile("(? removeDQuotedStrings(String inp) { - if(inp == null) { + if (inp == null) { throw new NullPointerException("inp must not be null"); } + /* + * What we need for piece-by-piece string building + */ StringBuffer work = new StringBuffer(); List res = new LinkedList<>(); + /* + * Matcher for proper strings and single quotes. + */ Matcher mt = doubleQuotePatt.matcher(inp); + Matcher corr = quotePatt.matcher(inp); + + if (corr.find() && !corr.find()) { + /* + * There's a unmatched opening quote with no strings. + */ + throw new IllegalArgumentException(String + .format("Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.indexOf("\""))); + } - while(mt.find()) { + while (mt.find()) { + /* + * Remove the string until the quoted string. + */ mt.appendReplacement(work, ""); + /* + * Add the string preceeeding the double-quoted string and the + * double-quoted string to the list. + */ res.add(work.toString()); res.add(mt.group(1)); + /* + * Renew the buffer. + */ work = new StringBuffer(); } + /* + * Grab the remainder of the string. + */ mt.appendTail(work); - res.add(work.toString()); + String tail = work.toString(); + + if (tail.contains("\"")) { + /* + * There's a unmatched opening quote with at least one string. + */ + throw new IllegalArgumentException(String.format( + "Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.lastIndexOf("\""))); + } + + /* + * Only add an empty tail if the string was empty. + */ + if (!tail.equals("") || res.isEmpty()) { + res.add(tail); + } return res; } @@ -74,13 +136,13 @@ public class TokenUtils { * Replace escape characters with their actual equivalents. * * @param inp - * The string to replace escape sequences in. + * The string to replace escape sequences in. * * @return The string with escape sequences replaced by their equivalent * characters. */ public static String descapeString(String inp) { - if(inp == null) { + if (inp == null) { throw new NullPointerException("inp must not be null"); } @@ -89,16 +151,15 @@ public class TokenUtils { Matcher possibleEscapeFinder = possibleEscape.matcher(inp); Matcher escapeFinder = escapePatt.matcher(inp); - while(possibleEscapeFinder.find()) { - if(!escapeFinder.find()) { - throw new IllegalArgumentException( - "Illegal escape sequence " + possibleEscapeFinder.group()); + while (possibleEscapeFinder.find()) { + if (!escapeFinder.find()) { + throw new IllegalArgumentException("Illegal escape sequence " + possibleEscapeFinder.group()); } String escapeSeq = escapeFinder.group(); String escapeRep = ""; - switch(escapeSeq) { + switch (escapeSeq) { case "\\b": escapeRep = "\b"; break; @@ -128,7 +189,7 @@ public class TokenUtils { escapeRep = "\\"; break; default: - if(escapeSeq.startsWith("u")) { + if (escapeSeq.startsWith("u")) { escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); } else { escapeRep = handleOctalEscape(escapeSeq); @@ -148,7 +209,7 @@ public class TokenUtils { int codepoint = Integer.parseInt(seq, 16); return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { + } catch (IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid Unicode escape sequence'", seq)); @@ -162,13 +223,13 @@ public class TokenUtils { try { int codepoint = Integer.parseInt(seq, 8); - if(codepoint > 255) { - throw new IllegalArgumentException(String - .format("'%d' is outside the range of octal escapes', codepoint")); + if (codepoint > 255) { + throw new IllegalArgumentException( + String.format("'%d' is outside the range of octal escapes', codepoint")); } return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { + } catch (IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid octal escape sequence'", seq)); @@ -179,11 +240,11 @@ public class TokenUtils { } /** - * Check if a given string would be successfully converted to a double - * by {@link Double#parseDouble(String)}. + * Check if a given string would be successfully converted to a double by + * {@link Double#parseDouble(String)}. * * @param inp - * The string to check. + * The string to check. * @return Whether the string is a valid double or not. */ public static boolean isDouble(String inp) { @@ -193,14 +254,14 @@ public class TokenUtils { private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); /** - * Check if a given string would be successfully converted to a integer - * by {@link Integer#parseInt(String)}. + * Check if a given string would be successfully converted to a integer by + * {@link Integer#parseInt(String)}. * - * NOTE: This only checks syntax. Using values out of the range of - * integers will still cause errors. + * NOTE: This only checks syntax. Using values out of the range of integers + * will still cause errors. * * @param inp - * The input to check. + * The input to check. * @return Whether the string is a valid double or not. */ public static boolean isInt(String inp) { -- cgit v1.2.3