diff options
Diffstat (limited to 'BJC-Utils2/src/main/java/bjc/utils')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java | 119 |
1 files changed, 90 insertions, 29 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java index ad30f4c..4e2bc22 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -21,8 +21,12 @@ public class TokenUtils { /*
* This regex matches java-style string escapes
*/
- private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \"
- + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences
+ private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match
+ // shortform
+ // escape
+ // sequences
+ // like \t or \"
+ + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences
+ "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences
private static Pattern escapePatt = Pattern.compile(escapeString);
@@ -30,9 +34,24 @@ public class TokenUtils { /*
* This regular expression matches java style double quoted strings
*/
- private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes
+ private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match
+ // one
+ // or
+ // more
+ // characters
+ // that
+ // aren't
+ // quotes
+ // or
+ // slashes
+ "|" + escapeString + ")" // Match escape sequences
- + "*\")"); // Match all of those things zero or more times, followed by a closing quote
+ + "*\")"); // Match all of those things zero or more times, followed
+ // by a closing quote
+
+ /*
+ * This regular expression matches non-escaped quotes.
+ */
+ private static Pattern quotePatt = Pattern.compile("(?<!\\\\)\"");
/**
* Remove double quoted strings from a string.
@@ -40,32 +59,75 @@ public class TokenUtils { * Splits a string around instances of java-style double-quoted strings.
*
* @param inp
- * The string to split.
+ * The string to split.
*
* @return An list containing alternating bits of the string and the
* embedded double-quoted strings that separated them.
*/
public static List<String> removeDQuotedStrings(String inp) {
- if(inp == null) {
+ if (inp == null) {
throw new NullPointerException("inp must not be null");
}
+ /*
+ * What we need for piece-by-piece string building
+ */
StringBuffer work = new StringBuffer();
List<String> res = new LinkedList<>();
+ /*
+ * Matcher for proper strings and single quotes.
+ */
Matcher mt = doubleQuotePatt.matcher(inp);
+ Matcher corr = quotePatt.matcher(inp);
+
+ if (corr.find() && !corr.find()) {
+ /*
+ * There's a unmatched opening quote with no strings.
+ */
+ throw new IllegalArgumentException(String
+ .format("Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.indexOf("\"")));
+ }
- while(mt.find()) {
+ while (mt.find()) {
+ /*
+ * Remove the string until the quoted string.
+ */
mt.appendReplacement(work, "");
+ /*
+ * Add the string preceeeding the double-quoted string and the
+ * double-quoted string to the list.
+ */
res.add(work.toString());
res.add(mt.group(1));
+ /*
+ * Renew the buffer.
+ */
work = new StringBuffer();
}
+ /*
+ * Grab the remainder of the string.
+ */
mt.appendTail(work);
- res.add(work.toString());
+ String tail = work.toString();
+
+ if (tail.contains("\"")) {
+ /*
+ * There's a unmatched opening quote with at least one string.
+ */
+ throw new IllegalArgumentException(String.format(
+ "Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.lastIndexOf("\"")));
+ }
+
+ /*
+ * Only add an empty tail if the string was empty.
+ */
+ if (!tail.equals("") || res.isEmpty()) {
+ res.add(tail);
+ }
return res;
}
@@ -74,13 +136,13 @@ public class TokenUtils { * Replace escape characters with their actual equivalents.
*
* @param inp
- * The string to replace escape sequences in.
+ * The string to replace escape sequences in.
*
* @return The string with escape sequences replaced by their equivalent
* characters.
*/
public static String descapeString(String inp) {
- if(inp == null) {
+ if (inp == null) {
throw new NullPointerException("inp must not be null");
}
@@ -89,16 +151,15 @@ public class TokenUtils { Matcher possibleEscapeFinder = possibleEscape.matcher(inp);
Matcher escapeFinder = escapePatt.matcher(inp);
- while(possibleEscapeFinder.find()) {
- if(!escapeFinder.find()) {
- throw new IllegalArgumentException(
- "Illegal escape sequence " + possibleEscapeFinder.group());
+ while (possibleEscapeFinder.find()) {
+ if (!escapeFinder.find()) {
+ throw new IllegalArgumentException("Illegal escape sequence " + possibleEscapeFinder.group());
}
String escapeSeq = escapeFinder.group();
String escapeRep = "";
- switch(escapeSeq) {
+ switch (escapeSeq) {
case "\\b":
escapeRep = "\b";
break;
@@ -128,7 +189,7 @@ public class TokenUtils { escapeRep = "\\";
break;
default:
- if(escapeSeq.startsWith("u")) {
+ if (escapeSeq.startsWith("u")) {
escapeRep = handleUnicodeEscape(escapeSeq.substring(1));
} else {
escapeRep = handleOctalEscape(escapeSeq);
@@ -148,7 +209,7 @@ public class TokenUtils { int codepoint = Integer.parseInt(seq, 16);
return new String(Character.toChars(codepoint));
- } catch(IllegalArgumentException iaex) {
+ } catch (IllegalArgumentException iaex) {
IllegalArgumentException reiaex = new IllegalArgumentException(
String.format("'%s' is not a valid Unicode escape sequence'", seq));
@@ -162,13 +223,13 @@ public class TokenUtils { try {
int codepoint = Integer.parseInt(seq, 8);
- if(codepoint > 255) {
- throw new IllegalArgumentException(String
- .format("'%d' is outside the range of octal escapes', codepoint"));
+ if (codepoint > 255) {
+ throw new IllegalArgumentException(
+ String.format("'%d' is outside the range of octal escapes', codepoint"));
}
return new String(Character.toChars(codepoint));
- } catch(IllegalArgumentException iaex) {
+ } catch (IllegalArgumentException iaex) {
IllegalArgumentException reiaex = new IllegalArgumentException(
String.format("'%s' is not a valid octal escape sequence'", seq));
@@ -179,11 +240,11 @@ public class TokenUtils { }
/**
- * Check if a given string would be successfully converted to a double
- * by {@link Double#parseDouble(String)}.
+ * Check if a given string would be successfully converted to a double by
+ * {@link Double#parseDouble(String)}.
*
* @param inp
- * The string to check.
+ * The string to check.
* @return Whether the string is a valid double or not.
*/
public static boolean isDouble(String inp) {
@@ -193,14 +254,14 @@ public class TokenUtils { private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z");
/**
- * Check if a given string would be successfully converted to a integer
- * by {@link Integer#parseInt(String)}.
+ * Check if a given string would be successfully converted to a integer by
+ * {@link Integer#parseInt(String)}.
*
- * NOTE: This only checks syntax. Using values out of the range of
- * integers will still cause errors.
+ * NOTE: This only checks syntax. Using values out of the range of integers
+ * will still cause errors.
*
* @param inp
- * The input to check.
+ * The input to check.
* @return Whether the string is a valid double or not.
*/
public static boolean isInt(String inp) {
|
