From 848dc739becfa41193aff9a07c918aed91e5ef79 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 7 Apr 2017 08:56:27 -0400 Subject: Cleanup --- .../java/bjc/utils/parserutils/TokenUtils.java | 522 +++++++++++---------- 1 file changed, 266 insertions(+), 256 deletions(-) (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java index 0ec00ee..52eba1d 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -1,257 +1,267 @@ -package bjc.utils.parserutils; - -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import static bjc.utils.PropertyDB.getRegex; -import static bjc.utils.PropertyDB.getCompiledRegex; -import static bjc.utils.PropertyDB.applyFormat; - -/** - * Utilities useful for operating on PL tokens. - * - * @author EVE - * - */ -public class TokenUtils { - private static String possibleEscapeString = getRegex("possibleStringEscape"); - - private static Pattern possibleEscapePatt = Pattern.compile(possibleEscapeString); - - private static String shortEscape = getRegex("shortFormStringEscape"); - private static String octalEscape = getRegex("octalStringEscape"); - private static String unicodeEscape = getRegex("unicodeStringEscape"); - - private static String escapeString = applyFormat("stringEscape", shortEscape, octalEscape, unicodeEscape); - - private static Pattern escapePatt = Pattern.compile(escapeString); - - private static String doubleQuoteString = applyFormat("doubleQuotes", getRegex("nonEscape"), - possibleEscapeString); - - private static Pattern doubleQuotePatt = Pattern.compile(doubleQuoteString); - - private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); - - /** - * Remove double quoted strings from a string. - * - * Splits a string around instances of java-style double-quoted strings. - * - * @param inp - * The string to split. - * - * @return An list containing alternating bits of the string and the - * embedded double-quoted strings that separated them. - */ - public static List removeDQuotedStrings(String inp) { - if(inp == null) { - throw new NullPointerException("inp must not be null"); - } - - /* - * What we need for piece-by-piece string building - */ - StringBuffer work = new StringBuffer(); - List res = new LinkedList<>(); - - /* - * Matcher for proper strings and single quotes. - */ - Matcher mt = doubleQuotePatt.matcher(inp); - Matcher corr = quotePatt.matcher(inp); - - if(corr.find() && !corr.find()) { - /* - * There's a unmatched opening quote with no strings. - */ - throw new IllegalArgumentException( - String.format("Unclosed string literal '%s'. Opening quote was at position %d", - inp, inp.indexOf("\""))); - } - - while(mt.find()) { - /* - * Remove the string until the quoted string. - */ - mt.appendReplacement(work, ""); - - /* - * Add the string preceding the double-quoted string and - * the double-quoted string to the list. - */ - res.add(work.toString()); - res.add(mt.group(1)); - - /* - * Renew the buffer. - */ - work = new StringBuffer(); - } - - /* - * Grab the remainder of the string. - */ - mt.appendTail(work); - String tail = work.toString(); - - if(tail.contains("\"")) { - /* - * There's a unmatched opening quote with at least one - * string. - */ - throw new IllegalArgumentException( - String.format("Unclosed string literal '%s'. Opening quote was at position %d", - inp, inp.lastIndexOf("\""))); - } - - /* - * Only add an empty tail if the string was empty. - */ - if(!tail.equals("") || res.isEmpty()) { - res.add(tail); - } - - return res; - } - - /** - * Replace escape characters with their actual equivalents. - * - * @param inp - * The string to replace escape sequences in. - * - * @return The string with escape sequences replaced by their equivalent - * characters. - */ - public static String descapeString(String inp) { - if(inp == null) { - throw new NullPointerException("inp must not be null"); - } - - StringBuffer work = new StringBuffer(); - - Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); - Matcher escapeFinder = escapePatt.matcher(inp); - - while(possibleEscapeFinder.find()) { - if(!escapeFinder.find()) { - throw new IllegalArgumentException(String.format( - "Illegal escape sequence '%s' at position %d", - possibleEscapeFinder.group(), possibleEscapeFinder.start())); - } - - String escapeSeq = escapeFinder.group(); - - String escapeRep = ""; - switch(escapeSeq) { - case "\\b": - escapeRep = "\b"; - break; - case "\\t": - escapeRep = "\t"; - break; - case "\\n": - escapeRep = "\n"; - break; - case "\\f": - escapeRep = "\f"; - break; - case "\\r": - escapeRep = "\r"; - break; - case "\\\"": - escapeRep = "\""; - break; - case "\\'": - escapeRep = "'"; - break; - case "\\\\": - /* - * Skip past the second slash. - */ - possibleEscapeFinder.find(); - escapeRep = "\\"; - break; - default: - if(escapeSeq.startsWith("u")) { - escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); - } else { - escapeRep = handleOctalEscape(escapeSeq); - } - } - - escapeFinder.appendReplacement(work, escapeRep); - } - - escapeFinder.appendTail(work); - - return work.toString(); - } - - private static String handleUnicodeEscape(String seq) { - try { - int codepoint = Integer.parseInt(seq, 16); - - return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { - IllegalArgumentException reiaex = new IllegalArgumentException( - String.format("'%s' is not a valid Unicode escape sequence'", seq)); - - reiaex.initCause(iaex); - - throw reiaex; - } - } - - private static String handleOctalEscape(String seq) { - try { - int codepoint = Integer.parseInt(seq, 8); - - if(codepoint > 255) { - throw new IllegalArgumentException(String - .format("'%d' is outside the range of octal escapes', codepoint")); - } - - return new String(Character.toChars(codepoint)); - } catch(IllegalArgumentException iaex) { - IllegalArgumentException reiaex = new IllegalArgumentException( - String.format("'%s' is not a valid octal escape sequence'", seq)); - - reiaex.initCause(iaex); - - throw reiaex; - } - } - - /** - * Check if a given string would be successfully converted to a double - * by {@link Double#parseDouble(String)}. - * - * @param inp - * The string to check. - * @return Whether the string is a valid double or not. - */ - public static boolean isDouble(String inp) { - return DoubleMatcher.floatingLiteral.matcher(inp).matches(); - } - - private static Pattern intLitPattern = getCompiledRegex("intLiteral"); - - /** - * Check if a given string would be successfully converted to a integer - * by {@link Integer#parseInt(String)}. - * - * NOTE: This only checks syntax. Using values out of the range of - * integers will still cause errors. - * - * @param inp - * The input to check. - * @return Whether the string is a valid double or not. - */ - public static boolean isInt(String inp) { - return intLitPattern.matcher(inp).matches(); - } +package bjc.utils.parserutils; + +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static bjc.utils.PropertyDB.getRegex; +import static bjc.utils.PropertyDB.getCompiledRegex; +import static bjc.utils.PropertyDB.applyFormat; + +/** + * Utilities useful for operating on PL tokens. + * + * @author EVE + * + * TODO add support for user defined escapes. + */ +public class TokenUtils { + /* + * Patterns and pattern parts. + */ + private static String rPossibleEscapeString = getRegex("possibleStringEscape"); + + private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString); + + private static String rShortEscape = getRegex("shortFormStringEscape"); + private static String rOctalEscape = getRegex("octalStringEscape"); + private static String rUnicodeEscape = getRegex("unicodeStringEscape"); + + private static String rEscapeString = applyFormat("stringEscape", rShortEscape, rOctalEscape, rUnicodeEscape); + + private static Pattern escapePatt = Pattern.compile(rEscapeString); + + private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonEscape"), + rPossibleEscapeString); + + private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString); + + private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); + + private static Pattern intLitPattern = getCompiledRegex("intLiteral"); + + /** + * Remove double quoted strings from a string. + * + * Splits a string around instances of java-style double-quoted strings. + * + * @param inp + * The string to split. + * + * @return An list containing alternating bits of the string and the + * embedded double-quoted strings that separated them. + */ + public static List removeDQuotedStrings(String inp) { + if(inp == null) { + throw new NullPointerException("inp must not be null"); + } + + /* + * What we need for piece-by-piece string building + */ + StringBuffer work = new StringBuffer(); + List res = new LinkedList<>(); + + /* + * Matcher for proper strings and single quotes. + */ + Matcher mt = doubleQuotePatt.matcher(inp); + Matcher corr = quotePatt.matcher(inp); + + if(corr.find() && !corr.find()) { + /* + * There's a unmatched opening quote with no strings. + */ + String msg = String.format("Unclosed string literal '%s'. Opening quote was at position %d", + inp, inp.indexOf("\"")); + + throw new IllegalArgumentException(msg); + } + + while(mt.find()) { + /* + * Remove the string until the quoted string. + */ + mt.appendReplacement(work, ""); + + /* + * Add the string preceding the double-quoted string and + * the double-quoted string to the list. + */ + res.add(work.toString()); + res.add(mt.group(1)); + + /* + * Renew the buffer. + */ + work = new StringBuffer(); + } + + /* + * Grab the remainder of the string. + */ + mt.appendTail(work); + String tail = work.toString(); + + if(tail.contains("\"")) { + /* + * There's a unmatched opening quote with at least one + * string. + */ + String msg = String.format("Unclosed string literal '%s'. Opening quote was at position %d", + inp, inp.lastIndexOf("\"")); + + throw new IllegalArgumentException(msg); + } + + /* + * Only add an empty tail if the string was empty. + */ + if(!tail.equals("") || res.isEmpty()) { + res.add(tail); + } + + return res; + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public static String descapeString(String inp) { + if(inp == null) { + throw new NullPointerException("inp must not be null"); + } + + StringBuffer work = new StringBuffer(); + + Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); + Matcher escapeFinder = escapePatt.matcher(inp); + + while(possibleEscapeFinder.find()) { + if(!escapeFinder.find()) { + String msg = String.format("Illegal escape sequence '%s' at position %d", + possibleEscapeFinder.group(), possibleEscapeFinder.start()); + + throw new IllegalArgumentException(msg); + } + + String escapeSeq = escapeFinder.group(); + + String escapeRep = ""; + switch(escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + /* + * Skip past the second slash. + */ + possibleEscapeFinder.find(); + escapeRep = "\\"; + break; + default: + if(escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else { + escapeRep = handleOctalEscape(escapeSeq); + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + private static String handleUnicodeEscape(String seq) { + try { + int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } catch(IllegalArgumentException iaex) { + String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq); + + IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + private static String handleOctalEscape(String seq) { + try { + int codepoint = Integer.parseInt(seq, 8); + + if(codepoint > 255) { + String msg = String.format("'%d' is outside the range of octal escapes', codepoint"); + + throw new IllegalArgumentException(msg); + } + + return new String(Character.toChars(codepoint)); + } catch(IllegalArgumentException iaex) { + String msg = String.format("'%s' is not a valid octal escape sequence'", seq); + + IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + /** + * Check if a given string would be successfully converted to a double + * by {@link Double#parseDouble(String)}. + * + * @param inp + * The string to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isDouble(String inp) { + return DoubleMatcher.doubleLiteral.matcher(inp).matches(); + } + + /** + * Check if a given string would be successfully converted to a integer + * by {@link Integer#parseInt(String)}. + * + * NOTE: This only checks syntax. Using values out of the range of + * integers will still cause errors. + * + * @param inp + * The input to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isInt(String inp) { + return intLitPattern.matcher(inp).matches(); + } } \ No newline at end of file -- cgit v1.2.3