From c82e3b3b2de0633317ec8fc85925e91422820597 Mon Sep 17 00:00:00 2001 From: "Benjamin J. Culkin" Date: Sun, 8 Oct 2017 22:39:59 -0300 Subject: Start splitting into maven modules --- .../java/bjc/utils/parserutils/TokenUtils.java | 303 --------------------- 1 file changed, 303 deletions(-) delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java deleted file mode 100644 index 67c1e5a..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java +++ /dev/null @@ -1,303 +0,0 @@ -package bjc.utils.parserutils; - -import static bjc.utils.PropertyDB.applyFormat; -import static bjc.utils.PropertyDB.getCompiledRegex; -import static bjc.utils.PropertyDB.getRegex; - -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import bjc.utils.funcdata.FunctionalList; -import bjc.utils.funcdata.IList; -import bjc.utils.parserutils.splitter.TokenSplitter; - -/** - * Utilities useful for operating on PL tokens. - * - * @author EVE - * - */ -public class TokenUtils { - /** - * Simple implementation of TokenSplitter for removing double-quoted - * strings. - * - * @author EVE - * - */ - public static class StringTokenSplitter implements TokenSplitter { - @Override - public IList split(final String input) { - return new FunctionalList<>(TokenUtils.removeDQuotedStrings(input)); - } - } - - /* - * Patterns and pattern parts. - */ - private static String rPossibleEscapeString = getRegex("possibleStringEscape"); - - private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString); - - private static String rShortEscape = getRegex("shortFormStringEscape"); - private static String rOctalEscape = getRegex("octalStringEscape"); - private static String rUnicodeEscape = getRegex("unicodeStringEscape"); - - private static String rEscapeString = applyFormat("stringEscape", rShortEscape, rOctalEscape, rUnicodeEscape); - - private static Pattern escapePatt = Pattern.compile(rEscapeString); - - private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonStringEscape"), - rPossibleEscapeString); - - private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString); - - private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); - - private static Pattern intLitPattern = getCompiledRegex("intLiteral"); - - /** - * Remove double quoted strings from a string. - * - * Splits a string around instances of java-style double-quoted strings. - * - * @param inp - * The string to split. - * - * @return An list containing alternating bits of the string and the - * embedded double-quoted strings that separated them. - */ - public static List removeDQuotedStrings(final String inp) { - if (inp == null) throw new NullPointerException("inp must not be null"); - - /* - * What we need for piece-by-piece string building - */ - StringBuffer work = new StringBuffer(); - final List res = new LinkedList<>(); - - /* - * Matcher for proper strings and single quotes. - */ - final Matcher mt = doubleQuotePatt.matcher(inp); - final Matcher corr = quotePatt.matcher(inp); - - if (corr.find() && !corr.find()) { - /* - * There's a unmatched opening quote with no strings. - */ - final String msg = String.format( - "Unclosed string literal '%s'. Opening quote was at position %d", inp, - inp.indexOf("\"")); - - throw new IllegalArgumentException(msg); - } - - while (mt.find()) { - /* - * Remove the string until the quoted string. - */ - mt.appendReplacement(work, ""); - - /* - * Add the string preceding the double-quoted string and - * the double-quoted string to the list. - */ - res.add(work.toString()); - res.add(mt.group(1)); - - /* - * Renew the buffer. - */ - work = new StringBuffer(); - } - - /* - * Grab the remainder of the string. - */ - mt.appendTail(work); - final String tail = work.toString(); - - if (tail.contains("\"")) { - /* - * There's a unmatched opening quote with at least one - * string. - */ - final String msg = String.format( - "Unclosed string literal '%s'. Opening quote was at position %d", inp, - inp.lastIndexOf("\"")); - - throw new IllegalArgumentException(msg); - } - - /* - * Only add an empty tail if the string was empty. - */ - if (!tail.equals("") || res.isEmpty()) { - res.add(tail); - } - - return res; - } - - /** - * Replace escape characters with their actual equivalents. - * - * @param inp - * The string to replace escape sequences in. - * - * @return The string with escape sequences replaced by their equivalent - * characters. - */ - public static String descapeString(final String inp) { - if (inp == null) throw new NullPointerException("inp must not be null"); - - /* - * Prepare the buffer and escape finder. - */ - final StringBuffer work = new StringBuffer(); - final Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); - final Matcher escapeFinder = escapePatt.matcher(inp); - - while (possibleEscapeFinder.find()) { - if (!escapeFinder.find()) { - /* - * Found a possible escape that isn't actually an - * escape. - */ - final String msg = String.format("Illegal escape sequence '%s' at position %d", - possibleEscapeFinder.group(), possibleEscapeFinder.start()); - - throw new IllegalArgumentException(msg); - } - - final String escapeSeq = escapeFinder.group(); - - /* - * Convert the escape to a string. - */ - String escapeRep = ""; - switch (escapeSeq) { - case "\\b": - escapeRep = "\b"; - break; - case "\\t": - escapeRep = "\t"; - break; - case "\\n": - escapeRep = "\n"; - break; - case "\\f": - escapeRep = "\f"; - break; - case "\\r": - escapeRep = "\r"; - break; - case "\\\"": - escapeRep = "\""; - break; - case "\\'": - escapeRep = "'"; - break; - case "\\\\": - /* - * Skip past the second slash. - */ - possibleEscapeFinder.find(); - escapeRep = "\\"; - break; - default: - if (escapeSeq.startsWith("u")) { - escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); - } else { - escapeRep = handleOctalEscape(escapeSeq); - } - } - - escapeFinder.appendReplacement(work, escapeRep); - } - - escapeFinder.appendTail(work); - - return work.toString(); - } - - /* - * Handle a unicode codepoint. - */ - private static String handleUnicodeEscape(final String seq) { - try { - final int codepoint = Integer.parseInt(seq, 16); - - return new String(Character.toChars(codepoint)); - } catch (final IllegalArgumentException iaex) { - final String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq); - - final IllegalArgumentException reiaex = new IllegalArgumentException(msg); - - reiaex.initCause(iaex); - - throw reiaex; - } - } - - /* - * Handle a octal codepoint. - */ - private static String handleOctalEscape(final String seq) { - try { - final int codepoint = Integer.parseInt(seq, 8); - - if (codepoint > 255) { - final String msg = String - .format("'%d' is outside the range of octal escapes', codepoint"); - - throw new IllegalArgumentException(msg); - } - - return new String(Character.toChars(codepoint)); - } catch (final IllegalArgumentException iaex) { - final String msg = String.format("'%s' is not a valid octal escape sequence'", seq); - - final IllegalArgumentException reiaex = new IllegalArgumentException(msg); - - reiaex.initCause(iaex); - - throw reiaex; - } - } - - /** - * Check if a given string would be successfully converted to a double - * by {@link Double#parseDouble(String)}. - * - * @param inp - * The string to check. - * @return Whether the string is a valid double or not. - */ - public static boolean isDouble(final String inp) { - return DoubleMatcher.doubleLiteral.matcher(inp).matches(); - } - - /** - * Check if a given string would be successfully converted to a integer - * by {@link Integer#parseInt(String)}. - * - * NOTE: This only checks syntax. Using values out of the range of - * integers will still cause errors. - * - * @param inp - * The input to check. - * @return Whether the string is a valid integer or not. - */ - public static boolean isInt(final String inp) { - try { - Integer.parseInt(inp); - return true; - } catch (NumberFormatException nfex) { - return false; - } - } -} -- cgit v1.2.3