package bjc.utils.parserutils; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import static bjc.utils.PropertyDB.getRegex; import static bjc.utils.PropertyDB.getCompiledRegex; import static bjc.utils.PropertyDB.applyFormat; /** * Utilities useful for operating on PL tokens. * * @author EVE * */ public class TokenUtils { private static String possibleEscapeString = getRegex("possibleStringEscape"); private static Pattern possibleEscapePatt = Pattern.compile(possibleEscapeString); private static String shortEscape = getRegex("shortFormStringEscape"); private static String octalEscape = getRegex("octalStringEscape"); private static String unicodeEscape = getRegex("unicodeStringEscape"); private static String escapeString = applyFormat("stringEscape", shortEscape, octalEscape, unicodeEscape); private static Pattern escapePatt = Pattern.compile(escapeString); private static String doubleQuoteString = applyFormat("doubleQuotes", getRegex("nonEscape"), possibleEscapeString); private static Pattern doubleQuotePatt = Pattern.compile(doubleQuoteString); private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); /** * Remove double quoted strings from a string. * * Splits a string around instances of java-style double-quoted strings. * * @param inp * The string to split. * * @return An list containing alternating bits of the string and the * embedded double-quoted strings that separated them. */ public static List removeDQuotedStrings(String inp) { if(inp == null) { throw new NullPointerException("inp must not be null"); } /* * What we need for piece-by-piece string building */ StringBuffer work = new StringBuffer(); List res = new LinkedList<>(); /* * Matcher for proper strings and single quotes. */ Matcher mt = doubleQuotePatt.matcher(inp); Matcher corr = quotePatt.matcher(inp); if(corr.find() && !corr.find()) { /* * There's a unmatched opening quote with no strings. */ throw new IllegalArgumentException( String.format("Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.indexOf("\""))); } while(mt.find()) { /* * Remove the string until the quoted string. */ mt.appendReplacement(work, ""); /* * Add the string preceding the double-quoted string and * the double-quoted string to the list. */ res.add(work.toString()); res.add(mt.group(1)); /* * Renew the buffer. */ work = new StringBuffer(); } /* * Grab the remainder of the string. */ mt.appendTail(work); String tail = work.toString(); if(tail.contains("\"")) { /* * There's a unmatched opening quote with at least one * string. */ throw new IllegalArgumentException( String.format("Unclosed string literal '%s'. Opening quote was at position %d", inp, inp.lastIndexOf("\""))); } /* * Only add an empty tail if the string was empty. */ if(!tail.equals("") || res.isEmpty()) { res.add(tail); } return res; } /** * Replace escape characters with their actual equivalents. * * @param inp * The string to replace escape sequences in. * * @return The string with escape sequences replaced by their equivalent * characters. */ public static String descapeString(String inp) { if(inp == null) { throw new NullPointerException("inp must not be null"); } StringBuffer work = new StringBuffer(); Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); Matcher escapeFinder = escapePatt.matcher(inp); while(possibleEscapeFinder.find()) { if(!escapeFinder.find()) { throw new IllegalArgumentException(String.format( "Illegal escape sequence '%s' at position %d", possibleEscapeFinder.group(), possibleEscapeFinder.start())); } String escapeSeq = escapeFinder.group(); String escapeRep = ""; switch(escapeSeq) { case "\\b": escapeRep = "\b"; break; case "\\t": escapeRep = "\t"; break; case "\\n": escapeRep = "\n"; break; case "\\f": escapeRep = "\f"; break; case "\\r": escapeRep = "\r"; break; case "\\\"": escapeRep = "\""; break; case "\\'": escapeRep = "'"; break; case "\\\\": /* * Skip past the second slash. */ possibleEscapeFinder.find(); escapeRep = "\\"; break; default: if(escapeSeq.startsWith("u")) { escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); } else { escapeRep = handleOctalEscape(escapeSeq); } } escapeFinder.appendReplacement(work, escapeRep); } escapeFinder.appendTail(work); return work.toString(); } private static String handleUnicodeEscape(String seq) { try { int codepoint = Integer.parseInt(seq, 16); return new String(Character.toChars(codepoint)); } catch(IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid Unicode escape sequence'", seq)); reiaex.initCause(iaex); throw reiaex; } } private static String handleOctalEscape(String seq) { try { int codepoint = Integer.parseInt(seq, 8); if(codepoint > 255) { throw new IllegalArgumentException(String .format("'%d' is outside the range of octal escapes', codepoint")); } return new String(Character.toChars(codepoint)); } catch(IllegalArgumentException iaex) { IllegalArgumentException reiaex = new IllegalArgumentException( String.format("'%s' is not a valid octal escape sequence'", seq)); reiaex.initCause(iaex); throw reiaex; } } /** * Check if a given string would be successfully converted to a double * by {@link Double#parseDouble(String)}. * * @param inp * The string to check. * @return Whether the string is a valid double or not. */ public static boolean isDouble(String inp) { return DoubleMatcher.floatingLiteral.matcher(inp).matches(); } private static Pattern intLitPattern = getCompiledRegex("intLiteral"); /** * Check if a given string would be successfully converted to a integer * by {@link Integer#parseInt(String)}. * * NOTE: This only checks syntax. Using values out of the range of * integers will still cause errors. * * @param inp * The input to check. * @return Whether the string is a valid double or not. */ public static boolean isInt(String inp) { return intLitPattern.matcher(inp).matches(); } }