From a63c30f5fe9ee302e73bb30e35095d789adb1a80 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 08:33:37 -0400 Subject: Refactor StringUtils Moved a bunch of token-oriented stuff from StringUtils to a new TokenUtils class. --- .../java/bjc/utils/parserutils/TokenUtils.java | 180 +++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java (limited to 'BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java') diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java new file mode 100644 index 0000000..8224928 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -0,0 +1,180 @@ +package bjc.utils.parserutils; + +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; + +/** + * Utilities useful for operating on PL tokens. + * + * @author EVE + * + */ +public class TokenUtils { + + /** + * Checks if the given expression contains the specified operator in a + * situation that indicates its use as an infix operator. + * + * @param expression + * The expression to check + * @param operator + * The operator to see if it is contained + * @return Whether or not the given expression contains the specified + * operator as a infix operator + */ + public static boolean containsInfixOperator(String expression, String operator) { + return StringUtils.countMatches(expression, operator) == 1 && !expression.equalsIgnoreCase(operator) + && !expression.startsWith(operator); + } + + /* + * This regex matches java-style string escapes + */ + private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \" + + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences + + private static Pattern escapePatt = Pattern.compile(escapeString); + + /* + * This regular expression matches java style double quoted strings + */ + private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes + + "|" + escapeString + ")" // Match escape sequences + + "*\")"); // Match all of those things zero or more times, followed by a closing quote + + /** + * Remove double quoted strings from a string. + * + * Splits a string around instances of java-style double-quoted strings. + * + * @param inp + * The string to split. + * + * @return An list containing alternating bits of the string and the + * embedded double-quoted strings that separated them. + */ + public static List removeDQuotedStrings(String inp) { + StringBuffer work = new StringBuffer(); + List res = new LinkedList<>(); + + Matcher mt = doubleQuotePatt.matcher(inp); + + while(mt.find()) { + mt.appendReplacement(work, ""); + + res.add(work.toString()); + res.add(mt.group(1)); + + work = new StringBuffer(); + } + + mt.appendTail(work); + res.add(work.toString()); + + return res; + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public static String descapeString(String inp) { + StringBuffer work = new StringBuffer(); + + Matcher escapeFinder = escapePatt.matcher(inp); + while(escapeFinder.find()) { + String escapeSeq = escapeFinder.group(); + + String escapeRep = ""; + switch(escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + escapeRep = "\\"; + break; + default: + if(escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else { + escapeRep = handleOctalEscape(escapeSeq); + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + private static String handleUnicodeEscape(String seq) { + int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } + + private static String handleOctalEscape(String seq) { + int codepoint = Integer.parseInt(seq, 8); + + return new String(Character.toChars(codepoint)); + } + + /** + * Check if a given string would be successfully converted to a double + * by {@link Double#parseDouble(String)}. + * + * @param inp + * The string to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isDouble(String inp) { + return DoubleMatcher.floatingLiteral.matcher(inp).matches(); + } + + private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); + + /** + * Check if a given string would be successfully converted to a integer + * by {@link Integer#parseInt(String)}. + * + * NOTE: This only checks syntax. Using values out of the range of + * integers will still cause errors. + * + * @param inp + * The input to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isInt(String inp) { + return intLitPattern.matcher(inp).matches(); + } +} -- cgit v1.2.3