From a63c30f5fe9ee302e73bb30e35095d789adb1a80 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Fri, 17 Mar 2017 08:33:37 -0400 Subject: Refactor StringUtils Moved a bunch of token-oriented stuff from StringUtils to a new TokenUtils class. --- .../java/bjc/utils/funcutils/DoubleMatcher.java | 71 -------- .../main/java/bjc/utils/funcutils/StringUtils.java | 172 -------------------- .../java/bjc/utils/parserutils/DoubleMatcher.java | 71 ++++++++ .../java/bjc/utils/parserutils/TokenUtils.java | 180 +++++++++++++++++++++ 4 files changed, 251 insertions(+), 243 deletions(-) delete mode 100644 BJC-Utils2/src/main/java/bjc/utils/funcutils/DoubleMatcher.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java create mode 100644 BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java (limited to 'BJC-Utils2/src/main/java/bjc') diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/DoubleMatcher.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/DoubleMatcher.java deleted file mode 100644 index 03227ed..0000000 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/DoubleMatcher.java +++ /dev/null @@ -1,71 +0,0 @@ -package bjc.utils.funcutils; - -import java.util.regex.Pattern; - -/* - * Checks if a string would pass Double.parseDouble. - * - * Uses a regex from the javadoc for Double.valueOf() - */ -class DoubleMatcher { - private static final String Digits = "(\\p{Digit}+)"; - private static final String HexDigits = "(\\p{XDigit}+)"; - - /* - * an exponent is 'e' or 'E' followed by an optionally signed decimal - * integer. - */ - private static final String Exp = "[eE][+-]?" + Digits; - - private static final String fpRegex = - "[\\x00-\\x20]*" // Optional leading "whitespace" - + "[+-]?(" + // Optional sign character - "NaN|" + // "NaN" string - "Infinity|" + // "Infinity" string - - /* - * A decimal floating-point string representing a finite - * positive number without a leading sign has at most - * five basic pieces: Digits . Digits ExponentPart - * FloatTypeSuffix - * - * Since this method allows integer-only strings as - * input in addition to strings of floating-point - * literals, the two sub-patterns below are - * simplifications of the grammar productions from - * section 3.10.2 of The Java™ Language Specification. - */ - - /* - * Digits ._opt Digits_opt ExponentPart_opt - * FloatTypeSuffix_opt - */ - "(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" + - - /* - * . Digits ExponentPart_opt FloatTypeSuffix_opt - */ - "(\\.(" + Digits + ")(" + Exp + ")?)|" + - - /* - * Hexadecimal strings - */ - "((" + - /* - * 0[xX] HexDigits ._opt BinaryExponent - * FloatTypeSuffix_opt - */ - "(0[xX]" + HexDigits + "(\\.)?)|" + - - /* - * 0[xX] HexDigits_opt . HexDigits BinaryExponent - * FloatTypeSuffix_opt - */ - "(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" + - - ")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*"; // Optional - // trailing - // "whitespace" - - public static final Pattern floatingLiteral = Pattern.compile("\\A" + fpRegex + "\\Z"); -} diff --git a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java index 77fec7e..a7b4436 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java +++ b/BJC-Utils2/src/main/java/bjc/utils/funcutils/StringUtils.java @@ -1,10 +1,6 @@ package bjc.utils.funcutils; import java.util.Deque; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Utility methods for operations on strings @@ -13,25 +9,6 @@ import java.util.regex.Pattern; * */ public class StringUtils { - /** - * Checks if the given expression contains the specified operator in a - * situation that indicates its use as an infix operator. - * - * @param expression - * The expression to check - * @param operator - * The operator to see if it is contained - * @return Whether or not the given expression contains the specified - * operator as a infix operator - */ - public static boolean containsInfixOperator(String expression, String operator) { - // Bit annoying to have to use a full class name, but what are - // you - // going to do? - return org.apache.commons.lang3.StringUtils.countMatches(expression, operator) == 1 - && !expression.equalsIgnoreCase(operator) && !expression.startsWith(operator); - } - /** * Check if a string consists only of one or more matches of a regular * expression @@ -89,155 +66,6 @@ public class StringUtils { return queue.isEmpty() ? "(none)" : queue.toString(); } - /* - * This regex matches java-style string escapes - */ - private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match - // shortform - // escape - // sequences - // like - // \t - // or - // \" - + "|[0-3]?[0-7]{1,2}" // Match - // octal - // escape - // sequences - + "|u[0-9a-fA-F]{4})"; // Match - // unicode - // escape - // sequences - private static Pattern escapePatt = Pattern.compile(escapeString); - - /* - * This regular expression matches java style double quoted strings - */ - private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match - // one - // or - // more - // characters - // that - // aren't - // quotes - // or - // slashes - + "|" + escapeString + ")" // Match escape sequences - + "*\")"); // Match all of those things zero or more - // times, followed by a closing quote - - /** - * Remove double quoted strings from a string. - * - * Splits a string around instances of java-style double-quoted strings. - * - * @param inp - * The string to split. - * - * @return An list containing alternating bits of the string and the - * embedded double-quoted strings that seperated them. - */ - public static List removeDQuotedStrings(String inp) { - StringBuffer work = new StringBuffer(); - List res = new LinkedList<>(); - - Matcher mt = doubleQuotePatt.matcher(inp); - - while(mt.find()) { - mt.appendReplacement(work, ""); - - res.add(work.toString()); - res.add(mt.group(1)); - - work = new StringBuffer(); - } - mt.appendTail(work); - res.add(work.toString()); - - return res; - } - - /** - * Replace escape characters with their actual equivalents. - * - * @param inp - * The string to replace escape sequences in. - * - * @return The string with escape sequences replaced by their equivalent - * characters. - */ - public static String descapeString(String inp) { - StringBuffer work = new StringBuffer(); - - Matcher escapeFinder = escapePatt.matcher(inp); - while(escapeFinder.find()) { - String escapeSeq = escapeFinder.group(); - - String escapeRep = ""; - switch(escapeSeq) { - case "\\b": - escapeRep = "\b"; - break; - case "\\t": - escapeRep = "\t"; - break; - case "\\n": - escapeRep = "\n"; - break; - case "\\f": - escapeRep = "\f"; - break; - case "\\r": - escapeRep = "\r"; - break; - case "\\\"": - escapeRep = "\""; - break; - case "\\'": - escapeRep = "'"; - break; - case "\\\\": - escapeRep = "\\"; - break; - default: - if(escapeSeq.startsWith("u")) { - escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); - } else { - escapeRep = handleOctalEscape(escapeSeq); - } - } - - escapeFinder.appendReplacement(work, escapeRep); - } - - escapeFinder.appendTail(work); - - return work.toString(); - } - - private static String handleUnicodeEscape(String seq) { - int codepoint = Integer.parseInt(seq, 16); - - return new String(Character.toChars(codepoint)); - } - - private static String handleOctalEscape(String seq) { - int codepoint = Integer.parseInt(seq, 8); - - return new String(Character.toChars(codepoint)); - } - - public static boolean isDouble(String inp) { - return DoubleMatcher.floatingLiteral.matcher(inp).matches(); - } - - private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); - - public static boolean isInt(String inp) { - return intLitPattern.matcher(inp).matches(); - } - /** * Converts a sequence to an English list. * diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java new file mode 100644 index 0000000..63eabca --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/DoubleMatcher.java @@ -0,0 +1,71 @@ +package bjc.utils.parserutils; + +import java.util.regex.Pattern; + +/* + * Checks if a string would pass Double.parseDouble. + * + * Uses a regex from the javadoc for Double.valueOf() + */ +class DoubleMatcher { + private static final String Digits = "(\\p{Digit}+)"; + private static final String HexDigits = "(\\p{XDigit}+)"; + + /* + * an exponent is 'e' or 'E' followed by an optionally signed decimal + * integer. + */ + private static final String Exp = "[eE][+-]?" + Digits; + + private static final String fpRegex = + "[\\x00-\\x20]*" // Optional leading "whitespace" + + "[+-]?(" + // Optional sign character + "NaN|" + // "NaN" string + "Infinity|" + // "Infinity" string + + /* + * A decimal floating-point string representing a finite + * positive number without a leading sign has at most + * five basic pieces: Digits . Digits ExponentPart + * FloatTypeSuffix + * + * Since this method allows integer-only strings as + * input in addition to strings of floating-point + * literals, the two sub-patterns below are + * simplifications of the grammar productions from + * section 3.10.2 of The Java™ Language Specification. + */ + + /* + * Digits ._opt Digits_opt ExponentPart_opt + * FloatTypeSuffix_opt + */ + "(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" + + + /* + * . Digits ExponentPart_opt FloatTypeSuffix_opt + */ + "(\\.(" + Digits + ")(" + Exp + ")?)|" + + + /* + * Hexadecimal strings + */ + "((" + + /* + * 0[xX] HexDigits ._opt BinaryExponent + * FloatTypeSuffix_opt + */ + "(0[xX]" + HexDigits + "(\\.)?)|" + + + /* + * 0[xX] HexDigits_opt . HexDigits BinaryExponent + * FloatTypeSuffix_opt + */ + "(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" + + + ")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*"; // Optional + // trailing + // "whitespace" + + public static final Pattern floatingLiteral = Pattern.compile("\\A" + fpRegex + "\\Z"); +} diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java new file mode 100644 index 0000000..8224928 --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenUtils.java @@ -0,0 +1,180 @@ +package bjc.utils.parserutils; + +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; + +/** + * Utilities useful for operating on PL tokens. + * + * @author EVE + * + */ +public class TokenUtils { + + /** + * Checks if the given expression contains the specified operator in a + * situation that indicates its use as an infix operator. + * + * @param expression + * The expression to check + * @param operator + * The operator to see if it is contained + * @return Whether or not the given expression contains the specified + * operator as a infix operator + */ + public static boolean containsInfixOperator(String expression, String operator) { + return StringUtils.countMatches(expression, operator) == 1 && !expression.equalsIgnoreCase(operator) + && !expression.startsWith(operator); + } + + /* + * This regex matches java-style string escapes + */ + private static String escapeString = "\\\\([btnfr\"'\\\\]" // Match shortform escape sequences like \t or \" + + "|[0-3]?[0-7]{1,2}" // Match octal escape sequences + + "|u[0-9a-fA-F]{4})"; // Match unicode escape sequences + + private static Pattern escapePatt = Pattern.compile(escapeString); + + /* + * This regular expression matches java style double quoted strings + */ + private static Pattern doubleQuotePatt = Pattern.compile("(\"(" + "[^\\\\\"]+" // Match one or more characters that aren't quotes or slashes + + "|" + escapeString + ")" // Match escape sequences + + "*\")"); // Match all of those things zero or more times, followed by a closing quote + + /** + * Remove double quoted strings from a string. + * + * Splits a string around instances of java-style double-quoted strings. + * + * @param inp + * The string to split. + * + * @return An list containing alternating bits of the string and the + * embedded double-quoted strings that separated them. + */ + public static List removeDQuotedStrings(String inp) { + StringBuffer work = new StringBuffer(); + List res = new LinkedList<>(); + + Matcher mt = doubleQuotePatt.matcher(inp); + + while(mt.find()) { + mt.appendReplacement(work, ""); + + res.add(work.toString()); + res.add(mt.group(1)); + + work = new StringBuffer(); + } + + mt.appendTail(work); + res.add(work.toString()); + + return res; + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public static String descapeString(String inp) { + StringBuffer work = new StringBuffer(); + + Matcher escapeFinder = escapePatt.matcher(inp); + while(escapeFinder.find()) { + String escapeSeq = escapeFinder.group(); + + String escapeRep = ""; + switch(escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + escapeRep = "\\"; + break; + default: + if(escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else { + escapeRep = handleOctalEscape(escapeSeq); + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + private static String handleUnicodeEscape(String seq) { + int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } + + private static String handleOctalEscape(String seq) { + int codepoint = Integer.parseInt(seq, 8); + + return new String(Character.toChars(codepoint)); + } + + /** + * Check if a given string would be successfully converted to a double + * by {@link Double#parseDouble(String)}. + * + * @param inp + * The string to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isDouble(String inp) { + return DoubleMatcher.floatingLiteral.matcher(inp).matches(); + } + + private static Pattern intLitPattern = Pattern.compile("\\A[+\\-]?\\d+\\Z"); + + /** + * Check if a given string would be successfully converted to a integer + * by {@link Integer#parseInt(String)}. + * + * NOTE: This only checks syntax. Using values out of the range of + * integers will still cause errors. + * + * @param inp + * The input to check. + * @return Whether the string is a valid double or not. + */ + public static boolean isInt(String inp) { + return intLitPattern.matcher(inp).matches(); + } +} -- cgit v1.2.3