diff options
| author | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-09-11 16:14:01 -0300 |
|---|---|---|
| committer | Benjamin J. Culkin <bjculkin@mix.wvu.edu> | 2017-09-11 16:14:01 -0300 |
| commit | 1948e4ea9288191d8de405c9c78460e6f0457cb9 (patch) | |
| tree | c320bc883feba9876886d8f829122a75c98792a3 /BJC-Utils2/src | |
| parent | fa2e7f6a7239c58cbd6c1005f9789232087a5eda (diff) | |
Add custom string escaper.
StringDescaper is a class that allows for adding additional \ escapes
that will be processed in a given string.
Diffstat (limited to 'BJC-Utils2/src')
| -rw-r--r-- | BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDescaper.java | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDescaper.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDescaper.java new file mode 100644 index 0000000..096656a --- /dev/null +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/StringDescaper.java @@ -0,0 +1,242 @@ +package bjc.utils.parserutils; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.UnaryOperator; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static java.util.Map.Entry; + +import static bjc.utils.PropertyDB.applyFormat; +import static bjc.utils.PropertyDB.getCompiledRegex; +import static bjc.utils.PropertyDB.getRegex; + +public class StringDescaper { + private Logger LOGGER = Logger.getLogger(StringDescaper.class.getName()); + + /* + * Patterns and pattern parts. + */ + private static String rPossibleEscapeString = getRegex("possibleStringEscape"); + private static Pattern possibleEscapePatt = Pattern.compile(rPossibleEscapeString); + + private static String rShortEscape = getRegex("shortFormStringEscape"); + private static String rOctalEscape = getRegex("octalStringEscape"); + private static String rUnicodeEscape = getRegex("unicodeStringEscape"); + + private String rEscapeString; + private Pattern escapePatt; + + private static String rDoubleQuoteString = applyFormat("doubleQuotes", getRegex("nonStringEscape"), rPossibleEscapeString); + private static Pattern doubleQuotePatt = Pattern.compile(rDoubleQuoteString); + + private static Pattern quotePatt = getCompiledRegex("unescapedQuote"); + + private Map<String, String> literalEscapes; + private Map<Pattern, UnaryOperator<String>> specialEscapes; + + public StringDescaper() { + literalEscapes = new HashMap<>(); + specialEscapes = new HashMap<>(); + + rEscapeString = String.format("\\\\(%1$s|%2$s|%3$s)"); + escapePatt = Pattern.compile(rEscapeString); + } + + public void addLiteralEscape(String escape, String val) { + if(literalEscapes.containsKey(escape)) { + LOGGER.warning(String.format("Shadowing literal escape '%s'\n", escape)); + } + + literalEscapes.put(escape, val); + } + + public void addSpecialEscape(String escape, UnaryOperator<String> val) { + if(specialEscapes.containsKey(escape)) { + LOGGER.warning(String.format("Shadowing special escape '%s'\n", escape)); + } + + /* + * Make sure this special escape is a valid regex. + */ + + Pattern patt = null; + try { + patt = Pattern.compile(escape); + } catch (PatternSyntaxException psex) { + String msg = String.format("Invalid special escape '%s'", escape); + + IllegalArgumentException iaex = new IllegalArgumentException(msg); + iaex.initCause(psex); + + throw psex; + } + + specialEscapes.put(patt, val); + } + + public void compileEscapes() { + StringBuilder work = new StringBuilder(); + + for(String litEscape : literalEscapes.keySet()) { + work.append("|(?:"); + work.append(Pattern.quote(litEscape)); + work.append(")"); + } + + for(Pattern specEscape : specialEscapes.keySet()) { + work.append("|(?:"); + work.append(specEscape.toString()); + work.append(")"); + } + + /* + * Convert user-defined escapes to a regex for matching. + * We don't need a bar before %4 because the string has it. + */ + rEscapeString = String.format("\\(%1$s|%2$s|%3$s%4$s)", rShortEscape, rOctalEscape, rUnicodeEscape, work.toString()); + escapePatt = Pattern.compile(rEscapeString); + } + + /** + * Replace escape characters with their actual equivalents. + * + * @param inp + * The string to replace escape sequences in. + * + * @return The string with escape sequences replaced by their equivalent + * characters. + */ + public String descapeString(final String inp) { + if (inp == null) { + throw new NullPointerException("Input to descapeString must not be null"); + } + + /* + * Prepare the buffer and escape finder. + */ + final StringBuffer work = new StringBuffer(); + final Matcher possibleEscapeFinder = possibleEscapePatt.matcher(inp); + final Matcher escapeFinder = escapePatt.matcher(inp); + + while (possibleEscapeFinder.find()) { + if (!escapeFinder.find()) { + /* + * Found a possible escape that isn't actually an + * escape. + */ + final String msg = String.format("Illegal escape sequence '%s' at position %d of string '%s'", + possibleEscapeFinder.group(), possibleEscapeFinder.start(), inp); + throw new IllegalArgumentException(msg); + } + + final String escapeSeq = escapeFinder.group(); + + /* + * Convert the escape to a string. + */ + String escapeRep = ""; + switch (escapeSeq) { + case "\\b": + escapeRep = "\b"; + break; + case "\\t": + escapeRep = "\t"; + break; + case "\\n": + escapeRep = "\n"; + break; + case "\\f": + escapeRep = "\f"; + break; + case "\\r": + escapeRep = "\r"; + break; + case "\\\"": + escapeRep = "\""; + break; + case "\\'": + escapeRep = "'"; + break; + case "\\\\": + /* + * Skip past the second slash. + */ + possibleEscapeFinder.find(); + escapeRep = "\\"; + break; + default: + if (escapeSeq.startsWith("u")) { + escapeRep = handleUnicodeEscape(escapeSeq.substring(1)); + } else if(escapeSeq.startsWith("O")) { + escapeRep = handleOctalEscape(escapeSeq.substring(1)); + } else if(literalEscapes.containsKey(escapeSeq)) { + escapeRep = literalEscapes.get(escapeSeq); + } else { + for(Entry<Pattern, UnaryOperator<String>> ent : specialEscapes.entrySet()) { + Pattern pat = ent.getKey(); + + Matcher mat = pat.matcher(escapeSeq); + if(mat.matches()) { + escapeRep = ent.getValue().apply(escapeSeq); + break; + } + } + } + } + + escapeFinder.appendReplacement(work, escapeRep); + } + + escapeFinder.appendTail(work); + + return work.toString(); + } + + /* + * Handle a unicode codepoint. + */ + private static String handleUnicodeEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 16); + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid Unicode escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } + + /* + * Handle a octal codepoint. + */ + private static String handleOctalEscape(final String seq) { + try { + final int codepoint = Integer.parseInt(seq, 8); + + if (codepoint > 255) { + final String msg = String.format("'%d' is outside the range of octal escapes', codepoint"); + + throw new IllegalArgumentException(msg); + } + + return new String(Character.toChars(codepoint)); + } catch (final IllegalArgumentException iaex) { + final String msg = String.format("'%s' is not a valid octal escape sequence'", seq); + + final IllegalArgumentException reiaex = new IllegalArgumentException(msg); + + reiaex.initCause(iaex); + + throw reiaex; + } + } +} |
