From 415f5689fe900a04bf64d41878cfa225905b6617 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Sat, 18 Mar 2017 19:58:22 -0400 Subject: Attempt to get subgroups working --- .../java/bjc/utils/examples/DelimSplitterTest.java | 438 +++++++++++++++++++-- BJC-Utils2/src/main/java/bjc/utils/data/Tree.java | 4 +- .../bjc/utils/parserutils/SequenceDelimiter.java | 70 +++- .../java/bjc/utils/parserutils/TokenSplitter.java | 141 +++++-- 4 files changed, 574 insertions(+), 79 deletions(-) diff --git a/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java b/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java index 521c521..4cd939e 100644 --- a/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java +++ b/BJC-Utils2/src/examples/java/bjc/utils/examples/DelimSplitterTest.java @@ -1,12 +1,25 @@ package bjc.utils.examples; import bjc.utils.data.ITree; +import bjc.utils.data.TopDownTransformResult; +import bjc.utils.funcdata.bst.TreeLinearizationMethod; +import bjc.utils.funcutils.StringUtils; import bjc.utils.parserutils.SequenceDelimiter; import bjc.utils.parserutils.StringDelimiter; import bjc.utils.parserutils.TokenSplitter; + import bjc.utils.parserutils.SequenceDelimiter.DelimiterException; +import bjc.utils.parserutils.SequenceDelimiter.DelimiterGroup; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.Scanner; /** @@ -16,51 +29,416 @@ import java.util.Scanner; * */ public class DelimSplitterTest { - /** - * Main method - * - * @param args - * Unused CLI args. + private TokenSplitter split; + + private StringDelimiter dlm; + + private Map mirrored; + + private Map> groups; + + /* + * Create a new tester. */ - public static void main(String[] args) { - Scanner scn = new Scanner(System.in); + private DelimSplitterTest() { + loadMirrorDB(); - TokenSplitter split = new TokenSplitter(); - split.addDelimiter("(", ")"); - split.addDelimiter("[", "]"); - split.addDelimiter("{", "}"); - split.addDelimiter("+", "-", "*", "/"); - split.compile(); + groups = new HashMap<>(); - StringDelimiter dlm = new StringDelimiter(); - dlm.addGroup(new String[] { "(" }, "parens", ")"); - dlm.addGroup(new String[] { "[" }, "brackets", "]"); - dlm.addGroup(new String[] { "{" }, "braces", "}"); + split = new TokenSplitter(); - System.out.print("Enter a sequence to delimit (blank line to quit): "); - String inp = scn.nextLine(); - System.out.println(); + dlm = new StringDelimiter(); + } - while(!inp.equals("")) { - String[] strings = split.split(inp); + private void loadMirrorDB() { + mirrored = new HashMap<>(); + + InputStream stream = getClass().getResourceAsStream("/BidiMirrorDB.txt"); + + try(Scanner scn = new Scanner(stream)) { + String ln = ""; + + while(scn.hasNextLine()) { + ln = scn.nextLine(); - System.out.println("Split tokens: " + Arrays.deepToString(strings)); + if(ln.equals("")) continue; + if(ln.startsWith("#")) continue; - try { - ITree delim = dlm.delimitSequence(strings); + int cp1 = Integer.parseInt(ln.substring(0, 4), 16); + int cp2 = Integer.parseInt(ln.substring(6, 10), 16); - System.out.println("Delimited tokens:\n" + delim.toString()); - } catch(DelimiterException dex) { - System.out.println("Expression isn't properly delimited."); - System.out.println("Cause: " + dex.getMessage()); + char[] cpa1 = Character.toChars(cp1); + char[] cpa2 = Character.toChars(cp2); + + String cps1 = new String(cpa1); + String cps2 = new String(cpa2); + + mirrored.put(cps1, cps2); } + } + } + + /* + * Run the tester interface. + */ + private void runLoop() { + Scanner scn = new Scanner(System.in); + + System.out.print("Enter a command (blank line to quit): "); + String inp = scn.nextLine().trim(); + System.out.println(); + + while(!inp.equals("")) { + handleCommand(inp, scn, true); System.out.println(); - System.out.print("Enter a sequence to delimit (blank line to quit): "); + + System.out.print("Enter a command (blank line to quit): "); inp = scn.nextLine(); + System.out.println(); } scn.close(); } + + /* + * Handle a input command. + */ + private void handleCommand(String inp, Scanner scn, boolean isInteractive) { + if(inp.equals("")) { + return; + } + + int idx = inp.indexOf(' '); + + if(idx == -1) { + idx = inp.length(); + } + + String command = inp.substring(0, idx); + + String args = inp.substring(idx).trim(); + String[] argArray = args.split(" "); + + switch(command) { + case "test": + handleTest(args, false); + break; + case "test-ws": + handleTest(args, true); + break; + case "splitter-split": + handleSplit(argArray); + break; + case "splitter-compile": + split.compile(); + System.out.println("Compiled splitter"); + break; + case "splitter-add": + split.addDelimiter(argArray); + System.out.println("Added delimiters " + StringUtils.toEnglishList(argArray, true)); + break; + case "splitter-addmulti": + split.addMultiDelimiter(argArray); + System.out.println("Added multi-delimiters " + StringUtils.toEnglishList(argArray, true)); + break; + case "splitter-addnon": + split.addNonMatcher(argArray); + System.out.println("Added non-splitters " + StringUtils.toEnglishList(argArray, true)); + break; + case "splitter-addmatch": + for(String arg : argArray) { + split.addDelimiter(arg, mirrored.get(arg)); + } + System.out.println("Added matched delimiters " + StringUtils.toEnglishList(argArray, true)); + break; + case "splitter-debug": + System.out.println(split.toString()); + break; + case "splitter-reset": + split = new TokenSplitter(); + System.out.println("Reset splitter"); + break; + case "delims-addopen": + dlm.addOpener(argArray[0], argArray[1]); + System.out.printf("Added opener '%s' for group '%s'\n", argArray[0], argArray[1]); + break; + case "delims-addgroup": + for(String arg : argArray) { + dlm.addGroup(groups.get(arg)); + } + System.out.println("Added groups " + StringUtils.toEnglishList(argArray, true)); + break; + case "delims-debug": + System.out.println(dlm.toString()); + break; + case "delims-test": + handleDelim(args); + break; + case "delims-reset": + dlm = new StringDelimiter(); + System.out.println("Reset delimiter"); + break; + case "delimgroups-new": + for(String arg : argArray) { + groups.put(arg, new DelimiterGroup<>(arg)); + } + System.out.println("Created groups " + StringUtils.toEnglishList(argArray, true)); + break; + case "delimgroups-edit": + for(String arg : argArray) { + handleEditGroup(arg, scn, isInteractive); + } + break; + case "delimgroups-debug": + for(DelimiterGroup group : groups.values()) { + System.out.println(group.toString()); + } + break; + case "delimgroups-reset": + dlm = new StringDelimiter(); + groups = new HashMap<>(); + System.out.println("Reset delimiter groups + delimiter"); + break; + case "load-file": + handleLoadFile(args); + break; + default: + System.out.println("Unknown command "); + } + + } + + /* + * Load script commands from a file. + */ + private void handleLoadFile(String args) { + try(FileInputStream fis = new FileInputStream(args)) { + Scanner scn = new Scanner(fis); + + while(scn.hasNextLine()) { + String ln = scn.nextLine().trim(); + + if(ln.equals("")) continue; + if(ln.startsWith("#")) continue; + + System.out.println("\nRead command '" + ln + "' from file\n"); + handleCommand(ln, scn, false); + } + + scn.close(); + } catch(FileNotFoundException fnfex) { + System.out.println("Couldn't find file '" + args + "'"); + } catch(IOException ioex) { + System.out.println("I/O error with file '" + args + "'\nCause: " + ioex.getMessage()); + } + } + + /* + * Handle editing a group. + */ + private void handleEditGroup(String arg, Scanner scn, boolean isInteractive) { + if(!groups.containsKey(arg)) { + System.out.println("No group named '" + arg + "'"); + return; + } + + DelimiterGroup group = groups.get(arg); + + System.out.println("Editing group '" + arg + "'"); + + if(isInteractive) { + System.out.println("Enter command (blank line to stop editing): "); + } + + String ln = scn.nextLine().trim(); + + while(!ln.equals("")) { + int idx = ln.indexOf(' '); + + if(idx == -1) { + idx = ln.length(); + } + + String command = ln.substring(0, idx); + + String args = ln.substring(idx).trim(); + String[] argArray = args.split(" "); + + switch(command) { + case "add-closing": + group.addClosing(argArray); + System.out.println("Added closers " + StringUtils.toEnglishList(argArray, true)); + break; + case "add-tlexclude": + group.addTopLevelForbid(argArray); + System.out.println("Added top-level exclusions " + + StringUtils.toEnglishList(argArray, true)); + break; + case "add-exclude": + group.addTopLevelForbid(argArray); + System.out.println( + "Added nested exclusions " + StringUtils.toEnglishList(argArray, true)); + break; + case "add-subgroup": + group.addSubgroup(argArray[0], Arrays.copyOfRange(argArray, 1, argArray.length)); + System.out.println("Added subgroups"); + break; + case "debug": + System.out.println(group.toString()); + break; + default: + System.out.println("Unknown command " + command); + } + + if(isInteractive) { + System.out.println("Enter command (blank line to stop editing): "); + } + + ln = scn.nextLine().trim(); + } + + System.out.println("Finished editing group '" + arg + "'"); + } + + private void handleDelim(String args) { + try { + ITree res = dlm.delimitSequence(args.split(" ")); + + printDelimSeq(res); + } catch(DelimiterException dex) { + System.out.println("Expression 'args' isn't properly delimited.\n\tCause: " + dex.getMessage()); + } + } + + private void handleSplit(String[] argArray) { + for(int i = 0; i < argArray.length; i++) { + String arg = argArray[i]; + + String[] res = split.split(arg); + + System.out.printf("%d '%s' %s\n", i, arg, Arrays.deepToString(res)); + } + } + + private void handleTest(String inp, boolean splitWS) { + String[] strings; + + try { + strings = split.split(inp); + } catch(IllegalStateException isex) { + System.out.println("Splitter must be compiled at least once before use."); + return; + } + + System.out.println("Split tokens: " + Arrays.deepToString(strings)); + + if(splitWS) { + List tks = new LinkedList<>(); + + for(String strang : strings) { + tks.addAll(Arrays.asList(strang.split(" "))); + } + + strings = tks.toArray(new String[0]); + } + try { + ITree delim = dlm.delimitSequence(strings); + + printDelimSeq(delim); + } catch(DelimiterException dex) { + System.out.println("Expression isn't properly delimited."); + System.out.println("Cause: " + dex.getMessage()); + } + } + + private void printDelimSeq(ITree delim) { + System.out.println("Delimited tokens:\n" + delim.toString()); + System.out.print("Delimited expr:\n"); + printDelimTree(delim); + System.out.println(); + + ITree transform = delim.topDownTransform(this::pickNode, this::transformNode); + System.out.println("Transformed tree: " + transform); + System.out.println(); + + System.out.print("Transformed expr:\n"); + printDelimTree(transform); + System.out.println(); + } + + private void printDelimTree(ITree tree) { + StringBuilder sb = new StringBuilder(); + + intPrintDelimTree(tree, sb); + + System.out.println(sb.toString().replaceAll("\\s+", " ")); + } + + private void intPrintDelimTree(ITree tree, StringBuilder sb) { + tree.doForChildren((child) -> { + intPrintDelimNode(child, sb); + }); + } + + private void intPrintDelimNode(ITree tree, StringBuilder sb) { + if(tree.getHead().equals("contents")) { + intPrintDelimTree(tree, sb); + return; + } + + switch(tree.getChildrenCount()) { + case 0: + sb.append(tree.getHead()); + sb.append(" "); + + break; + case 1: + intPrintDelimTree(tree.getChild(0), sb); + + break; + case 2: + intPrintDelimTree(tree.getChild(0).getChild(0), sb); + intPrintDelimNode(tree.getChild(1), sb); + + break; + case 3: + intPrintDelimNode(tree.getChild(0), sb); + + ITree contents = tree.getChild(1); + + intPrintDelimTree(contents.getChild(0), sb); + intPrintDelimNode(tree.getChild(2), sb); + + break; + } + } + + private TopDownTransformResult pickNode(String node) { + if(groups.containsKey(node) || node.equals("subgroup")) + return TopDownTransformResult.PUSHDOWN; + else + return TopDownTransformResult.PASSTHROUGH; + } + + private ITree transformNode(ITree tree) { + if(groups.containsKey(tree.getHead())) { + + } + + return tree; + } + + /** + * Main method + * + * @param args + * Unused CLI args. + */ + public static void main(String[] args) { + DelimSplitterTest tst = new DelimSplitterTest(); + + tst.runLoop(); + } } diff --git a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java index 86ea884..34e074b 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java +++ b/BJC-Utils2/src/main/java/bjc/utils/data/Tree.java @@ -104,7 +104,9 @@ public class Tree implements ITree { @Override public void doForChildren(Consumer> action) { - children.forEach(action); + if(childCount > 0) { + children.forEach(action); + } } @Override diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java index 96a6c65..af6ba81 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/SequenceDelimiter.java @@ -76,6 +76,7 @@ public class SequenceDelimiter { closingDelimiters = new HashSet<>(); topLevelExclusions = new HashSet<>(); groupExclusions = new HashSet<>(); + subgroups = new HashMap<>(); } /** @@ -473,60 +474,60 @@ public class SequenceDelimiter { whoForbid.remove(excludedGroup); } - } else if(!groupStack.empty() && groupStack.top().subgroups.containsKey(tok)){ + } else if(!groupStack.empty() && groupStack.top().subgroups.containsKey(tok)) { /* * Parse a sub-group. */ - + /* * The set of enclosed groups. */ Set enclosed = groupStack.top().subgroups.get(tok); - + /* * The current contents of this group. */ ITree contentTree = trees.pop(); - + /* - * Find the first element to enclose in the subgroup. + * Find the first element to enclose in the + * subgroup. */ int ind = contentTree.revFind((chd) -> { - if(chd.getHead().equals(subgroup)) { - return !enclosed.contains(chd.getChild(1)); - } else { - return false; - } + return checkChild(subgroup, enclosed, chd); }); - + + if(ind == -1) ind = 0; + ITree newContentTree = new Tree<>(contentTree.getHead()); ITree subgroupContents = new Tree<>(contents); - + /* - * Split content tree into an untouched tree, and the subgroup. + * Split content tree into an untouched tree, + * and the subgroup. */ for(int j = 0; j < contentTree.getChildrenCount(); j++) { ITree child = contentTree.getChild(j); - + if(j < ind) { newContentTree.addChild(child); } else { subgroupContents.addChild(child); } } - + /* * Construct the subgroup. */ ITree subgroupTree = new Tree<>(subgroup); subgroupTree.addChild(subgroupContents); subgroupTree.addChild(new Tree<>(tok)); - + /* * Add the subgroup to the group. */ newContentTree.addChild(subgroupTree); - + /* * Add the group contents. */ @@ -560,6 +561,19 @@ public class SequenceDelimiter { return res; } + private boolean checkChild(T subgroup, Set enclosed, ITree chd) { + System.out.println("Checking child '" + chd.getHead() + "' for subgroups."); + + if(chd.getHead().equals(subgroup)) { + System.out.println("Checking if '" + chd.getChild(1) + "' is a subordinate group."); + boolean contains = enclosed.contains(chd.getChild(1)); + System.out.println("It " + (contains ? "was" : "wasn't")); + return contains; + } else { + return false; + } + } + private boolean isForbidden(Stack> groupStack, Multiset forbiddenDelimiters, T groupName) { boolean localForbid; if(groupStack.empty()) @@ -627,4 +641,26 @@ public class SequenceDelimiter { addOpener(open, groupName); } } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("SequenceDelimiter ["); + + if(openDelimiters != null) { + builder.append("openDelimiters="); + builder.append(openDelimiters); + builder.append(", "); + } + + if(groups != null) { + builder.append("groups="); + builder.append(groups); + } + + builder.append("]"); + + return builder.toString(); + } } diff --git a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java index e6191b9..ec69ade 100644 --- a/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java +++ b/BJC-Utils2/src/main/java/bjc/utils/parserutils/TokenSplitter.java @@ -1,5 +1,7 @@ package bjc.utils.parserutils; +import java.util.HashSet; +import java.util.Set; import java.util.regex.Pattern; /** @@ -13,10 +15,9 @@ public class TokenSplitter { * * It does two things: * - *
    - *
  1. Match to the left of the provided delimiter by positive lookahead
  2. - *
  3. Match to the right of the provided delimiter by positive lookbehind
  4. - *
+ *
  1. Match to the left of the provided delimiter by positive + * lookahead
  2. Match to the right of the provided delimiter by + * positive lookbehind
* * Thus, it will only match in places where the delimiter is, but won't * actually match the delimiter, leaving split to put it into the stream @@ -47,10 +48,20 @@ public class TokenSplitter { private Pattern compPatt; private Pattern exclusionPatt; + /* + * These represent info for debugging. + */ + private Set delimSet; + private Set multidelimSet; + private Set exclusionSet; + /** * Create a new token splitter. */ public TokenSplitter() { + delimSet = new HashSet<>(); + multidelimSet = new HashSet<>(); + exclusionSet = new HashSet<>(); } /** @@ -73,7 +84,8 @@ public class TokenSplitter { if(compPatt == null) throw new IllegalStateException("Token splitter has not been compiled yet"); /* - * Don't split something that we should exclude from being split. + * Don't split something that we should exclude from being + * split. */ if(exclusionPatt.matcher(inp).matches()) return new String[] { inp }; @@ -93,18 +105,20 @@ public class TokenSplitter { public void addDelimiter(String... delims) { for(String delim : delims) { String quoteDelim = Pattern.quote(delim); - String delimPat = String.format(WITH_DELIM, quoteDelim); - + String delimPat = String.format(WITH_DELIM, quoteDelim); + if(currPatt == null) { - currPatt = new StringBuilder(); + currPatt = new StringBuilder(); currExclusionPatt = new StringBuilder(); - + currPatt.append("(?:" + delimPat + ")"); currExclusionPatt.append("(?:" + quoteDelim + ")"); } else { currPatt.append("|(?:" + delimPat + ")"); currExclusionPatt.append("|(?:" + quoteDelim + ")"); } + + delimSet.add(delim); } } @@ -114,48 +128,113 @@ public class TokenSplitter { * The provided string should be a pattern to match one or more * occurances of. * - * @param delim + * @param delims * The delimiter to split on. */ - public void addMultiDelimiter(String delim) { - String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + public void addMultiDelimiter(String... delims) { + for(String delim : delims) { + String delimPat = String.format(WITH_MULTI_DELIM, "(?:" + delim + ")"); + + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); + currPatt.append("(?:" + delimPat + ")"); + currExclusionPatt.append("(?:(?:" + delim + ")+)"); - currPatt.append("(?:" + delimPat + ")"); - currExclusionPatt.append("(?:(?:" + delim + ")+)"); + } else { + currPatt.append("|(?:" + delimPat + ")"); + currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + } - } else { - currPatt.append("|(?:" + delimPat + ")"); - currExclusionPatt.append("|(?:(?:" + delim + ")+)"); + multidelimSet.add(delim); } } /** * Marks strings matching the pattern delim as non-splittable. * - * @param delim - * The regex to not splitting matching strings. + * @param delimSet + * The regex to not splitting matching strings. */ - public void addNonMatcher(String delim) { - if(currPatt == null) { - currPatt = new StringBuilder(); - currExclusionPatt = new StringBuilder(); - - currExclusionPatt.append("(?:" + delim + ")"); - } else { - currExclusionPatt.append("|(?:" + delim + ")"); + public void addNonMatcher(String... delims) { + for(String delim : delims) { + if(currPatt == null) { + currPatt = new StringBuilder(); + currExclusionPatt = new StringBuilder(); + + currExclusionPatt.append("(?:" + delim + ")"); + } else { + currExclusionPatt.append("|(?:" + delim + ")"); + } + + exclusionSet.add(delim); } } + /** * Compiles the current set of delimiters to a pattern. * * Makes this splitter ready to use. */ public void compile() { - compPatt = Pattern.compile(currPatt.toString()); + compPatt = Pattern.compile(currPatt.toString()); exclusionPatt = Pattern.compile(currExclusionPatt.toString()); } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + + builder.append("TokenSplitter ["); + + if(currPatt != null) { + builder.append("currPatt="); + builder.append(currPatt); + builder.append("\n\t, "); + } + + if(currExclusionPatt != null) { + builder.append("currExclusionPatt="); + builder.append(currExclusionPatt); + builder.append("\n\t, "); + } + + if(compPatt != null) { + builder.append("compPatt="); + builder.append(compPatt); + builder.append("\n\t, "); + } + + if(exclusionPatt != null) { + builder.append("exclusionPatt="); + builder.append(exclusionPatt); + builder.append("\n\t, "); + } + + if(delimSet != null) { + builder.append("delimSet="); + builder.append(delimSet); + builder.append("\n\t, "); + } + + if(multidelimSet != null) { + builder.append("multidelimSet="); + builder.append(multidelimSet); + builder.append("\n\t, "); + } + + if(exclusionSet != null) { + builder.append("exclusionSet="); + builder.append(exclusionSet); + } + + builder.append("]"); + return builder.toString(); + } } -- cgit v1.2.3