From 7b56beefe4df24acd8437bf42262e7cd30d43970 Mon Sep 17 00:00:00 2001 From: bjculkin Date: Tue, 4 Apr 2017 18:51:52 -0400 Subject: Add a test Plus, more plural fixes --- .../java/bjc/inflexion/InflexionTester.java | 178 ++++++++++++++++++++- 1 file changed, 176 insertions(+), 2 deletions(-) (limited to 'src/examples/java') diff --git a/src/examples/java/bjc/inflexion/InflexionTester.java b/src/examples/java/bjc/inflexion/InflexionTester.java index 5f95de7..a11d168 100644 --- a/src/examples/java/bjc/inflexion/InflexionTester.java +++ b/src/examples/java/bjc/inflexion/InflexionTester.java @@ -19,7 +19,19 @@ import bjc.inflexion.v2.Noun; import bjc.inflexion.v2.Nouns; import bjc.inflexion.v2.Prepositions; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; /** * Test inflecting words. @@ -46,7 +58,9 @@ public class InflexionTester { Scanner scn = new Scanner(System.in); - System.out.print("Enter a noun to inflect (blank line to quit): "); + wikitest(scn, nounDB); + + /*System.out.print("Enter a noun to inflect (blank line to quit): "); String ln = scn.nextLine().trim(); while(!ln.equals("")) { @@ -63,8 +77,168 @@ public class InflexionTester { System.out.print("Enter a noun to inflect (blank line to quit): "); ln = scn.nextLine().trim(); - } + }*/ scn.close(); } + + @SuppressWarnings("unused") + private static void wikitest(Scanner scn, Nouns nounDB) { + System.out.print("Enter name of dump file: "); + + String fname = scn.nextLine().trim(); + + try(InputStream compressedStream = new FileInputStream(fname)) { + InputStream stream = new BZip2CompressorInputStream(compressedStream); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); + + /* + * Pattern find word name + */ + Pattern titlePattern = Pattern.compile("([^<]+)"); + /* + * Pattern to find beginning of wiki text + */ + Pattern textPattern = Pattern.compile(" plurals = new ArrayList<>(); + + boolean uncountable = false; + boolean noPlural = false; + for(String rule : rules) { + if(rule.isEmpty()) { + continue; + } + if("-".equals(rule)) { + plurals.add(word); + uncountable = true; + } else if("s".equals(rule)) { + plurals.add(word + "s"); + } else if("es".equals(rule)) { + plurals.add(word + "es"); + } else if("!".equals(rule)) { + plurals.add("plural not attested"); + uncountable = true; + } else if("?".equals(rule)) { + plurals.add("unknown"); + noPlural = true; + } else { + Matcher matcher = wordPattern.matcher(rule); + if(matcher.matches()) { + plurals.add(rule); + } + } + } + if(plurals.isEmpty()) { + plurals.add(word + "s"); + } + + String calculatedPlural = nounDB.getNoun(word).plural(); + boolean ok = false; + for(String plural : plurals) { + if(plural.equals(calculatedPlural)) { + ok = true; + break; + } + } + + if(!ok) { + wrong++; + if(uncountable) { + wrongUncountable++; + } else if(noPlural) { + wrongNoPlural++; + } + if(basicWord) { + System.out.println("basic word: " + word + " got: " + + calculatedPlural + ", but expected " + + enNounMatcher.group(1)); + basicWrong++; + } else { + System.out.println(word + " got: " + calculatedPlural + + ", but expected " + enNounMatcher.group(1)); + } + } + } + } + reader.close(); + compressedStream.close(); + + float correct = (count - wrong) * 100 / (float) count; + float basicCorrect = (basicCount - basicWrong) * 100 / (float) basicCount; + float wrongUncountablePercent = wrongUncountable * 100 / (float) count; + float wrongNoPluralPercent = wrongNoPlural * 100 / (float) count; + int justPlainWrong = wrong - wrongUncountable - wrongNoPlural; + float justPlainWrongPercent = justPlainWrong * 100 / (float) count; + System.out.println("Words checked: " + count + " (" + basicCount + " basic words)"); + System.out.println("Correct: " + correct + "% (" + basicCorrect + "% basic words)"); + System.out.println("Errors: "); + System.out.println( + " Uncountable: " + wrongUncountable + " (" + wrongUncountablePercent + "%)"); + System.out.println(" No plural form specified: " + wrongNoPlural + " (" + + wrongNoPluralPercent + "%)"); + System.out.println(" Incorrect answer: " + justPlainWrong + " (" + justPlainWrongPercent + + "%)"); + } catch(FileNotFoundException fnfex) { + fnfex.printStackTrace(); + } catch(IOException ioex) { + ioex.printStackTrace(); + } + } } \ No newline at end of file -- cgit v1.2.3