diff options
Diffstat (limited to 'src/share/classes/com/sun/org/apache/regexp/internal/RETest.java')
-rw-r--r-- | src/share/classes/com/sun/org/apache/regexp/internal/RETest.java | 883 |
1 files changed, 883 insertions, 0 deletions
diff --git a/src/share/classes/com/sun/org/apache/regexp/internal/RETest.java b/src/share/classes/com/sun/org/apache/regexp/internal/RETest.java new file mode 100644 index 0000000..463776d --- /dev/null +++ b/src/share/classes/com/sun/org/apache/regexp/internal/RETest.java @@ -0,0 +1,883 @@ +/* + * reserved comment block + * DO NOT REMOVE OR ALTER! + */ +/* + * Copyright 1999-2004 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.sun.org.apache.regexp.internal; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.io.File; +import java.io.ByteArrayOutputStream; +import java.io.ObjectOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ObjectInputStream; +import java.io.StringBufferInputStream; +import java.io.StringReader; +import java.io.IOException; + +/** + * Data driven (and optionally interactive) testing harness to exercise regular + * expression compiler and matching engine. + * + * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> + * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a> + * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> + */ +public class RETest +{ + // True if we want to see output from success cases + static final boolean showSuccesses = false; + + // A new line character. + static final String NEW_LINE = System.getProperty( "line.separator" ); + + // Construct a debug compiler + REDebugCompiler compiler = new REDebugCompiler(); + + /** + * Main program entrypoint. If an argument is given, it will be compiled + * and interactive matching will ensue. If no argument is given, the + * file RETest.txt will be used as automated testing input. + * @param args Command line arguments (optional regular expression) + */ + public static void main(String[] args) + { + try + { + if (!test( args )) { + System.exit(1); + } + } + catch (Exception e) + { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Testing entrypoint. + * @param args Command line arguments + * @exception Exception thrown in case of error + */ + public static boolean test( String[] args ) throws Exception + { + RETest test = new RETest(); + // Run interactive tests against a single regexp + if (args.length == 2) + { + test.runInteractiveTests(args[1]); + } + else if (args.length == 1) + { + // Run automated tests + test.runAutomatedTests(args[0]); + } + else + { + System.out.println( "Usage: RETest ([-i] [regex]) ([/path/to/testfile.txt])" ); + System.out.println( "By Default will run automated tests from file 'docs/RETest.txt' ..." ); + System.out.println(); + test.runAutomatedTests("docs/RETest.txt"); + } + return test.failures == 0; + } + + /** + * Constructor + */ + public RETest() + { + } + + /** + * Compile and test matching against a single expression + * @param expr Expression to compile and test + */ + void runInteractiveTests(String expr) + { + RE r = new RE(); + try + { + // Compile expression + r.setProgram(compiler.compile(expr)); + + // Show expression + say("" + NEW_LINE + "" + expr + "" + NEW_LINE + ""); + + // Show program for compiled expression + PrintWriter writer = new PrintWriter( System.out ); + compiler.dumpProgram( writer ); + writer.flush(); + + boolean running = true; + // Test matching against compiled expression + while ( running ) + { + // Read from keyboard + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + System.out.print("> "); + System.out.flush(); + String match = br.readLine(); + + if ( match != null ) + { + // Try a match against the keyboard input + if (r.match(match)) + { + say("Match successful."); + } + else + { + say("Match failed."); + } + + // Show subparen registers + showParens(r); + } + else + { + running = false; + System.out.println(); + } + } + } + catch (Exception e) + { + say("Error: " + e.toString()); + e.printStackTrace(); + } + } + + /** + * Exit with a fatal error. + * @param s Last famous words before exiting + */ + void die(String s) + { + say("FATAL ERROR: " + s); + System.exit(-1); + } + + /** + * Fail with an error. Will print a big failure message to System.out. + * + * @param log Output before failure + * @param s Failure description + */ + void fail(StringBuffer log, String s) + { + System.out.print(log.toString()); + fail(s); + } + + /** + * Fail with an error. Will print a big failure message to System.out. + * + * @param s Failure description + */ + void fail(String s) + { + failures++; + say("" + NEW_LINE + ""); + say("*******************************************************"); + say("********************* FAILURE! **********************"); + say("*******************************************************"); + say("" + NEW_LINE + ""); + say(s); + say(""); + // make sure the writer gets flushed. + if (compiler != null) { + PrintWriter writer = new PrintWriter( System.out ); + compiler.dumpProgram( writer ); + writer.flush(); + say("" + NEW_LINE + ""); + } + } + + /** + * Say something to standard out + * @param s What to say + */ + void say(String s) + { + System.out.println(s); + } + + /** + * Dump parenthesized subexpressions found by a regular expression matcher object + * @param r Matcher object with results to show + */ + void showParens(RE r) + { + // Loop through each paren + for (int i = 0; i < r.getParenCount(); i++) + { + // Show paren register + say("$" + i + " = " + r.getParen(i)); + } + } + + /* + * number in automated test + */ + int testCount = 0; + + /* + * Count of failures in automated test + */ + int failures = 0; + + /** + * Run automated tests in RETest.txt file (from Perl 4.0 test battery) + * @exception Exception thrown in case of error + */ + void runAutomatedTests(String testDocument) throws Exception + { + long ms = System.currentTimeMillis(); + + // Some unit tests + testPrecompiledRE(); + testSplitAndGrep(); + testSubst(); + testOther(); + + // Test from script file + File testInput = new File(testDocument); + if (! testInput.exists()) { + throw new Exception ("Could not find: " + testDocument); + } + + BufferedReader br = new BufferedReader(new FileReader(testInput)); + try + { + // While input is available, parse lines + while (br.ready()) + { + RETestCase testcase = getNextTestCase(br); + if (testcase != null) { + testcase.runTest(); + } + } + } + finally + { + br.close(); + } + + // Show match time + say(NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms."); + + // Print final results + if (failures > 0) { + say("*************** THERE ARE FAILURES! *******************"); + } + say("Tests complete. " + testCount + " tests, " + failures + " failure(s)."); + } + + /** + * Run automated unit test + * @exception Exception thrown in case of error + */ + void testOther() throws Exception + { + // Serialization test 1: Compile regexp and serialize/deserialize it + RE r = new RE("(a*)b"); + say("Serialized/deserialized (a*)b"); + ByteArrayOutputStream out = new ByteArrayOutputStream(128); + new ObjectOutputStream(out).writeObject(r); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + r = (RE)new ObjectInputStream(in).readObject(); + if (!r.match("aaab")) + { + fail("Did not match 'aaab' with deserialized RE."); + } else { + say("aaaab = true"); + showParens(r); + } + + // Serialization test 2: serialize/deserialize used regexp + out.reset(); + say("Deserialized (a*)b"); + new ObjectOutputStream(out).writeObject(r); + in = new ByteArrayInputStream(out.toByteArray()); + r = (RE)new ObjectInputStream(in).readObject(); + if (r.getParenCount() != 0) + { + fail("Has parens after deserialization."); + } + if (!r.match("aaab")) + { + fail("Did not match 'aaab' with deserialized RE."); + } else { + say("aaaab = true"); + showParens(r); + } + + // Test MATCH_CASEINDEPENDENT + r = new RE("abc(\\w*)"); + say("MATCH_CASEINDEPENDENT abc(\\w*)"); + r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); + say("abc(d*)"); + if (!r.match("abcddd")) + { + fail("Did not match 'abcddd'."); + } else { + say("abcddd = true"); + showParens(r); + } + + if (!r.match("aBcDDdd")) + { + fail("Did not match 'aBcDDdd'."); + } else { + say("aBcDDdd = true"); + showParens(r); + } + + if (!r.match("ABCDDDDD")) + { + fail("Did not match 'ABCDDDDD'."); + } else { + say("ABCDDDDD = true"); + showParens(r); + } + + r = new RE("(A*)b\\1"); + r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); + if (!r.match("AaAaaaBAAAAAA")) + { + fail("Did not match 'AaAaaaBAAAAAA'."); + } else { + say("AaAaaaBAAAAAA = true"); + showParens(r); + } + + r = new RE("[A-Z]*"); + r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); + if (!r.match("CaBgDe12")) + { + fail("Did not match 'CaBgDe12'."); + } else { + say("CaBgDe12 = true"); + showParens(r); + } + + // Test MATCH_MULTILINE. Test for eol/bol symbols. + r = new RE("^abc$", RE.MATCH_MULTILINE); + if (!r.match("\nabc")) { + fail("\"\\nabc\" doesn't match \"^abc$\""); + } + if (!r.match("\rabc")) { + fail("\"\\rabc\" doesn't match \"^abc$\""); + } + if (!r.match("\r\nabc")) { + fail("\"\\r\\nabc\" doesn't match \"^abc$\""); + } + if (!r.match("\u0085abc")) { + fail("\"\\u0085abc\" doesn't match \"^abc$\""); + } + if (!r.match("\u2028abc")) { + fail("\"\\u2028abc\" doesn't match \"^abc$\""); + } + if (!r.match("\u2029abc")) { + fail("\"\\u2029abc\" doesn't match \"^abc$\""); + } + + // Test MATCH_MULTILINE. Test that '.' does not matches new line. + r = new RE("^a.*b$", RE.MATCH_MULTILINE); + if (r.match("a\nb")) { + fail("\"a\\nb\" matches \"^a.*b$\""); + } + if (r.match("a\rb")) { + fail("\"a\\rb\" matches \"^a.*b$\""); + } + if (r.match("a\r\nb")) { + fail("\"a\\r\\nb\" matches \"^a.*b$\""); + } + if (r.match("a\u0085b")) { + fail("\"a\\u0085b\" matches \"^a.*b$\""); + } + if (r.match("a\u2028b")) { + fail("\"a\\u2028b\" matches \"^a.*b$\""); + } + if (r.match("a\u2029b")) { + fail("\"a\\u2029b\" matches \"^a.*b$\""); + } + } + + private void testPrecompiledRE() + { + // Pre-compiled regular expression "a*b" + char[] re1Instructions = + { + 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041, + 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047, + 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000, + 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000, + 0x0000, + }; + + REProgram re1 = new REProgram(re1Instructions); + + // Simple test of pre-compiled regular expressions + RE r = new RE(re1); + say("a*b"); + boolean result = r.match("aaab"); + say("aaab = " + result); + showParens(r); + if (!result) { + fail("\"aaab\" doesn't match to precompiled \"a*b\""); + } + + result = r.match("b"); + say("b = " + result); + showParens(r); + if (!result) { + fail("\"b\" doesn't match to precompiled \"a*b\""); + } + + result = r.match("c"); + say("c = " + result); + showParens(r); + if (result) { + fail("\"c\" matches to precompiled \"a*b\""); + } + + result = r.match("ccccaaaaab"); + say("ccccaaaaab = " + result); + showParens(r); + if (!result) { + fail("\"ccccaaaaab\" doesn't match to precompiled \"a*b\""); + } + } + + private void testSplitAndGrep() + { + String[] expected = {"xxxx", "xxxx", "yyyy", "zzz"}; + RE r = new RE("a*b"); + String[] s = r.split("xxxxaabxxxxbyyyyaaabzzz"); + for (int i = 0; i < expected.length && i < s.length; i++) { + assertEquals("Wrong splitted part", expected[i], s[i]); + } + assertEquals("Wrong number of splitted parts", expected.length, + s.length); + + r = new RE("x+"); + expected = new String[] {"xxxx", "xxxx"}; + s = r.grep(s); + for (int i = 0; i < s.length; i++) + { + say("s[" + i + "] = " + s[i]); + assertEquals("Grep fails", expected[i], s[i]); + } + assertEquals("Wrong number of string found by grep", expected.length, + s.length); + } + + private void testSubst() + { + RE r = new RE("a*b"); + String expected = "-foo-garply-wacky-"; + String actual = r.subst("aaaabfooaaabgarplyaaabwackyb", "-"); + assertEquals("Wrong result of substitution in \"a*b\"", expected, actual); + + // Test subst() with backreferences + r = new RE("http://[\\.\\w\\-\\?/~_@&=%]+"); + actual = r.subst("visit us: http://www.apache.org!", + "1234<a href=\"$0\">$0</a>", RE.REPLACE_BACKREFERENCES); + assertEquals("Wrong subst() result", "visit us: 1234<a href=\"http://www.apache.org\">http://www.apache.org</a>!", actual); + + // Test subst() with backreferences without leading characters + // before first backreference + r = new RE("(.*?)=(.*)"); + actual = r.subst("variable=value", + "$1_test_$212", RE.REPLACE_BACKREFERENCES); + assertEquals("Wrong subst() result", "variable_test_value12", actual); + + // Test subst() with NO backreferences + r = new RE("^a$"); + actual = r.subst("a", + "b", RE.REPLACE_BACKREFERENCES); + assertEquals("Wrong subst() result", "b", actual); + + // Test subst() with NO backreferences + r = new RE("^a$", RE.MATCH_MULTILINE); + actual = r.subst("\r\na\r\n", + "b", RE.REPLACE_BACKREFERENCES); + assertEquals("Wrong subst() result", "\r\nb\r\n", actual); + } + + public void assertEquals(String message, String expected, String actual) + { + if (expected != null && !expected.equals(actual) + || actual != null && !actual.equals(expected)) + { + fail(message + " (expected \"" + expected + + "\", actual \"" + actual + "\")"); + } + } + + public void assertEquals(String message, int expected, int actual) + { + if (expected != actual) { + fail(message + " (expected \"" + expected + + "\", actual \"" + actual + "\")"); + } + } + + /** + * Converts yesno string to boolean. + * @param yesno string representation of expected result + * @return true if yesno is "YES", false if yesno is "NO" + * stops program otherwise. + */ + private boolean getExpectedResult(String yesno) + { + if ("NO".equals(yesno)) + { + return false; + } + else if ("YES".equals(yesno)) + { + return true; + } + else + { + // Bad test script + die("Test script error!"); + return false; //to please javac + } + } + + /** + * Finds next test description in a given script. + * @param br <code>BufferedReader</code> for a script file + * @return strign tag for next test description + * @exception IOException if some io problems occured + */ + private String findNextTest(BufferedReader br) throws IOException + { + String number = ""; + + while (br.ready()) + { + number = br.readLine(); + if (number == null) + { + break; + } + number = number.trim(); + if (number.startsWith("#")) + { + break; + } + if (!number.equals("")) + { + say("Script error. Line = " + number); + System.exit(-1); + } + } + return number; + } + + /** + * Creates testcase for the next test description in the script file. + * @param br <code>BufferedReader</code> for script file. + * @return a new tescase or null. + * @exception IOException if some io problems occured + */ + private RETestCase getNextTestCase(BufferedReader br) throws IOException + { + // Find next re test case + final String tag = findNextTest(br); + + // Are we done? + if (!br.ready()) + { + return null; + } + + // Get expression + final String expr = br.readLine(); + + // Get test information + final String matchAgainst = br.readLine(); + final boolean badPattern = "ERR".equals(matchAgainst); + boolean shouldMatch = false; + int expectedParenCount = 0; + String[] expectedParens = null; + + if (!badPattern) { + shouldMatch = getExpectedResult(br.readLine().trim()); + if (shouldMatch) { + expectedParenCount = Integer.parseInt(br.readLine().trim()); + expectedParens = new String[expectedParenCount]; + for (int i = 0; i < expectedParenCount; i++) { + expectedParens[i] = br.readLine(); + } + } + } + + return new RETestCase(this, tag, expr, matchAgainst, badPattern, + shouldMatch, expectedParens); + } +} + +final class RETestCase +{ + final private StringBuffer log = new StringBuffer(); + final private int number; + final private String tag; // number from script file + final private String pattern; + final private String toMatch; + final private boolean badPattern; + final private boolean shouldMatch; + final private String[] parens; + final private RETest test; + private RE regexp; + + public RETestCase(RETest test, String tag, String pattern, + String toMatch, boolean badPattern, + boolean shouldMatch, String[] parens) + { + this.number = ++test.testCount; + this.test = test; + this.tag = tag; + this.pattern = pattern; + this.toMatch = toMatch; + this.badPattern = badPattern; + this.shouldMatch = shouldMatch; + if (parens != null) { + this.parens = new String[parens.length]; + for (int i = 0; i < parens.length; i++) { + this.parens[i] = parens[i]; + } + } else { + this.parens = null; + } + } + + public void runTest() + { + test.say(tag + "(" + number + "): " + pattern); + if (testCreation()) { + testMatch(); + } + } + + boolean testCreation() + { + try + { + // Compile it + regexp = new RE(); + regexp.setProgram(test.compiler.compile(pattern)); + // Expression didn't cause an expected error + if (badPattern) + { + test.fail(log, "Was expected to be an error, but wasn't."); + return false; + } + + return true; + } + // Some expressions *should* cause exceptions to be thrown + catch (Exception e) + { + // If it was supposed to be an error, report success and continue + if (badPattern) + { + log.append(" Match: ERR\n"); + success("Produces an error (" + e.toString() + "), as expected."); + return false; + } + + // Wasn't supposed to be an error + String message = (e.getMessage() == null) ? e.toString() : e.getMessage(); + test.fail(log, "Produces an unexpected exception \"" + message + "\""); + e.printStackTrace(); + } + catch (Error e) + { + // Internal error happened + test.fail(log, "Compiler threw fatal error \"" + e.getMessage() + "\""); + e.printStackTrace(); + } + + return false; + } + + private void testMatch() + { + log.append(" Match against: '" + toMatch + "'\n"); + // Try regular matching + try + { + // Match against the string + boolean result = regexp.match(toMatch); + log.append(" Matched: " + (result ? "YES" : "NO") + "\n"); + + // Check result, parens, and iterators + if (checkResult(result) && (!shouldMatch || checkParens())) + { + // test match(CharacterIterator, int) + // for every CharacterIterator implementation. + log.append(" Match using StringCharacterIterator\n"); + if (!tryMatchUsingCI(new StringCharacterIterator(toMatch))) + return; + + log.append(" Match using CharacterArrayCharacterIterator\n"); + if (!tryMatchUsingCI(new CharacterArrayCharacterIterator(toMatch.toCharArray(), 0, toMatch.length()))) + return; + + log.append(" Match using StreamCharacterIterator\n"); + if (!tryMatchUsingCI(new StreamCharacterIterator(new StringBufferInputStream(toMatch)))) + return; + + log.append(" Match using ReaderCharacterIterator\n"); + if (!tryMatchUsingCI(new ReaderCharacterIterator(new StringReader(toMatch)))) + return; + } + } + // Matcher blew it + catch(Exception e) + { + test.fail(log, "Matcher threw exception: " + e.toString()); + e.printStackTrace(); + } + // Internal error + catch(Error e) + { + test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\""); + e.printStackTrace(); + } + } + + private boolean checkResult(boolean result) + { + // Write status + if (result == shouldMatch) { + success((shouldMatch ? "Matched" : "Did not match") + + " \"" + toMatch + "\", as expected:"); + return true; + } else { + if (shouldMatch) { + test.fail(log, "Did not match \"" + toMatch + "\", when expected to."); + } else { + test.fail(log, "Matched \"" + toMatch + "\", when not expected to."); + } + return false; + } + } + + private boolean checkParens() + { + // Show subexpression registers + if (RETest.showSuccesses) + { + test.showParens(regexp); + } + + log.append(" Paren count: " + regexp.getParenCount() + "\n"); + if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount())) + { + return false; + } + + // Check registers against expected contents + for (int p = 0; p < regexp.getParenCount(); p++) + { + log.append(" Paren " + p + ": " + regexp.getParen(p) + "\n"); + + // Compare expected result with actual + if ("null".equals(parens[p]) && regexp.getParen(p) == null) + { + // Consider "null" in test file equal to null + continue; + } + if (!assertEquals(log, "Wrong register " + p, parens[p], regexp.getParen(p))) + { + return false; + } + } + + return true; + } + + boolean tryMatchUsingCI(CharacterIterator matchAgainst) + { + try { + boolean result = regexp.match(matchAgainst, 0); + log.append(" Match: " + (result ? "YES" : "NO") + "\n"); + return checkResult(result) && (!shouldMatch || checkParens()); + } + // Matcher blew it + catch(Exception e) + { + test.fail(log, "Matcher threw exception: " + e.toString()); + e.printStackTrace(); + } + // Internal error + catch(Error e) + { + test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\""); + e.printStackTrace(); + } + return false; + } + + public boolean assertEquals(StringBuffer log, String message, String expected, String actual) + { + if (expected != null && !expected.equals(actual) + || actual != null && !actual.equals(expected)) + { + test.fail(log, message + " (expected \"" + expected + + "\", actual \"" + actual + "\")"); + return false; + } + return true; + } + + public boolean assertEquals(StringBuffer log, String message, int expected, int actual) + { + if (expected != actual) { + test.fail(log, message + " (expected \"" + expected + + "\", actual \"" + actual + "\")"); + return false; + } + return true; + } + + /** + * Show a success + * @param s Success story + */ + void success(String s) + { + if (RETest.showSuccesses) + { + test.say("" + RETest.NEW_LINE + "-----------------------" + RETest.NEW_LINE + ""); + test.say("Expression #" + (number) + " \"" + pattern + "\" "); + test.say("Success: " + s); + } + } +} |