Bug 1252361 - Produce UNEXPECTED-PASS results for fuzzy tests that are overfuzzed. r=dbaron

This catches a common problem where somebody adds a fuzzy annotation on a test to work around some minor differences. Later the differences go away, but since the test harness doesn't catch that, nobody is the wiser. Subsequently a "real" regression can reintroduce differences which are hidden by the stale fuzzy annotations. With this patch, if the annotations are set up properly, the test harness will flag tests as "UNEXPECTED-PASS" when the differences go away. This will require the patch author to reduce the allowed fuzziness parameters, and will make it easier to catch subsequent regressions. MozReview-Commit-ID: B3rGPFLXkCu
2017-06-02 09:28:33 -04:00 · 2017-06-02 09:28:33 -04:00 · 748457c51a
--- a/layout/reftests/reftest-sanity/reftest.list
+++ b/layout/reftests/reftest-sanity/reftest.list
@ -147,11 +147,21 @@ fails test-pref(font.size.variable.x-western,"foo") == font-default.html font-si
 ref-pref(font.size.variable.x-western,16) test-pref(font.size.variable.x-western,24) skip-if(styloVsGecko) != font-default.html font-default.html
 ref-pref(font.size.variable.x-western,24) test-pref(font.size.variable.x-western,16) skip-if(styloVsGecko) != font-default.html font-default.html
 ref-pref(font.size.variable.x-western,24) test-pref(font.size.variable.x-western,24) skip-if(styloVsGecko) == font-default.html font-default.html
+
 # reftest syntax: fuzzy(maxPixelDifference,maxNumberDifferingPixels)
 fuzzy(1,250000) == fuzzy.html fuzzy-ref.html
 fuzzy(1,250000) != too-fuzzy.html fuzzy-ref.html
 fuzzy-if(true,1,250000) == fuzzy.html fuzzy-ref.html
 fuzzy-if(false,2,1) == fuzzy-ref.html fuzzy-ref.html
+# test some ranged fuzzes
+fuzzy(1-10,1-250000) fuzzy-if(false,5-10,250000) skip-if(styloVsGecko) == fuzzy.html fuzzy-ref.html
+fuzzy(0-0,250000) skip-if(styloVsGecko) != fuzzy.html fuzzy-ref.html
+fuzzy(1,0-2) skip-if(styloVsGecko) != fuzzy.html fuzzy-ref.html
+# If enabled, the following two should result in UNEXPECTED-PASS because
+# they are both overfuzzed
+# fuzzy(3-4,250000) == fuzzy.html fuzzy-ref.html
+# fuzzy(1,250001-250002) == fuzzy.html fuzzy-ref.html
+#
 # When using 565 fuzzy.html and fuzzy-ref.html will compare as equal
 fails-if(!styloVsGecko) fuzzy-if(false,2,1) random-if(Android) == fuzzy.html fuzzy-ref.html

--- a/layout/tools/reftest/reftest.jsm
+++ b/layout/tools/reftest/reftest.jsm
@ -1638,7 +1638,8 @@ function RecordResult(testRunTime, errorMsg, scriptResults)
        true:  {s: ["PASS", "PASS"], n: "Random"},
        false: {s: ["FAIL", "FAIL"], n: "Random"}
    };
-    outputs[EXPECTED_FUZZY] = outputs[EXPECTED_PASS];
+    // for EXPECTED_FUZZY we need special handling because we can have
+    // Pass, UnexpectedPass, or UnexpectedFail

    var output;
    var extra;
@ -1740,23 +1741,30 @@ function RecordResult(testRunTime, errorMsg, scriptResults)
            // whether the two renderings match:
            var equal;
            var maxDifference = {};
+            // whether the allowed fuzziness from the annotations is exceeded
+            // by the actual comparison results
+            var fuzz_exceeded = false;

            differences = gWindowUtils.compareCanvases(gCanvas1, gCanvas2, maxDifference);
            equal = (differences == 0);

+            if (maxDifference.value > 0 && equal) {
+                throw "Inconsistent result from compareCanvases.";
+            }
+
            // what is expected on this platform (PASS, FAIL, or RANDOM)
            var expected = gURLs[0].expected;

-            if (maxDifference.value > 0 &&
+            if (expected == EXPECTED_FUZZY) {
+                logger.info(`REFTEST fuzzy test ` +
+                            `(${gURLs[0].fuzzyMinDelta}, ${gURLs[0].fuzzyMinPixels}) <= ` +
+                            `(${maxDifference.value}, ${differences}) <= ` +
+                            `(${gURLs[0].fuzzyMaxDelta}, ${gURLs[0].fuzzyMaxPixels})`);
+                fuzz_exceeded = maxDifference.value > gURLs[0].fuzzyMaxDelta ||
+                                differences > gURLs[0].fuzzyMaxPixels;
+                equal = !fuzz_exceeded &&
                        maxDifference.value >= gURLs[0].fuzzyMinDelta &&
-                maxDifference.value <= gURLs[0].fuzzyMaxDelta &&
-                differences >= gURLs[0].fuzzyMinPixels &&
-                differences <= gURLs[0].fuzzyMaxPixels) {
-                if (equal) {
-                    throw "Inconsistent result from compareCanvases.";
-                }
-                equal = expected == EXPECTED_FUZZY;
-                logger.info(`REFTEST fuzzy match (${maxDifference.value}, ${differences}) <= (${gURLs[0].fuzzyMaxDelta}, ${gURLs[0].fuzzyMaxPixels})`);
+                        differences >= gURLs[0].fuzzyMinPixels;
            }

            var failedExtraCheck = gFailedNoPaint || gFailedOpaqueLayer || gFailedAssignedLayer;
@ -1764,7 +1772,27 @@ function RecordResult(testRunTime, errorMsg, scriptResults)
            // whether the comparison result matches what is in the manifest
            var test_passed = (equal == (gURLs[0].type == TYPE_REFTEST_EQUAL)) && !failedExtraCheck;

+            if (expected != EXPECTED_FUZZY) {
                output = outputs[expected][test_passed];
+            } else if (test_passed) {
+                output = {s: ["PASS", "PASS"], n: "Pass"};
+            } else if (gURLs[0].type == TYPE_REFTEST_EQUAL &&
+                       !failedExtraCheck &&
+                       !fuzz_exceeded) {
+                // If we get here, that means we had an '==' type test where
+                // at least one of the actual difference values was below the
+                // allowed range, but nothing else was wrong. So let's produce
+                // UNEXPECTED-PASS in this scenario. Also, if we enter this
+                // branch, 'equal' must be false so let's assert that to guard
+                // against logic errors.
+                if (equal) {
+                    throw "Logic error in reftest.jsm fuzzy test handling!";
+                }
+                output = {s: ["PASS", "FAIL"], n: "UnexpectedPass"};
+            } else {
+                // In all other cases we fail the test
+                output = {s: ["FAIL", "PASS"], n: "UnexpectedFail"};
+            }
            extra = { status_msg: output.n };

            ++gTestResults[output.n];