Gold Tests: Fix flaky test handling.

Indead of just retrying the batch, retry individual tests. This should correctly handle the Intel flake pattern. Bug: angleproject:5415 Change-Id: I029c2514ff8e7a45184c5c105bf2d0350171da29 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2940049 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jonah Ryan-Davis <jonahr@google.com> Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
2021-06-07 12:48:01 -04:00 · 2021-06-07 12:48:01 -04:00 · d99864c0fa
--- a/infra/specs/angle.json
+++ b/infra/specs/angle.json
@ -610,7 +610,8 @@
        "args": [
          "--test-machine-name",
          "${buildername}",
-          "--git-revision=${got_angle_revision}"
+          "--git-revision=${got_angle_revision}",
+          "--flaky-retries=1"
        ],
        "isolate_name": "angle_restricted_trace_gold_tests",
        "merge": {
@ -1966,7 +1967,8 @@
        "args": [
          "--test-machine-name",
          "${buildername}",
-          "--git-revision=${got_angle_revision}"
+          "--git-revision=${got_angle_revision}",
+          "--flaky-retries=1"
        ],
        "isolate_name": "angle_restricted_trace_gold_tests",
        "merge": {
--- a/infra/specs/test_suite_exceptions.pyl
+++ b/infra/specs/test_suite_exceptions.pyl
@ -26,6 +26,20 @@
      },
    },
  },
+  'angle_restricted_trace_gold_tests': {
+    'modifications': {
+      'linux-intel': {
+        'args': [
+          '--flaky-retries=1',
+        ],
+      },
+      'win10-x64-intel': {
+        'args': [
+          '--flaky-retries=1',
+        ],
+      },
+    },
+  },
  'angle_white_box_tests': {
    'modifications': {
      # anglebug.com/5328 suspecting blue screen caused by multiprocess
--- a/scripts/code_generation_hashes/Test_spec_JSON.json
+++ b/scripts/code_generation_hashes/Test_spec_JSON.json
@ -1,12 +1,12 @@
 {
  "infra/specs/angle.json":
-    "ead3cabcb0132402f7c722c072f1ae55",
+    "da36986392a628a408b2ce19318ac0e5",
  "infra/specs/generate_test_spec_json.py":
    "162566b21bca4ef0b815e411920c9f2d",
  "infra/specs/mixins.pyl":
    "937e107ab606846d61eec617d09e50d0",
  "infra/specs/test_suite_exceptions.pyl":
-    "aad1a4aed801277cc531733deab221b5",
+    "723460da84a90884a9668c07a0893390",
  "infra/specs/test_suites.pyl":
    "687b407a1fd7d83583817b9570ad983e",
  "infra/specs/variants.pyl":
--- a/src/tests/restricted_traces/restricted_trace_gold_tests.py
+++ b/src/tests/restricted_traces/restricted_trace_gold_tests.py
@ -358,42 +358,53 @@ def _run_tests(args, tests, extra_flags, env, screenshot_dir, results, test_resu
        batches = _get_batches(traces, args.batch_size)

        for batch in batches:
-            with common.temporary_file() as tempfile_path:
-                gtest_filter = _get_gtest_filter_for_batch(batch)
-                cmd = [
-                    args.test_suite,
-                    gtest_filter,
-                    '--render-test-output-dir=%s' % screenshot_dir,
-                    '--one-frame-only',
-                    '--verbose-logging',
-                ] + extra_flags
+            for iteration in range(0, args.flaky_retries + 1):
+                with common.temporary_file() as tempfile_path:
+                    # This is how we signal early exit
+                    if not batch:
+                        logging.debug('All tests in batch completed.')
+                        break
+                    if iteration > 0:
+                        logging.info('Test run failed, running retry #%d...' % iteration)

-                batch_result = None
-                for iteration in range(0, args.flaky_retries + 1):
-                    if batch_result != PASS:
-                        if iteration > 0:
-                            logging.info('Test run failed, running retry #%d...' % (iteration + 1))
-                        batch_result = PASS if run_wrapper(args, cmd, env,
-                                                           tempfile_path) == 0 else FAIL
+                    gtest_filter = _get_gtest_filter_for_batch(batch)
+                    cmd = [
+                        args.test_suite,
+                        gtest_filter,
+                        '--render-test-output-dir=%s' % screenshot_dir,
+                        '--one-frame-only',
+                        '--verbose-logging',
+                    ] + extra_flags
+                    batch_result = PASS if run_wrapper(args, cmd, env,
+                                                       tempfile_path) == 0 else FAIL

-                for trace in batch:
-                    artifacts = {}
+                    next_batch = []
+                    for trace in batch:
+                        artifacts = {}

-                    if batch_result == PASS:
-                        logging.debug('upload test result: %s' % trace)
-                        result = upload_test_result_to_skia_gold(args, gold_session_manager,
-                                                                 gold_session, gold_properties,
-                                                                 screenshot_dir, trace, artifacts)
-                    else:
-                        result = batch_result
+                        if batch_result == PASS:
+                            logging.debug('upload test result: %s' % trace)
+                            result = upload_test_result_to_skia_gold(args, gold_session_manager,
+                                                                     gold_session, gold_properties,
+                                                                     screenshot_dir, trace,
+                                                                     artifacts)
+                        else:
+                            result = batch_result

-                    expected_result = SKIP if result == SKIP else PASS
-                    test_results[trace] = {'expected': expected_result, 'actual': result}
-                    if result == FAIL:
-                        test_results[trace]['is_unexpected'] = True
-                    if len(artifacts) > 0:
-                        test_results[trace]['artifacts'] = artifacts
-                    results['num_failures_by_type'][result] += 1
+                        expected_result = SKIP if result == SKIP else PASS
+                        test_results[trace] = {'expected': expected_result, 'actual': result}
+                        if len(artifacts) > 0:
+                            test_results[trace]['artifacts'] = artifacts
+                        if result == FAIL:
+                            next_batch.append(trace)
+                    batch = next_batch
+
+        # These properties are recorded after iteration to ensure they only happen once.
+        for _, trace_results in test_results.items():
+            result = trace_results['actual']
+            results['num_failures_by_type'][result] += 1
+            if result == FAIL:
+                trace_results['is_unexpected'] = True

        return results['num_failures_by_type'][FAIL] == 0