[replay] Improve compatibility with unstable builds

MozillaSecurity · May 27, 2024 · be570c9 · be570c9
1 parent 0378e21
commit be570c9
Show file tree

Hide file tree

Showing 9 changed files with 129 additions and 105 deletions.
diff --git a/grizzly/common/report.py b/grizzly/common/report.py
@@ -52,6 +52,7 @@ class Report:
         "path",
         "prefix",
         "stack",
+        "unstable",
     )
 
     def __init__(
@@ -60,6 +61,7 @@ def __init__(
         target_binary: Path,
         is_hang: bool = False,
         size_limit: int = MAX_LOG_SIZE,
+        unstable: bool = False,
     ) -> None:
         self._crash_info: Optional[CrashInfo] = None
         self._logs = self._select_logs(log_path)
@@ -69,6 +71,8 @@ def __init__(
         self._target_binary = target_binary
         self.is_hang = is_hang
         self.path = log_path
+        # if a build crashes before the initial testcase is served it is unstable
+        self.unstable = unstable
         # tail files in log_path if needed
         if size_limit < 1:
             LOG.warning("No limit set on report log size!")

diff --git a/grizzly/common/reporter.py b/grizzly/common/reporter.py
@@ -259,6 +259,9 @@ def _submit_report(
         if report.is_hang:
             self.add_extra_metadata("is_hang", True)
 
+        if report.unstable:
+            self.add_extra_metadata("unstable_build", True)
+
         # dump test cases and the contained files to working directory
         test_case_meta = []
         for test_number, test_case in enumerate(reversed(test_cases)):

diff --git a/grizzly/common/test_reporter.py b/grizzly/common/test_reporter.py
@@ -177,7 +177,9 @@ def test_fuzzmanager_reporter_01(mocker, tmp_path, tests, frequent, force, sig_c
         test_cases.append(fake_test)
     reporter = FuzzManagerReporter("fake-tool")
     reporter.submit(
-        test_cases, Report(log_path, Path("bin"), is_hang=True), force=force
+        test_cases,
+        Report(log_path, Path("bin"), is_hang=True, unstable=True),
+        force=force,
     )
     assert not log_path.is_dir()
     assert fake_collector.call_args == ({"tool": "fake-tool"},)

diff --git a/grizzly/replay/replay.py b/grizzly/replay/replay.py
@@ -424,12 +424,11 @@ def harness_fn(_: str) -> bytes:  # pragma: no cover
                 assert run_result is not None
                 if not run_result.attempted:
                     LOG.warning("Test case was not served")
-                    if runner.initial:
-                        if run_result.status == Result.FOUND:
-                            # TODO: what is the best action to take in this case?
-                            LOG.warning("Delayed startup failure detected")
-                        else:
-                            LOG.warning("Timeout too short? System too busy?")
+                    if run_result.timeout:
+                        LOG.warning("Browser hung? Timeout too short? System too busy?")
+                    elif runner.initial:
+                        # TODO: what is the best action to take in this case?
+                        LOG.warning("Delayed startup failure detected")
                 # process run results
                 if run_result.status == Result.FOUND:
                     report: Optional[Report] = None
@@ -440,70 +439,61 @@ def harness_fn(_: str) -> bytes:  # pragma: no cover
                     log_path = Path(mkdtemp(prefix="logs_", dir=grz_tmp("logs")))
                     self.target.save_logs(log_path)
                     report = Report(
-                        log_path, self.target.binary, is_hang=run_result.timeout
+                        log_path,
+                        self.target.binary,
+                        is_hang=run_result.timeout,
+                        unstable=runner.startup_failure,
                     )
                     # set active signature
-                    if (
-                        not runner.startup_failure
-                        and not self._any_crash
-                        and not run_result.timeout
-                        and not sig_set
-                    ):
+                    if not self._any_crash and not run_result.timeout and not sig_set:
                         assert not expect_hang
                         assert self._signature is None
                         LOG.debug(
                             "no signature given, using short sig %r",
                             report.short_signature,
                         )
+                        if runner.startup_failure:
+                            LOG.warning(
+                                "Using signature from startup failure! "
+                                "Provide a signature to avoid this."
+                            )
                         self._signature = report.crash_signature
                         sig_set = True
                         if self._signature is not None:
                             assert not sig_hash, "sig_hash should only be set once"
                             sig_hash = Report.calc_hash(self._signature)
-
-                    # bucket result
-                    if not runner.startup_failure and (
-                        self._any_crash
-                        or self.check_match(
-                            self._signature, report, expect_hang, sig_set
-                        )
-                    ):
+                    # look for existing buckets (signature match)
+                    expected = self._any_crash or self.check_match(
+                        self._signature, report, expect_hang, sig_set
+                    )
+                    if expected:
                         if sig_hash is not None:
                             LOG.debug("using signature hash (%s) to bucket", sig_hash)
                             bucket_hash = sig_hash
                         else:
                             bucket_hash = report.crash_hash
                         self.status.results.count(bucket_hash, report.short_signature)
-                        LOG.info(
-                            "Result: %s (%s:%s)",
-                            report.short_signature,
-                            report.major[:8],
-                            report.minor[:8],
-                        )
-                        if bucket_hash not in reports:
-                            reports[bucket_hash] = ReplayResult(report, durations, True)
-                            LOG.debug("now tracking %s", bucket_hash)
-                            report = None  # don't remove report
-                        else:
-                            reports[bucket_hash].count += 1
-                            LOG.debug("already tracking %s", bucket_hash)
                     else:
-                        LOG.info(
-                            "Result: Different signature: %s (%s:%s)",
-                            report.short_signature,
-                            report.major[:8],
-                            report.minor[:8],
-                        )
+                        bucket_hash = report.crash_hash
                         self.status.ignored += 1
-                        if report.crash_hash not in reports:
-                            reports[report.crash_hash] = ReplayResult(
-                                report, durations, False
-                            )
-                            LOG.debug("now tracking %s", report.crash_hash)
-                            report = None  # don't remove report
-                        else:
-                            reports[report.crash_hash].count += 1
-                            LOG.debug("already tracking %s", report.crash_hash)
+                    LOG.info(
+                        "%s: %s (%s:%s)",
+                        "Result" if expected else "Result: Different signature",
+                        report.short_signature,
+                        report.major[:8],
+                        report.minor[:8],
+                    )
+                    # bucket result
+                    if bucket_hash not in reports:
+                        reports[bucket_hash] = ReplayResult(report, durations, expected)
+                        LOG.debug("now tracking %s", bucket_hash)
+                        report = None  # don't remove report
+                    else:
+                        reports[bucket_hash].count += 1
+                        if report.unstable and not reports[bucket_hash].report.unstable:
+                            LOG.debug("updating report to unstable")
+                            reports[bucket_hash].report.unstable = True
+                        LOG.debug("already tracking %s", bucket_hash)
                     # purge untracked report
                     if report is not None:
                         report.cleanup()
@@ -555,17 +545,10 @@ def harness_fn(_: str) -> bytes:  # pragma: no cover
 
             # process results
             if self._any_crash:
-                # add all results if min_results was reached
-                if sum(x.count for x in reports.values() if x.expected) >= min_results:
-                    results: List[ReplayResult] = list(reports.values())
-                else:
-                    # add only unexpected results since min_results was not reached
-                    results = []
-                    for result in reports.values():
-                        if result.expected:
-                            result.report.cleanup()
-                        else:
-                            results.append(result)
+                # all reports should be expected when self._any_crash=True
+                assert all(x.expected for x in reports.values())
+                success = sum(x.count for x in reports.values()) >= min_results
+                if not success:
                     LOG.debug(
                         "%d (any_crash) less than minimum %d",
                         self.status.results.total,
@@ -574,19 +557,24 @@ def harness_fn(_: str) -> bytes:  # pragma: no cover
             else:
                 # there should be at most one expected bucket
                 assert sum(x.expected for x in reports.values()) <= 1
-                # filter out unreliable expected results
-                results = []
-                for crash_hash, result in reports.items():
-                    if result.expected and result.count < min_results:
+                success = any(
+                    x.count >= min_results for x in reports.values() if x.expected
+                )
+            results: List[ReplayResult] = []
+            for crash_hash, result in reports.items():
+                # if min_results not met (success=False) cleanup expected reports
+                if not success and result.expected:
+                    if not self._any_crash:
                         LOG.debug(
                             "%r less than minimum (%d/%d)",
                             crash_hash,
                             result.count,
                             min_results,
                         )
-                        result.report.cleanup()
-                        continue
-                    results.append(result)
+                    result.report.cleanup()
+                    continue
+                results.append(result)
+
             # this should only be displayed when both conditions are met:
             # 1) runner does not close target (no delay was given before shutdown)
             # 2) result has not been successfully reproduced
@@ -602,7 +590,7 @@ def harness_fn(_: str) -> bytes:  # pragma: no cover
             return results
 
         finally:
-            # we don't want to clean up but we are not checking results
+            # we don't want to cleanup but we are not checking results
             self.target.close(force_close=True)
             # remove unprocessed reports
             for result in reports.values():

diff --git a/grizzly/replay/test_replay.py b/grizzly/replay/test_replay.py
@@ -164,21 +164,6 @@ def test_replay_05(mocker, server):
         # target.close() called once in runner and once by ReplayManager.run()
         assert target.close.call_count == 2
     target.reset_mock()
-    # test target crashed
-    target.check_result.return_value = Result.FOUND
-    target.save_logs = _fake_save_logs
-    with ReplayManager([], server, target, use_harness=False) as replay:
-        results = replay.run(tests, 10, repeat=1)
-        assert replay.status
-        assert replay.status.ignored == 1
-        assert replay.status.iteration == 1
-        assert replay.status.results.total == 0
-        assert replay._signature is None
-        # target.close() called once in runner and once by ReplayManager.run()
-        assert target.close.call_count == 2
-    assert len(results) == 1
-    assert results[0].count == 1
-    assert not results[0].expected
 
 
 def test_replay_06(mocker, server):
@@ -408,23 +393,59 @@ def test_replay_12(mocker, server):
     assert report_2.cleanup.call_count == 1
 
 
-def test_replay_13(mocker, server):
-    """test ReplayManager.run() - any crash - startup failure"""
-    server.serve_path.return_value = (Served.NONE, {})
+@mark.parametrize(
+    "to_serve, sig_value, expected, unexpected",
+    [
+        # No signature provided
+        (((Served.NONE, {}), (Served.ALL, {"a.html": "/fake/path"})), None, 2, 0),
+        (((Served.ALL, {"a.html": "/fake/path"}), (Served.NONE, {})), None, 2, 0),
+        (((Served.NONE, {}), (Served.NONE, {})), None, 2, 0),
+        # Signature provided (signatures match)
+        (((Served.NONE, {}), (Served.ALL, {"a.html": "/fake/path"})), "STDERR", 2, 0),
+        (((Served.ALL, {"a.html": "/fake/path"}), (Served.NONE, {})), "STDERR", 2, 0),
+        (((Served.NONE, {}), (Served.NONE, {})), "STDERR", 2, 0),
+        # Signature provided (signatures don't match)
+        (((Served.NONE, {}), (Served.ALL, {"a.html": "/fake/path"})), "miss", 0, 1),
+        (((Served.ALL, {"a.html": "/fake/path"}), (Served.NONE, {})), "miss", 0, 1),
+        (((Served.NONE, {}), (Served.NONE, {})), "miss", 0, 1),
+        # Hang after launch, before first test
+        (((Served.TIMEOUT, {}),), None, 0, 0),
+    ],
+)
+def test_replay_13(mocker, server, tmp_path, to_serve, sig_value, expected, unexpected):
+    """test ReplayManager.run() - result after launch, before running test"""
+    server.serve_path.side_effect = to_serve
+
+    # prepare signature
+    if sig_value is not None:
+        sig_file = tmp_path / "sig.json"
+        sig_file.write_text(
+            "{\n"
+            '  "symptoms": [\n'
+            "    {\n"
+            '      "src": "stderr",\n'
+            '      "type": "output",\n'
+            f'      "value": "/{sig_value}/"\n'
+            "    }\n"
+            "  ]\n"
+            "}\n"
+        )
+        sig = CrashSignature.fromFile(str(sig_file))
+    else:
+        sig = None
+
     target = mocker.Mock(spec_set=Target, binary=Path("bin"), launch_timeout=30)
     target.check_result.return_value = Result.FOUND
     target.save_logs = _fake_save_logs
     target.monitor.is_healthy.return_value = False
     tests = [mocker.MagicMock(spec_set=TestCase, entry_point="a.html")]
-    with ReplayManager([], server, target, any_crash=True, use_harness=False) as replay:
-        results = replay.run(tests, 10, repeat=1, min_results=1)
-        assert results
-        assert not any(x.expected for x in results)
-        assert target.close.call_count == 2
-        assert replay.status
-        assert replay.status.iteration == 1
-        assert replay.status.results.total == 0
-        assert replay.status.ignored == 1
+    with ReplayManager(set(), server, target, any_crash=False, signature=sig) as replay:
+        assert replay._relaunch == 1, "test is broken!"
+        results = replay.run(tests, 10, repeat=2, min_results=2)
+        assert sum(x.count for x in results if x.expected) == expected
+        assert sum(x.count for x in results if not x.expected) == unexpected
+        # make sure the serve flag is updated
+        assert any(x.report.unstable for x in results) or expected == 0
 
 
 def test_replay_14(mocker, server):

diff --git a/grizzly/session.py b/grizzly/session.py
@@ -247,21 +247,24 @@ def run(
                 if current_test.entry_point not in current_test:
                     LOG.error("Check adapter, test case is missing entry point")
                     raise SessionError("Test case is missing entry point")
-                if runner.initial and result.status != Result.NONE:
+                if result.timeout:
+                    LOG.warning("Browser hung? Timeout too short? System too busy?")
+                elif runner.initial:
                     # since this is the first iteration since the Target launched
                     # something is likely wrong with the Target or Adapter
                     LOG.warning(
                         "Failure detected before running a test case, "
                         "browser build is potentially unstable"
                     )
-                if result.timeout:
-                    LOG.warning("Browser hung? Timeout too short? System too busy?")
             else:
                 self.iomanager.commit()
             # process results
             if result.status == Result.FOUND:
                 LOG.debug("result detected")
-                report = self.target.create_report(is_hang=result.timeout)
+                report = self.target.create_report(
+                    is_hang=result.timeout,
+                    unstable=runner.startup_failure,
+                )
                 seen, initial = self.status.results.count(
                     report.crash_hash, report.short_signature
                 )

diff --git a/grizzly/target/puppet_target.py b/grizzly/target/puppet_target.py
@@ -153,10 +153,10 @@ def close(self, force_close: bool = False) -> None:
     def closed(self) -> bool:
         return self._puppet.reason is not None
 
-    def create_report(self, is_hang: bool = False) -> Report:
+    def create_report(self, is_hang: bool = False, unstable: bool = False) -> Report:
         logs = Path(mkdtemp(prefix="logs_", dir=grz_tmp("logs")))
         self.save_logs(logs)
-        return Report(logs, self.binary, is_hang=is_hang)
+        return Report(logs, self.binary, is_hang=is_hang, unstable=unstable)
 
     def filtered_environ(self) -> Dict[str, str]:
         # remove context specific entries from environment
@@ -362,7 +362,9 @@ def launch(self, location: str) -> None:
             self.close()
             if isinstance(exc, BrowserTimeoutError):
                 raise TargetLaunchTimeout(str(exc)) from None
-            raise TargetLaunchError(str(exc), self.create_report()) from None
+            raise TargetLaunchError(
+                str(exc), self.create_report(unstable=True)
+            ) from None
 
     def log_size(self) -> int:
         total = 0

diff --git a/grizzly/target/target.py b/grizzly/target/target.py
@@ -179,11 +179,12 @@ def closed(self) -> bool:
         """
 
     @abstractmethod
-    def create_report(self, is_hang: bool = False) -> Report:
+    def create_report(self, is_hang: bool = False, unstable: bool = False) -> Report:
         """Process logs and create a Report.
 
         Args:
             is_hang: Indicate whether the results is due to a hang/timeout.
+            unstable: Indicate whether build is unstable.
 
         Returns:
             Report object.