From 7c36e1c941ff72b5a1beb2e4169ba586b7311e4b Mon Sep 17 00:00:00 2001
From: Tyson Smith <tyson.w.smith@gmail.com>
Date: Fri, 8 Sep 2023 17:49:36 -0700
Subject: [PATCH] Always submit initial results

When a result is detected for the first time it is always submitted.
This can be blocked by the FuzzManager frequent flag. The submission
is forced if the result has only been found once (including parallel
instances). For subsequent submissions we check if the result is
frequent and report accordingly. This takes into account results found
by parallel instances.
---
 grizzly/common/reporter.py      | 16 ++++++++--------
 grizzly/common/status.py        | 31 ++++++++++++++++++-------------
 grizzly/common/test_reporter.py |  7 ++++---
 grizzly/common/test_status.py   | 14 +++++++-------
 grizzly/reduce/core.py          |  4 +---
 grizzly/reduce/test_reduce.py   |  3 ++-
 grizzly/replay/replay.py        |  3 +--
 grizzly/session.py              |  6 +++---
 8 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/grizzly/common/reporter.py b/grizzly/common/reporter.py
index 6cbd01d9..ef95337c 100644
--- a/grizzly/common/reporter.py
+++ b/grizzly/common/reporter.py
@@ -68,10 +68,10 @@ def _pre_submit(self, report):
         pass
 
     @abstractmethod
-    def _submit_report(self, report, test_cases):
+    def _submit_report(self, report, test_cases, force):
         pass
 
-    def submit(self, test_cases, report):
+    def submit(self, test_cases, report, force=False):
         """Submit report containing results.
 
         Args:
@@ -79,6 +79,7 @@ def submit(self, test_cases, report):
                                    the newest being the mostly likely to trigger
                                    the result (crash, assert... etc).
             report (Report): Report to submit.
+            force (bool): Ignore any limits.
 
         Returns:
             *: implementation specific result indicating where the report was created
@@ -99,7 +100,7 @@ def submit(self, test_cases, report):
             else:
                 LOG.info("=== BEGIN REPORT ===\nBrowser hang detected")
             LOG.info("=== END REPORT ===")
-        result = self._submit_report(report, test_cases)
+        result = self._submit_report(report, test_cases, force)
         if report is not None:
             report.cleanup()
         self._post_submit()
@@ -124,7 +125,7 @@ def _pre_submit(self, report):
     def _post_submit(self):
         pass
 
-    def _submit_report(self, report, test_cases):
+    def _submit_report(self, report, test_cases, force):
         # create major bucket directory in working directory if needed
         if self.major_bucket:
             dest = self.report_path / report.major[:16]
@@ -167,13 +168,12 @@ def _post_submit(self):
 class FuzzManagerReporter(Reporter):
     FM_CONFIG = Path.home() / ".fuzzmanagerconf"
 
-    __slots__ = ("_extra_metadata", "force_report", "quality", "tool")
+    __slots__ = ("_extra_metadata", "quality", "tool")
 
     def __init__(self, tool):
         super().__init__()
         assert isinstance(tool, str)
         self._extra_metadata = {}
-        self.force_report = False
         self.quality = Quality.UNREDUCED
         # remove whitespace and use only lowercase
         self.tool = "-".join(tool.lower().split())
@@ -245,10 +245,10 @@ def _ignored(report):
         # ignore Valgrind crashes
         return log_data.startswith("VEX temporary storage exhausted.")
 
-    def _submit_report(self, report, test_cases):
+    def _submit_report(self, report, test_cases, force):
         collector = Collector(tool=self.tool)
 
-        if not self.force_report:
+        if not force:
             if collector.sigCacheDir and Path(collector.sigCacheDir).is_dir():
                 # search for a cached signature match
                 with InterProcessLock(str(grz_tmp() / "fm_sigcache.lock")):
diff --git a/grizzly/common/status.py b/grizzly/common/status.py
index f04ed65b..078af495 100644
--- a/grizzly/common/status.py
+++ b/grizzly/common/status.py
@@ -616,9 +616,11 @@ class ResultCounter(SimpleResultCounter):
 
     def __init__(self, pid, db_file, life_time=RESULTS_EXPIRE, report_limit=0):
         super().__init__(pid)
+        assert db_file
         assert report_limit >= 0
         self._db_file = db_file
         self._frequent = set()
+        # use zero to disable report limit
         self._limit = report_limit
         self.last_found = 0
         self._init_db(db_file, pid, life_time)
@@ -667,10 +669,12 @@ def count(self, result_id, desc):
             desc (str): User friendly description.
 
         Returns:
-            int: Current count for given result_id.
+            tuple (int, bool): Local count and initial report (includes
+                               parallel instances) for given result_id.
         """
         super().count(result_id, desc)
         timestamp = time()
+        initial = False
         with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con:
             cur = con.cursor()
             with con:
@@ -683,6 +687,11 @@ def count(self, result_id, desc):
                     (timestamp, self._count[result_id], self.pid, result_id),
                 )
                 if cur.rowcount < 1:
+                    cur.execute(
+                        """SELECT pid FROM results WHERE result_id = ?;""",
+                        (result_id,),
+                    )
+                    initial = cur.fetchone() is None
                     cur.execute(
                         """INSERT INTO results(
                                 pid,
@@ -691,16 +700,10 @@ def count(self, result_id, desc):
                                 timestamp,
                                 count)
                             VALUES (?, ?, ?, ?, ?);""",
-                        (
-                            self.pid,
-                            result_id,
-                            desc,
-                            timestamp,
-                            self._count[result_id],
-                        ),
+                        (self.pid, result_id, desc, timestamp, self._count[result_id]),
                     )
         self.last_found = timestamp
-        return self._count[result_id]
+        return self._count[result_id], initial
 
     def is_frequent(self, result_id):
         """Scan all results including results from other running instances
@@ -723,16 +726,18 @@ def is_frequent(self, result_id):
         # only check the db for parallel results if
         # - result has been found locally more than once
         # - limit has not been exceeded locally
-        # - a db file is given
-        if self._limit >= total > 1 and self._db_file:
+        if self._limit >= total > 1:
             with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con:
                 cur = con.cursor()
                 # look up total count from all processes
                 cur.execute(
-                    """SELECT SUM(count) FROM results WHERE result_id = ?;""",
+                    """SELECT COALESCE(SUM(count), 0)
+                        FROM results WHERE result_id = ?;""",
                     (result_id,),
                 )
-                total = cur.fetchone()[0] or 0
+                global_total = cur.fetchone()[0]
+            assert global_total >= total
+            total = global_total
         if total > self._limit:
             self._frequent.add(result_id)
             return True
diff --git a/grizzly/common/test_reporter.py b/grizzly/common/test_reporter.py
index f2d60092..a6623d72 100644
--- a/grizzly/common/test_reporter.py
+++ b/grizzly/common/test_reporter.py
@@ -47,7 +47,7 @@ def _pre_submit(self, report):
         def _post_submit(self):
             pass
 
-        def _submit_report(self, report, test_cases):
+        def _submit_report(self, report, test_cases, force):
             pass
 
     (tmp_path / "log_stderr.txt").write_bytes(b"log msg")
@@ -208,8 +208,9 @@ def test_fuzzmanager_reporter_02(
     if tests:
         test_cases.append(fake_test)
     reporter = FuzzManagerReporter("fake-tool")
-    reporter.force_report = force
-    reporter.submit(test_cases, Report(log_path, Path("bin"), is_hang=True))
+    reporter.submit(
+        test_cases, Report(log_path, Path("bin"), is_hang=True), force=force
+    )
     assert not log_path.is_dir()
     assert fake_collector.call_args == ({"tool": "fake-tool"},)
     if (frequent and not force) or ignored:
diff --git a/grizzly/common/test_status.py b/grizzly/common/test_status.py
index 0bcdfb89..6ef65c62 100644
--- a/grizzly/common/test_status.py
+++ b/grizzly/common/test_status.py
@@ -576,7 +576,7 @@ def test_report_counter_01(tmp_path, keys, counts, limit):
         assert not counter.is_frequent(report_id)
         # call count() with report_id 'counted' times
         for current in range(1, counted + 1):
-            assert counter.count(report_id, "desc") == current
+            assert counter.count(report_id, "desc") == (current, (current == 1))
         # test get()
         if sum(counts) > 0:
             assert counter.get(report_id) == (report_id, counted, "desc")
@@ -610,31 +610,31 @@ def test_report_counter_02(mocker, tmp_path):
     assert not counter_b.is_frequent("a")
     assert not counter_c.is_frequent("a")
     # local (counter_a, bucket a) count is 1, global (all counters) count is 1
-    assert counter_a.count("a", "desc") == 1
+    assert counter_a.count("a", "desc") == (1, True)
     assert not counter_a.is_frequent("a")
     assert not counter_b.is_frequent("a")
     assert not counter_c.is_frequent("a")
     # local (counter_b, bucket a) count is 1, global (all counters) count is 2
-    assert counter_b.count("a", "desc") == 1
+    assert counter_b.count("a", "desc") == (1, False)
     assert not counter_a.is_frequent("a")
     assert not counter_b.is_frequent("a")
     assert not counter_c.is_frequent("a")
     # local (counter_b, bucket a) count is 2, global (all counters) count is 3
     # locally exceeded
-    assert counter_b.count("a", "desc") == 2
+    assert counter_b.count("a", "desc") == (2, False)
     assert counter_b.is_frequent("a")
     # local (counter_c, bucket a) count is 1, global (all counters) count is 4
-    assert counter_c.count("a", "desc") == 1
+    assert counter_c.count("a", "desc") == (1, False)
     assert not counter_a.is_frequent("a")
     assert counter_b.is_frequent("a")
     assert not counter_c.is_frequent("a")
     # local (counter_a, bucket a) count is 2, global (all counters) count is 5
     # no limit
-    assert counter_a.count("a", "desc") == 2
+    assert counter_a.count("a", "desc") == (2, False)
     assert not counter_a.is_frequent("a")
     # local (counter_c, bucket a) count is 2, global (all counters) count is 6
     # locally not exceeded, globally exceeded
-    assert counter_c.count("a", "desc") == 2
+    assert counter_c.count("a", "desc") == (2, False)
     assert counter_c.is_frequent("a")
     # local (counter_a, bucket x) count is 0, global (all counters) count is 0
     assert not counter_a.is_frequent("x")
diff --git a/grizzly/reduce/core.py b/grizzly/reduce/core.py
index cba746e1..c8164420 100644
--- a/grizzly/reduce/core.py
+++ b/grizzly/reduce/core.py
@@ -721,8 +721,6 @@ def report(self, results, testcases, update_status=False):
         for result in results:
             if self._report_to_fuzzmanager:
                 reporter = FuzzManagerReporter(self._report_tool)
-                if result.expected:
-                    reporter.force_report = True
             else:
                 report_dir = "reports" if result.expected else "other_reports"
                 reporter = FilesystemReporter(
@@ -742,7 +740,7 @@ def report(self, results, testcases, update_status=False):
                 if result.served is not None:
                     for clone, served in zip(clones, result.served):
                         clone.purge_optional(served)
-                result = reporter.submit(clones, result.report)
+                result = reporter.submit(clones, result.report, force=result.expected)
                 if result is not None:
                     if isinstance(result, Path):
                         result = str(result)
diff --git a/grizzly/reduce/test_reduce.py b/grizzly/reduce/test_reduce.py
index 704a909e..ae1bccd7 100644
--- a/grizzly/reduce/test_reduce.py
+++ b/grizzly/reduce/test_reduce.py
@@ -843,7 +843,8 @@ def replay_run(testcases, _time_limit, **kw):
 
     reporter = mocker.patch("grizzly.reduce.core.FilesystemReporter", autospec=True)
 
-    def submit(test_cases, report):
+    # pylint: disable=unused-argument
+    def submit(test_cases, report, force=False):
         assert test_cases
         assert isinstance(report, Report)
         for test in test_cases:
diff --git a/grizzly/replay/replay.py b/grizzly/replay/replay.py
index 666dcf7a..4d699978 100644
--- a/grizzly/replay/replay.py
+++ b/grizzly/replay/replay.py
@@ -246,8 +246,7 @@ def report_to_fuzzmanager(results, tests, tool=None):
         for result in results:
             # always report expected results
             # avoid reporting unexpected frequent results
-            reporter.force_report = result.expected
-            reporter.submit(tests, result.report)
+            reporter.submit(tests, result.report, force=result.expected)
 
     def run(
         self,
diff --git a/grizzly/session.py b/grizzly/session.py
index 694c9b72..40080c19 100644
--- a/grizzly/session.py
+++ b/grizzly/session.py
@@ -258,7 +258,7 @@ def run(
                     else:
                         # FM crash signature creation failed
                         short_sig = "Signature creation failed"
-                seen = self.status.results.count(bucket_hash, short_sig)
+                seen, initial = self.status.results.count(bucket_hash, short_sig)
                 LOG.info(
                     "Result: %s (%s:%s) - %d",
                     short_sig,
@@ -266,8 +266,8 @@ def run(
                     report.minor[:8],
                     seen,
                 )
-                if not self.status.results.is_frequent(bucket_hash):
-                    self.reporter.submit(self.iomanager.tests, report)
+                if initial or not self.status.results.is_frequent(bucket_hash):
+                    self.reporter.submit(self.iomanager.tests, report, force=initial)
                 else:
                     # we should always submit the first instance of a result
                     assert seen > 1