[detectors] Add new flash suppression method (#53)

Add new FlashFilter to scenedetect.scene_detector. Integrates with ContentDetector and turn on by default. Add placeholder for config option and update changelog.
Breakthrough · Apr 20, 2024 · e1472bd · e1472bd
1 parent 8913d92
commit e1472bd
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 33 deletions.
diff --git a/scenedetect.cfg b/scenedetect.cfg
@@ -39,13 +39,19 @@
 # Method to use for downscaling (nearest, linear, cubic, area, lanczos4).
 #downscale-method = linear
 
-# Minimum length of a given scene (shorter scenes will be merged).
+# Minimum length of a given scene. See filter-mode to control how this is enforced.
 #min-scene-len = 0.6s
 
-# Merge last scene if it is shorter than min-scene-len (yes/no)
+# Mode to use when filtering out scenes (merge or suppress):
+#   merge: Consecutive scenes shorter than min-scene-len are combined.
+#   suppress: No new scenes can be generated until min-scene-len passes.
+#filter-mode = merge
+
+# Merge last scene if it is shorter than min-scene-len (yes/no). This can occur
+# when a cut is detected just before the video ends.
 #merge-last-scene = no
 
-# Drop scenes shorter than min-scene-len instead of merging (yes/no)
+# Drop scenes shorter than min-scene-len instead of merging (yes/no).
 #drop-short-scenes = no
 
 # Verbosity of console output (debug, info, warning, error, or none).

diff --git a/scenedetect/__init__.py b/scenedetect/__init__.py
@@ -47,7 +47,7 @@
 
 # Used for module identification and when printing version & about info
 # (e.g. calling `scenedetect version` or `scenedetect about`).
-__version__ = '0.6.4-dev0'
+__version__ = '0.7-dev0'
 
 init_logger()
 logger = getLogger('pyscenedetect')

diff --git a/scenedetect/detectors/content_detector.py b/scenedetect/detectors/content_detector.py
@@ -22,7 +22,7 @@
 import numpy
 import cv2
 
-from scenedetect.scene_detector import SceneDetector
+from scenedetect.scene_detector import SceneDetector, FlashFilter
 
 
 def _mean_pixel_distance(left: numpy.ndarray, right: numpy.ndarray) -> float:
@@ -105,6 +105,7 @@ def __init__(
         weights: 'ContentDetector.Components' = DEFAULT_COMPONENT_WEIGHTS,
         luma_only: bool = False,
         kernel_size: Optional[int] = None,
+        filter_mode: FlashFilter.Mode = FlashFilter.Mode.MERGE,
     ):
         """
         Arguments:
@@ -118,11 +119,12 @@ def __init__(
                 Overrides `weights` if both are set.
             kernel_size: Size of kernel for expanding detected edges. Must be odd integer
                 greater than or equal to 3. If None, automatically set using video resolution.
+            filter_mode: Mode to use when filtering cuts to meet `min_scene_len`.
         """
         super().__init__()
         self._threshold: float = threshold
         self._min_scene_len: int = min_scene_len
-        self._last_scene_cut: Optional[int] = None
+        self._last_above_threshold: Optional[int] = None
         self._last_frame: Optional[ContentDetector._FrameData] = None
         self._weights: ContentDetector.Components = weights
         if luma_only:
@@ -134,6 +136,7 @@ def __init__(
                 raise ValueError('kernel_size must be odd integer >= 3')
             self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8)
         self._frame_score: Optional[float] = None
+        self._flash_filter = FlashFilter(mode=filter_mode, length=min_scene_len)
 
     def get_metrics(self):
         return ContentDetector.METRIC_KEYS
@@ -195,22 +198,12 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
             List[int]: List of frames where scene cuts have been detected. There may be 0
             or more frames in the list, and not necessarily the same as frame_num.
         """
-        # Initialize last scene cut point at the beginning of the frames of interest.
-        if self._last_scene_cut is None:
-            self._last_scene_cut = frame_num
-
         self._frame_score = self._calculate_frame_score(frame_num, frame_img)
         if self._frame_score is None:
             return []
 
-        # We consider any frame over the threshold a new scene, but only if
-        # the minimum scene length has been reached (otherwise it is ignored).
-        min_length_met: bool = (frame_num - self._last_scene_cut) >= self._min_scene_len
-        if self._frame_score >= self._threshold and min_length_met:
-            self._last_scene_cut = frame_num
-            return [frame_num]
-
-        return []
+        above_threshold: bool = self._frame_score >= self._threshold
+        return self._flash_filter.filter(frame_num=frame_num, above_threshold=above_threshold)
 
     def _detect_edges(self, lum: numpy.ndarray) -> numpy.ndarray:
         """Detect edges using the luma channel of a frame.

diff --git a/scenedetect/scene_detector.py b/scenedetect/scene_detector.py
@@ -25,7 +25,8 @@
     event (in, out, cut, etc...).
 """
 
-from typing import List, Optional, Tuple
+from enum import Enum
+import typing as ty
 
 import numpy
 
@@ -46,7 +47,7 @@ class SceneDetector:
     """
     # TODO(v0.7): Make this a proper abstract base class.
 
-    stats_manager: Optional[StatsManager] = None
+    stats_manager: ty.Optional[StatsManager] = None
     """Optional :class:`StatsManager <scenedetect.stats_manager.StatsManager>` to
     use for caching frame metrics to and from."""
 
@@ -77,7 +78,7 @@ def stats_manager_required(self) -> bool:
         """
         return False
 
-    def get_metrics(self) -> List[str]:
+    def get_metrics(self) -> ty.List[str]:
         """Get Metrics:  Get a list of all metric names/keys used by the detector.
 
         Returns:
@@ -86,7 +87,7 @@ def get_metrics(self) -> List[str]:
         """
         return []
 
-    def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
+    def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> ty.List[int]:
         """Process the next frame. `frame_num` is assumed to be sequential.
 
         Args:
@@ -103,7 +104,7 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
         """
         return []
 
-    def post_process(self, frame_num: int) -> List[int]:
+    def post_process(self, frame_num: int) -> ty.List[int]:
         """Post Process: Performs any processing after the last frame has been read.
 
         Prototype method, no actual detection.
@@ -132,7 +133,8 @@ class SparseSceneDetector(SceneDetector):
     An example of a SparseSceneDetector is the MotionDetector.
     """
 
-    def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[Tuple[int, int]]:
+    def process_frame(self, frame_num: int,
+                      frame_img: numpy.ndarray) -> ty.List[ty.Tuple[int, int]]:
         """Process Frame: Computes/stores metrics and detects any scene changes.
 
         Prototype method, no actual detection.
@@ -143,7 +145,7 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[Tuple[
         """
         return []
 
-    def post_process(self, frame_num: int) -> List[Tuple[int, int]]:
+    def post_process(self, frame_num: int) -> ty.List[ty.Tuple[int, int]]:
         """Post Process: Performs any processing after the last frame has been read.
 
         Prototype method, no actual detection.
@@ -153,3 +155,66 @@ def post_process(self, frame_num: int) -> List[Tuple[int, int]]:
             to be added to the output scene list directly.
         """
         return []
+
+
+class FlashFilter:
+
+    class Mode(Enum):
+        MERGE = 0
+        """Merge consecutive cuts shorter than filter length."""
+        SUPPRESS = 1
+        """Suppress consecutive cuts until the filter length has passed."""
+
+    def __init__(self, mode: Mode, length: int):
+        self._mode = mode
+        self._filter_length = length  # Number of frames to use for activating the filter.
+        self._last_above = None       # Last frame above threshold.
+        self._merge_enabled = False   # Used to disable merging until at least one cut was found.
+        self._merge_triggered = False # True when the merge filter is active.
+        self._merge_start = None      # Frame number where we started the merge filte.
+
+    def filter(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
+        if not self._filter_length > 0:
+            return [frame_num] if above_threshold else []
+        if self._last_above is None:
+            self._last_above = frame_num
+        if self._mode == FlashFilter.Mode.MERGE:
+            return self._filter_merge(frame_num=frame_num, above_threshold=above_threshold)
+        if self._mode == FlashFilter.Mode.SUPPRESS:
+            return self._filter_suppress(frame_num=frame_num, above_threshold=above_threshold)
+
+    def _filter_suppress(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
+        min_length_met: bool = (frame_num - self._last_above) >= self._filter_length
+        if not (above_threshold and min_length_met):
+            return []
+        # Both length and threshold requirements were satisfied. Emit the cut, and wait until both
+        # requirements are met again.
+        self._last_above = frame_num
+        return [frame_num]
+
+    def _filter_merge(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
+        min_length_met: bool = (frame_num - self._last_above) >= self._filter_length
+        # Ensure last frame is always advanced to the most recent one that was above the threshold.
+        if above_threshold:
+            self._last_above = frame_num
+        if self._merge_triggered:
+            # This frame was under the threshold, see if enough frames passed to disable the filter.
+            num_merged_frames = self._last_above - self._merge_start
+            if min_length_met and not above_threshold and num_merged_frames >= self._filter_length:
+                self._merge_triggered = False
+                return [self._last_above]
+            # Keep merging until enough frames pass below the threshold.
+            return []
+        # Wait for next frame above the threshold.
+        if not above_threshold:
+            return []
+        # If we met the minimum length requirement, no merging is necessary.
+        if min_length_met:
+            # Only allow the merge filter once the first cut is emitted.
+            self._merge_enabled = True
+            return [frame_num]
+        # Start merging cuts until the length requirement is met.
+        if self._merge_enabled:
+            self._merge_triggered = True
+            self._merge_start = frame_num
+        return []
diff --git a/tests/test_detectors.py b/tests/test_detectors.py
@@ -165,7 +165,8 @@ def get_fade_in_out_test_cases():
 def test_detect_fast_cuts(test_case: TestCase):
     scene_list = test_case.detect()
     start_frames = [timecode.get_frames() for timecode, _ in scene_list]
-    assert test_case.scene_boundaries == start_frames
+
+    assert start_frames == test_case.scene_boundaries
     assert scene_list[0][0] == test_case.start_time
     assert scene_list[-1][1] == test_case.end_time
 
@@ -174,7 +175,7 @@ def test_detect_fast_cuts(test_case: TestCase):
 def test_detect_fades(test_case: TestCase):
     scene_list = test_case.detect()
     start_frames = [timecode.get_frames() for timecode, _ in scene_list]
-    assert test_case.scene_boundaries == start_frames
+    assert start_frames == test_case.scene_boundaries
     assert scene_list[0][0] == test_case.start_time
     assert scene_list[-1][1] == test_case.end_time
 
@@ -191,14 +192,12 @@ def test_detectors_with_stats(test_video_file):
         end_time = FrameTimecode('00:00:08', video.frame_rate)
         scene_manager.detect_scenes(video=video, end_time=end_time)
         initial_scene_len = len(scene_manager.get_scene_list())
-        assert initial_scene_len > 0 # test case must have at least one scene!
-                                     # Re-analyze using existing stats manager.
+        assert initial_scene_len > 0, "Test case must have at least one scene."
+        # Re-analyze using existing stats manager.
         scene_manager = SceneManager(stats_manager=stats)
         scene_manager.add_detector(detector())
-
         video.reset()
         scene_manager.auto_downscale = True
-
         scene_manager.detect_scenes(video=video, end_time=end_time)
         scene_list = scene_manager.get_scene_list()
         assert len(scene_list) == initial_scene_len
diff --git a/website/pages/changelog.md b/website/pages/changelog.md
@@ -2,13 +2,18 @@
 Releases
 ==========================================================
 
-## PySceneDetect 0.6
+## PySceneDetect 0.7
 
-### 0.6.4 (In Development)
+### 0.7 (In Development)
 
  - [feature] New detector: `detect-hist` / `HistogramDetector`, [thanks @wjs018](https://github.com/Breakthrough/PySceneDetect/pull/295) [#53](https://github.com/Breakthrough/PySceneDetect/issues/53)
+ - [feature] Add new flash suppression filter with `filter-mode` config option, reduces number of cuts generated during strobing/flashing effects [#35](https://github.com/Breakthrough/PySceneDetect/pull/295) [#53](https://github.com/Breakthrough/PySceneDetect/issues/35)
+    - `filter-mode = merge`, the new default mode, merges consecutive scenes shorter than `min-scene-len`
+    - `filter-mode = suppress`, the  previous behavior, disables generating new scenes until `min-scene-len` has passed
  - [bugfix] Remove extraneous console output when using `--drop-short-scenes`
 
+## PySceneDetect 0.6
+
 ### 0.6.3 (March 9, 2024)
 
 #### Release Notes