diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index ce7fa999..80e8c5d8 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -370,16 +370,25 @@ def transcribe( or language_detection_segments < 1 ): language_detection_segments = 1 - seek = 0 + if isinstance(clip_timestamps, str): + start_timestamp = float(clip_timestamps.split(",")[0]) + else: + start_timestamp = clip_timestamps[0] detected_language_info = {} + seek = int(start_timestamp * self.frames_per_second) content_frames = ( features.shape[-1] - self.feature_extractor.nb_max_frames ) - while ( - seek <= content_frames - and seek - < self.feature_extractor.nb_max_frames * language_detection_segments - ): + # If seek is beyond the full frames, set it to the last segment + if seek > features.shape[-1]: + seek = content_frames + end_frames = min( + seek + + self.feature_extractor.nb_max_frames + * language_detection_segments, + features.shape[-1], + ) + while seek < end_frames: segment = features[ :, seek : seek + self.feature_extractor.nb_max_frames ]