diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py index 08a336129a..3e94f56713 100644 --- a/llmfoundry/data/finetuning/tasks.py +++ b/llmfoundry/data/finetuning/tasks.py @@ -834,6 +834,8 @@ def dataset_mapper(example: Dict): desc='Tokenizing dataset', ) + log.info('Finished tokenizing dataset.') + # Use multiprocessing to introduce a custom timeout for this operation. # A temporary workaround to avoid indefinite hangs observed occasionally due to # filter_dataset not properly exiting and the dist.barrier() below not timing out.