diff --git a/llmfoundry/data/text_data.py b/llmfoundry/data/text_data.py index f529feef00..42f1422ec6 100644 --- a/llmfoundry/data/text_data.py +++ b/llmfoundry/data/text_data.py @@ -247,6 +247,7 @@ def build_text_dataloader( dataset = StreamingTextDataset( tokenizer=tokenizer, streams=streams, + batch_size=device_batch_size, **cfg.dataset, )