Skip to content

Commit

Permalink
just because omega starts with OMMMM does not mean it's zen
Browse files Browse the repository at this point in the history
  • Loading branch information
milocress committed Apr 23, 2024
1 parent 08814e1 commit 80acfb3
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
3 changes: 1 addition & 2 deletions llmfoundry/data/text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import transformers
from composer.core.data_spec import DataSpec
from composer.core.types import Batch
from omegaconf import OmegaConf as om
from streaming import Stream, StreamingDataset
from torch.utils.data import DataLoader
from transformers import PreTrainedTokenizerBase
Expand Down Expand Up @@ -274,6 +273,7 @@ def build_text_dataloader(
persistent_workers: bool = True,
timeout: int = 0,
) -> DataSpec:

dataset_cfg = dataset

# get kwargs
Expand Down Expand Up @@ -450,7 +450,6 @@ def get_num_samples_in_batch(batch: Batch) -> int:
'drop_last': False,
'num_workers': 4,
}
cfg = om.create(cfg)
device_batch_size = 2

tokenizer_name = args.tokenizer
Expand Down
6 changes: 2 additions & 4 deletions tests/data/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,7 @@ def test_token_counting_func_dataloader_setting(
device_batch_size=batch_size,
**cfg)
elif dataloader_type == 'text':
cfg = DictConfig({
cfg = {
'name': 'text',
'dataset': {
'local': 'dummy-path',
Expand All @@ -1130,7 +1130,7 @@ def test_token_counting_func_dataloader_setting(
'shuffle_seed': 0,
},
**common_args
})
}
ds_mock = MagicMock()
ds_mock.tokenizer = gptt
monkeypatch.setattr('llmfoundry.data.text_data.StreamingTextDataset',
Expand All @@ -1142,8 +1142,6 @@ def test_token_counting_func_dataloader_setting(
else:
raise NotImplementedError()

cfg = om.create(cfg)

batch_collated = dl.dataloader.collate_fn(batch_tokenized) # type: ignore
actual_token_count = dl.get_num_tokens_in_batch(batch_collated)

Expand Down

0 comments on commit 80acfb3

Please sign in to comment.