From 9558a40083b49cc198b749484c28c1045c550b19 Mon Sep 17 00:00:00 2001 From: Cody Blakeney Date: Wed, 6 Sep 2023 13:27:12 -0400 Subject: [PATCH] typecast shuffle_block_size because of issue (#581) --- llmfoundry/data/text_data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llmfoundry/data/text_data.py b/llmfoundry/data/text_data.py index 42f1422ec6..253f90cd9f 100644 --- a/llmfoundry/data/text_data.py +++ b/llmfoundry/data/text_data.py @@ -112,6 +112,10 @@ def __init__(self, f'local directory {local} does not contain split {split}' ) + # TODO: discover where yamls are being converted incorrect, but temporary workaround + if isinstance(shuffle_block_size, float): + shuffle_block_size = int(shuffle_block_size) + # Build Dataset super().__init__( streams=streams,