diff --git a/daft/table/table_io.py b/daft/table/table_io.py index 9e51b7669f..bdad3ebef8 100644 --- a/daft/table/table_io.py +++ b/daft/table/table_io.py @@ -6,6 +6,7 @@ import random import time from collections.abc import Callable, Generator +from functools import partial from typing import IO, TYPE_CHECKING, Any, Union from uuid import uuid4 @@ -682,14 +683,14 @@ def write_deltalake( data_files: list[AddAction] = [] - # added to get_file_stats_from_metadata in deltalake v0.17.4: non-optional "num_indexed_cols" argument + # added to get_file_stats_from_metadata in deltalake v0.17.4: non-optional "num_indexed_cols" and "columns_to_collect_stats" arguments # https://github.com/delta-io/delta-rs/blob/353e08be0202c45334dcdceee65a8679f35de710/python/deltalake/writer.py#L725 if parse(deltalake.__version__) < parse("0.17.4"): get_file_stats_from_metadata = deltalake.writer.get_file_stats_from_metadata else: - - def get_file_stats_from_metadata(metadata): - deltalake.writer.get_file_stats_from_metadata(metadata, -1) + get_file_stats_from_metadata = partial( + deltalake.writer.get_file_stats_from_metadata, num_indexed_cols=-1, columns_to_collect_stats=None + ) def file_visitor(written_file: Any) -> None: path, partition_values = get_partitions_from_path(written_file.path) diff --git a/requirements-dev.txt b/requirements-dev.txt index 623fdefd08..3a1520253f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -48,7 +48,7 @@ tenacity==8.2.3; python_version >= '3.8' # Delta Lake deltalake==0.5.8; platform_system == "Windows" -deltalake==0.15.3; platform_system != "Windows" and python_version >= '3.8' +deltalake==0.18.2; platform_system != "Windows" and python_version >= '3.8' # Databricks databricks-sdk==0.12.0