From bdba3c38fb4d5ae01902ac4db93cdaa2d6a3d65c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 16 Aug 2024 11:42:22 -0400 Subject: [PATCH] [c++] Corner-case bug in extend-enumeration logic (#2897) * Add Python unit-test coverage * run `make format` * [c++] Corner-case bug in extend-enumeration logic * lint --- apis/python/tests/test_dataframe.py | 9 +++------ libtiledbsoma/src/soma/soma_array.cc | 11 +++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py index 76e7f8c52b..514503f1fa 100644 --- a/apis/python/tests/test_dataframe.py +++ b/apis/python/tests/test_dataframe.py @@ -1642,10 +1642,7 @@ def test_timestamped_schema_evolve(tmp_path): atbl = pa.Table.from_pydict( { "soma_joinid": [3, 4], - "myenum": pd.Series(["b", "c"], dtype="category"), - # TODO https://github.com/single-cell-data/TileDB-SOMA/issues/2896 - # "myenum": pd.Series(['b', 'b'], dtype='category'), - # Perhaps leave this t=3 as-is, and add a write of ['c', 'c'] at t=4. + "myenum": pd.Series(["b", "b"], dtype="category"), } ) with soma.DataFrame.open(uri, "w", tiledb_timestamp=3) as sdf: @@ -1661,8 +1658,8 @@ def test_timestamped_schema_evolve(tmp_path): with soma.DataFrame.open(uri, tiledb_timestamp=3) as sdf: table = sdf.read().concat() - assert table["myenum"].to_pylist() == ["a", "b", "a", "b", "c"] + assert table["myenum"].to_pylist() == ["a", "b", "a", "b", "b"] with soma.DataFrame.open(uri) as sdf: table = sdf.read().concat() - assert table["myenum"].to_pylist() == ["a", "b", "a", "b", "c"] + assert table["myenum"].to_pylist() == ["a", "b", "a", "b", "b"] diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index af323c99cf..ccda688494 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -431,6 +431,17 @@ bool SOMAArray::_extend_and_evolve_schema_str( index_schema->format = ArrowAdapter::to_arrow_format(disk_index_type) .data(); return true; + } else { + // Example: + // + // * Already on storage/schema there are values a,b,c with indices + // 0,1,2. + // * User appends values b,c which, within the Arrow data coming in + // from the user, have indices 0,1. + // * We need to remap those to 1,2. + + SOMAArray::_remap_indexes( + column_name, enmr, enums_in_write, index_schema, index_array); } return false; }