Skip to content

Commit

Permalink
Enrich data source / asset association (#584)
Browse files Browse the repository at this point in the history
* Add columns to Asset <-> DataSource Assoc. Tests fail.

* Tweak construction

* WIP

* Mostly works. Parameter names need adjustment.

* Add dedicated filepath-based constructors.

* Sort assets by (parameter, num).

* Test registering TIFF seq out of alphanumeric order.

* Test HDF5 virtual dataset.

* copyedit docstring

Co-authored-by: Padraic Shafer <[email protected]>

* Copy edit docstring

Co-authored-by: Padraic Shafer <[email protected]>

* Test for parameter, num uniqueness constraints.

* Create trigger. Caught one case but not the other.

* Trigger works for all cases, on SQLite.

* Tests pass on PostgreSQL also

* Adapters expect URIs

* Catalog and writing tests pass.

* Add docstring with examples

* Directory walker tests pass

* Revert confused change

* TIFF tests pass

* Zarr handles init (no structure) and access (structure).

* Update XDI to use URI.

* Change some custom mimetypes and refactor default creation.

* WIP: Write migration

* Generate TIFF sequences in example data.

* fixes

* Migration is tested on SQLite

* Fix usage

* Make PG trigger conditional same as SQLite

* Add missing constraint for PG.

* Include SQLite test data in CI.

* Run database migrations against example data in CI.

* Target correct database for upgrade

* Rename SQL function

* Explain overly clever test.

* Update docstring for API change.

* Give trigger better name.

* Comment on handling of HDF5 virtual data sets.

---------

Co-authored-by: Padraic Shafer <[email protected]>
  • Loading branch information
danielballan and padraic-shafer authored Jan 23, 2024
1 parent 391696b commit 97ad75b
Show file tree
Hide file tree
Showing 23 changed files with 985 additions and 233 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,25 @@ jobs:
shell: bash -l {0}
run: source continuous_integration/scripts/start_LDAP.sh

- name: Download SQLite example data.
shell: bash -l {0}
run: source continuous_integration/scripts/download_sqlite_data.sh

- name: Start PostgreSQL service in container.
shell: bash -l {0}
run: source continuous_integration/scripts/start_postgres.sh


- name: Ensure example data is migrated to current catalog database schema.
# The example data is expected to be kept up to date to the latest Tiled
# release, but this CI run may include some unreleased schema changes,
# so we run a migration here.
shell: bash -l {0}
run: |
set -vxeuo pipefail
tiled catalog upgrade-database sqlite+aiosqlite:///tiled_test_db_sqlite.db
tiled catalog upgrade-database postgresql+asyncpg://postgres:secret@localhost:5432/tiled-example-data
- name: Test with pytest
shell: bash -l {0}
run: |
Expand Down
130 changes: 122 additions & 8 deletions tiled/_tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import pandas.testing
import pytest
import pytest_asyncio
import sqlalchemy.dialects.postgresql.asyncpg
import sqlalchemy.exc
import tifffile
import xarray

Expand All @@ -17,12 +19,13 @@
from ..catalog import in_memory
from ..catalog.adapter import WouldDeleteData
from ..catalog.explain import record_explanations
from ..catalog.register import create_node_safe
from ..catalog.utils import ensure_uri
from ..client import Context, from_context
from ..client.xarray import write_xarray_dataset
from ..queries import Eq, Key
from ..server.app import build_app, build_app_from_config
from ..server.schemas import Asset, DataSource
from ..server.schemas import Asset, DataSource, Management
from ..structures.core import StructureFamily
from .utils import enter_password

Expand Down Expand Up @@ -197,9 +200,10 @@ async def test_metadata_index_is_used(example_data_adapter):
@pytest.mark.asyncio
async def test_write_array_external(a, tmpdir):
arr = numpy.ones((5, 3))
filepath = tmpdir / "file.tiff"
tifffile.imwrite(str(filepath), arr)
ad = TiffAdapter(str(filepath))
filepath = str(tmpdir / "file.tiff")
data_uri = ensure_uri(filepath)
tifffile.imwrite(filepath, arr)
ad = TiffAdapter(data_uri)
structure = asdict(ad.structure())
await a.create_node(
key="x",
Expand All @@ -211,7 +215,14 @@ async def test_write_array_external(a, tmpdir):
structure=structure,
parameters={},
management="external",
assets=[Asset(data_uri=str(ensure_uri(filepath)), is_directory=False)],
assets=[
Asset(
parameter="data_uri",
num=None,
data_uri=str(data_uri),
is_directory=False,
)
],
)
],
)
Expand All @@ -222,9 +233,10 @@ async def test_write_array_external(a, tmpdir):
@pytest.mark.asyncio
async def test_write_dataframe_external_direct(a, tmpdir):
df = pandas.DataFrame(numpy.ones((5, 3)), columns=list("abc"))
filepath = tmpdir / "file.csv"
filepath = str(tmpdir / "file.csv")
data_uri = ensure_uri(filepath)
df.to_csv(filepath, index=False)
dfa = read_csv(filepath)
dfa = read_csv(data_uri)
structure = asdict(dfa.structure())
await a.create_node(
key="x",
Expand All @@ -236,7 +248,14 @@ async def test_write_dataframe_external_direct(a, tmpdir):
structure=structure,
parameters={},
management="external",
assets=[Asset(data_uri=str(ensure_uri(filepath)), is_directory=False)],
assets=[
Asset(
parameter="data_uri",
num=None,
data_uri=data_uri,
is_directory=False,
)
],
)
],
)
Expand Down Expand Up @@ -411,3 +430,98 @@ async def test_access_control(tmpdir):
public_client["outer_z"]["inner"].read()
with pytest.raises(KeyError):
public_client["outer_x"]


@pytest.mark.parametrize(
"assets",
[
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=None,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=1,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=1,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=None,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=None,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=None,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=1,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=1,
),
],
],
ids=[
"null-then-int",
"int-then-null",
"duplicate-null",
"duplicate-int",
],
)
@pytest.mark.asyncio
async def test_constraints_on_parameter_and_num(a, assets):
"Test constraints enforced by database on 'parameter' and 'num'."
arr_adapter = ArrayAdapter.from_array([1, 2, 3])
with pytest.raises(
(
sqlalchemy.exc.IntegrityError, # SQLite
sqlalchemy.exc.DBAPIError, # PostgreSQL
)
):
await create_node_safe(
a,
key="test",
structure_family=arr_adapter.structure_family,
metadata=dict(arr_adapter.metadata()),
specs=arr_adapter.specs,
data_sources=[
DataSource(
mimetype="application/x-test",
structure=asdict(arr_adapter.structure()),
parameters={},
management=Management.external,
assets=assets,
)
],
)
Loading

0 comments on commit 97ad75b

Please sign in to comment.