diff --git a/Cargo.lock b/Cargo.lock index 1b735294..ef21d9e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1371,7 +1371,7 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "letsql" -version = "0.1.3" +version = "0.1.4" dependencies = [ "arrow", "arrow-ord", diff --git a/Cargo.toml b/Cargo.toml index c9a4e903..bcf2f176 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "letsql" -version = "0.1.3" +version = "0.1.4" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/examples/penguins_example.py b/examples/penguins_example.py new file mode 100644 index 00000000..83c38663 --- /dev/null +++ b/examples/penguins_example.py @@ -0,0 +1,11 @@ +import letsql as ls +from letsql.common.caching import ParquetCacheStorage +from pathlib import Path + +t = ls.examples.penguins.fetch() + +con = t.op().source + +t.filter([t.species == "Adelie"]).cache( + storage=ParquetCacheStorage(source=con, path=Path.cwd()) +).execute() diff --git a/poetry.lock b/poetry.lock index 9f9f4801..758ea7f0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1497,6 +1497,7 @@ optional = false python-versions = ">=3.9" files = [ {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, @@ -1517,6 +1518,7 @@ files = [ {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, diff --git a/pyproject.toml b/pyproject.toml index 6af60bf8..cd17344d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,27 @@ build-backend = "maturin" [project] name = "letsql" dynamic = ["version"] +dependencies = [ + "ibis-framework==9.0.0 ; python_version >= '3.10' and python_version < '4.0'", + "dask==2023.12.1 ; python_version >= '3.10' and python_version < '4.0'", + "attrs==23.2.0 ; python_version >= '3.10' and python_version < '4.0'", + "connectorx==0.3.2 ; python_version >= '3.10' and python_version < '4.0'", + "psycopg2-binary==2.9.9 ; python_version >= '3.10' and python_version < '4.0'", + "sqlalchemy==2.0.29 ; python_version >= '3.10' and python_version < '4.0'", + "pyarrow==13.0.0 ; python_version >= '3.10' and python_version < '4.0'", + "palmerpenguins==0.1.4 ; python_version >= '3.10' and python_version < '4.0'", + "structlog==24.2.0 ; python_version >= '3.10' and python_version < '4.0'", + "pytest-mock==3.14.0 ; python_version >= '3.10' and python_version < '4.0'", +] requires-python = ">=3.7" +authors = [ + { name = "Hussain Sultan", email = "hussain@letsql.com" }, +] +maintainers = [ + { email = "Dan Lovell " }, + { email = "Daniel Mesejo " }, +] +description = "Data processing library built on top of Ibis and DataFusion to write multi-engine data workflows." readme = "README.md" license = { file = "LICENSE" } classifiers = [ @@ -25,6 +45,18 @@ Repository = "https://github.com/letsql/letsql.git" Issues = "https://github.com/letsql/letsql/issues" Changelog = "https://github.com/letsql/letsql/blob/main/CHANGELOG.md" +[project.optional-dependencies] +duckb = [ + "duckdb==0.10.3 ; python_version >= '3.10' and python_version < '4.0'" +] +datafusion = [ + "datafusion==34.0.0 ; python_version >= '3.10' and python_version < '4.0'" +] +snowflake = [ + "snowflake-connector-python==3.10.1 ; python_version >= '3.10' and python_version < '4.0'" +] + + [tool.maturin] module-name = "letsql._internal" python-source = "python" diff --git a/python/letsql/backends/let/__init__.py b/python/letsql/backends/let/__init__.py index 83afd323..54d5626a 100644 --- a/python/letsql/backends/let/__init__.py +++ b/python/letsql/backends/let/__init__.py @@ -9,7 +9,6 @@ from ibis import BaseBackend from ibis.expr import types as ir from ibis.expr.schema import SchemaLike -from ibis.backends.datafusion import Backend as IbisDataFusionBackend from sqlglot import exp, parse_one import letsql.backends.let.hotfix # noqa: F401 @@ -64,7 +63,10 @@ def register( table_or_expr = self._sources.get_table_or_op(table_or_expr) backend = self._sources.get_backend(table_or_expr) - if isinstance(backend, (DataFusionBackend, IbisDataFusionBackend)): + if ( + isinstance(backend, DataFusionBackend) + or getattr(backend, "name", "") == DataFusionBackend.name + ): source = _get_datafusion_dataframe(backend, source) registered_table = super().register(source, table_name=table_name, **kwargs) diff --git a/python/letsql/common/utils/dask_normalize_expr.py b/python/letsql/common/utils/dask_normalize_expr.py index be90fce2..a2cc97b0 100644 --- a/python/letsql/common/utils/dask_normalize_expr.py +++ b/python/letsql/common/utils/dask_normalize_expr.py @@ -5,6 +5,7 @@ import ibis.expr.operations.relations as ir import sqlglot as sg +import letsql from letsql.expr.relations import ( make_native_op, ) @@ -18,15 +19,12 @@ def expr_is_bound(expr): def unbound_expr_to_default_sql(expr): if expr_is_bound(expr): raise ValueError - default_sql = ibis.to_sql( - expr, - dialect=ibis.options.sql.default_dialect, - ) + default_sql = letsql.to_sql(expr) return str(default_sql) def normalize_memory_databasetable(dt): - if dt.source.name not in ("pandas", "datafusion", "duckdb"): + if dt.source.name not in ("pandas", "let", "datafusion", "duckdb"): raise ValueError return dask.base._normalize_seq_func( ( @@ -175,7 +173,7 @@ def normalize_backend(con): con_details = {k: con_dct[k] for k in ("host", "port", "dbname")} elif name == "pandas": con_details = id(con.dictionary) - elif name in ("datafusion", "duckdb"): + elif name in ("datafusion", "duckdb", "let"): con_details = id(con.con) else: raise ValueError diff --git a/python/letsql/config.py b/python/letsql/config.py index c3f8d68c..d11c7e7f 100644 --- a/python/letsql/config.py +++ b/python/letsql/config.py @@ -84,6 +84,19 @@ class Repr(Config): interactive: Interactive = Interactive() +class SQL(Config): + """SQL-related options. + + Attributes + ---------- + dialect : str + Dialect to use for printing SQL when the backend cannot be determined. + + """ + + dialect: str = "datafusion" + + class Options(Config): """LETSQL configuration options @@ -100,6 +113,7 @@ class Options(Config): cache: Cache = Cache() backend: Optional[Any] = None repr: Repr = Repr() + sql: SQL = SQL() @property def interactive(self) -> bool: diff --git a/python/letsql/expr/api.py b/python/letsql/expr/api.py index 2c8d4702..1219393d 100644 --- a/python/letsql/expr/api.py +++ b/python/letsql/expr/api.py @@ -13,8 +13,10 @@ import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import api +from ibis.backends.sql.dialects import DataFusion from ibis.common.deferred import Deferred, _, deferrable from ibis.expr.schema import Schema +from ibis.expr.sql import SQLString from ibis.expr.types import ( Column, DateValue, @@ -90,6 +92,7 @@ "table", "time", "today", + "to_sql", "timestamp", "union", "uuid", @@ -1515,3 +1518,27 @@ def interval( microseconds=microseconds, nanoseconds=nanoseconds, ) + + +def to_sql(expr: ir.Expr, pretty: bool = True) -> SQLString: + """Return the formatted SQL string for an expression. + + Parameters + ---------- + expr + Ibis expression. + pretty + Whether to use pretty formatting. + + Returns + ------- + str + Formatted SQL string + + """ + from letsql.config import _backend_init + + con = _backend_init() + sg_expr = con._to_sqlglot(expr.unbind()) + sql = sg_expr.sql(dialect=DataFusion, pretty=pretty) + return SQLString(sql)