diff --git a/airbyte/__init__.py b/airbyte/__init__.py index 8e143693..6d6f1f7b 100644 --- a/airbyte/__init__.py +++ b/airbyte/__init__.py @@ -1,8 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -"""PyAirbyte brings Airbyte ELT to every Python developer. - -PyAirbyte brings the power of Airbyte to every Python developer. PyAirbyte provides a set of -utilities to use Airbyte connectors in Python. +"""***PyAirbyte brings the power of Airbyte to every Python developer.*** [![PyPI version](https://badge.fury.io/py/airbyte.svg)](https://badge.fury.io/py/airbyte) [![PyPI - Downloads](https://img.shields.io/pypi/dm/airbyte)](https://pypi.org/project/airbyte/) @@ -121,7 +118,7 @@ ---------------------- -""" +""" # noqa: D415 from __future__ import annotations diff --git a/airbyte/_connector_base.py b/airbyte/_connector_base.py index ad687eef..f14a3924 100644 --- a/airbyte/_connector_base.py +++ b/airbyte/_connector_base.py @@ -230,7 +230,6 @@ def _yaml_spec(self) -> str: @property def docs_url(self) -> str: """Get the URL to the connector's documentation.""" - # TODO: Replace with docs URL from metadata when available return ( f"https://docs.airbyte.com/integrations/{self.connector_type}s/" + self.name.lower().replace(f"{self.connector_type}-", "") diff --git a/airbyte/_executors/declarative.py b/airbyte/_executors/declarative.py index 537924f1..e7fff317 100644 --- a/airbyte/_executors/declarative.py +++ b/airbyte/_executors/declarative.py @@ -40,7 +40,7 @@ class DeclarativeExecutor(Executor): def __init__( self, - manifest: str | dict | Path, + manifest: dict | Path, ) -> None: """Initialize a declarative executor. @@ -53,10 +53,6 @@ def __init__( if isinstance(manifest, Path): self._manifest_dict = cast(dict, json.loads(manifest.read_text())) - elif isinstance(manifest, str): - # TODO: Implement HTTP path parsing - raise NotImplementedError("HTTP path parsing is not yet implemented.") - elif isinstance(manifest, dict): self._manifest_dict = manifest @@ -64,7 +60,10 @@ def __init__( raise PyAirbyteInternalError(message="Manifest must be a dict.") self.declarative_source = ManifestDeclarativeSource(source_config=self._manifest_dict) - self.reported_version: str | None = None # TODO: Consider adding version detection + + # TODO: Consider adding version detection + # https://github.com/airbytehq/airbyte/issues/318 + self.reported_version: str | None = None @property def _cli(self) -> list[str]: diff --git a/airbyte/_executors/local.py b/airbyte/_executors/local.py index bb5311ea..786b690e 100644 --- a/airbyte/_executors/local.py +++ b/airbyte/_executors/local.py @@ -41,9 +41,6 @@ def ensure_installation( try: self.execute(["spec"]) except Exception as e: - # TODO: Improve error handling. We should try to distinguish between - # a connector that is not installed and a connector that is not - # working properly. raise exc.AirbyteConnectorExecutableNotFoundError( connector_name=self.name, ) from e diff --git a/airbyte/_executors/python.py b/airbyte/_executors/python.py index 09d9339b..8e198f53 100644 --- a/airbyte/_executors/python.py +++ b/airbyte/_executors/python.py @@ -98,7 +98,6 @@ def uninstall(self) -> None: @property def docs_url(self) -> str: """Get the URL to the connector's documentation.""" - # TODO: Refactor installation so that this can just live in the Source class. return "https://docs.airbyte.com/integrations/sources/" + self.name.lower().replace( "source-", "" ) diff --git a/airbyte/_future_cdk/sql_processor.py b/airbyte/_future_cdk/sql_processor.py index d945317e..925ab5e3 100644 --- a/airbyte/_future_cdk/sql_processor.py +++ b/airbyte/_future_cdk/sql_processor.py @@ -206,9 +206,6 @@ def get_sql_table_name( ) -> str: """Return the name of the SQL table for the given stream.""" table_prefix = self.sql_config.table_prefix - - # TODO: Add default prefix based on the source name. - return self.normalizer.normalize( f"{table_prefix}{stream_name}", ) @@ -433,6 +430,7 @@ def _ensure_compatible_table_schema( input stream. """ # TODO: Expand this to check for column types and sizes. + # https://github.com/airbytehq/pyairbyte/issues/321 self._add_missing_columns_to_table( stream_name=stream_name, table_name=table_name, diff --git a/airbyte/_processors/sql/postgres.py b/airbyte/_processors/sql/postgres.py index 77afe502..49da07f5 100644 --- a/airbyte/_processors/sql/postgres.py +++ b/airbyte/_processors/sql/postgres.py @@ -48,6 +48,6 @@ class PostgresSqlProcessor(SqlProcessorBase): or another import method. (Relatively low priority, since for now it works fine as-is.) """ - supports_merge_insert = False # TODO: Add native implementation for merge insert + supports_merge_insert = False file_writer_class = JsonlWriter sql_config: PostgresConfig diff --git a/airbyte/_util/api_util.py b/airbyte/_util/api_util.py index 6cc5411a..4bdc0b11 100644 --- a/airbyte/_util/api_util.py +++ b/airbyte/_util/api_util.py @@ -254,7 +254,7 @@ def create_source( models.SourceCreateRequest( name=name, workspace_id=workspace_id, - configuration=config, # TODO: wrap in a proper configuration object + configuration=config, definition_id=None, # Not used alternative to config.sourceType. secret_id=None, # For OAuth, not yet supported ), @@ -337,7 +337,7 @@ def create_destination( models.DestinationCreateRequest( name=name, workspace_id=workspace_id, - configuration=config, # TODO: wrap in a proper configuration object + configuration=config, ), ) if status_ok(response.status_code) and response.destination_response: @@ -368,6 +368,7 @@ def get_destination( if status_ok(response.status_code): # TODO: This is a temporary workaround to resolve an issue where # the destination API response is of the wrong type. + # https://github.com/airbytehq/pyairbyte/issues/320 raw_response: dict[str, Any] = json.loads(response.raw_response.text) raw_configuration: dict[str, Any] = raw_response["configuration"] destination_type = raw_response.get("destinationType") @@ -538,10 +539,6 @@ def delete_connection( # api_key: str, # workspace_id: str | None = None, # ) -> api.SourceCheckResponse: -# """Check a source. - -# # TODO: Need to use legacy Configuration API for this: -# # https://airbyte-public-api-docs.s3.us-east-2.amazonaws.com/rapidoc-api-docs.html#post-/v1/sources/check_connection -# """ +# """Check a source.""" # _ = source_id, workspace_id, api_root, api_key # raise NotImplementedError diff --git a/airbyte/_util/document_rendering.py b/airbyte/_util/document_rendering.py index 9d69717f..072d3a1c 100644 --- a/airbyte/_util/document_rendering.py +++ b/airbyte/_util/document_rendering.py @@ -43,6 +43,7 @@ class DocumentRenderer(BaseModel): render_metadata: bool = False # TODO: Add primary key and cursor key support: + # https://github.com/airbytehq/pyairbyte/issues/319 # primary_key_properties: list[str] # cursor_property: str | None @@ -72,13 +73,6 @@ def render_document(self, record: dict[str, Any]) -> Document: content += yaml.dump({key: record[key] for key in self.metadata_properties}) content += "```\n" - # TODO: Add support for primary key and doc ID generation: - # doc_id: str = ( - # "-".join(str(record[key]) for key in self.primary_key_properties) - # if self.primary_key_properties - # else str(hash(record)) - # ) - if not self.content_properties: pass elif len(self.content_properties) == 1: diff --git a/airbyte/caches/motherduck.py b/airbyte/caches/motherduck.py index 7332ec16..762d1b65 100644 --- a/airbyte/caches/motherduck.py +++ b/airbyte/caches/motherduck.py @@ -48,7 +48,8 @@ def get_sql_alchemy_url(self) -> SecretString: return SecretString( f"duckdb:///md:{self.database}?motherduck_token={self.api_key}" - # f"&schema={self.schema_name}" # TODO: Debug why this doesn't work + # Not sure why this doesn't work. We have to override later in the flow. + # f"&schema={self.schema_name}" ) @overrides diff --git a/airbyte/cloud/workspaces.py b/airbyte/cloud/workspaces.py index 889b5f02..dd53906c 100644 --- a/airbyte/cloud/workspaces.py +++ b/airbyte/cloud/workspaces.py @@ -68,6 +68,7 @@ def connect(self) -> None: # Deploy and delete sources # TODO: Make this a public API + # https://github.com/airbytehq/pyairbyte/issues/228 def _deploy_source( self, source: Source, @@ -123,6 +124,7 @@ def _permanently_delete_source( # Deploy and delete destinations # TODO: Make this a public API + # https://github.com/airbytehq/pyairbyte/issues/228 def _deploy_cache_as_destination( self, cache: CacheBase, @@ -183,6 +185,7 @@ def _permanently_delete_destination( # Deploy and delete connections # TODO: Make this a public API + # https://github.com/airbytehq/pyairbyte/issues/228 def _deploy_connection( self, source: Source | str, diff --git a/airbyte/destinations/base.py b/airbyte/destinations/base.py index a61522c1..ab52c3d3 100644 --- a/airbyte/destinations/base.py +++ b/airbyte/destinations/base.py @@ -256,8 +256,6 @@ def _write_airbyte_message_stream( progress_tracker: ProgressTracker, ) -> None: """Read from the connector and write to the cache.""" - _ = progress_tracker # TODO: Implement progress tracking - # Run optional validation step if not skip_validation: self.validate_config() @@ -293,10 +291,7 @@ def _write_airbyte_message_stream( ) ): if destination_message.type is Type.STATE: - tmp = state_writer.known_stream_names state_writer.write_state(state_message=destination_message.state) - # TODO: DELETEME - assert tmp.issubset(state_writer.known_stream_names) except exc.AirbyteConnectorFailedError as ex: raise exc.AirbyteConnectorWriteError( diff --git a/airbyte/exceptions.py b/airbyte/exceptions.py index 58009879..7208038d 100644 --- a/airbyte/exceptions.py +++ b/airbyte/exceptions.py @@ -51,7 +51,7 @@ NEW_ISSUE_URL = "https://github.com/airbytehq/airbyte/issues/new/choose" -DOCS_URL = "https://docs.airbyte.io/" +DOCS_URL = "https://airbytehq.github.io/PyAirbyte/airbyte.html" # Base error class @@ -158,9 +158,8 @@ class PyAirbyteInputError(PyAirbyteError, ValueError): ValueError in the PyAirbyte API. """ - # TODO: Consider adding a help_url that links to the auto-generated API reference. - guidance = "Please check the provided value and try again." + help_url = DOCS_URL input_value: str | None = None diff --git a/airbyte/progress.py b/airbyte/progress.py index 3cb1bd47..7660c328 100644 --- a/airbyte/progress.py +++ b/airbyte/progress.py @@ -414,7 +414,6 @@ def reset_progress_style( elif meta.is_ci(): # Some CI environments support Rich, but Dagger does not. - # TODO: Consider updating this to check for Dagger specifically. self.style = ProgressStyle.PLAIN else: @@ -590,11 +589,7 @@ def _update_display(self, *, force_refresh: bool = False) -> None: elif self.style == ProgressStyle.RICH and self._rich_view is not None: self._rich_view.update(RichMarkdown(status_message)) - elif self.style == ProgressStyle.PLAIN: - # TODO: Add a plain text progress print option that isn't too noisy. - pass - - elif self.style == ProgressStyle.NONE: + elif self.style in {ProgressStyle.PLAIN, ProgressStyle.NONE}: pass self._last_update_time = time.time() diff --git a/airbyte/sources/base.py b/airbyte/sources/base.py index eb17f477..8b34b46c 100644 --- a/airbyte/sources/base.py +++ b/airbyte/sources/base.py @@ -312,7 +312,6 @@ def _yaml_spec(self) -> str: @property def docs_url(self) -> str: """Get the URL to the connector's documentation.""" - # TODO: Replace with docs URL from metadata when available return "https://docs.airbyte.com/integrations/sources/" + self.name.lower().replace( "source-", "" ) @@ -377,8 +376,6 @@ def get_configured_catalog( stream=stream, destination_sync_mode=DestinationSyncMode.overwrite, primary_key=stream.source_defined_primary_key, - # TODO: The below assumes all sources can coalesce from incremental sync to - # full_table as needed. CDK supports this, so it might be safe: sync_mode=SyncMode.incremental, ) for stream in self.discovered_catalog.streams diff --git a/airbyte/validate.py b/airbyte/validate.py index 475c8151..ee58fd00 100644 --- a/airbyte/validate.py +++ b/airbyte/validate.py @@ -62,7 +62,6 @@ def full_tests(connector_name: str, sample_config: str) -> None: """Run full tests on the connector.""" print("Creating source and validating spec and version...") source = ab.get_source( - # TODO: FIXME: noqa: SIM115, PTH123 connector_name, config=json.loads(Path(sample_config).read_text(encoding="utf-8")), # , install_if_missing=False, @@ -123,7 +122,6 @@ def validate(connector_dir: str, sample_config: str, *, validate_install_only: b metadata_path = Path(connector_dir) / "metadata.yaml" metadata = yaml.safe_load(Path(metadata_path).read_text(encoding="utf-8"))["data"] - # TODO: Use remoteRegistries.pypi.packageName once set for connectors connector_name = metadata["dockerRepository"].replace("airbyte/", "") # create a venv and install the connector diff --git a/pyproject.toml b/pyproject.toml index 804deb77..29c9a556 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -197,7 +197,7 @@ ignore = [ "FIX002", # Allow "TODO:" until release (then switch to requiring links via TDO003) "PLW0603", # Using the global statement to update _cache is discouraged "PLW0108", # Lambda may be unnecessary; consider inlining inner function - "TD003", # Require links for TODOs # TODO: Re-enable when we disable FIX002 + # "TD003", # Require links for TODOs (now enabled) "UP007", # Allow legacy `Union[a, b]` and `Optional[a]` for Pydantic, until we drop Python 3.9 (Pydantic doesn't like it) ]