From 328be4b5650e9001f46d7c5d899b61c1e652f880 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:36:32 +0200 Subject: [PATCH] fix(airbyte-cdk): fix declarative schema refs (#42844) Signed-off-by: Artem Inzhyyants --- .../declarative_component_schema.yaml | 2 +- .../models/declarative_component_schema.py | 6 +-- .../test_manifest_declarative_source.py | 41 +++++++++++++++++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index ab1e2fec0aa4..decb6c41c914 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -387,7 +387,7 @@ definitions: decoder: title: Decoder description: Component decoding the response so records can be extracted. - "$ref": "#/definitions/Decoder" + "$ref": "#/definitions/JsonDecoder" $parameters: type: object additionalProperties: true diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 0a8288b34aa4..ec7d65d4c255 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -928,10 +928,6 @@ class WaitUntilTimeFromHeader(BaseModel): parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') -class Decoder(BaseModel): - __root__: Any - - class AddedFieldDefinition(BaseModel): type: Literal['AddedFieldDefinition'] path: List[str] = Field( @@ -1040,7 +1036,7 @@ class CursorPagination(BaseModel): ], title='Stop Condition', ) - decoder: Optional[Decoder] = Field( + decoder: Optional[JsonDecoder] = Field( None, description='Component decoding the response so records can be extracted.', title='Decoder', diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py index ec138ccf4ef5..2d318e0a85a4 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -7,6 +7,7 @@ import os import sys from copy import deepcopy +from pathlib import Path from typing import Any, List, Mapping from unittest.mock import call, patch @@ -1261,3 +1262,43 @@ def _run_read(manifest: Mapping[str, Any], stream_name: str) -> List[AirbyteMess ] ) return list(source.read(logger, {}, catalog, {})) + + +def test_declarative_component_schema_valid_ref_links(): + def load_yaml(file_path) -> Mapping[str, Any]: + with open(file_path, 'r') as file: + return yaml.safe_load(file) + + def extract_refs(data, base_path='#') -> List[str]: + refs = [] + if isinstance(data, dict): + for key, value in data.items(): + if key == '$ref' and isinstance(value, str) and value.startswith('#'): + ref_path = value + refs.append(ref_path) + else: + refs.extend(extract_refs(value, base_path)) + elif isinstance(data, list): + for item in data: + refs.extend(extract_refs(item, base_path)) + return refs + + def resolve_pointer(data: Mapping[str, Any], pointer: str) -> bool: + parts = pointer.split('/')[1:] # Skip the first empty part due to leading '#/' + current = data + try: + for part in parts: + part = part.replace('~1', '/').replace('~0', '~') # Unescape JSON Pointer + current = current[part] + return True + except (KeyError, TypeError): + return False + + def validate_refs(yaml_file: str) -> List[str]: + data = load_yaml(yaml_file) + refs = extract_refs(data) + invalid_refs = [ref for ref in refs if not resolve_pointer(data, ref.replace('#', ''))] + return invalid_refs + + yaml_file_path = Path(__file__).resolve().parent.parent.parent.parent / 'airbyte_cdk/sources/declarative/declarative_component_schema.yaml' + assert not validate_refs(yaml_file_path)