Skip to content

Commit

Permalink
metadata-service: validate that the dockerImageTag is not decremented (
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere authored Aug 2, 2024
1 parent 24cf967 commit cc003ed
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def validate(metadata_file_path: pathlib.Path, docs_path: pathlib.Path):
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME

click.echo(f"Validating {metadata_file_path}...")

metadata, error = validate_and_load(metadata_file_path, PRE_UPLOAD_VALIDATORS, ValidatorOptions(docs_path=str(docs_path)))
if metadata:
click.echo(f"{metadata_file_path} is a valid ConnectorMetadataDefinitionV0 YAML file.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os
import time
from typing import Optional
from typing import Dict, Optional

import requests

Expand All @@ -27,6 +27,79 @@ def get_docker_hub_auth_token() -> str:
return token


def get_docker_hub_headers() -> Dict | None:
if "DOCKER_HUB_USERNAME" not in os.environ or "DOCKER_HUB_PASSWORD" not in os.environ:
# If the Docker Hub credentials are not provided, we can only anonymously call the Docker Hub API.
# This will only work for public images and lead to a lower rate limit.
return {}
else:
token = get_docker_hub_auth_token()
return {"Authorization": f"JWT {token}"} if token else {}


def get_docker_hub_tags_and_digests(
image_name: str,
retries: int = 0,
wait_sec: int = 30,
next_page_url: str | None = None,
tags_and_digests: Dict[str, str] | None = None,
paginate: bool = True,
) -> Dict[str, str]:
"""Find all released tags and digests for an image.
Args:
image_name (str): The image name to get tags and digest
retries (int, optional): The number of times to retry the request. Defaults to 0.
wait_sec (int, optional): The number of seconds to wait between retries. Defaults to 30.
next_page_url (str | None, optional): The next DockerHub page to consume. Defaults to None.
tags_and_digest (Dict[str, str] | None, optional): The accumulated tags and digests for recursion. Defaults to None.
Returns:
Dict[str, str]: Mapping of image tag to digest
"""
headers = get_docker_hub_headers()
tags_and_digests = tags_and_digests or {}

if not next_page_url:
tags_url = f"https://registry.hub.docker.com/v2/repositories/{image_name}/tags"
else:
tags_url = next_page_url

# Allow for retries as the DockerHub API is not always reliable with returning the latest publish.
for _ in range(retries + 1):
response = requests.get(tags_url, headers=headers)
if response.ok:
break

# This is to handle the case when a connector has not ever been released yet.
if response.status_code == 404:
print(f"{tags_url} returned a 404. The connector might not be released yet.")
print(response)
return tags_and_digests
time.sleep(wait_sec)

response.raise_for_status()
json_response = response.json()
tags_and_digests.update({result["name"]: result.get("digest") for result in json_response.get("results", [])})
if paginate:
if next_page_url := json_response.get("next"):
tags_and_digests.update(
get_docker_hub_tags_and_digests(
image_name, retries=retries, wait_sec=wait_sec, next_page_url=next_page_url, tags_and_digests=tags_and_digests
)
)
return tags_and_digests


def get_latest_version_on_dockerhub(image_name: str) -> str | None:
tags_and_digests = get_docker_hub_tags_and_digests(image_name, retries=3, wait_sec=30)
if latest_digest := tags_and_digests.get("latest"):
for tag, digest in tags_and_digests.items():
if digest == latest_digest and tag != "latest":
return tag
return None


def is_image_on_docker_hub(image_name: str, version: str, digest: Optional[str] = None, retries: int = 0, wait_sec: int = 30) -> bool:
"""Check if a given image and version exists on Docker Hub.
Expand All @@ -40,13 +113,7 @@ def is_image_on_docker_hub(image_name: str, version: str, digest: Optional[str]
bool: True if the image and version exists on Docker Hub, False otherwise.
"""

if "DOCKER_HUB_USERNAME" not in os.environ or "DOCKER_HUB_PASSWORD" not in os.environ:
# If the Docker Hub credentials are not provided, we can only anonymously call the Docker Hub API.
# This will only work for public images and lead to a lower rate limit.
headers = {}
else:
token = get_docker_hub_auth_token()
headers = {"Authorization": f"JWT {token}"} if token else {}
headers = get_docker_hub_headers()

tag_url = f"https://registry.hub.docker.com/v2/repositories/{image_name}/tags/{version}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import semver
import yaml
from metadata_service.docker_hub import is_image_on_docker_hub
from metadata_service.docker_hub import get_latest_version_on_dockerhub, is_image_on_docker_hub
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
from pydantic import ValidationError
from pydash.objects import get
Expand Down Expand Up @@ -175,13 +175,36 @@ def validate_pypi_only_for_python(
return True, None


def validate_docker_image_tag_is_not_decremented(
metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions
) -> ValidationResult:
docker_image_name = get(metadata_definition, "data.dockerRepository")
if not docker_image_name:
return False, "The dockerRepository field is not set"
docker_image_tag = get(metadata_definition, "data.dockerImageTag")
if not docker_image_tag:
return False, "The dockerImageTag field is not set."
latest_released_version = get_latest_version_on_dockerhub(docker_image_name)
# This is happening when the connector has never been released to DockerHub
if not latest_released_version:
return True, None
if docker_image_tag == latest_released_version:
return True, None
current_semver_version = semver.Version.parse(docker_image_tag)
latest_released_semver_version = semver.Version.parse(latest_released_version)
if current_semver_version < latest_released_semver_version:
return False, f"The dockerImageTag value can't be decremented: it should be equal to or above {latest_released_version}."
return True, None


PRE_UPLOAD_VALIDATORS = [
validate_all_tags_are_keyvalue_pairs,
validate_at_least_one_language_tag,
validate_major_version_bump_has_breaking_change_entry,
validate_docs_path_exists,
validate_metadata_base_images_in_dockerhub,
validate_pypi_only_for_python,
validate_docker_image_tag_is_not_decremented,
]

POST_UPLOAD_VALIDATORS = PRE_UPLOAD_VALIDATORS + [
Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/metadata_service/lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metadata-service"
version = "0.9.0"
version = "0.10.0"
description = ""
authors = ["Ben Church <[email protected]>"]
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@
from click.testing import CliRunner
from metadata_service import commands
from metadata_service.gcs_upload import MetadataUploadInfo, UploadedFile
from metadata_service.validators.metadata_validator import ValidatorOptions
from metadata_service.validators.metadata_validator import ValidatorOptions, validate_docker_image_tag_is_not_decremented
from pydantic import BaseModel, ValidationError, error_wrappers
from test_gcs_upload import stub_is_image_on_docker_hub

NOT_TEST_VALIDATORS = [
# Not testing validate_docker_image_tag_is_not_decremented as its tested independently in test_validators
validate_docker_image_tag_is_not_decremented
]

PATCHED_VALIDATORS = [v for v in commands.PRE_UPLOAD_VALIDATORS if v not in NOT_TEST_VALIDATORS]

# TEST VALIDATE COMMAND
def test_valid_metadata_yaml_files(mocker, valid_metadata_yaml_files, tmp_path):
runner = CliRunner()

# Mock dockerhub for base image checks
mocker.patch("metadata_service.validators.metadata_validator.is_image_on_docker_hub", side_effect=stub_is_image_on_docker_hub)

mocker.patch("metadata_service.commands.PRE_UPLOAD_VALIDATORS", PATCHED_VALIDATORS)
assert len(valid_metadata_yaml_files) > 0, "No files found"

for file_path in valid_metadata_yaml_files:
Expand All @@ -31,6 +37,7 @@ def test_invalid_metadata_yaml_files(mocker, invalid_metadata_yaml_files, tmp_pa
runner = CliRunner()

mocker.patch("metadata_service.validators.metadata_validator.is_image_on_docker_hub", side_effect=stub_is_image_on_docker_hub)
mocker.patch("metadata_service.commands.PRE_UPLOAD_VALIDATORS", PATCHED_VALIDATORS)

assert len(invalid_metadata_yaml_files) > 0, "No files found"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import warnings

import pytest
from metadata_service import docker_hub


@pytest.fixture
def image_name():
return "airbyte/source-faker"


def test_get_docker_hub_tags_and_digests(image_name):
warnings.warn(f"This test can be flaky as its results depends on the current state of {image_name} dockerhub image.", UserWarning)
tags_and_digests = docker_hub.get_docker_hub_tags_and_digests(image_name)
assert isinstance(tags_and_digests, dict)
assert "latest" in tags_and_digests, "The latest tag is not in the returned dict"
assert "0.1.0" in tags_and_digests, f"The first {image_name} version is not in the returned dict"
assert len(tags_and_digests) > 10, f"Pagination is likely not working as we expect more than 10 version of {image_name} to be released"


def test_get_latest_version_on_dockerhub(image_name):
warnings.warn(f"This test can be flaky as its results depends on the current state of {image_name} dockerhub image.", UserWarning)
assert (
docker_hub.get_latest_version_on_dockerhub(image_name) is not None
), f"No latest version found for {image_name}. We expect one to exist."
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.


import pytest
import requests
import semver
import yaml
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
from metadata_service.validators import metadata_validator


@pytest.fixture
def metadata_definition():
metadata_file_url = (
"https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-integrations/connectors/source-faker/metadata.yaml"
)
response = requests.get(metadata_file_url)
response.raise_for_status()

metadata_yaml_dict = yaml.safe_load(response.text)
return ConnectorMetadataDefinitionV0.parse_obj(metadata_yaml_dict)


@pytest.mark.parametrize(
"latest_version, current_version,should_pass_validation",
[("1.0.0", "0.1.0", False), ("1.0.0", "1.0.0", True), ("1.0.0", "1.1.0", True)],
)
def test_validate_docker_image_tag_is_not_decremented(mocker, metadata_definition, latest_version, current_version, should_pass_validation):
mocker.patch.object(metadata_validator, "get_latest_version_on_dockerhub", return_value=latest_version)
metadata_definition.data.dockerImageTag = current_version
passed_validation, _ = metadata_validator.validate_docker_image_tag_is_not_decremented(metadata_definition, None)
assert passed_validation == should_pass_validation


@pytest.fixture
def current_version(metadata_definition):
return metadata_definition.data.dockerImageTag


@pytest.fixture
def decremented_version(current_version):
version_info = semver.VersionInfo.parse(current_version)
if version_info.major > 0:
patched_version_info = version_info.replace(major=version_info.major - 1)
elif version_info.minor > 0:
patched_version_info = version_info.replace(major=version_info.minor - 1)
elif version_info.patch > 0:
patched_version_info = version_info.replace(patch=version_info.patch - 1)
else:
raise ValueError(f"Version {version_info} can't be decremented to prepare our test")
return str(patched_version_info)


@pytest.fixture
def incremented_version(current_version):
version_info = semver.VersionInfo.parse(current_version)
if version_info.major > 0:
patched_version_info = version_info.replace(major=version_info.major + 1)
elif version_info.minor > 0:
patched_version_info = version_info.replace(major=version_info.minor + 1)
elif version_info.patch > 0:
patched_version_info = version_info.replace(patch=version_info.patch + 1)
else:
raise ValueError(f"Version {version_info} can't be incremented to prepare our test")
return str(patched_version_info)


def test_validation_fail_on_docker_image_tag_decrement(metadata_definition, decremented_version):
current_version = metadata_definition.data.dockerImageTag

metadata_definition.data.dockerImageTag = decremented_version
success, error_message = metadata_validator.validate_docker_image_tag_is_not_decremented(metadata_definition, None)
assert not success
assert error_message == f"The dockerImageTag value can't be decremented: it should be equal to or above {current_version}."


def test_validation_pass_on_docker_image_tag_increment(metadata_definition, incremented_version):
metadata_definition.data.dockerImageTag = incremented_version
success, error_message = metadata_validator.validate_docker_image_tag_is_not_decremented(metadata_definition, None)
assert success
assert error_message is None


def test_validation_pass_on_same_docker_image_tag(metadata_definition):
success, error_message = metadata_validator.validate_docker_image_tag_is_not_decremented(metadata_definition, None)
assert success
assert error_message is None


def test_validation_pass_on_docker_image_no_latest(capsys, metadata_definition):
metadata_definition.data.dockerRepository = "airbyte/unreleased"
success, error_message = metadata_validator.validate_docker_image_tag_is_not_decremented(metadata_definition, None)
captured = capsys.readouterr()
assert (
"https://registry.hub.docker.com/v2/repositories/airbyte/unreleased/tags returned a 404. The connector might not be released yet."
in captured.out
)
assert success
assert error_message is None

0 comments on commit cc003ed

Please sign in to comment.