diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py index 4754c2183383..761da4c7b092 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py +++ b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py @@ -5,7 +5,13 @@ from dagster_slack import SlackResource from orchestrator.resources.gcp import gcp_gcs_client, gcs_directory_blobs, gcs_file_blob, gcs_file_manager -from orchestrator.resources.github import github_client, github_connector_repo, github_connectors_directory, github_workflow_runs +from orchestrator.resources.github import ( + github_client, + github_connector_repo, + github_connectors_directory, + github_workflow_runs, + github_connectors_metadata_files, +) from orchestrator.assets import ( connector_test_report, @@ -25,6 +31,7 @@ add_new_metadata_partitions, ) from orchestrator.jobs.connector_test_report import generate_nightly_reports, generate_connector_test_summary_reports +from orchestrator.jobs.metadata import generate_stale_gcs_latest_metadata_file from orchestrator.sensors.registry import registry_updated_sensor from orchestrator.sensors.gcs import new_gcs_blobs_sensor from orchestrator.logging.sentry import setup_dagster_sentry @@ -64,6 +71,7 @@ "github_client": github_client.configured({"github_token": {"env": "GITHUB_METADATA_SERVICE_TOKEN"}}), "github_connector_repo": github_connector_repo.configured({"connector_repo_name": CONNECTOR_REPO_NAME}), "github_connectors_directory": github_connectors_directory.configured({"connectors_path": CONNECTORS_PATH}), + "github_connectors_metadata_files": github_connectors_metadata_files.configured({"connectors_path": CONNECTORS_PATH}), "github_connector_nightly_workflow_successes": github_workflow_runs.configured( { "workflow_id": NIGHTLY_GHA_WORKFLOW_ID, @@ -168,6 +176,11 @@ SCHEDULES = [ ScheduleDefinition(job=add_new_metadata_partitions, cron_schedule="*/5 * * * *", tags={"dagster/priority": HIGH_QUEUE_PRIORITY}), ScheduleDefinition(job=generate_connector_test_summary_reports, cron_schedule="@hourly"), + ScheduleDefinition( + cron_schedule="0 8 * * *", # Daily at 8am US/Pacific + execution_timezone="US/Pacific", + job=generate_stale_gcs_latest_metadata_file, + ), ] JOBS = [ @@ -177,6 +190,7 @@ generate_registry_entry, generate_nightly_reports, add_new_metadata_partitions, + generate_stale_gcs_latest_metadata_file, ] """ diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/github.py b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/github.py index 33b3a29d126d..54d4696b0d3d 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/github.py +++ b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/github.py @@ -1,5 +1,15 @@ -from dagster import Output, asset, OpExecutionContext import pandas as pd +import hashlib +import base64 +import dateutil +import datetime +import humanize +import os + +from dagster import Output, asset, OpExecutionContext +from github import Repository + +from orchestrator.ops.slack import send_slack_message from orchestrator.utils.dagster_helpers import OutputDataFrame, output_dataframe from orchestrator.logging import sentry @@ -7,6 +17,21 @@ GROUP_NAME = "github" +def _get_md5_of_github_file(context: OpExecutionContext, github_connector_repo: Repository, path: str) -> str: + """ + Return the md5 hash of a file in the github repo. + """ + context.log.debug(f"retrieving contents of {path}") + file_contents = github_connector_repo.get_contents(path) + + # calculate the md5 hash of the file contents + context.log.debug(f"calculating md5 hash of {path}") + md5_hash = hashlib.md5() + md5_hash.update(file_contents.decoded_content) + base_64_value = base64.b64encode(md5_hash.digest()).decode("utf8") + return base_64_value + + @asset(required_resource_keys={"github_connectors_directory"}, group_name=GROUP_NAME) @sentry.instrument_asset_op def github_connector_folders(context): @@ -19,6 +44,93 @@ def github_connector_folders(context): return Output(folder_names, metadata={"preview": folder_names}) +@asset(required_resource_keys={"github_connector_repo", "github_connectors_metadata_files"}, group_name=GROUP_NAME) +def github_metadata_file_md5s(context): + """ + Return a list of all the folders in the github connectors directory. + """ + github_connector_repo = context.resources.github_connector_repo + github_connectors_metadata_files = context.resources.github_connectors_metadata_files + + metadata_file_paths = { + metadata_file["path"]: { + "md5": _get_md5_of_github_file(context, github_connector_repo, metadata_file["path"]), + "last_modified": metadata_file["last_modified"], + } + for metadata_file in github_connectors_metadata_files + } + + return Output(metadata_file_paths, metadata={"preview": metadata_file_paths}) + +def _should_publish_have_ran(datetime_string: str) -> bool: + """ + Return true if the datetime is 2 hours old. + + """ + dt = dateutil.parser.parse(datetime_string) + now = datetime.datetime.now(datetime.timezone.utc) + two_hours_ago = now - datetime.timedelta(hours=2) + return dt < two_hours_ago + +def _to_time_ago(datetime_string: str) -> str: + """ + Return a string of how long ago the datetime is human readable format. 10 min + """ + dt = dateutil.parser.parse(datetime_string) + return humanize.naturaltime(dt) + + +def _is_stale(github_file_info: dict, latest_gcs_metadata_md5s: dict) -> bool: + """ + Return true if the github info is stale. + """ + not_in_gcs = latest_gcs_metadata_md5s.get(github_file_info["md5"]) is None + return not_in_gcs and _should_publish_have_ran(github_file_info["last_modified"]) + +@asset(required_resource_keys={"slack", "latest_metadata_file_blobs"}, group_name=GROUP_NAME) +def stale_gcs_latest_metadata_file(context, github_metadata_file_md5s: dict) -> OutputDataFrame: + """ + Return a list of all metadata files in the github repo and denote whether they are stale or not. + + Stale means that the file in the github repo is not in the latest metadata file blobs. + """ + human_readable_stale_bools = {True: "🚨 YES!!!", False: "No"} + latest_gcs_metadata_file_blobs = context.resources.latest_metadata_file_blobs + latest_gcs_metadata_md5s = {blob.md5_hash: blob.name for blob in latest_gcs_metadata_file_blobs} + + stale_report = [ + { + "stale": _is_stale(github_file_info, latest_gcs_metadata_md5s), + "github_path": github_path, + "github_md5": github_file_info["md5"], + "github_last_modified": _to_time_ago(github_file_info["last_modified"]), + "gcs_md5": latest_gcs_metadata_md5s.get(github_file_info["md5"]), + "gcs_path": latest_gcs_metadata_md5s.get(github_file_info["md5"]), + } + for github_path, github_file_info in github_metadata_file_md5s.items() + ] + + stale_metadata_files_df = pd.DataFrame(stale_report) + + # sort by stale true to false, then by github_path + stale_metadata_files_df = stale_metadata_files_df.sort_values( + by=["stale", "github_path"], + ascending=[False, True], + ) + + # If any stale files exist, report to slack + channel = os.getenv("STALE_REPORT_CHANNEL") + any_stale = stale_metadata_files_df["stale"].any() + if channel and any_stale: + only_stale_df = stale_metadata_files_df[stale_metadata_files_df["stale"] == True] + pretty_stale_df = only_stale_df.replace(human_readable_stale_bools) + stale_report_md = pretty_stale_df.to_markdown(index=False) + send_slack_message(context, channel, stale_report_md, enable_code_block_wrapping=True) + + stale_metadata_files_df.replace(human_readable_stale_bools, inplace=True) + return output_dataframe(stale_metadata_files_df) + + @asset(required_resource_keys={"github_connector_nightly_workflow_successes"}, group_name=GROUP_NAME) @sentry.instrument_asset_op def github_connector_nightly_workflow_successes(context: OpExecutionContext) -> OutputDataFrame: diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/jobs/metadata.py b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/jobs/metadata.py new file mode 100644 index 000000000000..071f1cb3c6a5 --- /dev/null +++ b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/jobs/metadata.py @@ -0,0 +1,4 @@ +from dagster import define_asset_job, AssetSelection + +stale_gcs_latest_metadata_file_inclusive = AssetSelection.keys("stale_gcs_latest_metadata_file").upstream() +generate_stale_gcs_latest_metadata_file = define_asset_job(name="generate_stale_metadata_report", selection=stale_gcs_latest_metadata_file_inclusive) diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/resources/github.py b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/resources/github.py index 9b0e125aeff0..2227ed3cc67f 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/resources/github.py +++ b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/resources/github.py @@ -1,8 +1,17 @@ from typing import List from dagster import StringSource, InitResourceContext, resource -from github import Github, Repository, ContentFile +from github import Github, Repository, ContentFile, GitTreeElement from datetime import datetime, timedelta +from dateutil.parser import parse +from orchestrator.config import CONNECTORS_PATH +from metadata_service.constants import METADATA_FILE_NAME + +def _valid_metadata_file_path(path: str) -> bool: + """ + Ensure that the path is a metadata file and not a scaffold file. + """ + return METADATA_FILE_NAME in path and CONNECTORS_PATH in path and "-scaffold-" not in path @resource( config_schema={"github_token": StringSource}, @@ -36,6 +45,25 @@ def github_connectors_directory(resource_context: InitResourceContext) -> List[C return github_connector_repo.get_contents(connectors_path) +@resource( + required_resource_keys={"github_connector_repo"}, + config_schema={"connectors_path": StringSource}, +) +def github_connectors_metadata_files(resource_context: InitResourceContext) -> List[dict]: + resource_context.log.info(f"retrieving github metadata files") + + github_connector_repo = resource_context.resources.github_connector_repo + repo_file_tree = github_connector_repo.get_git_tree("master", recursive=True).tree + metadata_file_paths = [{ + "path": github_file.path, + "sha": github_file.sha, + "last_modified": github_file.last_modified + } for github_file in repo_file_tree if _valid_metadata_file_path(github_file.path)] + + resource_context.log.info(f"finished retrieving github metadata files") + return metadata_file_paths + + @resource( required_resource_keys={"github_connector_repo"}, config_schema={ diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock b/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock index 538ec28d61aa..fafe163819a5 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock +++ b/airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "alembic" -version = "1.11.1" +version = "1.11.2" description = "A database migration tool for SQLAlchemy." optional = false python-versions = ">=3.7" files = [ - {file = "alembic-1.11.1-py3-none-any.whl", hash = "sha256:dc871798a601fab38332e38d6ddb38d5e734f60034baeb8e2db5b642fccd8ab8"}, - {file = "alembic-1.11.1.tar.gz", hash = "sha256:6a810a6b012c88b33458fceb869aef09ac75d6ace5291915ba7fae44de372c01"}, + {file = "alembic-1.11.2-py3-none-any.whl", hash = "sha256:7981ab0c4fad4fe1be0cf183aae17689fe394ff874fd2464adb774396faf0796"}, + {file = "alembic-1.11.2.tar.gz", hash = "sha256:678f662130dc540dac12de0ea73de9f89caea9dbea138f60ef6263149bf84657"}, ] [package.dependencies] @@ -466,31 +466,31 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "dagit" -version = "1.4.3" +version = "1.4.4" description = "Web UI for dagster." optional = false python-versions = "*" files = [ - {file = "dagit-1.4.3-py3-none-any.whl", hash = "sha256:110dc66d81478cf0ffcbab0ad1f2829bae685c32008e2e7ef156ee987816c08e"}, - {file = "dagit-1.4.3.tar.gz", hash = "sha256:552dc3abdaec71b90d0fba22495d50c25597ba2dba2c373b426698559993e1c5"}, + {file = "dagit-1.4.4-py3-none-any.whl", hash = "sha256:cf10a16546c6e81618af9cc6cbe8a1914c8e60df191c1fdd38c3ce8e874f64a5"}, + {file = "dagit-1.4.4.tar.gz", hash = "sha256:83778973f07b97ae415ecc67c86ee502395e7d882d474827a4e914766122dbf3"}, ] [package.dependencies] -dagster-webserver = "1.4.3" +dagster-webserver = "1.4.4" [package.extras] -notebook = ["dagster-webserver[notebook] (==1.4.3)"] -test = ["dagster-webserver[test] (==1.4.3)"] +notebook = ["dagster-webserver[notebook] (==1.4.4)"] +test = ["dagster-webserver[test] (==1.4.4)"] [[package]] name = "dagster" -version = "1.4.3" +version = "1.4.4" description = "The data orchestration platform built for productivity." optional = false python-versions = "*" files = [ - {file = "dagster-1.4.3-py3-none-any.whl", hash = "sha256:d16c46d27d91ed10e37c35f406bb5a6a349b7b6e2d92443d09d97a78cd079c52"}, - {file = "dagster-1.4.3.tar.gz", hash = "sha256:eb0c9870c3f2e072688c4423d4dfb2dac670870cd1c5e39806f1384f71336c9c"}, + {file = "dagster-1.4.4-py3-none-any.whl", hash = "sha256:8790005fef7d21e65bdf206908706b486181365b908242edf6d0d06a97901a75"}, + {file = "dagster-1.4.4.tar.gz", hash = "sha256:4e4d07609489b3499ab4d3f0b24796f860c57f35d5234d73bc6869f1dda39d47"}, ] [package.dependencies] @@ -520,7 +520,7 @@ tomli = "*" toposort = ">=1.0" tqdm = "*" typing-extensions = ">=4.4.0" -universal-pathlib = "*" +universal-pathlib = "<0.1.0" watchdog = ">=0.8.3" [package.extras] @@ -533,41 +533,41 @@ test = ["buildkite-test-collector", "docker", "grpcio-tools (>=1.44.0)", "mock ( [[package]] name = "dagster-cloud" -version = "1.4.3" +version = "1.4.4" description = "" optional = false python-versions = "*" files = [ - {file = "dagster_cloud-1.4.3-py3-none-any.whl", hash = "sha256:5f55ffb61ee232fc442f0c1c2050034790e0d9b04cd3601b9e4b48cfaddcf126"}, - {file = "dagster_cloud-1.4.3.tar.gz", hash = "sha256:2b87d3ea5f5f52ec4af4fef4c33440554bd1eaa381f634f51f28b42fedc5a2ad"}, + {file = "dagster_cloud-1.4.4-py3-none-any.whl", hash = "sha256:fe0c1a098530d33cdb440dc29d6ae55fdcc02eb1e7ce3a6ea4582342881a6842"}, + {file = "dagster_cloud-1.4.4.tar.gz", hash = "sha256:047cf1dacac012311252cfb505f1229e912e3e175a9cbe0549ae6b3facfd5417"}, ] [package.dependencies] -dagster = "1.4.3" -dagster-cloud-cli = "1.4.3" +dagster = "1.4.4" +dagster-cloud-cli = "1.4.4" pex = "*" questionary = "*" requests = "*" typer = {version = "*", extras = ["all"]} [package.extras] -docker = ["dagster-docker (==0.20.3)", "docker"] -ecs = ["boto3", "dagster-aws (==0.20.3)"] -kubernetes = ["dagster-k8s (==0.20.3)", "kubernetes"] +docker = ["dagster-docker (==0.20.4)", "docker"] +ecs = ["boto3", "dagster-aws (==0.20.4)"] +kubernetes = ["dagster-k8s (==0.20.4)", "kubernetes"] pex = ["boto3"] sandbox = ["supervisor"] serverless = ["boto3"] -tests = ["black", "dagster-cloud-test-infra", "dagster-k8s (==0.20.3)", "docker", "httpretty", "isort", "kubernetes", "moto[all]", "mypy", "paramiko", "pylint", "pytest", "types-PyYAML", "types-requests"] +tests = ["black", "dagster-cloud-test-infra", "dagster-k8s (==0.20.4)", "docker", "httpretty", "isort", "kubernetes", "moto[all]", "mypy", "paramiko", "pylint", "pytest", "types-PyYAML", "types-requests"] [[package]] name = "dagster-cloud-cli" -version = "1.4.3" +version = "1.4.4" description = "" optional = false python-versions = "*" files = [ - {file = "dagster_cloud_cli-1.4.3-py3-none-any.whl", hash = "sha256:1fb92d4a1fe4d2582ccf51230e2039338139dd22b58022ba454cbd12e4ebf6fd"}, - {file = "dagster_cloud_cli-1.4.3.tar.gz", hash = "sha256:0536d99cdf9b56ffc3bec26a7dac814e700d12e9e96e180ec8d62d92c52296b7"}, + {file = "dagster_cloud_cli-1.4.4-py3-none-any.whl", hash = "sha256:f38f230bb21a4535765762f92b5d06438a507da7bab57fe7db91c27cc70fe60f"}, + {file = "dagster_cloud_cli-1.4.4.tar.gz", hash = "sha256:6ae9f5bd1b9235108c6131551752953a88613e71c20d9b4086597c8a9966f2a4"}, ] [package.dependencies] @@ -583,18 +583,18 @@ tests = ["freezegun"] [[package]] name = "dagster-gcp" -version = "0.20.3" +version = "0.20.4" description = "Package for GCP-specific Dagster framework op and resource components." optional = false python-versions = "*" files = [ - {file = "dagster-gcp-0.20.3.tar.gz", hash = "sha256:b0ec46a1e01933dfc7d73592ba2243a7b7923c79f9da809fd38580b2ccb2ef0d"}, - {file = "dagster_gcp-0.20.3-py3-none-any.whl", hash = "sha256:861bcfafd739f61689dbb031b037760231ed4604f4c3b2b79b607781fa61e2fb"}, + {file = "dagster-gcp-0.20.4.tar.gz", hash = "sha256:b3c76ea8398a41016e58374cd9699514ae1903e503b426347dea17adca0ea758"}, + {file = "dagster_gcp-0.20.4-py3-none-any.whl", hash = "sha256:2cb241f47e98cfbc3f3c2af64e7260923c6ba717929f672f4a039ec988b0de61"}, ] [package.dependencies] -dagster = "1.4.3" -dagster-pandas = "0.20.3" +dagster = "1.4.4" +dagster-pandas = "0.20.4" db-dtypes = "*" google-api-python-client = "*" google-cloud-bigquery = "*" @@ -606,17 +606,17 @@ pyarrow = ["pyarrow"] [[package]] name = "dagster-graphql" -version = "1.4.3" +version = "1.4.4" description = "The GraphQL frontend to python dagster." optional = false python-versions = "*" files = [ - {file = "dagster-graphql-1.4.3.tar.gz", hash = "sha256:75d745774ce66d800654428ddba7e80a16917e3440b32606c8643de09f4b9363"}, - {file = "dagster_graphql-1.4.3-py3-none-any.whl", hash = "sha256:637c32584429b1bd81a753048b26a874c13a863cd89d968efbc9c9e4528e2f46"}, + {file = "dagster-graphql-1.4.4.tar.gz", hash = "sha256:7ca85756393aa6a4d0c2a43044e3a0d3e3a61bffb527fa82c936126296bfb5c6"}, + {file = "dagster_graphql-1.4.4-py3-none-any.whl", hash = "sha256:f919459f1edb8be2e1d02a28fa3600869a27be5d52d66eb253902e155d1a5a04"}, ] [package.dependencies] -dagster = "1.4.3" +dagster = "1.4.4" gql = {version = ">=3.0.0", extras = ["requests"]} graphene = ">=3" requests = "*" @@ -625,49 +625,49 @@ urllib3 = "<2.0.0" [[package]] name = "dagster-pandas" -version = "0.20.3" +version = "0.20.4" description = "Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines" optional = false python-versions = "*" files = [ - {file = "dagster-pandas-0.20.3.tar.gz", hash = "sha256:4ee73b13ee6b70fb5d2fc0131ba2ee1dd22ec9feeb23f95b71930a16b868aa51"}, - {file = "dagster_pandas-0.20.3-py3-none-any.whl", hash = "sha256:18b61825475b1b1e5be110dcf035d1ceb8292e94b54335a9caf920ded8697284"}, + {file = "dagster-pandas-0.20.4.tar.gz", hash = "sha256:954055ce711017e151f3a3f0466d99d55ffc16bf4554e357777d7a02e3413993"}, + {file = "dagster_pandas-0.20.4-py3-none-any.whl", hash = "sha256:f5e37ad885cd44e79f06eae412792b6284f9a0568f4ba606f895fe467cccaf74"}, ] [package.dependencies] -dagster = "1.4.3" +dagster = "1.4.4" pandas = "*" [[package]] name = "dagster-slack" -version = "0.20.3" +version = "0.20.4" description = "A Slack client resource for posting to Slack" optional = false python-versions = "*" files = [ - {file = "dagster-slack-0.20.3.tar.gz", hash = "sha256:0a8fa894a596ff6398d4043c832199e4392d315a189e0ccd6dbdf7213ba6fe14"}, - {file = "dagster_slack-0.20.3-py3-none-any.whl", hash = "sha256:558b3627193f30aa26be5326b357b9e1382c2c31e946c25ab206f03797ce71ae"}, + {file = "dagster-slack-0.20.4.tar.gz", hash = "sha256:c0a8dcedd722f4d0f15eb4322d6a0160f0360e24e1bfffc612624f967b99e3d2"}, + {file = "dagster_slack-0.20.4-py3-none-any.whl", hash = "sha256:4e418012bd94fda8303044282aedaec1d11ce697f7495161f23b745885223914"}, ] [package.dependencies] -dagster = "1.4.3" +dagster = "1.4.4" slack-sdk = "*" [[package]] name = "dagster-webserver" -version = "1.4.3" +version = "1.4.4" description = "Web UI for dagster." optional = false python-versions = "*" files = [ - {file = "dagster_webserver-1.4.3-py3-none-any.whl", hash = "sha256:70c9cb633d6b782ef4b93a950ce05393c3b84dc567af49bf740eb870cc7ab0a6"}, - {file = "dagster_webserver-1.4.3.tar.gz", hash = "sha256:c046a8798e929474063f9bae211f35a44a157d9ac91222dabd0c42284d814be0"}, + {file = "dagster_webserver-1.4.4-py3-none-any.whl", hash = "sha256:80ebb430617a1949c7d3019fd2cc29178467d1d6b8136bd09b64fb13ba09103a"}, + {file = "dagster_webserver-1.4.4.tar.gz", hash = "sha256:3b1b0316d5937478f8ff734c2de10e2f5ae3da500fbdea47948947496fc60646"}, ] [package.dependencies] click = ">=7.0,<9.0" -dagster = "1.4.3" -dagster-graphql = "1.4.3" +dagster = "1.4.4" +dagster-graphql = "1.4.4" starlette = "*" uvicorn = {version = "*", extras = ["standard"]} @@ -1683,6 +1683,20 @@ files = [ [package.dependencies] pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} +[[package]] +name = "humanize" +version = "4.7.0" +description = "Python humanize utilities" +optional = false +python-versions = ">=3.8" +files = [ + {file = "humanize-4.7.0-py3-none-any.whl", hash = "sha256:df7c429c2d27372b249d3f26eb53b07b166b661326e0325793e0a988082e3889"}, + {file = "humanize-4.7.0.tar.gz", hash = "sha256:7ca0e43e870981fa684acb5b062deb307218193bca1a01f2b2676479df849b3a"}, +] + +[package.extras] +tests = ["freezegun", "pytest", "pytest-cov"] + [[package]] name = "idna" version = "3.4" @@ -2022,13 +2036,13 @@ url = "../lib" [[package]] name = "more-itertools" -version = "10.0.0" +version = "10.1.0" description = "More routines for operating on iterables, beyond itertools" optional = false python-versions = ">=3.8" files = [ - {file = "more-itertools-10.0.0.tar.gz", hash = "sha256:cd65437d7c4b615ab81c0640c0480bc29a550ea032891977681efd28344d51e1"}, - {file = "more_itertools-10.0.0-py3-none-any.whl", hash = "sha256:928d514ffd22b5b0a8fce326d57f423a55d2ff783b093bab217eda71e732330f"}, + {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, + {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, ] [[package]] @@ -3209,13 +3223,13 @@ full = ["numpy"] [[package]] name = "referencing" -version = "0.30.0" +version = "0.30.1" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" files = [ - {file = "referencing-0.30.0-py3-none-any.whl", hash = "sha256:c257b08a399b6c2f5a3510a50d28ab5dbc7bbde049bcaf954d43c446f83ab548"}, - {file = "referencing-0.30.0.tar.gz", hash = "sha256:47237742e990457f7512c7d27486394a9aadaf876cbfaa4be65b27b4f4d47c6b"}, + {file = "referencing-0.30.1-py3-none-any.whl", hash = "sha256:185d4a29f001c6e8ae4dad3861e61282a81cb01b9f0ef70a15450c45c6513a0d"}, + {file = "referencing-0.30.1.tar.gz", hash = "sha256:9370c77ceefd39510d70948bbe7375ce2d0125b9c11fd380671d4de959a8e3ce"}, ] [package.dependencies] @@ -3749,20 +3763,20 @@ files = [ [[package]] name = "universal-pathlib" -version = "0.1.0" +version = "0.0.24" description = "pathlib api extended to use fsspec backends" optional = false python-versions = ">=3.8" files = [ - {file = "universal_pathlib-0.1.0-py3-none-any.whl", hash = "sha256:307cf3963eb2396728aca76c3c886e3e73d6569bd4dfa399c954b617a972dd4d"}, - {file = "universal_pathlib-0.1.0.tar.gz", hash = "sha256:2eace58c8654661f331ef73206a14705bba7a4955816993a99fb9eb151b2a238"}, + {file = "universal_pathlib-0.0.24-py3-none-any.whl", hash = "sha256:a2e907b11b1b3f6e982275e5ac0c58a4d34dba2b9e703ecbe2040afa572c741b"}, + {file = "universal_pathlib-0.0.24.tar.gz", hash = "sha256:fcbffb95e4bc69f704af5dde4f9a624b2269f251a38c81ab8bec19dfeaad830f"}, ] [package.dependencies] fsspec = "*" [package.extras] -dev = ["adlfs", "aiohttp", "cheroot", "gcsfs", "hadoop-test-cluster", "moto[s3,server]", "mypy (==1.3.0)", "pyarrow", "pydantic", "pydantic-settings", "pylint (==2.17.4)", "pytest (==7.3.2)", "pytest-cov (==4.1.0)", "pytest-mock (==3.11.1)", "pytest-sugar (==0.9.6)", "requests", "s3fs", "webdav4[fsspec]", "wsgidav"] +dev = ["adlfs", "aiohttp", "cheroot", "gcsfs", "hadoop-test-cluster", "moto[s3,server]", "mypy (==1.3.0)", "pyarrow", "pylint (==2.17.4)", "pytest (==7.3.2)", "pytest-cov (==4.1.0)", "pytest-mock (==3.11.1)", "pytest-sugar (==0.9.6)", "requests", "s3fs", "webdav4[fsspec]", "wsgidav"] tests = ["mypy (==1.3.0)", "pylint (==2.17.4)", "pytest (==7.3.2)", "pytest-cov (==4.1.0)", "pytest-mock (==3.11.1)", "pytest-sugar (==0.9.6)"] [[package]] @@ -4339,4 +4353,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "006df8f9463d714cceaa9041f0a2f542bbb12beb5681fb21fb5f850a93eacd04" +content-hash = "8c6fa8dc9750af9e32ac39bfb45a960721098d735bd81f5baf8134921127f16d" diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/pyproject.toml b/airbyte-ci/connectors/metadata_service/orchestrator/pyproject.toml index 837c9b74569b..36eb30b42eff 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/pyproject.toml +++ b/airbyte-ci/connectors/metadata_service/orchestrator/pyproject.toml @@ -28,6 +28,8 @@ pydantic = "^1.10.6" dagster-slack = "^0.20.2" sentry-sdk = "^1.28.1" semver = "^3.0.1" +python-dateutil = "^2.8.2" +humanize = "^4.7.0" [tool.poetry.group.dev.dependencies] diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/tests/test_debug.py b/airbyte-ci/connectors/metadata_service/orchestrator/tests/test_debug.py index 3cfc0bc7bbfa..bf46d5321099 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/tests/test_debug.py +++ b/airbyte-ci/connectors/metadata_service/orchestrator/tests/test_debug.py @@ -4,8 +4,9 @@ from orchestrator.assets.connector_test_report import generate_nightly_report, persist_connectors_test_summary_files from orchestrator.assets.registry_entry import registry_entry, metadata_entry from orchestrator.assets.registry import persisted_oss_registry +from orchestrator.assets.github import github_metadata_file_md5s, stale_gcs_latest_metadata_file from orchestrator.config import NIGHTLY_INDIVIDUAL_TEST_REPORT_FILE_NAME, NIGHTLY_FOLDER, NIGHTLY_COMPLETE_REPORT_FILE_NAME, REPORT_FOLDER -from orchestrator import REGISTRY_ENTRY_RESOURCE_TREE +from orchestrator import REGISTRY_ENTRY_RESOURCE_TREE, GITHUB_RESOURCE_TREE, METADATA_RESOURCE_TREE from metadata_service.constants import METADATA_FILE_NAME, METADATA_FOLDER @@ -37,6 +38,17 @@ def debug_registry(): persisted_oss_registry(context).value +def debug_github_folders(): + context = build_op_context( + resources={ + **GITHUB_RESOURCE_TREE, + **METADATA_RESOURCE_TREE, + } + ) + github_md5s = github_metadata_file_md5s(context).value + stale_gcs_latest_metadata_file(context, github_md5s).value + + def debug_badges(): resources = { "gcp_gcs_client": gcp_gcs_client.configured(