diff --git a/docker/airflow/Dockerfile b/docker/airflow/Dockerfile index 4eeacc5e6..a1b5a95fe 100644 --- a/docker/airflow/Dockerfile +++ b/docker/airflow/Dockerfile @@ -43,7 +43,7 @@ RUN useradd -m -d ${AIRFLOW_HOME} airflow && \ chown airflow:airflow ${OUTPUT_DIR} USER airflow -ARG AIRFLOW_VERSION=2.3.3 +ARG AIRFLOW_VERSION=2.3.4 WORKDIR ${AIRFLOW_HOME} # Always add the prod req because the dev reqs depend on it for deduplication COPY ${REQUIREMENTS_FILE} requirements_prod.txt ${AIRFLOW_HOME}/ diff --git a/openverse_catalog/dags/commoncrawl/commoncrawl_etl.py b/openverse_catalog/dags/commoncrawl/commoncrawl_etl.py index 5ea710428..fdc78e2fb 100644 --- a/openverse_catalog/dags/commoncrawl/commoncrawl_etl.py +++ b/openverse_catalog/dags/commoncrawl/commoncrawl_etl.py @@ -8,7 +8,7 @@ EmrTerminateJobFlowOperator, ) from airflow.providers.amazon.aws.sensors.emr import EmrJobFlowSensor -from airflow.providers.amazon.aws.sensors.s3 import S3KeySensor, S3PrefixSensor +from airflow.providers.amazon.aws.sensors.s3 import S3KeySensor from airflow.utils.trigger_rule import TriggerRule from common.constants import DAG_DEFAULT_ARGS from commoncrawl.commoncrawl_utils import get_load_s3_task_id, load_file_to_s3 @@ -169,12 +169,13 @@ ) with dag: - check_for_cc_index = S3PrefixSensor( + check_for_cc_index = S3KeySensor( task_id="check_for_cc_index", retries=0, aws_conn_id=AWS_CONN_ID, bucket_name=COMMONCRAWL_BUCKET, - prefix=f"crawl-data/{CC_INDEX_TEMPLATE}", + bucket_key=f"crawl-data/{CC_INDEX_TEMPLATE}*", + wildcard_match=True, poke_interval=60, timeout=60 * 60 * 24 * 3, soft_fail=True, diff --git a/requirements_prod.txt b/requirements_prod.txt index f753eec98..3aec4d73f 100644 --- a/requirements_prod.txt +++ b/requirements_prod.txt @@ -1,5 +1,5 @@ # PYTHON=3.10 -apache-airflow[amazon,postgres,http]==2.3.3 +apache-airflow[amazon,postgres,http]==2.3.4 lxml psycopg2-binary requests-file==1.5.1 diff --git a/tests/dags/common/sensors/test_single_run_external_dags_sensor.py b/tests/dags/common/sensors/test_single_run_external_dags_sensor.py index 6f5d206c8..fd8969594 100644 --- a/tests/dags/common/sensors/test_single_run_external_dags_sensor.py +++ b/tests/dags/common/sensors/test_single_run_external_dags_sensor.py @@ -111,7 +111,7 @@ def test_fails_if_external_dag_missing_sensor_task(self): # TODO: warnings for...(drumroll) Airflow operators 🙃 hopefully this is # TODO: fixed in 2.2.5 or something. # TODO: Update: 2022-05-06/v2.3.0, still an issue! - # TODO: Update: 2022-08-15/v2.3.3, Still an issue (MJSB) + # TODO: Update: 2022-09-19/v2.3.4, Still an issue (MJSB) warnings.simplefilter("ignore", category=DeprecationWarning) dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) bash_dag = dagbag.dags["example_bash_operator"]