Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(dora): use api v2 #106

Merged
merged 14 commits into from
Jun 26, 2023
3 changes: 2 additions & 1 deletion .template.env
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ AIRFLOW_CONN_S3_SOURCES=
BAN_API_URL=https://api-adresse.data.gouv.fr
CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/665776ae-fa25-46ab-9bfd-c4241866f03f/download/annuaire_sociale_fixe.csv
CD72_FILE_URL=
DORA_API_URL=https://api.dora.fabrique.social.gouv.fr/api/v1/
DORA_API_TOKEN=
DORA_API_URL=https://api.dora.inclusion.beta.gouv.fr/api/v2/
EMPLOIS_API_TOKEN=
EMPLOIS_API_URL=https://emplois.inclusion.beta.gouv.fr/api/v1/structures/
ETAB_PUB_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/73302880-e4df-4d4c-8676-1a61bb997f3d
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ x-airflow-common:
AIRFLOW_VAR_DATAGOUV_API_URL: ${DATAGOUV_API_URL}
AIRFLOW_VAR_DATAGOUV_DI_DATASET_ID: ${DATAGOUV_DI_DATASET_ID}
AIRFLOW_VAR_DATAGOUV_DI_RESOURCE_IDS: ${DATAGOUV_DI_RESOURCE_IDS}
AIRFLOW_VAR_DORA_API_TOKEN: ${DORA_API_TOKEN}
AIRFLOW_VAR_DORA_API_URL: ${DORA_API_URL}
AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${EMPLOIS_API_TOKEN}
AIRFLOW_VAR_EMPLOIS_API_URL: ${EMPLOIS_API_URL}
Expand Down
3 changes: 1 addition & 2 deletions pipeline/dags/dags/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def notify_webhook(context: Context, conn_id: str, format_fn):

try:
http_hook = HttpHook(http_conn_id=conn_id)
http_hook.run(json={"text": format_fn(context)})
except exceptions.AirflowNotFoundException:
logger.warning("Webhook notifier disabled.")
return

http_hook.run(json={"text": format_fn(context)})
2 changes: 2 additions & 0 deletions pipeline/dags/dags/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
"id": "structures",
"filename": "structures.json",
"url": Variable.get("DORA_API_URL", None),
"token": Variable.get("DORA_API_TOKEN", None),
},
{
"id": "services",
"filename": "services.json",
"url": Variable.get("DORA_API_URL", None),
"token": Variable.get("DORA_API_TOKEN", None),
},
],
},
Expand Down
4 changes: 3 additions & 1 deletion pipeline/dags/import_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

default_args = {
"on_failure_callback": lambda context: notify_webhook(
context, "mattermost", format_failure
context,
conn_id="mattermost",
format_fn=format_failure,
)
}

Expand Down
77 changes: 77 additions & 0 deletions pipeline/dbt/models/_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,84 @@ sources:
schema: dora
tables:
- name: structures
columns:
- name: data
tests:
- expect_column_value_paths_to_exist:
config:
severity: error
path_list:
- '$.antenne'
- '$.date_maj'
- '$.labels_autres'
- '$.labels_nationaux'
- '$.latitude'
- '$.longitude'
- '$.thematiques'
- '$.accessibilite'
- '$.adresse'
- '$.code_insee'
- '$.code_postal'
- '$.commune'
- '$.complement_adresse'
- '$.courriel'
- '$.horaires_ouverture'
- '$.id'
- '$.lien_source'
- '$.nom'
- '$.presentation_detail'
- '$.presentation_resume'
- '$.rna'
- '$.siret'
- '$.site_web'
- '$.source'
- '$.telephone'
- '$.typologie'
- name: services
columns:
- name: data
tests:
- expect_column_value_paths_to_exist:
config:
severity: warn
path_list:
- '$.contact_public'
- '$.cumulable'
- '$.date_creation'
- '$.date_maj'
- '$.date_suspension'
- '$.justificatifs'
- '$.latitude'
- '$.longitude'
- '$.modes_accueil'
- '$.pre_requis'
- '$.profils'
- '$.thematiques'
- '$.types'
- '$.adresse'
- '$.code_insee'
- '$.code_postal'
- '$.commune'
- '$.complement_adresse'
- '$.contact_nom'
- '$.contact_prenom'
- '$.courriel'
- '$.formulaire_en_ligne'
- '$.frais_autres'
- '$.frais'
- '$.id'
- '$.lien_source'
- '$.nom'
- '$.presentation_resume'
- '$.presentation_detail'
- '$.prise_rdv'
- '$.recurrence'
- '$.source'
- '$.structure_id'
- '$.telephone'
- '$.zone_diffusion_code'
- '$.zone_diffusion_nom'
- '$.zone_diffusion_type'

- name: finess
schema: finess
Expand Down
36 changes: 18 additions & 18 deletions pipeline/dbt/models/intermediate/dora/int_dora__adresses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,29 @@ services AS (

structure_adresses AS (
SELECT
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
NULLIF(address_2, '') AS "complement_adresse",
NULLIF(city, '') AS "commune",
NULLIF(address_1, '') AS "adresse",
NULLIF(postal_code, '') AS "code_postal",
NULLIF(city_code, '') AS "code_insee"
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
complement_adresse AS "complement_adresse",
commune AS "commune",
adresse AS "adresse",
code_postal AS "code_postal",
code_insee AS "code_insee"
FROM structures
),

service_adresses AS (
SELECT
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
NULLIF(address_2, '') AS "complement_adresse",
NULLIF(city, '') AS "commune",
NULLIF(address_1, '') AS "adresse",
NULLIF(postal_code, '') AS "code_postal",
NULLIF(city_code, '') AS "code_insee"
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
complement_adresse AS "complement_adresse",
commune AS "commune",
adresse AS "adresse",
code_postal AS "code_postal",
code_insee AS "code_insee"
FROM services
),

Expand Down
77 changes: 31 additions & 46 deletions pipeline/dbt/models/intermediate/dora/int_dora__services.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,39 @@ WITH services AS (
SELECT * FROM {{ ref('stg_dora__services') }}
),

di_frais_by_dora_fee_condition AS (
SELECT x.*
FROM (
VALUES
('1', 'gratuit'),
('2', 'gratuit-sous-conditions'),
('3', 'payant'),
('4', 'adhesion'),
('5', 'pass-numerique')
) AS x (fee_condition, frais)
),


final AS (
SELECT
id AS "id",
id AS "adresse_id",
_di_source_id AS "source",
name AS "nom",
short_desc AS "presentation_resume",
kinds AS "types",
online_form AS "prise_rdv",
ARRAY(
SELECT di_frais_by_dora_fee_condition.frais
FROM di_frais_by_dora_fee_condition
WHERE services.fee_condition = di_frais_by_dora_fee_condition.fee_condition
)::TEXT [] AS "frais",
fee_details AS "frais_autres",
NULL::TEXT [] AS "profils",
NULL AS "pre_requis",
NULL AS "cumulable",
NULL AS "justificatifs",
NULL AS "date_creation",
NULL AS "date_suspension",
NULL AS "lien_source",
NULL AS "telephone",
NULL AS "courriel",
NULL AS "contact_public",
NULL AS "date_maj",
NULL AS "zone_diffusion_type",
NULL AS "zone_diffusion_code",
NULL AS "zone_diffusion_nom",
NULLIF(full_desc, '') AS "presentation_detail",
NULLIF(online_form, '') AS "formulaire_en_ligne",
NULLIF(recurrence, '') AS "recurrence",
SPLIT_PART(TRIM('/' FROM structure), '/structures/', 2) AS "structure_id",
(categories || subcategories) AS "thematiques",
NULLIF(location_kinds, '{}') AS "modes_accueil"
id AS "adresse_id",
contact_nom AS "contact_nom", -- ignored for now
contact_prenom AS "contact_prenom", -- ignored for now
contact_public AS "contact_public",
NULL AS "courriel", -- ignored for now
cumulable AS "cumulable",
date_creation AS "date_creation",
date_maj AS "date_maj",
date_suspension AS "date_suspension",
formulaire_en_ligne AS "formulaire_en_ligne",
frais_autres AS "frais_autres",
frais AS "frais",
id AS "id",
justificatifs AS "justificatifs",
lien_source AS "lien_source",
modes_accueil AS "modes_accueil",
nom AS "nom",
presentation_resume AS "presentation_resume",
presentation_detail AS "presentation_detail",
prise_rdv AS "prise_rdv",
profils AS "profils",
recurrence AS "recurrence",
_di_source_id AS "source",
structure_id AS "structure_id",
telephone AS "telephone", -- ignored for now
thematiques AS "thematiques",
types AS "types",
zone_diffusion_code AS "zone_diffusion_code",
zone_diffusion_nom AS "zone_diffusion_nom",
zone_diffusion_type AS "zone_diffusion_type",
ARRAY_TO_STRING(pre_requis, ',') AS "pre_requis"
FROM services
)

Expand Down
40 changes: 20 additions & 20 deletions pipeline/dbt/models/intermediate/dora/int_dora__structures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@ WITH structures AS (

final AS (
SELECT
id AS "id",
id AS "adresse_id",
NULL::BOOLEAN AS "antenne",
NULL AS "rna",
_di_source_id AS "source",
NULL AS "horaires_ouverture",
NULL AS "accessibilite",
NULL::TEXT [] AS "labels_nationaux",
NULL::TEXT [] AS "labels_autres",
NULL::TEXT [] AS "thematiques",
typology AS "typologie",
modification_date AS "date_maj",
NULLIF(siret, '') AS "siret",
NULLIF(name, '') AS "nom",
NULLIF(link_on_source, '') AS "lien_source",
NULLIF(short_desc, '') AS "presentation_resume",
NULLIF(full_desc, '') AS "presentation_detail",
NULLIF(phone, '') AS "telephone",
NULLIF(url, '') AS "site_web",
NULLIF(email, '') AS "courriel"
accessibilite AS "accessibilite",
id AS "adresse_id",
antenne AS "antenne",
courriel AS "courriel",
date_maj AS "date_maj",
horaires_ouverture AS "horaires_ouverture",
id AS "id",
labels_autres AS "labels_autres",
labels_nationaux AS "labels_nationaux",
lien_source AS "lien_source",
nom AS "nom",
presentation_detail AS "presentation_detail",
presentation_resume AS "presentation_resume",
rna AS "rna",
siret AS "siret",
site_web AS "site_web",
_di_source_id AS "source",
telephone AS "telephone",
NULL::TEXT [] AS "thematiques",
typologie AS "typologie"
FROM structures
)

Expand Down
Loading