Skip to content

Commit

Permalink
feat(dora): use api v2 (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
vmttn authored Jun 26, 2023
1 parent 615bf6f commit 27f4170
Show file tree
Hide file tree
Showing 13 changed files with 524 additions and 165 deletions.
3 changes: 2 additions & 1 deletion .template.env
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ AIRFLOW_CONN_S3_SOURCES=
BAN_API_URL=https://api-adresse.data.gouv.fr
CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/665776ae-fa25-46ab-9bfd-c4241866f03f/download/annuaire_sociale_fixe.csv
CD72_FILE_URL=
DORA_API_URL=https://api.dora.fabrique.social.gouv.fr/api/v1/
DORA_API_TOKEN=
DORA_API_URL=https://api.dora.inclusion.beta.gouv.fr/api/v2/
EMPLOIS_API_TOKEN=
EMPLOIS_API_URL=https://emplois.inclusion.beta.gouv.fr/api/v1/structures/
ETAB_PUB_FILE_URL=https://www.data.gouv.fr/fr/datasets/r/73302880-e4df-4d4c-8676-1a61bb997f3d
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ x-airflow-common:
AIRFLOW_VAR_DATAGOUV_API_URL: ${DATAGOUV_API_URL}
AIRFLOW_VAR_DATAGOUV_DI_DATASET_ID: ${DATAGOUV_DI_DATASET_ID}
AIRFLOW_VAR_DATAGOUV_DI_RESOURCE_IDS: ${DATAGOUV_DI_RESOURCE_IDS}
AIRFLOW_VAR_DORA_API_TOKEN: ${DORA_API_TOKEN}
AIRFLOW_VAR_DORA_API_URL: ${DORA_API_URL}
AIRFLOW_VAR_EMPLOIS_API_TOKEN: ${EMPLOIS_API_TOKEN}
AIRFLOW_VAR_EMPLOIS_API_URL: ${EMPLOIS_API_URL}
Expand Down
3 changes: 1 addition & 2 deletions pipeline/dags/dags/notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def notify_webhook(context: Context, conn_id: str, format_fn):

try:
http_hook = HttpHook(http_conn_id=conn_id)
http_hook.run(json={"text": format_fn(context)})
except exceptions.AirflowNotFoundException:
logger.warning("Webhook notifier disabled.")
return

http_hook.run(json={"text": format_fn(context)})
2 changes: 2 additions & 0 deletions pipeline/dags/dags/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
"id": "structures",
"filename": "structures.json",
"url": Variable.get("DORA_API_URL", None),
"token": Variable.get("DORA_API_TOKEN", None),
},
{
"id": "services",
"filename": "services.json",
"url": Variable.get("DORA_API_URL", None),
"token": Variable.get("DORA_API_TOKEN", None),
},
],
},
Expand Down
4 changes: 3 additions & 1 deletion pipeline/dags/import_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

default_args = {
"on_failure_callback": lambda context: notify_webhook(
context, "mattermost", format_failure
context,
conn_id="mattermost",
format_fn=format_failure,
)
}

Expand Down
77 changes: 77 additions & 0 deletions pipeline/dbt/models/_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,84 @@ sources:
schema: dora
tables:
- name: structures
columns:
- name: data
tests:
- expect_column_value_paths_to_exist:
config:
severity: error
path_list:
- '$.antenne'
- '$.date_maj'
- '$.labels_autres'
- '$.labels_nationaux'
- '$.latitude'
- '$.longitude'
- '$.thematiques'
- '$.accessibilite'
- '$.adresse'
- '$.code_insee'
- '$.code_postal'
- '$.commune'
- '$.complement_adresse'
- '$.courriel'
- '$.horaires_ouverture'
- '$.id'
- '$.lien_source'
- '$.nom'
- '$.presentation_detail'
- '$.presentation_resume'
- '$.rna'
- '$.siret'
- '$.site_web'
- '$.source'
- '$.telephone'
- '$.typologie'
- name: services
columns:
- name: data
tests:
- expect_column_value_paths_to_exist:
config:
severity: warn
path_list:
- '$.contact_public'
- '$.cumulable'
- '$.date_creation'
- '$.date_maj'
- '$.date_suspension'
- '$.justificatifs'
- '$.latitude'
- '$.longitude'
- '$.modes_accueil'
- '$.pre_requis'
- '$.profils'
- '$.thematiques'
- '$.types'
- '$.adresse'
- '$.code_insee'
- '$.code_postal'
- '$.commune'
- '$.complement_adresse'
- '$.contact_nom'
- '$.contact_prenom'
- '$.courriel'
- '$.formulaire_en_ligne'
- '$.frais_autres'
- '$.frais'
- '$.id'
- '$.lien_source'
- '$.nom'
- '$.presentation_resume'
- '$.presentation_detail'
- '$.prise_rdv'
- '$.recurrence'
- '$.source'
- '$.structure_id'
- '$.telephone'
- '$.zone_diffusion_code'
- '$.zone_diffusion_nom'
- '$.zone_diffusion_type'

- name: finess
schema: finess
Expand Down
36 changes: 18 additions & 18 deletions pipeline/dbt/models/intermediate/dora/int_dora__adresses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,29 @@ services AS (

structure_adresses AS (
SELECT
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
NULLIF(address_2, '') AS "complement_adresse",
NULLIF(city, '') AS "commune",
NULLIF(address_1, '') AS "adresse",
NULLIF(postal_code, '') AS "code_postal",
NULLIF(city_code, '') AS "code_insee"
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
complement_adresse AS "complement_adresse",
commune AS "commune",
adresse AS "adresse",
code_postal AS "code_postal",
code_insee AS "code_insee"
FROM structures
),

service_adresses AS (
SELECT
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
NULLIF(address_2, '') AS "complement_adresse",
NULLIF(city, '') AS "commune",
NULLIF(address_1, '') AS "adresse",
NULLIF(postal_code, '') AS "code_postal",
NULLIF(city_code, '') AS "code_insee"
id AS "id",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source",
complement_adresse AS "complement_adresse",
commune AS "commune",
adresse AS "adresse",
code_postal AS "code_postal",
code_insee AS "code_insee"
FROM services
),

Expand Down
77 changes: 31 additions & 46 deletions pipeline/dbt/models/intermediate/dora/int_dora__services.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,39 @@ WITH services AS (
SELECT * FROM {{ ref('stg_dora__services') }}
),

di_frais_by_dora_fee_condition AS (
SELECT x.*
FROM (
VALUES
('1', 'gratuit'),
('2', 'gratuit-sous-conditions'),
('3', 'payant'),
('4', 'adhesion'),
('5', 'pass-numerique')
) AS x (fee_condition, frais)
),


final AS (
SELECT
id AS "id",
id AS "adresse_id",
_di_source_id AS "source",
name AS "nom",
short_desc AS "presentation_resume",
kinds AS "types",
online_form AS "prise_rdv",
ARRAY(
SELECT di_frais_by_dora_fee_condition.frais
FROM di_frais_by_dora_fee_condition
WHERE services.fee_condition = di_frais_by_dora_fee_condition.fee_condition
)::TEXT [] AS "frais",
fee_details AS "frais_autres",
NULL::TEXT [] AS "profils",
NULL AS "pre_requis",
NULL AS "cumulable",
NULL AS "justificatifs",
NULL AS "date_creation",
NULL AS "date_suspension",
NULL AS "lien_source",
NULL AS "telephone",
NULL AS "courriel",
NULL AS "contact_public",
NULL AS "date_maj",
NULL AS "zone_diffusion_type",
NULL AS "zone_diffusion_code",
NULL AS "zone_diffusion_nom",
NULLIF(full_desc, '') AS "presentation_detail",
NULLIF(online_form, '') AS "formulaire_en_ligne",
NULLIF(recurrence, '') AS "recurrence",
SPLIT_PART(TRIM('/' FROM structure), '/structures/', 2) AS "structure_id",
(categories || subcategories) AS "thematiques",
NULLIF(location_kinds, '{}') AS "modes_accueil"
id AS "adresse_id",
contact_nom AS "contact_nom", -- ignored for now
contact_prenom AS "contact_prenom", -- ignored for now
contact_public AS "contact_public",
NULL AS "courriel", -- ignored for now
cumulable AS "cumulable",
date_creation AS "date_creation",
date_maj AS "date_maj",
date_suspension AS "date_suspension",
formulaire_en_ligne AS "formulaire_en_ligne",
frais_autres AS "frais_autres",
frais AS "frais",
id AS "id",
justificatifs AS "justificatifs",
lien_source AS "lien_source",
modes_accueil AS "modes_accueil",
nom AS "nom",
presentation_resume AS "presentation_resume",
presentation_detail AS "presentation_detail",
prise_rdv AS "prise_rdv",
profils AS "profils",
recurrence AS "recurrence",
_di_source_id AS "source",
structure_id AS "structure_id",
telephone AS "telephone", -- ignored for now
thematiques AS "thematiques",
types AS "types",
zone_diffusion_code AS "zone_diffusion_code",
zone_diffusion_nom AS "zone_diffusion_nom",
zone_diffusion_type AS "zone_diffusion_type",
ARRAY_TO_STRING(pre_requis, ',') AS "pre_requis"
FROM services
)

Expand Down
40 changes: 20 additions & 20 deletions pipeline/dbt/models/intermediate/dora/int_dora__structures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@ WITH structures AS (

final AS (
SELECT
id AS "id",
id AS "adresse_id",
NULL::BOOLEAN AS "antenne",
NULL AS "rna",
_di_source_id AS "source",
NULL AS "horaires_ouverture",
NULL AS "accessibilite",
NULL::TEXT [] AS "labels_nationaux",
NULL::TEXT [] AS "labels_autres",
NULL::TEXT [] AS "thematiques",
typology AS "typologie",
modification_date AS "date_maj",
NULLIF(siret, '') AS "siret",
NULLIF(name, '') AS "nom",
NULLIF(link_on_source, '') AS "lien_source",
NULLIF(short_desc, '') AS "presentation_resume",
NULLIF(full_desc, '') AS "presentation_detail",
NULLIF(phone, '') AS "telephone",
NULLIF(url, '') AS "site_web",
NULLIF(email, '') AS "courriel"
accessibilite AS "accessibilite",
id AS "adresse_id",
antenne AS "antenne",
courriel AS "courriel",
date_maj AS "date_maj",
horaires_ouverture AS "horaires_ouverture",
id AS "id",
labels_autres AS "labels_autres",
labels_nationaux AS "labels_nationaux",
lien_source AS "lien_source",
nom AS "nom",
presentation_detail AS "presentation_detail",
presentation_resume AS "presentation_resume",
rna AS "rna",
siret AS "siret",
site_web AS "site_web",
_di_source_id AS "source",
telephone AS "telephone",
NULL::TEXT [] AS "thematiques",
typologie AS "typologie"
FROM structures
)

Expand Down
Loading

0 comments on commit 27f4170

Please sign in to comment.