Skip to content

Commit

Permalink
feat(cd35): update schema
Browse files Browse the repository at this point in the history
  • Loading branch information
vmttn committed Jun 26, 2023
1 parent 8bff9b2 commit 0cfa95c
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 50 deletions.
2 changes: 1 addition & 1 deletion pipeline/dags/dags/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"streams": [
{
"id": "organisations",
"filename": "organisations.xlsx",
"filename": "organisations.csv",
"url": Variable.get("CD35_FILE_URL", None),
},
],
Expand Down
34 changes: 34 additions & 0 deletions pipeline/dbt/models/_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,40 @@ sources:
schema: cd35
tables:
- name: organisations
columns:
- name: data
tests:
- expect_column_value_paths_to_exist:
config:
severity: error
path_list:
- '$.ADRESSE'
- '$.CODE_INSEE'
- '$.CODE_POSTAL'
- '$.COMMUNE'
- '$.COMPLEMENT_ADRESSE'
- '$.COURRIEL'
- '$.DATE_CREATION'
- '$.DATE_MAJ'
- '$.HORAIRES_OUVERTURES'
- '$.ID'
- '$.LATITUDE'
- '$.LIEN_SOURCE'
- '$.LONGITUDE'
- '$.NOM'
- '$.PRESENTATION_DETAIL'
- '$.PROFILS'
- '$.SIGLE'
- '$.SITE_WEB'
- '$.TELEPHONE'
- '$.THEMATIQUES'
- expect_column_value_paths_to_exist:
config:
severity: warn
path_list:
- '$.ASCOLLMODIFICATION'
- '$.ASCOLLNOM'
- '$.ASCOLLSIRET'

- name: cd72
schema: cd72
Expand Down
18 changes: 9 additions & 9 deletions pipeline/dbt/models/intermediate/cd35/int_cd35__adresses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ WITH organisations AS (

final AS (
SELECT
id AS "id",
org_ville AS "commune",
org_cp AS "code_postal",
NULL AS "code_insee",
org_adres AS "adresse",
NULL AS "complement_adresse",
org_longitude AS "longitude",
org_latitude AS "latitude",
_di_source_id AS "source"
id AS "id",
commune AS "commune",
code_postal AS "code_postal",
code_insee AS "code_insee",
adresse AS "adresse",
complement_adresse AS "complement_adresse",
longitude AS "longitude",
latitude AS "latitude",
_di_source_id AS "source"
FROM organisations
)

Expand Down
44 changes: 22 additions & 22 deletions pipeline/dbt/models/intermediate/cd35/int_cd35__structures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,34 @@ WITH organisations AS (

final AS (
SELECT
id AS "id",
NULL AS "siret",
NULL::BOOLEAN AS "antenne",
NULL AS "rna",
org_nom AS "nom",
org_tel AS "telephone",
org_mail AS "courriel",
org_web AS "site_web",
_di_source_id AS "source",
url AS "lien_source",
org_horaire AS "horaires_ouverture",
NULL AS "accessibilite",
NULL::TEXT [] AS "labels_nationaux",
NULL::TEXT [] AS "labels_autres",
NULL::TEXT [] AS "thematiques",
CASE org_sigle
id AS "id",
NULL AS "siret",
NULL::BOOLEAN AS "antenne",
NULL AS "rna",
nom AS "nom",
telephone AS "telephone",
courriel AS "courriel",
site_web AS "site_web",
_di_source_id AS "source",
lien_source AS "lien_source",
horaires_ouverture AS "horaires_ouverture",
NULL AS "accessibilite",
NULL::TEXT [] AS "labels_nationaux",
NULL::TEXT [] AS "labels_autres",
NULL::TEXT [] AS "thematiques",
date_maj AS "date_maj",
CASE sigle
WHEN 'CCAS' THEN 'CCAS'
WHEN 'MAIRIE' THEN 'MUNI'
END AS "typologie",
CASE LENGTH(org_desc) <= 280
END AS "typologie",
CASE LENGTH(presentation_detail) <= 280
WHEN TRUE THEN org_desc
WHEN FALSE THEN LEFT(org_desc, 279) || ''
END AS "presentation_resume",
CASE LENGTH(org_desc) <= 280
END AS "presentation_resume",
CASE LENGTH(presentation_detail) <= 280
WHEN TRUE THEN NULL
WHEN FALSE THEN org_desc
END AS "presentation_detail",
COALESCE(org_datemaj, org_datecrea) AS "date_maj"
END AS "presentation_detail"
FROM organisations
)

Expand Down
78 changes: 78 additions & 0 deletions pipeline/dbt/models/staging/cd35/_cd35__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,83 @@ version: 2

models:
- name: stg_cd35__organisations
columns:
- name: adresse
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: code_insee
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: code_postal
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: commune
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: complement_adresse
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: courriel
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: date_creation
tests:
- dbt_utils.at_least_one
- name: date_maj
tests:
- dbt_utils.at_least_one
- name: horaires_ouvertures
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string:
config:
severity: warn
- name: id
tests:
- unique
- not_null
- dbt_utils.not_empty_string
- name: latitude
tests:
- dbt_utils.at_least_one
- name: lien_source
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: longitude
tests:
- dbt_utils.at_least_one
- name: nom
tests:
- not_null
- dbt_utils.not_empty_string
- name: presentation_detail
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: profils
tests:
- dbt_utils.at_least_one
- name: sigle
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: site_web
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: telephone
tests:
- dbt_utils.at_least_one
- dbt_utils.not_empty_string
- name: thematiques
tests:
- dbt_utils.at_least_one
config:
tags: cd35
39 changes: 21 additions & 18 deletions pipeline/dbt/models/staging/cd35/stg_cd35__organisations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,27 @@ WITH source AS (

final AS (
SELECT
_di_source_id AS "_di_source_id",
(TRIM('|' FROM data ->> 'ORG_LONGITUDE'))::FLOAT AS "org_longitude",
(TRIM('|' FROM data ->> 'ORG_LATITUDE'))::FLOAT AS "org_latitude",
TO_DATE(data ->> 'ORG_DATEMAJ', 'DD-MM-YYYY') AS "org_datemaj",
TO_DATE(data ->> 'ORG_DATECREA', 'DD-MM-YYYY') AS "org_datecrea",
data ->> 'ORG_ID' AS "id",
data ->> 'ORG_ID' AS "org_id",
data ->> 'ORG_NOM' AS "org_nom",
data ->> 'ORG_VILLE' AS "org_ville",
data ->> 'ORG_CP' AS "org_cp",
data ->> 'ORG_ADRES' AS "org_adres",
data ->> 'ORG_SIGLE' AS "org_sigle",
data ->> 'ORG_TEL' AS "org_tel",
data ->> 'ORG_MAIL' AS "org_mail",
data ->> 'ORG_WEB' AS "org_web",
data ->> 'ORG_DESC' AS "org_desc",
data ->> 'URL' AS "url",
data ->> 'ORG_HORAIRE' AS "org_horaire"
_di_source_id AS "_di_source_id",
(data ->> 'LATITUDE')::FLOAT AS "latitude",
(data ->> 'LONGITUDE')::FLOAT AS "longitude",
data ->> 'ADRESSE' AS "adresse",
data ->> 'CODE_INSEE' AS "code_insee",
data ->> 'CODE_POSTAL' AS "code_postal",
data ->> 'COMMUNE' AS "commune",
data ->> 'COMPLEMENT_ADRESSE' AS "complement_adresse",
data ->> 'COURRIEL' AS "courriel",
TO_DATE(data ->> 'DATE_CREATION', 'DD-MM-YYYY') AS "date_creation",
TO_DATE(data ->> 'DATE_MAJ', 'DD-MM-YYYY') AS "date_maj",
data ->> 'HORAIRES_OUVERTURES' AS "horaires_ouvertures",
data ->> 'ID' AS "id",
data ->> 'LIEN_SOURCE' AS "lien_source",
data ->> 'NOM' AS "nom",
data ->> 'PRESENTATION_DETAIL' AS "presentation_detail",
(SELECT ARRAY_AGG(TRIM(p)) FROM UNNEST(STRING_TO_ARRAY(data ->> 'PROFILS', ',')) AS "p") AS "profils",
data ->> 'SIGLE' AS "sigle",
data ->> 'SITE_WEB' AS "site_web",
data ->> 'TELEPHONE' AS "telephone",
(SELECT ARRAY_AGG(TRIM(t)) FROM UNNEST(STRING_TO_ARRAY(data ->> 'THEMATIQUES', ',')) AS "t") AS "thematiques"
FROM source
)

Expand Down

0 comments on commit 0cfa95c

Please sign in to comment.