Skip to content
This repository has been archived by the owner on Jul 13, 2023. It is now read-only.

🆕 DBT for Pluto #413

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,9 @@ __pycache__/
!example.env
!version.env
pluto_build/pts/
pluto_build/.library/
pluto_build/.library/
pluto_build/.venv/
pluto_build/.postgres/
pluto_build/.target/
pluto_build/dbt_packages/
pluto_build/logs/
1 change: 1 addition & 0 deletions pluto_build/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.8.12
20 changes: 20 additions & 0 deletions pluto_build/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: pluto
config-version: 2
version: '0.1'

profile: nycplanning

model-paths: ["models"]
target-path: ".target"

models:
+persist_docs:
relation: true
columns: true
pluto:
staging:
+materialized: table
schema: staging
calculation:
+materialized: table
schema: calculation
47 changes: 47 additions & 0 deletions pluto_build/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Instructions

## Create local Postgres-Postgis database
0. make sure you have docker installed
1. ```
docker run -d \
-p 5432:5432 \
-e POSTGRES_PASSWORD=postgres \
-v $(pwd)/.postgres:/var/lib/postgresql/data \
postgis/postgis:15-3.3-alpine
```
> this will create a postgres/postgis docker container locally for development purposes

## Install DBT
0. navigate to `pluto_build`
1. create virtualenv `python -m virtualenv .venv`
2. activate virtualenv `source .venv/bin/activate`
3. install dependencies `pip install -r requirements.txt`
4. install dbt packages `dbt deps`

## Set up DBT profile
0. first time setup -> `touch ~/.dbt/profiles.yml`
1. add the following to the `profiles.yml`
```yml
nycplanning:
target: pluto-dev
outputs:
pluto-dev:
type: postgres
host: localhost
user: postgres
password: postgres
port: 5432
dbname: postgres
schema: public
threads: 4
```

## Example Commands
1. `dbt run -s staging.stg_pts` to run a specific model
2. `dbt compile` to compile sql to `target` without executing sql
3. `dbt docs generate` to generate documentation
4. ` dbt docs serve --port 8000` to open docs in browser (`http://localhost:8000`)

## Execution Order
1. you will still need to run `bash 01_dataloading.sh` to import source data
2. once dataloading is complete, you can then proceed to run dbt commands
23 changes: 23 additions & 0 deletions pluto_build/models/calculation/calc_allocated.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SELECT
DISTINCT
bbl.primebbl,
rpad.bldgcl as bldgclass,
rpad.story as numfloors,
rpad.lfft as lotfront,
rpad.ldft as lotdepth,
rpad.bfft as bldgfront,
rpad.bdft as bldgdepth,
rpad.ext as ext,
rpad.condo_number as condono,
rpad.land_area as lotarea,
rpad.gross_sqft as bldgarea,
rpad.yrbuilt as yearbuilt,
rpad.yralt1 as yearalter1,
rpad.yralt2 as yearalter2,
rpad.owner as ownername,
rpad.irreg as irrlotcode,
concat(rpad.housenum_lo,' ',rpad.street_name) as address,
ap_datef as appdate
FROM {{ ref('stg_bbl') }} as bbl
LEFT JOIN {{ ref('stg_rpad') }} as rpad ON bbl.primebbl = rpad.bbl
WHERE LENGTH(bbl.bbl) = 10
47 changes: 47 additions & 0 deletions pluto_build/models/calculation/calc_areas.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
SELECT
COALESCE(rpad.primebbl, cama.primebbl) as primebbl,
COALESCE(
NULLIF(rpad.commercial_area, 0),
NULLIF(cama.commercial_area, 0)
) as commercial_area,
COALESCE(
NULLIF(rpad.residential_area, 0),
NULLIF(cama.residential_area, 0)
) as residential_area,
COALESCE(
NULLIF(rpad.office_area, 0),
NULLIF(cama.office_area, 0)
) as office_area,
COALESCE(
NULLIF(rpad.retail_area, 0),
NULLIF(cama.retail_area, 0)
) as retail_area,
COALESCE(
NULLIF(rpad.garage_area, 0),
NULLIF(cama.garage_area, 0)
) as garage_area,
COALESCE(
NULLIF(rpad.storage_area, 0),
NULLIF(cama.storage_area, 0)
) as storage_area,
COALESCE(
NULLIF(rpad.factory_area, 0),
NULLIF(cama.factory_area, 0)
) as factory_area,
COALESCE(
NULLIF(rpad.other_area, 0),
NULLIF(cama.other_area, 0)
) as other_area,
COALESCE(
NULLIF(rpad.gross_area, 0),
NULLIF(cama.gross_area, 0),
rpad._gross_area
) as bldg_area,
COALESCE(
CASE WHEN NULLIF(rpad.gross_area, 0) IS NOT NULL THEN '2' END,
CASE WHEN NULLIF(cama.gross_area, 0) IS NOT NULL THEN '7' END,
CASE WHEN NULLIF(rpad._gross_area, 0) IS NOT NULL THEN '5' END
) as area_source
FROM {{ ref('calc_areas_rpad') }} rpad
FULL OUTER JOIN {{ ref('calc_areas_cama') }} cama
ON rpad.primebbl = cama.primebbl
14 changes: 14 additions & 0 deletions pluto_build/models/calculation/calc_areas_cama.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT
cama.primebbl,
SUM(commercialarea) as commercial_area,
SUM(residarea) as residential_area,
SUM(officearea) as office_area,
SUM(retailarea) as retail_area,
SUM(garagearea) as garage_area,
SUM(storagearea) as storage_area,
SUM(factoryarea) as factory_area,
SUM(otherarea) as other_area,
SUM(grossarea) as gross_area
FROM {{ ref('stg_cama') }} as cama
WHERE bldgnum = '1'
GROUP BY cama.primebbl
21 changes: 21 additions & 0 deletions pluto_build/models/calculation/calc_areas_rpad.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
SELECT
bbl as primebbl,
(
rpad.officearea +
rpad.retailarea +
rpad.garagearea +
rpad.storagearea +
rpad.factoryarea +
rpad.otherarea
) as commercial_area,
rpad.residarea as residential_area,
rpad.officearea as office_area,
rpad.retailarea as retail_area,
rpad.garagearea as garage_area,
rpad.storagearea as storage_area,
rpad.factoryarea as factory_area,
rpad.otherarea as other_area,
rpad.gross_sqft as gross_area,
rpad.lfft * rpad.ldft * rpad.story as _gross_area
FROM {{ ref('stg_rpad') }} as rpad
WHERE RIGHT(rpad.bbl, 4) NOT LIKE '75%'
15 changes: 15 additions & 0 deletions pluto_build/models/calculation/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: 2

models:
- name: calc_allocated
columns:
- name: primebbl
tests:
- unique
- not_null

- name: calc_areas_cama
description: |
building area related fields comming from cama
> **Note:** that we only take building number (bldgnum) 1
as the main source of building area
47 changes: 47 additions & 0 deletions pluto_build/models/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
version: 2

sources:
- name: public
tables:
- name: dcp_cb2010_wi
- name: dcp_cb2020_wi
- name: dcp_cdboundaries_wi
- name: dcp_colp
- name: dcp_commercialoverlay
- name: dcp_councildistricts_wi
- name: dcp_ct2010_wi
- name: dcp_ct2020_wi
- name: dcp_edesignation
- name: dcp_firecompanies
- name: dcp_healthareas
- name: dcp_healthcenters
- name: dcp_limitedheight
- name: dcp_policeprecincts
- name: dcp_school_districts
- name: dcp_specialpurpose
- name: dcp_specialpurposesubdistricts
- name: dcp_zoning_maxfar
- name: dcp_zoningdistricts
- name: dcp_zoningmapamendments
- name: dcp_zoningmapindex
- name: dof_condo
- name: dof_dtm
- name: dof_shoreline
- name: doitt_zipcodeboundaries
- name: dpr_greenthumb
- name: dsny_frequencies
- name: fema_firms2007_100yr
- name: fema_pfirms2015_100yr
- name: lpc_historic_districts
- name: lpc_landmarks
- name: pluto_corrections
- name: pluto_input_bsmtcode
- name: pluto_input_cama_dof
- name: pluto_input_condo_bldgclass
- name: pluto_input_condolot_descriptiveattributes
- name: pluto_input_geocodes
- name: pluto_input_landuse_bldgclass
- name: pluto_input_numbldgs
- name: pluto_input_research
- name: pluto_pts
- name: source_data_versions
54 changes: 54 additions & 0 deletions pluto_build/models/staging/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: 2

models:
- name: stg_pts
description: cleaned and casted table for PTS

- name: stg_cama
description: cleaned and casted table for CAMA

- name: stg_rpad
description: Convert PTS to RPAD (legacy format)
columns:
- name: bbl
tests:
- unique
- not_null

- name: stg_geocodes
description: cleaned table for geocodes
columns:
- name: bbl
tests:
- unique

- name: borough
tests:
- not_null
- accepted_values:
values: [1, 2, 3, 4, 5]

- name: stg_bbl
description: cleaned table for BBL
columns:
- name: boro
- name: tb
description: Tax Block
- name: tl
description: Tax Lot
- name: bbl
description: >-
Borough Block Lot (BBL) from stg_pts,
not that this field can contain letter starting from E (H, I, J, K ...)
tests:
- unique
- not_null

- name: billingbbl
description: >-
Billing BBL from stg_geocodes, this is coming from geosupport

- name: primebbl
description: >-
Prime BBL, used for calculation purposes usually.
multiple lots can shared the same prime bbl (condo lots)
14 changes: 14 additions & 0 deletions pluto_build/models/staging/stg_bbl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT
a.boro,
a.tb,
a.tl,
a.bbl,
NULLIF(b.billingbbl, '0000000000') as billingbbl,
(CASE
WHEN NULLIF(b.billingbbl, '0000000000') IS NOT NULL
THEN b.billingbbl
ELSE a.boro||a.tb||a.tl
END) AS primebbl
FROM {{ ref('stg_pts') }} a
LEFT JOIN {{ ref('stg_geocodes') }} b
ON a.boro||a.tb||a.tl = b.bbl
44 changes: 44 additions & 0 deletions pluto_build/models/staging/stg_cama.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
SELECT
a.bbl,
parcelcard,
bldgnum,
bldgclass,
primaryusecode,
NULLIF(developmentname, '') AS developmentname,
streettype,
lottype,
residarea::integer,
officearea::integer,
retailarea::integer,
garagearea::integer,
storagearea::integer,
factoryarea::integer,
otherarea::integer,
grossarea::integer,
ownerarea::integer,
grossvolume,
commercialarea::integer,
proxcode,
bsmnt_type,
bsmntgradient,
bsmntarea::integer,
firstfloorarea::integer,
secondfloorarea::integer,
upperfloorarea::integer,
partresfloorarea::integer,
unfinishedfloorarea::integer,
finishedfloorarea::integer,
nonresidfloorarea::integer,
NULLIF(residconstrtype, '') AS residconstrtype,
NULLIF(commercialconstrtype, '') AS commercialconstrtype,
NULLIF(condomainconstrtype, '') AS condomainconstrtype,
NULLIF(condounitsconstrtype, '') AS condounitsconstrtype,
b.billingbbl,
(CASE
WHEN b.billingbbl IS NOT NULL AND b.billingbbl <> '0000000000'
THEN b.billingbbl
ELSE LEFT(a.bbl,10)
END) AS primebbl
FROM {{ source('public', 'pluto_input_cama_dof') }} a
LEFT JOIN {{ source('public', 'pluto_input_geocodes') }} b
ON LEFT(a.bbl,10)=b.borough||lpad(b.block,5,'0')||lpad(b.lot,4,'0')
Loading