diff --git a/products/facilities/facdb/cli.py b/products/facilities/facdb/cli.py index cfda944b66..6160c618ae 100644 --- a/products/facilities/facdb/cli.py +++ b/products/facilities/facdb/cli.py @@ -12,6 +12,7 @@ BUILD_ENGINE, BUILD_NAME, CACHE_PATH, + PRODUCT_PATH, SQL_PATH, ) @@ -111,6 +112,19 @@ def _export_fgdb(pg: postgres.PostgresClient, output_dir: Path) -> None: shutil.rmtree(gdb_outer) +def _dbt(args: list[str]) -> None: + subprocess.check_call( + [ + "dbt", + *args, + "--quiet", + "--warn-error-options", + '{"error": ["NoNodesForSelectionCriteria"]}', + ], + cwd=PRODUCT_PATH, + ) + + @app.command("init") def _cli_init(): """ @@ -121,6 +135,8 @@ def _cli_init(): BUILD_ENGINE, SQL_PATH / "_create_reference_tables.sql" ) postgres.execute_file_via_shell(BUILD_ENGINE, SQL_PATH / "_procedures.sql") + _dbt(["deps"]) + _dbt(["seed"]) @app.command("build") @@ -136,10 +152,7 @@ def _cli_build(): BUILD_ENGINE, SQL_PATH / "_create_facdb_spatial.sql" ) postgres.execute_file_via_shell(BUILD_ENGINE, SQL_PATH / "_create_facdb_boro.sql") - postgres.execute_file_via_shell( - BUILD_ENGINE, SQL_PATH / "_create_facdb_classification.sql" - ) - postgres.execute_file_via_shell(BUILD_ENGINE, SQL_PATH / "_create_facdb_agency.sql") + _dbt(["run", "--select", "facdb_classification facdb_agency"]) postgres.execute_file_via_shell( BUILD_ENGINE, SQL_PATH / "_create_facdb.sql", diff --git a/products/facilities/facdb/sql/_create_facdb_agency.sql b/products/facilities/facdb/sql/_create_facdb_agency.sql deleted file mode 100644 index 1b2fc7d449..0000000000 --- a/products/facilities/facdb/sql/_create_facdb_agency.sql +++ /dev/null @@ -1,15 +0,0 @@ -DROP TABLE IF EXISTS facdb_agency; -SELECT - a.uid, - a.opname, - a.opabbrev, - b.optype, - a.overabbrev, - c.overagency, - c.overlevel -INTO facdb_agency -FROM facdb_base AS a -LEFT JOIN lookup_agency AS b - ON a.opabbrev = b.agencyabbrev -LEFT JOIN lookup_agency AS c - ON a.overabbrev = c.agencyabbrev; diff --git a/products/facilities/facdb/sql/_create_facdb_classification.sql b/products/facilities/facdb/sql/_create_facdb_classification.sql deleted file mode 100644 index ad385f22e2..0000000000 --- a/products/facilities/facdb/sql/_create_facdb_classification.sql +++ /dev/null @@ -1,11 +0,0 @@ -DROP TABLE IF EXISTS facdb_classification; -SELECT - a.uid, - a.facsubgrp, - b.facgroup, - b.facdomain, - b.servarea -INTO facdb_classification -FROM facdb_base AS a -INNER JOIN lookup_classification AS b - ON UPPER(a.facsubgrp) = UPPER(b.facsubgrp); diff --git a/products/facilities/facdb/sql/_create_reference_tables.sql b/products/facilities/facdb/sql/_create_reference_tables.sql index 6f732d7619..baaed46e61 100644 --- a/products/facilities/facdb/sql/_create_reference_tables.sql +++ b/products/facilities/facdb/sql/_create_reference_tables.sql @@ -6,27 +6,6 @@ CREATE TABLE lookup_boro ( ); \COPY lookup_boro FROM 'facdb/data/lookup_boro.csv' DELIMITER ',' CSV HEADER; - -DROP TABLE IF EXISTS lookup_classification; -CREATE TABLE lookup_classification ( - facsubgrp TEXT, - facgroup TEXT, - facdomain TEXT, - servarea TEXT -); -\COPY lookup_classification FROM 'facdb/data/lookup_classification.csv' DELIMITER ',' CSV HEADER; - - -DROP TABLE IF EXISTS lookup_agency; -CREATE TABLE lookup_agency ( - agencyabbrev TEXT, - overagency TEXT, - overlevel TEXT, - optype TEXT -); -\COPY lookup_agency FROM 'facdb/data/lookup_agency.csv' DELIMITER ',' CSV HEADER; - - DROP TABLE IF EXISTS manual_corrections; CREATE TABLE manual_corrections ( uid TEXT, diff --git a/products/facilities/models/intermediate/_intermediate.yml b/products/facilities/models/intermediate/_intermediate.yml new file mode 100644 index 0000000000..023b6bf549 --- /dev/null +++ b/products/facilities/models/intermediate/_intermediate.yml @@ -0,0 +1,50 @@ +version: 2 + +models: + - name: facdb_classification + description: > + Enriches each facility record with its classification hierarchy + (facgroup, facdomain, servarea) by joining facdb_base.facsubgrp against + the lookup_classification seed. Records with an unrecognized facsubgrp + are dropped by the inner join — monitor row counts against facdb_base + to catch unmapped values. + config: + materialized: table + columns: + - name: uid + tests: + - not_null + - unique + - name: facsubgrp + tests: + - not_null + - relationships: + to: ref('lookup_classification') + field: facsubgrp + - name: factype + tests: + - not_null + - relationships: + to: ref('lookup_factype') + field: factype + config: + severity: warn + - name: facgroup + tests: + - not_null + - name: facdomain + tests: + - not_null + + - name: facdb_agency + description: > + Enriches each facility record with operator type and oversight agency + details by joining facdb_base.opabbrev and overabbrev against the + lookup_agency seed. + config: + materialized: table + columns: + - name: uid + tests: + - not_null + - unique diff --git a/products/facilities/models/intermediate/facdb_agency.sql b/products/facilities/models/intermediate/facdb_agency.sql new file mode 100644 index 0000000000..12e3845c85 --- /dev/null +++ b/products/facilities/models/intermediate/facdb_agency.sql @@ -0,0 +1,13 @@ +select + base.uid, + base.opname, + base.opabbrev, + op.optype, + base.overabbrev, + ov.overagency, + ov.overlevel +from {{ source('facdb', 'facdb_base') }} as base +left join {{ ref('lookup_agency') }} as op + on base.opabbrev = op.agencyabbrev +left join {{ ref('lookup_agency') }} as ov + on base.overabbrev = ov.agencyabbrev diff --git a/products/facilities/models/intermediate/facdb_classification.sql b/products/facilities/models/intermediate/facdb_classification.sql new file mode 100644 index 0000000000..97d4b12653 --- /dev/null +++ b/products/facilities/models/intermediate/facdb_classification.sql @@ -0,0 +1,10 @@ +select + base.uid, + base.factype, + cls.facsubgrp, + cls.facgroup, + cls.facdomain, + cls.servarea +from {{ source('facdb', 'facdb_base') }} as base +inner join {{ ref('lookup_classification') }} as cls + on upper(base.facsubgrp) = upper(cls.facsubgrp) diff --git a/products/facilities/models/staging/sources.yml b/products/facilities/models/staging/sources.yml new file mode 100644 index 0000000000..5548d23d6f --- /dev/null +++ b/products/facilities/models/staging/sources.yml @@ -0,0 +1,29 @@ +version: 2 + +sources: + - name: facdb + schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" + description: > + Tables produced by the FacDB Python pipeline (geocoding, preprocessing, + and per-dataset SQL scripts). These are the handoff point into dbt. + tables: + - name: facdb_base + description: > + Staging table populated by the 50+ dataset-specific pipeline scripts. + Each row is a facility record with factype and facsubgrp assigned at + the source level. Downstream dbt models join lookup seeds against this + table to build the classification and agency enrichment layers. + columns: + - name: uid + tests: + - not_null + - unique + - name: source + tests: + - not_null + - name: factype + tests: + - not_null + - name: facsubgrp + tests: + - not_null diff --git a/products/facilities/facdb/data/lookup_agency.csv b/products/facilities/seeds/lookup_agency.csv similarity index 100% rename from products/facilities/facdb/data/lookup_agency.csv rename to products/facilities/seeds/lookup_agency.csv diff --git a/products/facilities/facdb/data/lookup_classification.csv b/products/facilities/seeds/lookup_classification.csv similarity index 98% rename from products/facilities/facdb/data/lookup_classification.csv rename to products/facilities/seeds/lookup_classification.csv index 63159e289a..be90ed029e 100644 --- a/products/facilities/facdb/data/lookup_classification.csv +++ b/products/facilities/seeds/lookup_classification.csv @@ -1,4 +1,4 @@ -facsubgrp, facgroup,facdomain,servarea +facsubgrp,facgroup,facdomain,servarea Camps,Camps,"Education, Child Welfare, and Youth",Local Day Care,Day Care and Pre-Kindergarten,"Education, Child Welfare, and Youth",Local DOE Universal Pre-Kindergarten,Day Care and Pre-Kindergarten,"Education, Child Welfare, and Youth",Local diff --git a/products/facilities/seeds/lookup_factype.csv b/products/facilities/seeds/lookup_factype.csv new file mode 100644 index 0000000000..97787ebdab --- /dev/null +++ b/products/facilities/seeds/lookup_factype.csv @@ -0,0 +1,82 @@ +factype,facsubgrp +Adult Day Care,Other Health Care +Asphalt Plant,Material Supplies +Bridge House,Other Transportation +Camp - All Age,Camps +Camp - Preschool Age,Camps +Camp - School Age,Camps +Cemetery,Cemeteries +Charter School,Charter K-12 Schools +Compost,DSNY Drop-off Facility +Community Park,Parks +Construction and Demolition Processing,Solid Waste Processing +Composting,Solid Waste Processing +Correctional Facility,Detention and Correctional +Courthouse,Courthouses and Judicial +Cruise Terminal,Ports and Ferry Landings +Day Care,Day Care +Detention Center,Detention and Correctional +DOE Universal Pre-K,DOE Universal Pre-Kindergarten +DOE Universal Pre-K - Charter,DOE Universal Pre-Kindergarten +DSNY Garage,Solid Waste Transfer and Carting +Early Education Program,DOE Universal Pre-Kindergarten +Electronics,DSNY Drop-off Facility +Elementary School - Non-public,Non-Public K-12 Schools +Emergency Medical Station,Other Emergency Services +Ferry Landing,Ports and Ferry Landings +Ferry Terminal,Ports and Ferry Landings +Firehouse,Fire Services +Flagship Park,Parks +Food Pantry,Soup Kitchens and Food Pantries +Garden,Gardens +Group Day Care - Age Unspecified,Day Care +High School - Non-public,Non-Public K-12 Schools +Historic House Park,Historical Sites +Home Delivered Meals,Senior Services +Jobs and Service Center,Workforce Development +Leaf,DSNY Drop-off Facility +Lot,City Agency Parking +"Maintenance, Management, and Operations",Other Transportation +Mall,"Streetscapes, Plazas, and Malls" +Manned Transportation Facility,Other Transportation +Medicaid Office,Financial Assistance and Social Services +Middle School - Non-public,Non-Public K-12 Schools +Natural Resource Area,Preserves and Conservation Areas +Nature Area,Preserves and Conservation Areas +Neighborhood Park,Parks +NORC Services,Senior Services +Nursing Home,Residential Health Care +NYCHA Community Center,Community Centers and Community Programs +NYCHA Police Service,Police Services +Other School - Non-public,Non-Public K-12 Schools +Other Solid Waste Processing,Solid Waste Processing +Parkway,"Streetscapes, Plazas, and Malls" +Pedestrian Plaza,"Streetscapes, Plazas, and Malls" +Police Station,Police Services +Port or Marine Terminal,Ports and Ferry Landings +Pre-K Center,DOE Universal Pre-Kindergarten +Privately Owned Public Space,Privately Owned Public Space +Programs for People with Disabilities,Programs for People with Disabilities +Public Library,Public Libraries +Public Parking,Parking Lots and Garages +Recyclables Handling and Recovery,Solid Waste Processing +Regulated Medical Waste,Solid Waste Processing +School Based Child Care - Age Unspecified,Day Care +School Based Child Care - Infants/Toddlers,Day Care +School Based Child Care - Preschool,Day Care +School Bus Depot,Bus Depots and Terminals +Scrap Metal Processing,Solid Waste Processing +Senior Center,Senior Services +Senior Services,Senior Services +SNAP Center,Financial Assistance and Social Services +Soup Kitchen,Soup Kitchens and Food Pantries +Special Waste,DSNY Drop-off Facility +State Historic Place,Historical Sites +Strip,"Streetscapes, Plazas, and Malls" +Textiles,DSNY Drop-off Facility +Tow Truck Company,Parking Lots and Garages +Tracking,"Streetscapes, Plazas, and Malls" +Transfer Station,Solid Waste Transfer and Carting +Triangle/Plaza,"Streetscapes, Plazas, and Malls" +Undeveloped,Undeveloped +Wastewater Treatment Plant,Wastewater and Pollution Control