Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GeoJSON Internal [Major] #53

Merged
merged 27 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
2023-09-06 v8.3.0
- Update to next major alpine version
- Remove mock library in favour of unittest.mock
- Update all python deps to latest versions

2023-09-05 v8.2.4
- Update TippeCanoe to latest version
- Make GeoJSON+LD output a default
Expand Down
8 changes: 4 additions & 4 deletions openaddr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .conform import (
ConformResult,
DecompressionTask,
ConvertToCsvTask,
ConvertToGeojsonTask,
elaborate_filenames,
ADDRESSES_SCHEMA,
BUILDINGS_SCHEMA,
Expand Down Expand Up @@ -134,7 +134,7 @@ def conform(source_config, destdir, extras):
decompressed_paths = task2.decompress(downloaded_path, workdir, names)
_L.info("Decompressed to %d files", len(decompressed_paths))

task4 = ConvertToCsvTask()
task4 = ConvertToGeojsonTask()
try:
csv_path, feat_count = task4.convert(source_config, decompressed_paths, workdir)
if feat_count > 0:
Expand All @@ -148,8 +148,8 @@ def conform(source_config, destdir, extras):

out_path = None
if csv_path is not None and exists(csv_path):
ingalls marked this conversation as resolved.
Show resolved Hide resolved
move(csv_path, join(destdir, 'out.csv'))
out_path = realpath(join(destdir, 'out.csv'))
move(csv_path, join(destdir, 'out.geojson'))
out_path = realpath(join(destdir, 'out.geojson'))

rmtree(workdir)

Expand Down
59 changes: 35 additions & 24 deletions openaddr/conform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

from .geojson import stream_geojson

from shapely.wkt import loads as wkt_loads
from shapely.geometry import mapping

from zipfile import ZipFile
from locale import getpreferredencoding
from os.path import splitext
Expand Down Expand Up @@ -365,7 +368,7 @@ def find_source_path(data_source, source_paths):
_L.warning("Unknown source conform format %s", format_string)
return None

class ConvertToCsvTask(object):
class ConvertToGeojsonTask(object):
known_types = ('.shp', '.json', '.csv', '.kml', '.gdb')

def convert(self, source_config, source_paths, workdir):
Expand All @@ -381,7 +384,7 @@ def convert(self, source_config, source_paths, workdir):
source_path = find_source_path(source_config.data_source, source_paths)
if source_path is not None:
basename, ext = os.path.splitext(os.path.basename(source_path))
dest_path = os.path.join(convert_path, basename + ".csv")
dest_path = os.path.join(convert_path, basename + ".geojson")
rc = conform_cli(source_config, source_path, dest_path)
if rc == 0:
with open(dest_path) as file:
Expand Down Expand Up @@ -832,14 +835,17 @@ def row_transform_and_convert(source_config, row):
# Make up a random fingerprint if none exists
cache_fingerprint = source_config.data_source.get('fingerprint', str(uuid4()))

row = row_convert_to_out(source_config, row)

if source_config.layer == "addresses":
row = row_canonicalize_unit_and_number(source_config.data_source, row)
row = row_round_lat_lon(source_config.data_source, row)

row = row_calculate_hash(cache_fingerprint, row)
return row

feat = row_convert_to_out(source_config, row)

if source_config.layer == "addresses":
feat = feat_canonicalize_unit_and_number(source_config.data_source, feat)

return feat

def fxn_smash_case(fxn):
if "field" in fxn:
Expand Down Expand Up @@ -1041,14 +1047,14 @@ def row_fxn_constant(sc, row, key, fxn):

return row

def row_canonicalize_unit_and_number(sc, row):
def feat_canonicalize_unit_and_number(sc, feat):
"Canonicalize address unit and number"
row["UNIT"] = (row["UNIT"] or '').strip()
row["NUMBER"] = (row["NUMBER"] or '').strip()
if row["NUMBER"].endswith(".0"):
row["NUMBER"] = row["NUMBER"][:-2]
row["STREET"] = (row["STREET"] or '').strip()
return row
feat["properties"]["unit"] = (feat["properties"]["unit"] or '').strip()
feat["properties"]["number"] = (feat["properties"]["number"] or '').strip()
if feat["properties"]["number"].endswith(".0"):
feat["properties"]["number"] = feat["properties"]["number"][:-2]
feat["properties"]["street"] = (feat["properties"]["street"] or '').strip()
return feat

def _round_wgs84_to_7(n):
"Round a WGS84 coordinate to 7 decimal points. Input and output both strings."
Expand Down Expand Up @@ -1086,21 +1092,28 @@ def row_convert_to_out(source_config, row):
"Convert a row from the source schema to OpenAddresses output schema"

output = {
"GEOM": row.get(GEOM_FIELDNAME.lower(), None),
"type": "Feature",
"properties": {},
"geometry": row.get(GEOM_FIELDNAME.lower(), None),
}

if output["geometry"] is not None:
wkt_parsed = wkt_loads(output["geometry"])
output["geometry"] = mapping(wkt_parsed)


for field in source_config.SCHEMA:
if row.get('oa:{}'.format(field.lower())) is not None:
# If there is an OA prefix, it is not a native field and was compiled
# via an attrib funciton or concatentation
output[field] = row.get('oa:{}'.format(field.lower()))
output["properties"][field.lower()] = row.get('oa:{}'.format(field.lower()))
else:
# Get a native field as specified in the conform object
cfield = source_config.data_source['conform'].get(field.lower())
if cfield:
output[field] = row.get(cfield.lower())
output["properties"][field.lower()] = row.get(cfield.lower())
else:
output[field] = ''
output["properties"][field.lower()] = ''

return output

Expand Down Expand Up @@ -1134,8 +1147,8 @@ def extract_to_source_csv(source_config, source_path, extract_path):
else:
raise Exception("Unsupported source format %s" % format_string)

def transform_to_out_csv(source_config, extract_path, dest_path):
''' Transform an extracted source CSV to the OpenAddresses output CSV by applying conform rules.
def transform_to_out_geojson(source_config, extract_path, dest_path):
''' Transform an extracted source CSV to the OpenAddresses output GeoJSON by applying conform rules.

source_config: description of the source, containing the conform object
extract_path: extracted CSV file to process
Expand All @@ -1147,14 +1160,12 @@ def transform_to_out_csv(source_config, extract_path, dest_path):
# Read through the extract CSV
with open(extract_path, 'r', encoding='utf-8') as extract_fp:
reader = csv.DictReader(extract_fp)
# Write to the destination CSV
# Write to the destination GeoJSON
with open(dest_path, 'w', encoding='utf-8') as dest_fp:
writer = csv.DictWriter(dest_fp, ['GEOM', 'HASH', *source_config.SCHEMA])
writer.writeheader()
# For every row in the extract
for extract_row in reader:
out_row = row_transform_and_convert(source_config, extract_row)
writer.writerow(out_row)
dest_fp.write(json.dumps(out_row) + '\n')

def conform_cli(source_config, source_path, dest_path):
"Command line entry point for conforming a downloaded source to an output CSV."
Expand All @@ -1176,7 +1187,7 @@ def conform_cli(source_config, source_path, dest_path):

try:
extract_to_source_csv(source_config, source_path, extract_path)
transform_to_out_csv(source_config, extract_path, dest_path)
transform_to_out_geojson(source_config, extract_path, dest_path)
finally:
os.remove(extract_path)

Expand Down
61 changes: 12 additions & 49 deletions openaddr/preview.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from __future__ import division
import logging; _L = logging.getLogger('openaddr.preview')

from zipfile import ZipFile
from io import TextIOWrapper
from csv import DictReader
from tempfile import mkstemp
from math import pow, sqrt, pi, log
from argparse import ArgumentParser
from urllib.parse import urlparse
import json, itertools, os, struct

import requests, uritemplate, mapbox_vector_tile
Expand All @@ -30,10 +25,9 @@
# Web Mercator, https://trac.osgeo.org/openlayers/wiki/SphericalMercator
EPSG900913 = '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +no_defs'

def render(filename_or_url, png_filename, width, resolution, mapbox_key):
def render(src_filename, png_filename, width, resolution, mapbox_key):
'''
'''
src_filename = get_local_filename(filename_or_url)
_, points_filename = mkstemp(prefix='points-', suffix='.bin')

try:
Expand Down Expand Up @@ -90,50 +84,19 @@ def render(filename_or_url, png_filename, width, resolution, mapbox_key):
os.remove(points_filename)
surface.write_to_png(png_filename)

def get_local_filename(filename_or_url):
'''
'''
parsed = urlparse(filename_or_url)
suffix = os.path.splitext(parsed.path)[1]

if parsed.scheme in ('', 'file'):
return filename_or_url

if parsed.scheme not in ('http', 'https'):
raise ValueError('Unknown URL type: {}'.format(filename_or_url))

_L.info('Downloading {}...'.format(filename_or_url))

got = requests.get(filename_or_url)
_, filename = mkstemp(prefix='Preview-', suffix=suffix)

with open(filename, 'wb') as file:
file.write(got.content)
_L.debug('Saved to {}'.format(filename))

return filename

def iterate_file_lonlats(filename):
''' Stream (lon, lat) coordinates from an input .csv or .zip file.
''' Stream (lon, lat) coordinates from an input GeoJSON
'''
suffix = os.path.splitext(filename)[1].lower()

if suffix == '.csv':
open_file = open(filename, 'r')
elif suffix == '.zip':
open_file = open(filename, 'rb')

with open_file as file:
if suffix == '.csv':
csv_file = file
elif suffix == '.zip':
zip = ZipFile(file)
csv_names = [name for name in zip.namelist() if name.endswith('.csv')]
csv_file = TextIOWrapper(zip.open(csv_names[0]))

for row in DictReader(csv_file):

print('FILENAME', filename)
with open(filename, 'r') as file:
print('FILE', file);
for line in file:
try:
lon, lat, x = ogr.CreateGeometryFromWkt(row['GEOM']).PointOnSurface().GetPoint()
line = json.loads(line)
lon, lat, x = ogr.CreateGeometryFromJson(json.dumps(line['geometry'])).PointOnSurface().GetPoint()
except Exception as e:
print('ERROR', e)
except:
continue

Expand Down Expand Up @@ -412,7 +375,7 @@ def draw_line(ctx, start, points):

parser = ArgumentParser(description='Draw a map of a single source preview.')

parser.add_argument('src_filename', help='Input Zip or CSV filename or URL.')
parser.add_argument('src_filename', help='Input GeoJSON')
parser.add_argument('png_filename', help='Output PNG filename.')

parser.set_defaults(resolution=1, width=668)
Expand Down
Loading
Loading