Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: overhaul overture street network preparation #267

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
25 changes: 6 additions & 19 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,17 @@
"version": "0.2.0",
"configurations": [
{
"name": "Kart: Command line",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/src/collection/kart/prepare_kart.py",
"console": "integratedTerminal",
"args": [
"--repo_url", "https://github.com/goat-community/pois",
"--maintainer", "rds",
"--table_name", "poi"
],
"cwd": "${workspaceFolder}"
},
{
"name": "CLI: Command line",
"type": "python",
"name": "Data Preparation CLI: manage.py",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/manage.py",
"console": "integratedTerminal",
"args": [
"--actions", "preparation",
"--region", "de",
"--datasets", "poi_osm_overture_fusion"
"--actions", "",
"--datasets", "",
"--region", "",
],
"cwd": "${workspaceFolder}"
}
]
}
}
9 changes: 6 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ RUN apt update
RUN apt install -y s3fs

# Install kart
RUN apt-get install -y libtinfo5
RUN wget "https://goat-db-schemas.s3.eu-central-1.amazonaws.com/kart.deb"
RUN dpkg -i kart.deb
# RUN apt-get install -y libtinfo5
# RUN wget "https://goat-db-schemas.s3.eu-central-1.amazonaws.com/kart.deb"
# RUN dpkg -i kart.deb

# install posgresql-client
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
Expand All @@ -40,6 +40,9 @@ RUN apt-get install -y postgresql-client-15
RUN apt install -y nano
RUN git config --global core.editor "nano"

# Install zip
RUN apt install -y zip

# Install Java 11
RUN echo 'deb http://deb.debian.org/debian bullseye main' > /etc/apt/sources.list
RUN apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Preparation
- poi_overture
- network
- network_pt
- network_overture
- overture_street_network
- gtfs
- gtfs_stops
- gtfs_stations
Expand Down
13 changes: 7 additions & 6 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,24 @@
from src.collection.gtfs import collect_gtfs
from src.collection.landuse import collect_landuse
from src.collection.network import collect_network
from src.collection.network_pt import collect_network_pt
from src.collection.osm_pt_lines import collect_osm_pt_lines
from src.collection.overture import collect_overture
from src.collection.poi import collect_poi
from src.core.config import settings
from src.db.db import Database
from src.export.gtfs import export_gtfs
from src.fusion.poi_osm_overture import fusion_poi_osm_overture
from src.migration.gtfs import migrate_gtfs
from src.preparation.building import prepare_building
from src.preparation.gtfs import export_gtfs, prepare_gtfs
from src.preparation.gtfs import prepare_gtfs
from src.preparation.gtfs_stations import prepare_gtfs_stations
from src.preparation.gtfs_stops import prepare_gtfs_stops
from src.preparation.network import export_network, prepare_network
from src.preparation.network_overture import prepare_overture_network
from src.preparation.network_pt import prepare_network_pt
from src.preparation.osm_pt_lines import prepare_osm_pt_lines
from src.preparation.overture_division_area import prepare_overture_division_area
from src.preparation.overture_place import prepare_overture_place
from src.preparation.overture_street_network import prepare_overture_street_network
from src.preparation.poi import export_poi, prepare_poi
from src.preparation.poi_overture import prepare_poi_overture
from src.preparation.population import prepare_population
Expand All @@ -40,7 +41,6 @@
"poi": collect_poi,
"landuse": collect_landuse,
"network": collect_network,
"network_pt": collect_network_pt,
"gtfs": collect_gtfs,
"overture": collect_overture,
"osm_pt_lines": collect_osm_pt_lines,
Expand All @@ -53,19 +53,20 @@
"building": prepare_building,
"population": prepare_population,
"gtfs": prepare_gtfs,
"network_overture": prepare_overture_network,
"overture_street_network": prepare_overture_street_network,
"overture": prepare_overture_division_area,
"gtfs_stops": prepare_gtfs_stops,
"gtfs_stations": prepare_gtfs_stations,
"osm_pt_lines": prepare_osm_pt_lines,
"overture_place": prepare_overture_place,
},
"fusion":{
"poi_osm_overture": fusion_poi_osm_overture,
},
"export": {
"poi": export_poi,
"network": export_network,
"gtfs": export_gtfs
"gtfs": export_gtfs,
},
"migration": {
"gtfs": migrate_gtfs
Expand Down
4 changes: 2 additions & 2 deletions src/collection/gtfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def import_file(self, input_dir: str, table: str, header: list, table_columns: l
sql_copy = f"""
INSERT INTO {self.schema}.{table} ({output_cols_formatted}, geom, h3_3)
SELECT {output_cols_formatted}, ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat), 4326) AS geom,
public.to_short_h3_3(h3_lat_lng_to_cell(ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat), 4326)::point, 3)::bigint) AS h3_3
basic.to_short_h3_3(h3_lat_lng_to_cell(ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat), 4326)::point, 3)::bigint) AS h3_3
FROM {self.schema}.{table}_temp;
"""
elif table == "stops":
Expand All @@ -196,7 +196,7 @@ def import_file(self, input_dir: str, table: str, header: list, table_columns: l
);

UPDATE {self.schema}.{table}_temp
SET h3_3 = to_short_h3_3(
SET h3_3 = basic.to_short_h3_3(
h3_lat_lng_to_cell(ST_SetSRID(ST_MakePoint(stop_lon, stop_lat), 4326)::point, 3)::bigint
)
WHERE h3_3 IS NULL;
Expand Down
121 changes: 0 additions & 121 deletions src/collection/network_pt.py

This file was deleted.

13 changes: 7 additions & 6 deletions src/collection/overture.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pyspark.sql.types as pyspark_types
from pyspark.sql import SparkSession
from pyspark.sql.functions import expr, to_json
from sedona.spark import SedonaContext

Expand Down Expand Up @@ -48,8 +49,8 @@ def validate_config(self):
if not self.data_config_collection.get("local_result_table"):
raise ValueError("Local result table not specified.")

def initialize_sedona_context(self):
"""Initialze Sedona context with required dependencies, AWS credentials provider and resource allocations."""
def initialize_spark_session(self) -> SparkSession:
"""Initialze SparkSession with required dependencies, AWS credentials provider and resource allocations."""

config = SedonaContext.builder() \
.config('spark.jars.packages',
Expand Down Expand Up @@ -86,11 +87,11 @@ def build_overture_s3_uri(self, version, theme, type):

return f"s3a://overturemaps-us-west-2/release/{version}/theme={theme}/type={type}"

def initialize_data_source(self, sedona: SedonaContext):
def initialize_data_source(self, spark: SparkSession):
"""Initialize Overture geoparquet file source and Spark data frames."""

# Load Overture geoparquet data into Spark DataFrames
self.data_frame = sedona.read.format("geoparquet").load(
self.data_frame = spark.read.format("geoparquet").load(
path=self.build_overture_s3_uri(
version=self.data_config_collection["version"],
theme=self.data_config_collection["theme"],
Expand Down Expand Up @@ -174,9 +175,9 @@ def run(self):
self.validate_config()

# Initialize Overture data source
sedona = self.initialize_sedona_context()
spark = self.initialize_spark_session()
self.initialize_jdbc_properties()
self.initialize_data_source(sedona)
self.initialize_data_source(spark)

# Process data frame and filter by region bounds
bbox_coords = get_region_bbox_coords(
Expand Down
2 changes: 1 addition & 1 deletion src/collection/poi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def poi_collection(self):
osm_filter += tag + " "

if osm_filter:
'--keep="' + osm_filter + '"'
osm_filter = '--keep="' + osm_filter + '"'

# Remove not needed osm feature categories
if self.data_config.collection["nodes"] == False:
Expand Down
4 changes: 2 additions & 2 deletions src/config/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import os
import subprocess

import yaml

from src.config.osm_dict import OSM_germany, OSM_tags
from src.core.config import settings
from src.utils.utils import download_link, print_info

import subprocess


class Config:
"""Reads the config file and returns the config variables.
Expand All @@ -28,6 +27,7 @@ def __init__(self, name: str, region: str):
self.name = name
self.collection = self.config.get("collection")
self.preparation = self.config.get("preparation")
self.export = self.config.get("export")
self.subscription = self.config.get("subscription")
self.analysis = self.config.get("analysis")
self.pbf_data = self.config.get("region_pbf")
Expand Down
7 changes: 5 additions & 2 deletions src/config/data_variables/gtfs/gtfs_de.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
preparation:
start_date: "2025-02-04" # Must be a Tuesday for stop_times_optimized to be correct!
num_weeks: 26
network_dir: "de"
target_schema: "gtfs_de"
network_dir: "gtfs_de_20250203"
target_schema: "gtfs_de_20250203"
regions_query: "SELECT nuts_id, nuts_name FROM public.nuts WHERE cntr_code IN ('DE') AND levl_code = '3';"

export:
local_gtfs_schema: "gtfs_de_20250106"
28 changes: 28 additions & 0 deletions src/config/data_variables/network_pt/network_pt_de.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
preparation:
# A SQL query which returns the region's bounding geometry (omit terminating semicolon)
region: "SELECT ST_Union(geom) AS geom FROM public.germany_states"

# File in the src/data/input/network_pt directory where OSM data is stored
local_osm_file: "germany-latest.osm.pbf"

# Schema in local database where GTFS data was collected
local_gtfs_schema: "gtfs_de_20250203"

# Table in local database where sub-regions will be written
local_sub_region_table: "temporal.sub_region_de"

# Weekday dates that will be used to optimize the GTFS data
# Leave empty if no optimization is desired
weekday_tuesday: "2025-02-18"
weekday_saturday: "2025-02-22"
weekday_sunday: "2025-02-23"

# Buffer distance in meters to add to the sub-region's geometry
sub_region_buffer_dist: 80000

# Number of sub-regions to divide the region into
sub_region_count: 4

export:
# Configure whether the script should delete old regions and bundles from R5 automatically
delete_old_regions: false
4 changes: 0 additions & 4 deletions src/config/data_variables/network_pt/network_pt_eu.yaml

This file was deleted.

Loading