Skip to content

Commit 10ae1d2

Browse files
committed
refactor: Encode context & docs into types, misc simplifying
- Logical flow is `Feature` -> `CapitolFeature` -> `StateCapitol` - Made `Feature` generic to promote reuse (e.g. #667) - Ensure an exit code is produced when `get_state_capitols` fails - Previously would print to console, but wouldn't block a task runner/CI - Move the territory filter into the query - Previously requested more than we wanted - Added references to things I needed context for (new to spatial data)
1 parent 9249a47 commit 10ae1d2

File tree

3 files changed

+150
-130
lines changed

3 files changed

+150
-130
lines changed

Diff for: datapackage.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
}
2121
],
2222
"version": "2.11.0",
23-
"created": "2025-01-23T03:23:37.709555+00:00",
23+
"created": "2025-01-23T14:30:56.038438+00:00",
2424
"resources": [
2525
{
2626
"name": "7zip.png",
@@ -3072,7 +3072,7 @@
30723072
"mediatype": "text/json",
30733073
"encoding": "utf-8",
30743074
"hash": "sha1:32b4d3a13918b0aa85e62c09495eccf842fffb31",
3075-
"bytes": 4048,
3075+
"bytes": 4100,
30763076
"dialect": {
30773077
"json": {
30783078
"keyed": true

Diff for: datapackage.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# vega-datasets
2-
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-23 03:23:37 [UTC]
2+
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-23 14:30:56 [UTC]
33

44
Common repository for example datasets used by Vega related projects.
55
BSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets

Diff for: scripts/us-state-capitals.py

+147-127
Original file line numberDiff line numberDiff line change
@@ -8,161 +8,181 @@
88
It relies on a local JSON file `_data/us-state-codes.json` for mapping state abbreviations to full names.
99
"""
1010

11+
from __future__ import annotations
12+
1113
import json
14+
import typing
15+
import warnings
16+
from functools import partial
1217
from operator import itemgetter
1318
from pathlib import Path
14-
from typing import Any
19+
from typing import TYPE_CHECKING, Literal, TypedDict
1520

1621
import niquests
1722

23+
if TYPE_CHECKING:
24+
import sys
25+
from collections.abc import Iterator, Mapping, Sequence
26+
from typing import Any, LiteralString
1827

19-
def load_state_codes(script_dir: Path) -> dict:
20-
"""
21-
Loads state/territory code mappings from `_data/us-state-codes.json`.
22-
23-
Required to:
24-
1. convert API state abbreviations to full names (e.g., 'CA' to 'California').
25-
2. filter out U.S. territory locations from the API data. (Current script scope: U.S. states).
26-
27-
Example `us-state-codes.json`:
28-
```json
29-
{
30-
"states": {
31-
"AL": "Alabama",
32-
"WY": "Wyoming"
33-
},
34-
"territories": {}
35-
}
36-
```
28+
if sys.version_info >= (3, 13):
29+
from typing import TypeIs
30+
else:
31+
from typing_extensions import TypeIs
3732

38-
Args:
39-
script_dir: Script directory (for locating `_data/us-state-codes.json`).
33+
type Features = Sequence[Feature[Any, Any, Any]]
34+
"""Represents the ``features`` property of capitol building data, before validation."""
4035

41-
Returns
42-
-------
43-
Dictionary: State abbreviation to full name mappings (from JSON "states"),
44-
used for name lookup and territory filtering.
45-
"""
46-
data_dir = script_dir.parent / "_data"
47-
state_codes_path = data_dir / "us-state-codes.json"
36+
type FieldName = Literal["NAME", "STATE", "CITY"]
4837

49-
with state_codes_path.open() as f:
50-
return json.load(f)
38+
REPO_ROOT: Path = Path(__file__).parent.parent
39+
INPUT_DIR: Path = REPO_ROOT / "_data"
40+
OUTPUT_DIR: Path = REPO_ROOT / "data"
5141

42+
INPUT_FILE: Path = INPUT_DIR / "us-state-codes.json"
43+
"""
44+
State abbreviation to full name mappings (from JSON "states").
45+
46+
Used for name lookup and territory filtering.
47+
48+
Example:
49+
50+
{"states": {"AL": "Alabama", "WY": "Wyoming"}, "territories": {}}
51+
"""
5252

53-
def get_state_capitols() -> dict | None:
53+
OUTPUT_FILE: Path = OUTPUT_DIR / "us-state-capitals.json"
54+
URL_ARCGIS = "https://carto.nationalmap.gov/arcgis/"
55+
URL_MAP_SERVER = f"{URL_ARCGIS}services/structures/MapServer/"
56+
URL_STATE_CAPITOLS = f"{URL_MAP_SERVER}6/query"
57+
FEATURE_STATE_CAPITOLS = "FCODE = 83006"
58+
TERRITORIES = "STATE IN ('AS', 'GU', 'MP', 'PR', 'VI')"
59+
WHERE_CLAUSE = f"{FEATURE_STATE_CAPITOLS} AND NOT ({TERRITORIES})"
60+
WKID_WGS84: Literal[4326] = 4326
61+
"""
62+
`Well-known ID`_ for `WGS 84`_, used as a `spatial reference`_.
63+
64+
.. _Well-known ID: https://support.esri.com/en-us/gis-dictionary/wkid
65+
.. _WGS 84: https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84
66+
.. _spatial reference: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/geometry-objects/#spatial-reference
67+
"""
68+
69+
70+
class MapServiceLayerResponse(TypedDict, total=False):
5471
"""
55-
Fetches state capitol building coordinates from the National Map Structures Database.
72+
Response from `National Map Structures Database`_.
5673
57-
Returns
58-
-------
59-
JSON response containing capitol building data, or None if request fails
74+
.. _National Map Structures Database: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/query-map-service-layer/
6075
"""
61-
url = "https://carto.nationalmap.gov/arcgis/rest/services/structures/MapServer/6/query"
62-
params = {
63-
"f": "json",
64-
"where": "FCODE=83006", # Feature code for state capitol buildings
65-
"outFields": "NAME,STATE,CITY,SHAPE",
66-
"geometryPrecision": 7,
67-
"outSR": 4326, # WGS84 coordinate system
68-
"returnGeometry": True,
69-
}
7076

71-
try:
72-
response = niquests.get(url, params=params)
73-
response.raise_for_status()
74-
return response.json()
75-
except niquests.exceptions.RequestException as e:
76-
print(f"Error fetching data: {e}")
77-
return None
77+
features: Features
7878

7979

80-
def format_capitols_data(
81-
capitols_data: dict[str, Any] | None, state_data: dict
82-
) -> list:
83-
"""
84-
Processes raw capitol data into a clean format with full state names.
80+
class Point(TypedDict):
81+
x: float
82+
y: float
8583

86-
Args:
87-
capitols_data: Raw JSON response from the WMS query
88-
state_data: Dictionary with 'states' and 'territories' mappings
8984

90-
Returns
91-
-------
92-
List of dictionaries containing formatted capitol data
85+
class Feature[A_KT: LiteralString, A_VT: str | float | bool, G: Mapping[str, Any]](
86+
TypedDict
87+
):
9388
"""
94-
formatted_data = []
95-
if capitols_data and "features" in capitols_data:
96-
for feature in capitols_data["features"]:
97-
attributes = feature.get("attributes", {})
98-
geometry = feature.get("geometry", {})
99-
100-
state_code = attributes.get("STATE")
101-
city_name = attributes.get("CITY")
102-
lon = geometry.get("x")
103-
lat = geometry.get("y")
104-
105-
# Check if it's a state and we have all required data
106-
if (
107-
state_code
108-
and state_code in state_data["states"] # Check in states dictionary
109-
and city_name
110-
and lon is not None
111-
and lat is not None
112-
):
113-
formatted_data.append({
114-
"lon": lon,
115-
"lat": lat,
116-
"state": state_data["states"][
117-
state_code
118-
], # Get name from states dictionary
119-
"city": city_name,
120-
})
121-
return formatted_data
122-
123-
124-
def save_json_output(data: list[dict], output_path: Path) -> None:
125-
"""
126-
Saves formatted capitol data to a JSON file with consistent formatting.
89+
A generic `GeoJSON feature object`_.
12790
128-
Args:
129-
data: List of formatted capitol dictionaries
130-
output_path: Path where JSON file should be saved
91+
.. _GeoJSON feature object: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/feature-object/
13192
"""
132-
sorted_data = sorted(data, key=itemgetter("state"))
13393

134-
with output_path.open("w") as f:
135-
f.write("[\n")
136-
for i, capital_data in enumerate(sorted_data):
137-
ordered_data = {
138-
"lon": capital_data["lon"],
139-
"lat": capital_data["lat"],
140-
"state": capital_data["state"],
141-
"city": capital_data["city"],
142-
}
143-
json_str = json.dumps(ordered_data, separators=(", ", ":"))
94+
attributes: Mapping[A_KT, A_VT]
95+
geometry: G
96+
97+
98+
class CapitolFeature(Feature[FieldName, str, Point]):
99+
"""Validated state capitol feature, **prior** to any processing."""
100+
101+
102+
class StateCapitol(TypedDict):
103+
"""State capitol feature, **after** processing."""
104+
105+
lon: float
106+
lat: float
107+
state: str
108+
city: str
144109

145-
f.write(" " + json_str + ("," if i < len(sorted_data) - 1 else "") + "\n")
146-
f.write("]\n")
110+
111+
def read_json(source: str | Path, /) -> Any:
112+
return json.loads(Path(source).read_text("utf-8"))
113+
114+
115+
def get_state_capitols() -> Features:
116+
"""Fetches state capitol building coordinates from the National Map Structures Database."""
117+
params = {
118+
"f": "json",
119+
"where": WHERE_CLAUSE,
120+
"outFields": ",".join((*_get_args(FieldName), "SHAPE")),
121+
"geometryPrecision": 7,
122+
"outSR": WKID_WGS84,
123+
"returnGeometry": True,
124+
}
125+
response = niquests.get(URL_STATE_CAPITOLS, params=params)
126+
response.raise_for_status()
127+
content: MapServiceLayerResponse = response.json()
128+
if features := content.get("features"):
129+
return features
130+
msg = f"Expected a features mapping but got:\n\n{content!r}"
131+
raise TypeError(msg)
132+
133+
134+
def is_capitol_feature(feat: Feature, states: dict[str, str]) -> TypeIs[CapitolFeature]:
135+
"""Ensure feature describes only capitols of states and not territories."""
136+
return bool(
137+
(attrs := feat.get("attributes"))
138+
and attrs.get("STATE") in states
139+
and "CITY" in attrs
140+
and (geom := feat.get("geometry"))
141+
and geom.keys() == {"x", "y"}
142+
)
143+
144+
145+
def into_state_capitol(feat: CapitolFeature, states: dict[str, str]) -> StateCapitol:
146+
"""Convert feature response into a clean format with full state names."""
147+
geom, attrs = feat["geometry"], feat["attributes"]
148+
return StateCapitol(
149+
lon=geom["x"], lat=geom["y"], state=states[attrs["STATE"]], city=attrs["CITY"]
150+
)
151+
152+
153+
def iter_state_capitols(
154+
features: Features, states: dict[str, str]
155+
) -> Iterator[StateCapitol]:
156+
for feat in features:
157+
if is_capitol_feature(feat, states):
158+
yield into_state_capitol(feat, states)
159+
else:
160+
msg = f"Unexpected territory:\n{feat!r}"
161+
warnings.warn(msg, stacklevel=2)
162+
163+
164+
def write_json(data: Sequence[StateCapitol], output: Path) -> None:
165+
"""Saves ``data`` to ``output`` with consistent formatting."""
166+
INDENT, OB, CB, NL = " ", "[", "]", "\n"
167+
to_str = partial(json.dumps, separators=(", ", ":"))
168+
with output.open("w", encoding="utf-8") as f:
169+
f.write(f"{OB}{NL}")
170+
for record in data[:-1]:
171+
f.write(f"{INDENT}{to_str(record)},{NL}")
172+
f.write(f"{INDENT}{to_str(data[-1])}{NL}{CB}{NL}")
173+
174+
175+
def _get_args(tp: Any, /) -> tuple[Any, ...]:
176+
return typing.get_args(getattr(tp, "__value__", tp))
147177

148178

149179
def main() -> None:
150-
script_dir = Path(__file__).parent
151-
state_codes = load_state_codes(script_dir)
152-
153-
capitols_response = get_state_capitols()
154-
if not capitols_response:
155-
print("Error: Failed to retrieve state capitals data")
156-
return
157-
158-
formatted_data = format_capitols_data(capitols_response, state_codes)
159-
print(f"Found {len(formatted_data)} state capitals")
160-
161-
data_dir = script_dir.parent / "data"
162-
output_path = data_dir / "us-state-capitals.json"
163-
output_path.touch()
164-
save_json_output(formatted_data, output_path)
165-
print(f"Data written to {output_path}")
180+
it = iter_state_capitols(get_state_capitols(), read_json(INPUT_FILE)["states"])
181+
by_state = sorted(it, key=itemgetter("state"))
182+
print(f"Found {len(by_state)} state capitals")
183+
OUTPUT_FILE.touch()
184+
write_json(by_state, OUTPUT_FILE)
185+
print(f"Data written to {OUTPUT_FILE}")
166186

167187

168188
if __name__ == "__main__":

0 commit comments

Comments
 (0)