|
8 | 8 | It relies on a local JSON file `_data/us-state-codes.json` for mapping state abbreviations to full names.
|
9 | 9 | """
|
10 | 10 |
|
| 11 | +from __future__ import annotations |
| 12 | + |
11 | 13 | import json
|
| 14 | +import typing |
| 15 | +import warnings |
| 16 | +from functools import partial |
12 | 17 | from operator import itemgetter
|
13 | 18 | from pathlib import Path
|
14 |
| -from typing import Any |
| 19 | +from typing import TYPE_CHECKING, Literal, TypedDict |
15 | 20 |
|
16 | 21 | import niquests
|
17 | 22 |
|
| 23 | +if TYPE_CHECKING: |
| 24 | + import sys |
| 25 | + from collections.abc import Iterator, Mapping, Sequence |
| 26 | + from typing import Any, LiteralString |
18 | 27 |
|
19 |
| -def load_state_codes(script_dir: Path) -> dict: |
20 |
| - """ |
21 |
| - Loads state/territory code mappings from `_data/us-state-codes.json`. |
22 |
| -
|
23 |
| - Required to: |
24 |
| - 1. convert API state abbreviations to full names (e.g., 'CA' to 'California'). |
25 |
| - 2. filter out U.S. territory locations from the API data. (Current script scope: U.S. states). |
26 |
| -
|
27 |
| - Example `us-state-codes.json`: |
28 |
| - ```json |
29 |
| - { |
30 |
| - "states": { |
31 |
| - "AL": "Alabama", |
32 |
| - "WY": "Wyoming" |
33 |
| - }, |
34 |
| - "territories": {} |
35 |
| - } |
36 |
| - ``` |
| 28 | + if sys.version_info >= (3, 13): |
| 29 | + from typing import TypeIs |
| 30 | + else: |
| 31 | + from typing_extensions import TypeIs |
37 | 32 |
|
38 |
| - Args: |
39 |
| - script_dir: Script directory (for locating `_data/us-state-codes.json`). |
| 33 | +type Features = Sequence[Feature[Any, Any, Any]] |
| 34 | +"""Represents the ``features`` property of capitol building data, before validation.""" |
40 | 35 |
|
41 |
| - Returns |
42 |
| - ------- |
43 |
| - Dictionary: State abbreviation to full name mappings (from JSON "states"), |
44 |
| - used for name lookup and territory filtering. |
45 |
| - """ |
46 |
| - data_dir = script_dir.parent / "_data" |
47 |
| - state_codes_path = data_dir / "us-state-codes.json" |
| 36 | +type FieldName = Literal["NAME", "STATE", "CITY"] |
48 | 37 |
|
49 |
| - with state_codes_path.open() as f: |
50 |
| - return json.load(f) |
| 38 | +REPO_ROOT: Path = Path(__file__).parent.parent |
| 39 | +INPUT_DIR: Path = REPO_ROOT / "_data" |
| 40 | +OUTPUT_DIR: Path = REPO_ROOT / "data" |
51 | 41 |
|
| 42 | +INPUT_FILE: Path = INPUT_DIR / "us-state-codes.json" |
| 43 | +""" |
| 44 | +State abbreviation to full name mappings (from JSON "states"). |
| 45 | +
|
| 46 | +Used for name lookup and territory filtering. |
| 47 | +
|
| 48 | +Example: |
| 49 | +
|
| 50 | + {"states": {"AL": "Alabama", "WY": "Wyoming"}, "territories": {}} |
| 51 | +""" |
52 | 52 |
|
53 |
| -def get_state_capitols() -> dict | None: |
| 53 | +OUTPUT_FILE: Path = OUTPUT_DIR / "us-state-capitals.json" |
| 54 | +URL_ARCGIS = "https://carto.nationalmap.gov/arcgis/" |
| 55 | +URL_MAP_SERVER = f"{URL_ARCGIS}services/structures/MapServer/" |
| 56 | +URL_STATE_CAPITOLS = f"{URL_MAP_SERVER}6/query" |
| 57 | +FEATURE_STATE_CAPITOLS = "FCODE = 83006" |
| 58 | +TERRITORIES = "STATE IN ('AS', 'GU', 'MP', 'PR', 'VI')" |
| 59 | +WHERE_CLAUSE = f"{FEATURE_STATE_CAPITOLS} AND NOT ({TERRITORIES})" |
| 60 | +WKID_WGS84: Literal[4326] = 4326 |
| 61 | +""" |
| 62 | +`Well-known ID`_ for `WGS 84`_, used as a `spatial reference`_. |
| 63 | +
|
| 64 | +.. _Well-known ID: https://support.esri.com/en-us/gis-dictionary/wkid |
| 65 | +.. _WGS 84: https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84 |
| 66 | +.. _spatial reference: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/geometry-objects/#spatial-reference |
| 67 | +""" |
| 68 | + |
| 69 | + |
| 70 | +class MapServiceLayerResponse(TypedDict, total=False): |
54 | 71 | """
|
55 |
| - Fetches state capitol building coordinates from the National Map Structures Database. |
| 72 | + Response from `National Map Structures Database`_. |
56 | 73 |
|
57 |
| - Returns |
58 |
| - ------- |
59 |
| - JSON response containing capitol building data, or None if request fails |
| 74 | + .. _National Map Structures Database: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/query-map-service-layer/ |
60 | 75 | """
|
61 |
| - url = "https://carto.nationalmap.gov/arcgis/rest/services/structures/MapServer/6/query" |
62 |
| - params = { |
63 |
| - "f": "json", |
64 |
| - "where": "FCODE=83006", # Feature code for state capitol buildings |
65 |
| - "outFields": "NAME,STATE,CITY,SHAPE", |
66 |
| - "geometryPrecision": 7, |
67 |
| - "outSR": 4326, # WGS84 coordinate system |
68 |
| - "returnGeometry": True, |
69 |
| - } |
70 | 76 |
|
71 |
| - try: |
72 |
| - response = niquests.get(url, params=params) |
73 |
| - response.raise_for_status() |
74 |
| - return response.json() |
75 |
| - except niquests.exceptions.RequestException as e: |
76 |
| - print(f"Error fetching data: {e}") |
77 |
| - return None |
| 77 | + features: Features |
78 | 78 |
|
79 | 79 |
|
80 |
| -def format_capitols_data( |
81 |
| - capitols_data: dict[str, Any] | None, state_data: dict |
82 |
| -) -> list: |
83 |
| - """ |
84 |
| - Processes raw capitol data into a clean format with full state names. |
| 80 | +class Point(TypedDict): |
| 81 | + x: float |
| 82 | + y: float |
85 | 83 |
|
86 |
| - Args: |
87 |
| - capitols_data: Raw JSON response from the WMS query |
88 |
| - state_data: Dictionary with 'states' and 'territories' mappings |
89 | 84 |
|
90 |
| - Returns |
91 |
| - ------- |
92 |
| - List of dictionaries containing formatted capitol data |
| 85 | +class Feature[A_KT: LiteralString, A_VT: str | float | bool, G: Mapping[str, Any]]( |
| 86 | + TypedDict |
| 87 | +): |
93 | 88 | """
|
94 |
| - formatted_data = [] |
95 |
| - if capitols_data and "features" in capitols_data: |
96 |
| - for feature in capitols_data["features"]: |
97 |
| - attributes = feature.get("attributes", {}) |
98 |
| - geometry = feature.get("geometry", {}) |
99 |
| - |
100 |
| - state_code = attributes.get("STATE") |
101 |
| - city_name = attributes.get("CITY") |
102 |
| - lon = geometry.get("x") |
103 |
| - lat = geometry.get("y") |
104 |
| - |
105 |
| - # Check if it's a state and we have all required data |
106 |
| - if ( |
107 |
| - state_code |
108 |
| - and state_code in state_data["states"] # Check in states dictionary |
109 |
| - and city_name |
110 |
| - and lon is not None |
111 |
| - and lat is not None |
112 |
| - ): |
113 |
| - formatted_data.append({ |
114 |
| - "lon": lon, |
115 |
| - "lat": lat, |
116 |
| - "state": state_data["states"][ |
117 |
| - state_code |
118 |
| - ], # Get name from states dictionary |
119 |
| - "city": city_name, |
120 |
| - }) |
121 |
| - return formatted_data |
122 |
| - |
123 |
| - |
124 |
| -def save_json_output(data: list[dict], output_path: Path) -> None: |
125 |
| - """ |
126 |
| - Saves formatted capitol data to a JSON file with consistent formatting. |
| 89 | + A generic `GeoJSON feature object`_. |
127 | 90 |
|
128 |
| - Args: |
129 |
| - data: List of formatted capitol dictionaries |
130 |
| - output_path: Path where JSON file should be saved |
| 91 | + .. _GeoJSON feature object: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/feature-object/ |
131 | 92 | """
|
132 |
| - sorted_data = sorted(data, key=itemgetter("state")) |
133 | 93 |
|
134 |
| - with output_path.open("w") as f: |
135 |
| - f.write("[\n") |
136 |
| - for i, capital_data in enumerate(sorted_data): |
137 |
| - ordered_data = { |
138 |
| - "lon": capital_data["lon"], |
139 |
| - "lat": capital_data["lat"], |
140 |
| - "state": capital_data["state"], |
141 |
| - "city": capital_data["city"], |
142 |
| - } |
143 |
| - json_str = json.dumps(ordered_data, separators=(", ", ":")) |
| 94 | + attributes: Mapping[A_KT, A_VT] |
| 95 | + geometry: G |
| 96 | + |
| 97 | + |
| 98 | +class CapitolFeature(Feature[FieldName, str, Point]): |
| 99 | + """Validated state capitol feature, **prior** to any processing.""" |
| 100 | + |
| 101 | + |
| 102 | +class StateCapitol(TypedDict): |
| 103 | + """State capitol feature, **after** processing.""" |
| 104 | + |
| 105 | + lon: float |
| 106 | + lat: float |
| 107 | + state: str |
| 108 | + city: str |
144 | 109 |
|
145 |
| - f.write(" " + json_str + ("," if i < len(sorted_data) - 1 else "") + "\n") |
146 |
| - f.write("]\n") |
| 110 | + |
| 111 | +def read_json(source: str | Path, /) -> Any: |
| 112 | + return json.loads(Path(source).read_text("utf-8")) |
| 113 | + |
| 114 | + |
| 115 | +def get_state_capitols() -> Features: |
| 116 | + """Fetches state capitol building coordinates from the National Map Structures Database.""" |
| 117 | + params = { |
| 118 | + "f": "json", |
| 119 | + "where": WHERE_CLAUSE, |
| 120 | + "outFields": ",".join((*_get_args(FieldName), "SHAPE")), |
| 121 | + "geometryPrecision": 7, |
| 122 | + "outSR": WKID_WGS84, |
| 123 | + "returnGeometry": True, |
| 124 | + } |
| 125 | + response = niquests.get(URL_STATE_CAPITOLS, params=params) |
| 126 | + response.raise_for_status() |
| 127 | + content: MapServiceLayerResponse = response.json() |
| 128 | + if features := content.get("features"): |
| 129 | + return features |
| 130 | + msg = f"Expected a features mapping but got:\n\n{content!r}" |
| 131 | + raise TypeError(msg) |
| 132 | + |
| 133 | + |
| 134 | +def is_capitol_feature(feat: Feature, states: dict[str, str]) -> TypeIs[CapitolFeature]: |
| 135 | + """Ensure feature describes only capitols of states and not territories.""" |
| 136 | + return bool( |
| 137 | + (attrs := feat.get("attributes")) |
| 138 | + and attrs.get("STATE") in states |
| 139 | + and "CITY" in attrs |
| 140 | + and (geom := feat.get("geometry")) |
| 141 | + and geom.keys() == {"x", "y"} |
| 142 | + ) |
| 143 | + |
| 144 | + |
| 145 | +def into_state_capitol(feat: CapitolFeature, states: dict[str, str]) -> StateCapitol: |
| 146 | + """Convert feature response into a clean format with full state names.""" |
| 147 | + geom, attrs = feat["geometry"], feat["attributes"] |
| 148 | + return StateCapitol( |
| 149 | + lon=geom["x"], lat=geom["y"], state=states[attrs["STATE"]], city=attrs["CITY"] |
| 150 | + ) |
| 151 | + |
| 152 | + |
| 153 | +def iter_state_capitols( |
| 154 | + features: Features, states: dict[str, str] |
| 155 | +) -> Iterator[StateCapitol]: |
| 156 | + for feat in features: |
| 157 | + if is_capitol_feature(feat, states): |
| 158 | + yield into_state_capitol(feat, states) |
| 159 | + else: |
| 160 | + msg = f"Unexpected territory:\n{feat!r}" |
| 161 | + warnings.warn(msg, stacklevel=2) |
| 162 | + |
| 163 | + |
| 164 | +def write_json(data: Sequence[StateCapitol], output: Path) -> None: |
| 165 | + """Saves ``data`` to ``output`` with consistent formatting.""" |
| 166 | + INDENT, OB, CB, NL = " ", "[", "]", "\n" |
| 167 | + to_str = partial(json.dumps, separators=(", ", ":")) |
| 168 | + with output.open("w", encoding="utf-8") as f: |
| 169 | + f.write(f"{OB}{NL}") |
| 170 | + for record in data[:-1]: |
| 171 | + f.write(f"{INDENT}{to_str(record)},{NL}") |
| 172 | + f.write(f"{INDENT}{to_str(data[-1])}{NL}{CB}{NL}") |
| 173 | + |
| 174 | + |
| 175 | +def _get_args(tp: Any, /) -> tuple[Any, ...]: |
| 176 | + return typing.get_args(getattr(tp, "__value__", tp)) |
147 | 177 |
|
148 | 178 |
|
149 | 179 | def main() -> None:
|
150 |
| - script_dir = Path(__file__).parent |
151 |
| - state_codes = load_state_codes(script_dir) |
152 |
| - |
153 |
| - capitols_response = get_state_capitols() |
154 |
| - if not capitols_response: |
155 |
| - print("Error: Failed to retrieve state capitals data") |
156 |
| - return |
157 |
| - |
158 |
| - formatted_data = format_capitols_data(capitols_response, state_codes) |
159 |
| - print(f"Found {len(formatted_data)} state capitals") |
160 |
| - |
161 |
| - data_dir = script_dir.parent / "data" |
162 |
| - output_path = data_dir / "us-state-capitals.json" |
163 |
| - output_path.touch() |
164 |
| - save_json_output(formatted_data, output_path) |
165 |
| - print(f"Data written to {output_path}") |
| 180 | + it = iter_state_capitols(get_state_capitols(), read_json(INPUT_FILE)["states"]) |
| 181 | + by_state = sorted(it, key=itemgetter("state")) |
| 182 | + print(f"Found {len(by_state)} state capitals") |
| 183 | + OUTPUT_FILE.touch() |
| 184 | + write_json(by_state, OUTPUT_FILE) |
| 185 | + print(f"Data written to {OUTPUT_FILE}") |
166 | 186 |
|
167 | 187 |
|
168 | 188 | if __name__ == "__main__":
|
|
0 commit comments