Skip to content

Commit

Permalink
feat: add generation script for us-state-capitals.json (#668)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsmedia authored Jan 24, 2025
1 parent 0048d85 commit dd43f29
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 54 deletions.
61 changes: 61 additions & 0 deletions _data/us-state-codes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"states": {
"AL": "Alabama",
"AK": "Alaska",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"ME": "Maine",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming"
},
"territories": {
"AS": "American Samoa",
"GU": "Guam",
"MP": "Northern Mariana Islands",
"PR": "Puerto Rico",
"VI": "U.S. Virgin Islands"
}
}
100 changes: 50 additions & 50 deletions data/us-state-capitals.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
[
{"lon":-86.279118, "lat":32.361538, "state":"Alabama", "city":"Montgomery"},
{"lon":-134.41974, "lat":58.301935, "state":"Alaska", "city":"Juneau"},
{"lon":-112.073844, "lat":33.448457, "state":"Arizona", "city":"Phoenix"},
{"lon":-92.331122, "lat":34.736009, "state":"Arkansas", "city":"Little Rock"},
{"lon":-121.468926, "lat":38.555605, "state":"California", "city":"Sacramento"},
{"lon":-104.984167, "lat":39.7391667, "state":"Colorado", "city":"Denver"},
{"lon":-72.677, "lat":41.767, "state":"Connecticut", "city":"Hartford"},
{"lon":-75.526755, "lat":39.161921, "state":"Delaware", "city":"Dover"},
{"lon":-84.27277, "lat":30.4518, "state":"Florida", "city":"Tallahassee"},
{"lon":-84.39, "lat":33.76, "state":"Georgia", "city":"Atlanta"},
{"lon":-157.826182, "lat":21.30895, "state":"Hawaii", "city":"Honolulu"},
{"lon":-116.237651, "lat":43.613739, "state":"Idaho", "city":"Boise"},
{"lon":-89.650373, "lat":39.78325, "state":"Illinois", "city":"Springfield"},
{"lon":-86.147685, "lat":39.790942, "state":"Indiana", "city":"Indianapolis"},
{"lon":-93.620866, "lat":41.590939, "state":"Iowa", "city":"Des Moines"},
{"lon":-95.69, "lat":39.04, "state":"Kansas", "city":"Topeka"},
{"lon":-84.86311, "lat":38.197274, "state":"Kentucky", "city":"Frankfort"},
{"lon":-91.140229, "lat":30.45809, "state":"Louisiana", "city":"Baton Rouge"},
{"lon":-69.765261, "lat":44.323535, "state":"Maine", "city":"Augusta"},
{"lon":-76.501157, "lat":38.972945, "state":"Maryland", "city":"Annapolis"},
{"lon":-71.0275, "lat":42.2352, "state":"Massachusetts", "city":"Boston"},
{"lon":-84.5467, "lat":42.7335, "state":"Michigan", "city":"Lansing"},
{"lon":-93.094, "lat":44.95, "state":"Minnesota", "city":"Saint Paul"},
{"lon":-90.207, "lat":32.32, "state":"Mississippi", "city":"Jackson"},
{"lon":-92.189283, "lat":38.572954, "state":"Missouri", "city":"Jefferson City"},
{"lon":-112.027031, "lat":46.595805, "state":"Montana", "city":"Helena"},
{"lon":-96.675345, "lat":40.809868, "state":"Nebraska", "city":"Lincoln"},
{"lon":-119.753877, "lat":39.160949, "state":"Nevada", "city":"Carson City"},
{"lon":-71.549127, "lat":43.220093, "state":"New Hampshire", "city":"Concord"},
{"lon":-74.756138, "lat":40.221741, "state":"New Jersey", "city":"Trenton"},
{"lon":-105.964575, "lat":35.667231, "state":"New Mexico", "city":"Santa Fe"},
{"lon":-73.781339, "lat":42.659829, "state":"New York", "city":"Albany"},
{"lon":-78.638, "lat":35.771, "state":"North Carolina", "city":"Raleigh"},
{"lon":-100.782868, "lat":46.819173, "state":"North Dakota", "city":"Bismarck"},
{"lon":-83.000647, "lat":39.962245, "state":"Ohio", "city":"Columbus"},
{"lon":-97.534994, "lat":35.482309, "state":"Oklahoma", "city":"Oklahoma City"},
{"lon":-123.029159, "lat":44.931109, "state":"Oregon", "city":"Salem"},
{"lon":-76.875613, "lat":40.269789, "state":"Pennsylvania", "city":"Harrisburg"},
{"lon":-71.422132, "lat":41.82355, "state":"Rhode Island", "city":"Providence"},
{"lon":-81.035, "lat":34, "state":"South Carolina", "city":"Columbia"},
{"lon":-100.336378, "lat":44.367966, "state":"South Dakota", "city":"Pierre"},
{"lon":-86.784, "lat":36.165, "state":"Tennessee", "city":"Nashville"},
{"lon":-97.75, "lat":30.266667, "state":"Texas", "city":"Austin"},
{"lon":-111.892622, "lat":40.7547, "state":"Utah", "city":"Salt Lake City"},
{"lon":-72.57194, "lat":44.26639, "state":"Vermont", "city":"Montpelier"},
{"lon":-77.46, "lat":37.54, "state":"Virginia", "city":"Richmond"},
{"lon":-122.893077, "lat":47.042418, "state":"Washington", "city":"Olympia"},
{"lon":-81.633294, "lat":38.349497, "state":"West Virginia", "city":"Charleston"},
{"lon":-89.384444, "lat":43.074722, "state":"Wisconsin", "city":"Madison"},
{"lon":-104.802042, "lat":41.145548, "state":"Wyoming", "city":"Cheyenne"}
{"lon":-86.3005639, "lat":32.3777298, "state":"Alabama", "city":"Montgomery"},
{"lon":-134.4104388, "lat":58.3020694, "state":"Alaska", "city":"Juneau"},
{"lon":-112.097065, "lat":33.4482497, "state":"Arizona", "city":"Phoenix"},
{"lon":-92.2892284, "lat":34.746745, "state":"Arkansas", "city":"Little Rock"},
{"lon":-121.4935591, "lat":38.5765854, "state":"California", "city":"Sacramento"},
{"lon":-104.9849779, "lat":39.7392198, "state":"Colorado", "city":"Denver"},
{"lon":-72.6823164, "lat":41.7642752, "state":"Connecticut", "city":"Hartford"},
{"lon":-75.5195811, "lat":39.1572815, "state":"Delaware", "city":"Dover"},
{"lon":-84.2821265, "lat":30.4381047, "state":"Florida", "city":"Tallahassee"},
{"lon":-84.3879614, "lat":33.7490287, "state":"Georgia", "city":"Atlanta"},
{"lon":-157.8573111, "lat":21.3073439, "state":"Hawaii", "city":"Honolulu"},
{"lon":-116.1998483, "lat":43.6177948, "state":"Idaho", "city":"Boise"},
{"lon":-89.6547203, "lat":39.7983912, "state":"Illinois", "city":"Springfield"},
{"lon":-86.1627697, "lat":39.7683841, "state":"Indiana", "city":"Indianapolis"},
{"lon":-93.6038358, "lat":41.5911079, "state":"Iowa", "city":"Des Moines"},
{"lon":-95.6780057, "lat":39.0482389, "state":"Kansas", "city":"Topeka"},
{"lon":-84.8753598, "lat":38.1866989, "state":"Kentucky", "city":"Frankfort"},
{"lon":-91.1873935, "lat":30.457024, "state":"Louisiana", "city":"Baton Rouge"},
{"lon":-69.7816228, "lat":44.307213, "state":"Maine", "city":"Augusta"},
{"lon":-76.491037, "lat":38.9788927, "state":"Maryland", "city":"Annapolis"},
{"lon":-71.0640129, "lat":42.3587532, "state":"Massachusetts", "city":"Boston"},
{"lon":-84.5555605, "lat":42.7336193, "state":"Michigan", "city":"Lansing"},
{"lon":-93.1021034, "lat":44.9551063, "state":"Minnesota", "city":"Saint Paul"},
{"lon":-90.1820382, "lat":32.303763, "state":"Mississippi", "city":"Jackson"},
{"lon":-92.1728432, "lat":38.5791852, "state":"Missouri", "city":"Jefferson City"},
{"lon":-112.0183427, "lat":46.5857742, "state":"Montana", "city":"Helena"},
{"lon":-96.6997467, "lat":40.8080641, "state":"Nebraska", "city":"Lincoln"},
{"lon":-119.7663053, "lat":39.1640815, "state":"Nevada", "city":"Carson City"},
{"lon":-71.5382718, "lat":43.2069054, "state":"New Hampshire", "city":"Concord"},
{"lon":-74.7699552, "lat":40.2203572, "state":"New Jersey", "city":"Trenton"},
{"lon":-105.9396043, "lat":35.6823747, "state":"New Mexico", "city":"Santa Fe"},
{"lon":-73.7575015, "lat":42.6525086, "state":"New York", "city":"Albany"},
{"lon":-78.6391225, "lat":35.7803724, "state":"North Carolina", "city":"Raleigh"},
{"lon":-100.7827194, "lat":46.8207637, "state":"North Dakota", "city":"Bismarck"},
{"lon":-82.9987984, "lat":39.961461, "state":"Ohio", "city":"Columbus"},
{"lon":-97.5033801, "lat":35.4922882, "state":"Oklahoma", "city":"Oklahoma City"},
{"lon":-123.0301147, "lat":44.938743, "state":"Oregon", "city":"Salem"},
{"lon":-76.8837835, "lat":40.2644747, "state":"Pennsylvania", "city":"Harrisburg"},
{"lon":-71.414855, "lat":41.8308218, "state":"Rhode Island", "city":"Providence"},
{"lon":-81.0331509, "lat":34.0004393, "state":"South Carolina", "city":"Columbia"},
{"lon":-100.3462286, "lat":44.3671094, "state":"South Dakota", "city":"Pierre"},
{"lon":-86.7841708, "lat":36.1658985, "state":"Tennessee", "city":"Nashville"},
{"lon":-97.7403271, "lat":30.2746658, "state":"Texas", "city":"Austin"},
{"lon":-111.888132, "lat":40.7773586, "state":"Utah", "city":"Salt Lake City"},
{"lon":-72.5804725, "lat":44.2624522, "state":"Vermont", "city":"Montpelier"},
{"lon":-77.4335963, "lat":37.5387651, "state":"Virginia", "city":"Richmond"},
{"lon":-122.9049162, "lat":47.0357595, "state":"Washington", "city":"Olympia"},
{"lon":-81.6120072, "lat":38.3364019, "state":"West Virginia", "city":"Charleston"},
{"lon":-89.3841797, "lat":43.0746533, "state":"Wisconsin", "city":"Madison"},
{"lon":-104.8203092, "lat":41.140301, "state":"Wyoming", "city":"Cheyenne"}
]
6 changes: 3 additions & 3 deletions datapackage.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
}
],
"version": "2.11.0",
"created": "2025-01-21T15:05:39.767318+00:00",
"created": "2025-01-23T15:50:44.713255+00:00",
"resources": [
{
"name": "7zip.png",
Expand Down Expand Up @@ -3071,8 +3071,8 @@
"format": "json",
"mediatype": "text/json",
"encoding": "utf-8",
"hash": "sha1:9c3211c5058c899412c30f5992a77c54a1b80066",
"bytes": 3869,
"hash": "sha1:32b4d3a13918b0aa85e62c09495eccf842fffb31",
"bytes": 4048,
"dialect": {
"json": {
"keyed": true
Expand Down
2 changes: 1 addition & 1 deletion datapackage.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# vega-datasets
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-21 15:05:39 [UTC]
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-23 15:50:44 [UTC]

Common repository for example datasets used by Vega related projects.
BSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets
Expand Down
189 changes: 189 additions & 0 deletions scripts/us-state-capitals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""
Retrieves and saves U.S. state capital locations with their coordinates from the National Map API.
This script fetches data from the USGS National Map Structures Database API to generate a JSON file
containing the latitude, longitude, state, and city of U.S. state capitals. State capitol building
locations are used as a practical representation of state capital city points.
It relies on a local JSON file `_data/us-state-codes.json` for mapping state abbreviations to full names.
"""

from __future__ import annotations

import json
import typing
import warnings
from functools import partial
from operator import itemgetter
from pathlib import Path
from typing import TYPE_CHECKING, Literal, TypedDict

import niquests

if TYPE_CHECKING:
import sys
from collections.abc import Iterator, Mapping, Sequence
from typing import Any, LiteralString

if sys.version_info >= (3, 13):
from typing import TypeIs
else:
from typing_extensions import TypeIs

type Features = Sequence[Feature[Any, Any, Any]]
"""Represents the ``features`` property of capitol building data, before validation."""

type FieldName = Literal["NAME", "STATE", "CITY"]

REPO_ROOT: Path = Path(__file__).parent.parent
INPUT_DIR: Path = REPO_ROOT / "_data"
OUTPUT_DIR: Path = REPO_ROOT / "data"

INPUT_FILE: Path = INPUT_DIR / "us-state-codes.json"
"""
State abbreviation to full name mappings (from JSON "states").
Used for name lookup and territory filtering.
Example:
{"states": {"AL": "Alabama", "WY": "Wyoming"}, "territories": {}}
"""

OUTPUT_FILE: Path = OUTPUT_DIR / "us-state-capitals.json"
URL_ARCGIS = "https://carto.nationalmap.gov/arcgis/rest/"
URL_MAP_SERVER = f"{URL_ARCGIS}services/structures/MapServer/"
URL_STATE_CAPITOLS = f"{URL_MAP_SERVER}6/query"
FEATURE_STATE_CAPITOLS = "FCODE = 83006"
TERRITORIES = "STATE IN ('AS', 'GU', 'MP', 'PR', 'VI')"
WHERE_CLAUSE = f"{FEATURE_STATE_CAPITOLS} AND NOT ({TERRITORIES})"
WKID_WGS84: Literal[4326] = 4326
"""
`Well-known ID`_ for `WGS 84`_, used as a `spatial reference`_.
.. _Well-known ID: https://support.esri.com/en-us/gis-dictionary/wkid
.. _WGS 84: https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84
.. _spatial reference: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/geometry-objects/#spatial-reference
"""


class MapServiceLayerResponse(TypedDict, total=False):
"""
Response from `National Map Structures Database`_.
.. _National Map Structures Database: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/query-map-service-layer/
"""

features: Features


class Point(TypedDict):
x: float
y: float


class Feature[A_KT: LiteralString, A_VT: str | float | bool, G: Mapping[str, Any]](
TypedDict
):
"""
A generic `GeoJSON feature object`_.
.. _GeoJSON feature object: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/feature-object/
"""

attributes: Mapping[A_KT, A_VT]
geometry: G


class CapitolFeature(Feature[FieldName, str, Point]):
"""Validated state capitol feature, **prior** to any processing."""


class StateCapitol(TypedDict):
"""State capitol feature, **after** processing."""

lon: float
lat: float
state: str
city: str


def read_json(source: str | Path, /) -> Any:
return json.loads(Path(source).read_text("utf-8"))


def get_state_capitols() -> Features:
"""Fetches state capitol building coordinates from the National Map Structures Database."""
params = {
"f": "json",
"where": WHERE_CLAUSE,
"outFields": ",".join((*_get_args(FieldName), "SHAPE")),
"geometryPrecision": 7,
"outSR": WKID_WGS84,
"returnGeometry": True,
}
response = niquests.get(URL_STATE_CAPITOLS, params=params)
response.raise_for_status()
content: MapServiceLayerResponse = response.json()
if features := content.get("features"):
return features
msg = f"Expected a features mapping but got:\n\n{content!r}"
raise TypeError(msg)


def is_capitol_feature(feat: Feature, states: dict[str, str]) -> TypeIs[CapitolFeature]:
"""Ensure feature describes only capitols of states and not territories."""
return bool(
(attrs := feat.get("attributes"))
and attrs.get("STATE") in states
and "CITY" in attrs
and (geom := feat.get("geometry"))
and geom.keys() == {"x", "y"}
)


def into_state_capitol(feat: CapitolFeature, states: dict[str, str]) -> StateCapitol:
"""Convert feature response into a clean format with full state names."""
geom, attrs = feat["geometry"], feat["attributes"]
return StateCapitol(
lon=geom["x"], lat=geom["y"], state=states[attrs["STATE"]], city=attrs["CITY"]
)


def iter_state_capitols(
features: Features, states: dict[str, str]
) -> Iterator[StateCapitol]:
for feat in features:
if is_capitol_feature(feat, states):
yield into_state_capitol(feat, states)
else:
msg = f"Unexpected territory:\n{feat!r}"
warnings.warn(msg, stacklevel=2)


def write_json(data: Sequence[StateCapitol], output: Path) -> None:
"""Saves ``data`` to ``output`` with consistent formatting."""
INDENT, OB, CB, NL = " ", "[", "]", "\n"
to_str = partial(json.dumps, separators=(", ", ":"))
with output.open("w", encoding="utf-8", newline="\n") as f:
f.write(f"{OB}{NL}")
for record in data[:-1]:
f.write(f"{INDENT}{to_str(record)},{NL}")
f.write(f"{INDENT}{to_str(data[-1])}{NL}{CB}{NL}")


def _get_args(tp: Any, /) -> tuple[Any, ...]:
return typing.get_args(getattr(tp, "__value__", tp))


def main() -> None:
it = iter_state_capitols(get_state_capitols(), read_json(INPUT_FILE)["states"])
by_state = sorted(it, key=itemgetter("state"))
print(f"Found {len(by_state)} state capitals")
OUTPUT_FILE.touch()
write_json(by_state, OUTPUT_FILE)
print(f"Data written to {OUTPUT_FILE}")


if __name__ == "__main__":
main()

0 comments on commit dd43f29

Please sign in to comment.