Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add generation script for us-state-capitals.json #668

Merged
merged 12 commits into from
Jan 24, 2025
61 changes: 61 additions & 0 deletions _data/us-state-codes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"states": {
"AL": "Alabama",
"AK": "Alaska",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"ME": "Maine",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming"
},
"territories": {
"AS": "American Samoa",
"GU": "Guam",
"MP": "Northern Mariana Islands",
"PR": "Puerto Rico",
"VI": "U.S. Virgin Islands"
}
}
100 changes: 50 additions & 50 deletions data/us-state-capitals.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
[
{"lon":-86.279118, "lat":32.361538, "state":"Alabama", "city":"Montgomery"},
{"lon":-134.41974, "lat":58.301935, "state":"Alaska", "city":"Juneau"},
{"lon":-112.073844, "lat":33.448457, "state":"Arizona", "city":"Phoenix"},
{"lon":-92.331122, "lat":34.736009, "state":"Arkansas", "city":"Little Rock"},
{"lon":-121.468926, "lat":38.555605, "state":"California", "city":"Sacramento"},
{"lon":-104.984167, "lat":39.7391667, "state":"Colorado", "city":"Denver"},
{"lon":-72.677, "lat":41.767, "state":"Connecticut", "city":"Hartford"},
{"lon":-75.526755, "lat":39.161921, "state":"Delaware", "city":"Dover"},
{"lon":-84.27277, "lat":30.4518, "state":"Florida", "city":"Tallahassee"},
{"lon":-84.39, "lat":33.76, "state":"Georgia", "city":"Atlanta"},
{"lon":-157.826182, "lat":21.30895, "state":"Hawaii", "city":"Honolulu"},
{"lon":-116.237651, "lat":43.613739, "state":"Idaho", "city":"Boise"},
{"lon":-89.650373, "lat":39.78325, "state":"Illinois", "city":"Springfield"},
{"lon":-86.147685, "lat":39.790942, "state":"Indiana", "city":"Indianapolis"},
{"lon":-93.620866, "lat":41.590939, "state":"Iowa", "city":"Des Moines"},
{"lon":-95.69, "lat":39.04, "state":"Kansas", "city":"Topeka"},
{"lon":-84.86311, "lat":38.197274, "state":"Kentucky", "city":"Frankfort"},
{"lon":-91.140229, "lat":30.45809, "state":"Louisiana", "city":"Baton Rouge"},
{"lon":-69.765261, "lat":44.323535, "state":"Maine", "city":"Augusta"},
{"lon":-76.501157, "lat":38.972945, "state":"Maryland", "city":"Annapolis"},
{"lon":-71.0275, "lat":42.2352, "state":"Massachusetts", "city":"Boston"},
{"lon":-84.5467, "lat":42.7335, "state":"Michigan", "city":"Lansing"},
{"lon":-93.094, "lat":44.95, "state":"Minnesota", "city":"Saint Paul"},
{"lon":-90.207, "lat":32.32, "state":"Mississippi", "city":"Jackson"},
{"lon":-92.189283, "lat":38.572954, "state":"Missouri", "city":"Jefferson City"},
{"lon":-112.027031, "lat":46.595805, "state":"Montana", "city":"Helena"},
{"lon":-96.675345, "lat":40.809868, "state":"Nebraska", "city":"Lincoln"},
{"lon":-119.753877, "lat":39.160949, "state":"Nevada", "city":"Carson City"},
{"lon":-71.549127, "lat":43.220093, "state":"New Hampshire", "city":"Concord"},
{"lon":-74.756138, "lat":40.221741, "state":"New Jersey", "city":"Trenton"},
{"lon":-105.964575, "lat":35.667231, "state":"New Mexico", "city":"Santa Fe"},
{"lon":-73.781339, "lat":42.659829, "state":"New York", "city":"Albany"},
{"lon":-78.638, "lat":35.771, "state":"North Carolina", "city":"Raleigh"},
{"lon":-100.782868, "lat":46.819173, "state":"North Dakota", "city":"Bismarck"},
{"lon":-83.000647, "lat":39.962245, "state":"Ohio", "city":"Columbus"},
{"lon":-97.534994, "lat":35.482309, "state":"Oklahoma", "city":"Oklahoma City"},
{"lon":-123.029159, "lat":44.931109, "state":"Oregon", "city":"Salem"},
{"lon":-76.875613, "lat":40.269789, "state":"Pennsylvania", "city":"Harrisburg"},
{"lon":-71.422132, "lat":41.82355, "state":"Rhode Island", "city":"Providence"},
{"lon":-81.035, "lat":34, "state":"South Carolina", "city":"Columbia"},
{"lon":-100.336378, "lat":44.367966, "state":"South Dakota", "city":"Pierre"},
{"lon":-86.784, "lat":36.165, "state":"Tennessee", "city":"Nashville"},
{"lon":-97.75, "lat":30.266667, "state":"Texas", "city":"Austin"},
{"lon":-111.892622, "lat":40.7547, "state":"Utah", "city":"Salt Lake City"},
{"lon":-72.57194, "lat":44.26639, "state":"Vermont", "city":"Montpelier"},
{"lon":-77.46, "lat":37.54, "state":"Virginia", "city":"Richmond"},
{"lon":-122.893077, "lat":47.042418, "state":"Washington", "city":"Olympia"},
{"lon":-81.633294, "lat":38.349497, "state":"West Virginia", "city":"Charleston"},
{"lon":-89.384444, "lat":43.074722, "state":"Wisconsin", "city":"Madison"},
{"lon":-104.802042, "lat":41.145548, "state":"Wyoming", "city":"Cheyenne"}
{"lon":-86.3005639, "lat":32.3777298, "state":"Alabama", "city":"Montgomery"},
{"lon":-134.4104388, "lat":58.3020694, "state":"Alaska", "city":"Juneau"},
{"lon":-112.097065, "lat":33.4482497, "state":"Arizona", "city":"Phoenix"},
{"lon":-92.2892284, "lat":34.746745, "state":"Arkansas", "city":"Little Rock"},
{"lon":-121.4935591, "lat":38.5765854, "state":"California", "city":"Sacramento"},
{"lon":-104.9849779, "lat":39.7392198, "state":"Colorado", "city":"Denver"},
{"lon":-72.6823164, "lat":41.7642752, "state":"Connecticut", "city":"Hartford"},
{"lon":-75.5195811, "lat":39.1572815, "state":"Delaware", "city":"Dover"},
{"lon":-84.2821265, "lat":30.4381047, "state":"Florida", "city":"Tallahassee"},
{"lon":-84.3879614, "lat":33.7490287, "state":"Georgia", "city":"Atlanta"},
{"lon":-157.8573111, "lat":21.3073439, "state":"Hawaii", "city":"Honolulu"},
{"lon":-116.1998483, "lat":43.6177948, "state":"Idaho", "city":"Boise"},
{"lon":-89.6547203, "lat":39.7983912, "state":"Illinois", "city":"Springfield"},
{"lon":-86.1627697, "lat":39.7683841, "state":"Indiana", "city":"Indianapolis"},
{"lon":-93.6038358, "lat":41.5911079, "state":"Iowa", "city":"Des Moines"},
{"lon":-95.6780057, "lat":39.0482389, "state":"Kansas", "city":"Topeka"},
{"lon":-84.8753598, "lat":38.1866989, "state":"Kentucky", "city":"Frankfort"},
{"lon":-91.1873935, "lat":30.457024, "state":"Louisiana", "city":"Baton Rouge"},
{"lon":-69.7816228, "lat":44.307213, "state":"Maine", "city":"Augusta"},
{"lon":-76.491037, "lat":38.9788927, "state":"Maryland", "city":"Annapolis"},
{"lon":-71.0640129, "lat":42.3587532, "state":"Massachusetts", "city":"Boston"},
{"lon":-84.5555605, "lat":42.7336193, "state":"Michigan", "city":"Lansing"},
{"lon":-93.1021034, "lat":44.9551063, "state":"Minnesota", "city":"Saint Paul"},
{"lon":-90.1820382, "lat":32.303763, "state":"Mississippi", "city":"Jackson"},
{"lon":-92.1728432, "lat":38.5791852, "state":"Missouri", "city":"Jefferson City"},
{"lon":-112.0183427, "lat":46.5857742, "state":"Montana", "city":"Helena"},
{"lon":-96.6997467, "lat":40.8080641, "state":"Nebraska", "city":"Lincoln"},
{"lon":-119.7663053, "lat":39.1640815, "state":"Nevada", "city":"Carson City"},
{"lon":-71.5382718, "lat":43.2069054, "state":"New Hampshire", "city":"Concord"},
{"lon":-74.7699552, "lat":40.2203572, "state":"New Jersey", "city":"Trenton"},
{"lon":-105.9396043, "lat":35.6823747, "state":"New Mexico", "city":"Santa Fe"},
{"lon":-73.7575015, "lat":42.6525086, "state":"New York", "city":"Albany"},
{"lon":-78.6391225, "lat":35.7803724, "state":"North Carolina", "city":"Raleigh"},
{"lon":-100.7827194, "lat":46.8207637, "state":"North Dakota", "city":"Bismarck"},
{"lon":-82.9987984, "lat":39.961461, "state":"Ohio", "city":"Columbus"},
{"lon":-97.5033801, "lat":35.4922882, "state":"Oklahoma", "city":"Oklahoma City"},
{"lon":-123.0301147, "lat":44.938743, "state":"Oregon", "city":"Salem"},
{"lon":-76.8837835, "lat":40.2644747, "state":"Pennsylvania", "city":"Harrisburg"},
{"lon":-71.414855, "lat":41.8308218, "state":"Rhode Island", "city":"Providence"},
{"lon":-81.0331509, "lat":34.0004393, "state":"South Carolina", "city":"Columbia"},
{"lon":-100.3462286, "lat":44.3671094, "state":"South Dakota", "city":"Pierre"},
{"lon":-86.7841708, "lat":36.1658985, "state":"Tennessee", "city":"Nashville"},
{"lon":-97.7403271, "lat":30.2746658, "state":"Texas", "city":"Austin"},
{"lon":-111.888132, "lat":40.7773586, "state":"Utah", "city":"Salt Lake City"},
{"lon":-72.5804725, "lat":44.2624522, "state":"Vermont", "city":"Montpelier"},
{"lon":-77.4335963, "lat":37.5387651, "state":"Virginia", "city":"Richmond"},
{"lon":-122.9049162, "lat":47.0357595, "state":"Washington", "city":"Olympia"},
{"lon":-81.6120072, "lat":38.3364019, "state":"West Virginia", "city":"Charleston"},
{"lon":-89.3841797, "lat":43.0746533, "state":"Wisconsin", "city":"Madison"},
{"lon":-104.8203092, "lat":41.140301, "state":"Wyoming", "city":"Cheyenne"}
]
6 changes: 3 additions & 3 deletions datapackage.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
}
],
"version": "2.11.0",
"created": "2025-01-21T15:05:39.767318+00:00",
"created": "2025-01-23T15:50:44.713255+00:00",
"resources": [
{
"name": "7zip.png",
Expand Down Expand Up @@ -3071,8 +3071,8 @@
"format": "json",
"mediatype": "text/json",
"encoding": "utf-8",
"hash": "sha1:9c3211c5058c899412c30f5992a77c54a1b80066",
"bytes": 3869,
"hash": "sha1:32b4d3a13918b0aa85e62c09495eccf842fffb31",
"bytes": 4048,
"dialect": {
"json": {
"keyed": true
Expand Down
2 changes: 1 addition & 1 deletion datapackage.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# vega-datasets
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-21 15:05:39 [UTC]
`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-23 15:50:44 [UTC]

Common repository for example datasets used by Vega related projects.
BSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets
Expand Down
189 changes: 189 additions & 0 deletions scripts/us-state-capitals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""
Retrieves and saves U.S. state capital locations with their coordinates from the National Map API.

This script fetches data from the USGS National Map Structures Database API to generate a JSON file
containing the latitude, longitude, state, and city of U.S. state capitals. State capitol building
locations are used as a practical representation of state capital city points.

It relies on a local JSON file `_data/us-state-codes.json` for mapping state abbreviations to full names.
"""

from __future__ import annotations

import json
import typing
import warnings
from functools import partial
from operator import itemgetter
from pathlib import Path
from typing import TYPE_CHECKING, Literal, TypedDict

import niquests

if TYPE_CHECKING:
import sys
from collections.abc import Iterator, Mapping, Sequence
from typing import Any, LiteralString

if sys.version_info >= (3, 13):
from typing import TypeIs
else:
from typing_extensions import TypeIs

type Features = Sequence[Feature[Any, Any, Any]]
"""Represents the ``features`` property of capitol building data, before validation."""

type FieldName = Literal["NAME", "STATE", "CITY"]

REPO_ROOT: Path = Path(__file__).parent.parent
INPUT_DIR: Path = REPO_ROOT / "_data"
OUTPUT_DIR: Path = REPO_ROOT / "data"

INPUT_FILE: Path = INPUT_DIR / "us-state-codes.json"
"""
State abbreviation to full name mappings (from JSON "states").

Used for name lookup and territory filtering.

Example:

{"states": {"AL": "Alabama", "WY": "Wyoming"}, "territories": {}}
"""

OUTPUT_FILE: Path = OUTPUT_DIR / "us-state-capitals.json"
URL_ARCGIS = "https://carto.nationalmap.gov/arcgis/rest/"
URL_MAP_SERVER = f"{URL_ARCGIS}services/structures/MapServer/"
URL_STATE_CAPITOLS = f"{URL_MAP_SERVER}6/query"
FEATURE_STATE_CAPITOLS = "FCODE = 83006"
TERRITORIES = "STATE IN ('AS', 'GU', 'MP', 'PR', 'VI')"
WHERE_CLAUSE = f"{FEATURE_STATE_CAPITOLS} AND NOT ({TERRITORIES})"
WKID_WGS84: Literal[4326] = 4326
"""
`Well-known ID`_ for `WGS 84`_, used as a `spatial reference`_.

.. _Well-known ID: https://support.esri.com/en-us/gis-dictionary/wkid
.. _WGS 84: https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84
.. _spatial reference: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/geometry-objects/#spatial-reference
"""


class MapServiceLayerResponse(TypedDict, total=False):
"""
Response from `National Map Structures Database`_.

.. _National Map Structures Database: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/query-map-service-layer/
"""

features: Features


class Point(TypedDict):
x: float
y: float


class Feature[A_KT: LiteralString, A_VT: str | float | bool, G: Mapping[str, Any]](
TypedDict
):
"""
A generic `GeoJSON feature object`_.

.. _GeoJSON feature object: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/feature-object/
"""

attributes: Mapping[A_KT, A_VT]
geometry: G


class CapitolFeature(Feature[FieldName, str, Point]):
"""Validated state capitol feature, **prior** to any processing."""


class StateCapitol(TypedDict):
"""State capitol feature, **after** processing."""

lon: float
lat: float
state: str
city: str


def read_json(source: str | Path, /) -> Any:
return json.loads(Path(source).read_text("utf-8"))


def get_state_capitols() -> Features:
"""Fetches state capitol building coordinates from the National Map Structures Database."""
params = {
"f": "json",
"where": WHERE_CLAUSE,
"outFields": ",".join((*_get_args(FieldName), "SHAPE")),
"geometryPrecision": 7,
"outSR": WKID_WGS84,
"returnGeometry": True,
}
response = niquests.get(URL_STATE_CAPITOLS, params=params)
response.raise_for_status()
content: MapServiceLayerResponse = response.json()
if features := content.get("features"):
return features
msg = f"Expected a features mapping but got:\n\n{content!r}"
raise TypeError(msg)


def is_capitol_feature(feat: Feature, states: dict[str, str]) -> TypeIs[CapitolFeature]:
"""Ensure feature describes only capitols of states and not territories."""
return bool(
(attrs := feat.get("attributes"))
and attrs.get("STATE") in states
and "CITY" in attrs
and (geom := feat.get("geometry"))
and geom.keys() == {"x", "y"}
)


def into_state_capitol(feat: CapitolFeature, states: dict[str, str]) -> StateCapitol:
"""Convert feature response into a clean format with full state names."""
geom, attrs = feat["geometry"], feat["attributes"]
return StateCapitol(
lon=geom["x"], lat=geom["y"], state=states[attrs["STATE"]], city=attrs["CITY"]
)


def iter_state_capitols(
features: Features, states: dict[str, str]
) -> Iterator[StateCapitol]:
for feat in features:
if is_capitol_feature(feat, states):
yield into_state_capitol(feat, states)
else:
msg = f"Unexpected territory:\n{feat!r}"
warnings.warn(msg, stacklevel=2)


def write_json(data: Sequence[StateCapitol], output: Path) -> None:
"""Saves ``data`` to ``output`` with consistent formatting."""
INDENT, OB, CB, NL = " ", "[", "]", "\n"
to_str = partial(json.dumps, separators=(", ", ":"))
with output.open("w", encoding="utf-8", newline="\n") as f:
f.write(f"{OB}{NL}")
for record in data[:-1]:
f.write(f"{INDENT}{to_str(record)},{NL}")
f.write(f"{INDENT}{to_str(data[-1])}{NL}{CB}{NL}")


def _get_args(tp: Any, /) -> tuple[Any, ...]:
return typing.get_args(getattr(tp, "__value__", tp))


def main() -> None:
it = iter_state_capitols(get_state_capitols(), read_json(INPUT_FILE)["states"])
by_state = sorted(it, key=itemgetter("state"))
print(f"Found {len(by_state)} state capitals")
OUTPUT_FILE.touch()
write_json(by_state, OUTPUT_FILE)
print(f"Data written to {OUTPUT_FILE}")


if __name__ == "__main__":
main()
Loading