diff --git a/_data/us-state-codes.json b/_data/us-state-codes.json new file mode 100644 index 00000000..dd1269a6 --- /dev/null +++ b/_data/us-state-codes.json @@ -0,0 +1,61 @@ +{ + "states": { + "AL": "Alabama", + "AK": "Alaska", + "AZ": "Arizona", + "AR": "Arkansas", + "CA": "California", + "CO": "Colorado", + "CT": "Connecticut", + "DE": "Delaware", + "FL": "Florida", + "GA": "Georgia", + "HI": "Hawaii", + "ID": "Idaho", + "IL": "Illinois", + "IN": "Indiana", + "IA": "Iowa", + "KS": "Kansas", + "KY": "Kentucky", + "LA": "Louisiana", + "ME": "Maine", + "MD": "Maryland", + "MA": "Massachusetts", + "MI": "Michigan", + "MN": "Minnesota", + "MS": "Mississippi", + "MO": "Missouri", + "MT": "Montana", + "NE": "Nebraska", + "NV": "Nevada", + "NH": "New Hampshire", + "NJ": "New Jersey", + "NM": "New Mexico", + "NY": "New York", + "NC": "North Carolina", + "ND": "North Dakota", + "OH": "Ohio", + "OK": "Oklahoma", + "OR": "Oregon", + "PA": "Pennsylvania", + "RI": "Rhode Island", + "SC": "South Carolina", + "SD": "South Dakota", + "TN": "Tennessee", + "TX": "Texas", + "UT": "Utah", + "VT": "Vermont", + "VA": "Virginia", + "WA": "Washington", + "WV": "West Virginia", + "WI": "Wisconsin", + "WY": "Wyoming" + }, + "territories": { + "AS": "American Samoa", + "GU": "Guam", + "MP": "Northern Mariana Islands", + "PR": "Puerto Rico", + "VI": "U.S. Virgin Islands" + } +} \ No newline at end of file diff --git a/data/us-state-capitals.json b/data/us-state-capitals.json index 9c3211c5..32b4d3a1 100644 --- a/data/us-state-capitals.json +++ b/data/us-state-capitals.json @@ -1,52 +1,52 @@ [ - {"lon":-86.279118, "lat":32.361538, "state":"Alabama", "city":"Montgomery"}, - {"lon":-134.41974, "lat":58.301935, "state":"Alaska", "city":"Juneau"}, - {"lon":-112.073844, "lat":33.448457, "state":"Arizona", "city":"Phoenix"}, - {"lon":-92.331122, "lat":34.736009, "state":"Arkansas", "city":"Little Rock"}, - {"lon":-121.468926, "lat":38.555605, "state":"California", "city":"Sacramento"}, - {"lon":-104.984167, "lat":39.7391667, "state":"Colorado", "city":"Denver"}, - {"lon":-72.677, "lat":41.767, "state":"Connecticut", "city":"Hartford"}, - {"lon":-75.526755, "lat":39.161921, "state":"Delaware", "city":"Dover"}, - {"lon":-84.27277, "lat":30.4518, "state":"Florida", "city":"Tallahassee"}, - {"lon":-84.39, "lat":33.76, "state":"Georgia", "city":"Atlanta"}, - {"lon":-157.826182, "lat":21.30895, "state":"Hawaii", "city":"Honolulu"}, - {"lon":-116.237651, "lat":43.613739, "state":"Idaho", "city":"Boise"}, - {"lon":-89.650373, "lat":39.78325, "state":"Illinois", "city":"Springfield"}, - {"lon":-86.147685, "lat":39.790942, "state":"Indiana", "city":"Indianapolis"}, - {"lon":-93.620866, "lat":41.590939, "state":"Iowa", "city":"Des Moines"}, - {"lon":-95.69, "lat":39.04, "state":"Kansas", "city":"Topeka"}, - {"lon":-84.86311, "lat":38.197274, "state":"Kentucky", "city":"Frankfort"}, - {"lon":-91.140229, "lat":30.45809, "state":"Louisiana", "city":"Baton Rouge"}, - {"lon":-69.765261, "lat":44.323535, "state":"Maine", "city":"Augusta"}, - {"lon":-76.501157, "lat":38.972945, "state":"Maryland", "city":"Annapolis"}, - {"lon":-71.0275, "lat":42.2352, "state":"Massachusetts", "city":"Boston"}, - {"lon":-84.5467, "lat":42.7335, "state":"Michigan", "city":"Lansing"}, - {"lon":-93.094, "lat":44.95, "state":"Minnesota", "city":"Saint Paul"}, - {"lon":-90.207, "lat":32.32, "state":"Mississippi", "city":"Jackson"}, - {"lon":-92.189283, "lat":38.572954, "state":"Missouri", "city":"Jefferson City"}, - {"lon":-112.027031, "lat":46.595805, "state":"Montana", "city":"Helena"}, - {"lon":-96.675345, "lat":40.809868, "state":"Nebraska", "city":"Lincoln"}, - {"lon":-119.753877, "lat":39.160949, "state":"Nevada", "city":"Carson City"}, - {"lon":-71.549127, "lat":43.220093, "state":"New Hampshire", "city":"Concord"}, - {"lon":-74.756138, "lat":40.221741, "state":"New Jersey", "city":"Trenton"}, - {"lon":-105.964575, "lat":35.667231, "state":"New Mexico", "city":"Santa Fe"}, - {"lon":-73.781339, "lat":42.659829, "state":"New York", "city":"Albany"}, - {"lon":-78.638, "lat":35.771, "state":"North Carolina", "city":"Raleigh"}, - {"lon":-100.782868, "lat":46.819173, "state":"North Dakota", "city":"Bismarck"}, - {"lon":-83.000647, "lat":39.962245, "state":"Ohio", "city":"Columbus"}, - {"lon":-97.534994, "lat":35.482309, "state":"Oklahoma", "city":"Oklahoma City"}, - {"lon":-123.029159, "lat":44.931109, "state":"Oregon", "city":"Salem"}, - {"lon":-76.875613, "lat":40.269789, "state":"Pennsylvania", "city":"Harrisburg"}, - {"lon":-71.422132, "lat":41.82355, "state":"Rhode Island", "city":"Providence"}, - {"lon":-81.035, "lat":34, "state":"South Carolina", "city":"Columbia"}, - {"lon":-100.336378, "lat":44.367966, "state":"South Dakota", "city":"Pierre"}, - {"lon":-86.784, "lat":36.165, "state":"Tennessee", "city":"Nashville"}, - {"lon":-97.75, "lat":30.266667, "state":"Texas", "city":"Austin"}, - {"lon":-111.892622, "lat":40.7547, "state":"Utah", "city":"Salt Lake City"}, - {"lon":-72.57194, "lat":44.26639, "state":"Vermont", "city":"Montpelier"}, - {"lon":-77.46, "lat":37.54, "state":"Virginia", "city":"Richmond"}, - {"lon":-122.893077, "lat":47.042418, "state":"Washington", "city":"Olympia"}, - {"lon":-81.633294, "lat":38.349497, "state":"West Virginia", "city":"Charleston"}, - {"lon":-89.384444, "lat":43.074722, "state":"Wisconsin", "city":"Madison"}, - {"lon":-104.802042, "lat":41.145548, "state":"Wyoming", "city":"Cheyenne"} + {"lon":-86.3005639, "lat":32.3777298, "state":"Alabama", "city":"Montgomery"}, + {"lon":-134.4104388, "lat":58.3020694, "state":"Alaska", "city":"Juneau"}, + {"lon":-112.097065, "lat":33.4482497, "state":"Arizona", "city":"Phoenix"}, + {"lon":-92.2892284, "lat":34.746745, "state":"Arkansas", "city":"Little Rock"}, + {"lon":-121.4935591, "lat":38.5765854, "state":"California", "city":"Sacramento"}, + {"lon":-104.9849779, "lat":39.7392198, "state":"Colorado", "city":"Denver"}, + {"lon":-72.6823164, "lat":41.7642752, "state":"Connecticut", "city":"Hartford"}, + {"lon":-75.5195811, "lat":39.1572815, "state":"Delaware", "city":"Dover"}, + {"lon":-84.2821265, "lat":30.4381047, "state":"Florida", "city":"Tallahassee"}, + {"lon":-84.3879614, "lat":33.7490287, "state":"Georgia", "city":"Atlanta"}, + {"lon":-157.8573111, "lat":21.3073439, "state":"Hawaii", "city":"Honolulu"}, + {"lon":-116.1998483, "lat":43.6177948, "state":"Idaho", "city":"Boise"}, + {"lon":-89.6547203, "lat":39.7983912, "state":"Illinois", "city":"Springfield"}, + {"lon":-86.1627697, "lat":39.7683841, "state":"Indiana", "city":"Indianapolis"}, + {"lon":-93.6038358, "lat":41.5911079, "state":"Iowa", "city":"Des Moines"}, + {"lon":-95.6780057, "lat":39.0482389, "state":"Kansas", "city":"Topeka"}, + {"lon":-84.8753598, "lat":38.1866989, "state":"Kentucky", "city":"Frankfort"}, + {"lon":-91.1873935, "lat":30.457024, "state":"Louisiana", "city":"Baton Rouge"}, + {"lon":-69.7816228, "lat":44.307213, "state":"Maine", "city":"Augusta"}, + {"lon":-76.491037, "lat":38.9788927, "state":"Maryland", "city":"Annapolis"}, + {"lon":-71.0640129, "lat":42.3587532, "state":"Massachusetts", "city":"Boston"}, + {"lon":-84.5555605, "lat":42.7336193, "state":"Michigan", "city":"Lansing"}, + {"lon":-93.1021034, "lat":44.9551063, "state":"Minnesota", "city":"Saint Paul"}, + {"lon":-90.1820382, "lat":32.303763, "state":"Mississippi", "city":"Jackson"}, + {"lon":-92.1728432, "lat":38.5791852, "state":"Missouri", "city":"Jefferson City"}, + {"lon":-112.0183427, "lat":46.5857742, "state":"Montana", "city":"Helena"}, + {"lon":-96.6997467, "lat":40.8080641, "state":"Nebraska", "city":"Lincoln"}, + {"lon":-119.7663053, "lat":39.1640815, "state":"Nevada", "city":"Carson City"}, + {"lon":-71.5382718, "lat":43.2069054, "state":"New Hampshire", "city":"Concord"}, + {"lon":-74.7699552, "lat":40.2203572, "state":"New Jersey", "city":"Trenton"}, + {"lon":-105.9396043, "lat":35.6823747, "state":"New Mexico", "city":"Santa Fe"}, + {"lon":-73.7575015, "lat":42.6525086, "state":"New York", "city":"Albany"}, + {"lon":-78.6391225, "lat":35.7803724, "state":"North Carolina", "city":"Raleigh"}, + {"lon":-100.7827194, "lat":46.8207637, "state":"North Dakota", "city":"Bismarck"}, + {"lon":-82.9987984, "lat":39.961461, "state":"Ohio", "city":"Columbus"}, + {"lon":-97.5033801, "lat":35.4922882, "state":"Oklahoma", "city":"Oklahoma City"}, + {"lon":-123.0301147, "lat":44.938743, "state":"Oregon", "city":"Salem"}, + {"lon":-76.8837835, "lat":40.2644747, "state":"Pennsylvania", "city":"Harrisburg"}, + {"lon":-71.414855, "lat":41.8308218, "state":"Rhode Island", "city":"Providence"}, + {"lon":-81.0331509, "lat":34.0004393, "state":"South Carolina", "city":"Columbia"}, + {"lon":-100.3462286, "lat":44.3671094, "state":"South Dakota", "city":"Pierre"}, + {"lon":-86.7841708, "lat":36.1658985, "state":"Tennessee", "city":"Nashville"}, + {"lon":-97.7403271, "lat":30.2746658, "state":"Texas", "city":"Austin"}, + {"lon":-111.888132, "lat":40.7773586, "state":"Utah", "city":"Salt Lake City"}, + {"lon":-72.5804725, "lat":44.2624522, "state":"Vermont", "city":"Montpelier"}, + {"lon":-77.4335963, "lat":37.5387651, "state":"Virginia", "city":"Richmond"}, + {"lon":-122.9049162, "lat":47.0357595, "state":"Washington", "city":"Olympia"}, + {"lon":-81.6120072, "lat":38.3364019, "state":"West Virginia", "city":"Charleston"}, + {"lon":-89.3841797, "lat":43.0746533, "state":"Wisconsin", "city":"Madison"}, + {"lon":-104.8203092, "lat":41.140301, "state":"Wyoming", "city":"Cheyenne"} ] diff --git a/datapackage.json b/datapackage.json index 2369730c..118676d3 100644 --- a/datapackage.json +++ b/datapackage.json @@ -20,7 +20,7 @@ } ], "version": "2.11.0", - "created": "2025-01-21T15:05:39.767318+00:00", + "created": "2025-01-23T15:50:44.713255+00:00", "resources": [ { "name": "7zip.png", @@ -3071,8 +3071,8 @@ "format": "json", "mediatype": "text/json", "encoding": "utf-8", - "hash": "sha1:9c3211c5058c899412c30f5992a77c54a1b80066", - "bytes": 3869, + "hash": "sha1:32b4d3a13918b0aa85e62c09495eccf842fffb31", + "bytes": 4048, "dialect": { "json": { "keyed": true diff --git a/datapackage.md b/datapackage.md index 88d1ae03..5cf1f4d5 100644 --- a/datapackage.md +++ b/datapackage.md @@ -1,5 +1,5 @@ # vega-datasets -`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-21 15:05:39 [UTC] +`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-01-23 15:50:44 [UTC] Common repository for example datasets used by Vega related projects. BSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets diff --git a/scripts/us-state-capitals.py b/scripts/us-state-capitals.py new file mode 100644 index 00000000..6fc6a498 --- /dev/null +++ b/scripts/us-state-capitals.py @@ -0,0 +1,189 @@ +""" +Retrieves and saves U.S. state capital locations with their coordinates from the National Map API. + +This script fetches data from the USGS National Map Structures Database API to generate a JSON file +containing the latitude, longitude, state, and city of U.S. state capitals. State capitol building +locations are used as a practical representation of state capital city points. + +It relies on a local JSON file `_data/us-state-codes.json` for mapping state abbreviations to full names. +""" + +from __future__ import annotations + +import json +import typing +import warnings +from functools import partial +from operator import itemgetter +from pathlib import Path +from typing import TYPE_CHECKING, Literal, TypedDict + +import niquests + +if TYPE_CHECKING: + import sys + from collections.abc import Iterator, Mapping, Sequence + from typing import Any, LiteralString + + if sys.version_info >= (3, 13): + from typing import TypeIs + else: + from typing_extensions import TypeIs + +type Features = Sequence[Feature[Any, Any, Any]] +"""Represents the ``features`` property of capitol building data, before validation.""" + +type FieldName = Literal["NAME", "STATE", "CITY"] + +REPO_ROOT: Path = Path(__file__).parent.parent +INPUT_DIR: Path = REPO_ROOT / "_data" +OUTPUT_DIR: Path = REPO_ROOT / "data" + +INPUT_FILE: Path = INPUT_DIR / "us-state-codes.json" +""" +State abbreviation to full name mappings (from JSON "states"). + +Used for name lookup and territory filtering. + +Example: + + {"states": {"AL": "Alabama", "WY": "Wyoming"}, "territories": {}} +""" + +OUTPUT_FILE: Path = OUTPUT_DIR / "us-state-capitals.json" +URL_ARCGIS = "https://carto.nationalmap.gov/arcgis/rest/" +URL_MAP_SERVER = f"{URL_ARCGIS}services/structures/MapServer/" +URL_STATE_CAPITOLS = f"{URL_MAP_SERVER}6/query" +FEATURE_STATE_CAPITOLS = "FCODE = 83006" +TERRITORIES = "STATE IN ('AS', 'GU', 'MP', 'PR', 'VI')" +WHERE_CLAUSE = f"{FEATURE_STATE_CAPITOLS} AND NOT ({TERRITORIES})" +WKID_WGS84: Literal[4326] = 4326 +""" +`Well-known ID`_ for `WGS 84`_, used as a `spatial reference`_. + +.. _Well-known ID: https://support.esri.com/en-us/gis-dictionary/wkid +.. _WGS 84: https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84 +.. _spatial reference: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/geometry-objects/#spatial-reference +""" + + +class MapServiceLayerResponse(TypedDict, total=False): + """ + Response from `National Map Structures Database`_. + + .. _National Map Structures Database: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/query-map-service-layer/ + """ + + features: Features + + +class Point(TypedDict): + x: float + y: float + + +class Feature[A_KT: LiteralString, A_VT: str | float | bool, G: Mapping[str, Any]]( + TypedDict +): + """ + A generic `GeoJSON feature object`_. + + .. _GeoJSON feature object: https://carto.nationalmap.gov/arcgis/help/en/rest/services-reference/enterprise/feature-object/ + """ + + attributes: Mapping[A_KT, A_VT] + geometry: G + + +class CapitolFeature(Feature[FieldName, str, Point]): + """Validated state capitol feature, **prior** to any processing.""" + + +class StateCapitol(TypedDict): + """State capitol feature, **after** processing.""" + + lon: float + lat: float + state: str + city: str + + +def read_json(source: str | Path, /) -> Any: + return json.loads(Path(source).read_text("utf-8")) + + +def get_state_capitols() -> Features: + """Fetches state capitol building coordinates from the National Map Structures Database.""" + params = { + "f": "json", + "where": WHERE_CLAUSE, + "outFields": ",".join((*_get_args(FieldName), "SHAPE")), + "geometryPrecision": 7, + "outSR": WKID_WGS84, + "returnGeometry": True, + } + response = niquests.get(URL_STATE_CAPITOLS, params=params) + response.raise_for_status() + content: MapServiceLayerResponse = response.json() + if features := content.get("features"): + return features + msg = f"Expected a features mapping but got:\n\n{content!r}" + raise TypeError(msg) + + +def is_capitol_feature(feat: Feature, states: dict[str, str]) -> TypeIs[CapitolFeature]: + """Ensure feature describes only capitols of states and not territories.""" + return bool( + (attrs := feat.get("attributes")) + and attrs.get("STATE") in states + and "CITY" in attrs + and (geom := feat.get("geometry")) + and geom.keys() == {"x", "y"} + ) + + +def into_state_capitol(feat: CapitolFeature, states: dict[str, str]) -> StateCapitol: + """Convert feature response into a clean format with full state names.""" + geom, attrs = feat["geometry"], feat["attributes"] + return StateCapitol( + lon=geom["x"], lat=geom["y"], state=states[attrs["STATE"]], city=attrs["CITY"] + ) + + +def iter_state_capitols( + features: Features, states: dict[str, str] +) -> Iterator[StateCapitol]: + for feat in features: + if is_capitol_feature(feat, states): + yield into_state_capitol(feat, states) + else: + msg = f"Unexpected territory:\n{feat!r}" + warnings.warn(msg, stacklevel=2) + + +def write_json(data: Sequence[StateCapitol], output: Path) -> None: + """Saves ``data`` to ``output`` with consistent formatting.""" + INDENT, OB, CB, NL = " ", "[", "]", "\n" + to_str = partial(json.dumps, separators=(", ", ":")) + with output.open("w", encoding="utf-8", newline="\n") as f: + f.write(f"{OB}{NL}") + for record in data[:-1]: + f.write(f"{INDENT}{to_str(record)},{NL}") + f.write(f"{INDENT}{to_str(data[-1])}{NL}{CB}{NL}") + + +def _get_args(tp: Any, /) -> tuple[Any, ...]: + return typing.get_args(getattr(tp, "__value__", tp)) + + +def main() -> None: + it = iter_state_capitols(get_state_capitols(), read_json(INPUT_FILE)["states"]) + by_state = sorted(it, key=itemgetter("state")) + print(f"Found {len(by_state)} state capitals") + OUTPUT_FILE.touch() + write_json(by_state, OUTPUT_FILE) + print(f"Data written to {OUTPUT_FILE}") + + +if __name__ == "__main__": + main()