22
22
from typing_extensions import TypeAlias
23
23
24
24
25
- __all__ = [
26
- "EXTENSION_SUFFIXES" ,
27
- "VERSION_LATEST" ,
28
- "Dataset" ,
29
- "Extension" ,
30
- "Metadata" ,
31
- "Version" ,
32
- "is_ext_read" ,
33
- ]
25
+ __all__ = ["EXTENSION_SUFFIXES" , "Dataset" , "Extension" , "Metadata" , "is_ext_read" ]
34
26
35
27
Dataset : TypeAlias = Literal [
28
+ "7zip" ,
36
29
"airports" ,
37
30
"annual-precip" ,
38
31
"anscombe" ,
42
35
"budgets" ,
43
36
"burtin" ,
44
37
"cars" ,
45
- "climate" ,
46
38
"co2-concentration" ,
47
39
"countries" ,
48
40
"crimea" ,
49
41
"disasters" ,
50
42
"driving" ,
51
43
"earthquakes" ,
44
+ "ffox" ,
52
45
"flare" ,
53
46
"flare-dependencies" ,
54
47
"flights-10k" ,
61
54
"football" ,
62
55
"gapminder" ,
63
56
"gapminder-health-income" ,
57
+ "gimp" ,
64
58
"github" ,
65
59
"global-temp" ,
66
- "graticule" ,
67
60
"income" ,
68
61
"iowa-electricity" ,
69
- "iris" ,
70
62
"jobs" ,
71
63
"la-riots" ,
72
64
"londonBoroughs" ,
86
78
"political-contributions" ,
87
79
"population" ,
88
80
"population_engineers_hurricanes" ,
89
- "seattle-temps" ,
90
81
"seattle-weather" ,
91
82
"seattle-weather-hourly-normals" ,
92
- "sf-temps" ,
93
83
"sp500" ,
94
84
"sp500-2000" ,
95
85
"stocks" ,
102
92
"us-state-capitals" ,
103
93
"volcano" ,
104
94
"weather" ,
105
- "weball26 " ,
95
+ "weekly-weather " ,
106
96
"wheat" ,
107
97
"windvectors" ,
108
98
"world-110m" ,
109
99
"zipcodes" ,
110
100
]
111
- Version : TypeAlias = Literal [
112
- "v2.11.0" ,
113
- "v2.10.0" ,
114
- "v2.9.0" ,
115
- "v2.8.1" ,
116
- "v2.8.0" ,
117
- "v2.7.0" ,
118
- "v2.5.4" ,
119
- "v2.5.3" ,
120
- "v2.5.3-next.0" ,
121
- "v2.5.2" ,
122
- "v2.5.2-next.0" ,
123
- "v2.5.1" ,
124
- "v2.5.1-next.0" ,
125
- "v2.5.0" ,
126
- "v2.5.0-next.0" ,
127
- "v2.4.0" ,
128
- "v2.3.1" ,
129
- "v2.3.0" ,
130
- "v2.1.0" ,
131
- "v2.0.0" ,
132
- "v1.31.1" ,
133
- "v1.31.0" ,
134
- "v1.30.4" ,
135
- "v1.30.3" ,
136
- "v1.30.2" ,
137
- "v1.30.1" ,
138
- "v1.29.0" ,
139
- "v1.24.0" ,
140
- "v1.22.0" ,
141
- "v1.21.1" ,
142
- "v1.21.0" ,
143
- "v1.20.0" ,
144
- "v1.19.0" ,
145
- "v1.18.0" ,
146
- "v1.17.0" ,
147
- "v1.16.0" ,
148
- "v1.15.0" ,
149
- "v1.14.0" ,
150
- "v1.12.0" ,
151
- "v1.11.0" ,
152
- "v1.10.0" ,
153
- "v1.8.0" ,
154
- "v1.7.0" ,
155
- "v1.5.0" ,
156
- ]
157
- Extension : TypeAlias = Literal [".csv" , ".json" , ".tsv" , ".arrow" , ".parquet" ]
158
- VERSION_LATEST : Literal ["v2.11.0" ] = "v2.11.0"
101
+ Extension : TypeAlias = Literal [".arrow" , ".csv" , ".json" , ".parquet" , ".tsv" ]
159
102
EXTENSION_SUFFIXES : tuple [
103
+ Literal [".arrow" ],
160
104
Literal [".csv" ],
161
105
Literal [".json" ],
162
- Literal [".tsv" ],
163
- Literal [".arrow" ],
164
106
Literal [".parquet" ],
165
- ] = (".csv" , ".json" , ".tsv" , ".arrow" , ".parquet" )
107
+ Literal [".tsv" ],
108
+ ] = (".arrow" , ".csv" , ".json" , ".parquet" , ".tsv" )
166
109
167
110
168
111
def is_ext_read (suffix : Any ) -> TypeIs [Extension ]:
169
- return suffix in {".csv " , ".json " , ".tsv " , ".arrow " , ".parquet " }
112
+ return suffix in {".arrow " , ".csv " , ".json " , ".parquet " , ".tsv " }
170
113
171
114
172
115
class Metadata (TypedDict , total = False ):
@@ -177,29 +120,34 @@ class Metadata(TypedDict, total=False):
177
120
----------
178
121
dataset_name
179
122
Name of the dataset/`Path.stem`_.
180
- ext_supported
181
- Dataset can be read as tabular data .
123
+ suffix
124
+ File extension/`Path.suffix`_ .
182
125
file_name
183
126
Equivalent to `Path.name`_.
184
- name_collision
185
- Dataset is available via multiple formats.
186
-
187
- .. note::
188
- Requires specifying a preference in calls to ``data(name, suffix=...)``
127
+ bytes
128
+ File size in *bytes*.
129
+ is_image
130
+ _description_
131
+ is_tabular
132
+ Can be read as tabular data.
133
+ is_geo
134
+ _description_
135
+ is_topo
136
+ _description_
137
+ is_spatial
138
+ _description_
139
+ is_json
140
+ _description_
141
+ has_schema
142
+ Data types available for improved ``pandas`` parsing.
189
143
sha
190
144
Unique hash for the dataset.
191
145
192
146
.. note::
193
- If the dataset did *not* change between ``v1.0.0``-``v2.0.0``;
147
+ E.g. if the dataset did *not* change between ``v1.0.0``-``v2.0.0``;
194
148
195
- then all ``tag``(s) in this range would **share** this value.
196
- size
197
- File size (*bytes*).
198
- suffix
199
- File extension/`Path.suffix`_.
200
- tag
201
- Version identifier for a `vega-datasets release`_.
202
- url_npm
149
+ then this value would remain stable.
150
+ url
203
151
Remote url used to access dataset.
204
152
205
153
.. _Path.stem:
@@ -208,13 +156,14 @@ class Metadata(TypedDict, total=False):
208
156
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.name
209
157
.. _Path.suffix:
210
158
https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix
211
- .. _vega-datasets release:
212
- https://github.com/vega/vega-datasets/releases
159
+
213
160
214
161
Examples
215
162
--------
216
163
``Metadata`` keywords form constraints to filter a table like the below sample:
217
164
165
+ ### FIXME: NEEDS UPDATING TO DATAPACKAGE VERSION
166
+
218
167
```
219
168
shape: (2_879, 9)
220
169
┌───────────┬──────────┬──────────┬──────────┬───┬────────┬─────────┬──────────┐
@@ -249,14 +198,18 @@ class Metadata(TypedDict, total=False):
249
198
"""
250
199
251
200
dataset_name : str
252
- ext_supported : bool
201
+ suffix : str
253
202
file_name : str
254
- name_collision : bool
203
+ bytes : int
204
+ is_image : bool
205
+ is_tabular : bool
206
+ is_geo : bool
207
+ is_topo : bool
208
+ is_spatial : bool
209
+ is_json : bool
210
+ has_schema : bool
255
211
sha : str
256
- size : int
257
- suffix : str
258
- tag : str
259
- url_npm : str
212
+ url : str
260
213
261
214
262
215
FlFieldStr : TypeAlias = Literal [
0 commit comments