Skip to content

Commit ae721b3

Browse files
Add some ipfs_client stuff; Upload to pypi.
1 parent cc1ac29 commit ae721b3

File tree

5 files changed

+344
-14
lines changed

5 files changed

+344
-14
lines changed

dweather_client/http_client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,12 @@ def get_station_csv(station_id):
7777
all_hashes = get_heads()
7878
dataset_hash = all_hashes["ghcnd"]
7979
dataset_url = GATEWAY_URL + "/ipfs/" + dataset_hash + '/' + station_id + ".csv.gz"
80+
print(dataset_url)
8081
r = requests.get(dataset_url)
82+
print(r)
8183
r.raise_for_status()
8284
with gzip.GzipFile(fileobj=io.BytesIO(r.content)) as zip_data:
85+
print(zip_data)
8386
return zip_data.read().decode("utf-8")
8487

8588

dweather_client/ipfs_client.py

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
import requests, datetime, io, gzip
2+
from dweather_client.ipfs_errors import *
3+
from dweather_client.utils import listify_period
4+
import dweather_client.ipfs_datasets
5+
import ipfshttpclient
6+
import json
7+
8+
MM_TO_INCHES = 0.0393701
9+
RAINFALL_PRECISION = 5
10+
GATEWAY_URL = 'https://gateway.arbolmarket.com'
11+
12+
13+
def get_heads(url=GATEWAY_URL):
14+
"""
15+
Get heads.json for a given IPFS gateway.
16+
Args:
17+
url (str): base url of the IPFS gateway url
18+
Returns (example heads.json):
19+
{
20+
'chirps_05-daily': 'Qm...',
21+
'chirps_05-monthly': 'Qm...',
22+
'chirps_25-daily': 'Qm...',
23+
'chirps_25-monthly': 'Qm...',
24+
'cpc_us-daily': 'Qm...',
25+
'cpc_us-monthly': 'Qm...'
26+
}
27+
"""
28+
hashes_url = url + "/climate/hashes/heads.json"
29+
r = requests.get(hashes_url)
30+
r.raise_for_status()
31+
return r.json()
32+
33+
34+
def cat_metadata(hash_str, client=None):
35+
"""
36+
Get the metadata file for a given hash.
37+
Args:
38+
url (str): the url of the IPFS server
39+
hash_str (str): the hash of the ipfs dataset
40+
Returns (example metadata.json):
41+
42+
{
43+
'date range': [
44+
'1981/01/01',
45+
'2019/07/31'
46+
],
47+
'entry delimiter': ',',
48+
'latitude range': [
49+
-49.975, 49.975
50+
],
51+
'longitude range': [
52+
-179.975, 179.975]
53+
,
54+
'name': 'CHIRPS .05 Daily Full Set Uncompressed',
55+
'period': 'daily',
56+
'precision': 0.01,
57+
'resolution': 0.05,
58+
'unit of measurement': 'mm',
59+
'year delimiter': '\n'
60+
}
61+
"""
62+
if (client is None):
63+
with ipfshttpclient.connect() as client:
64+
metadata = client.cat(hash_str + "/metadata.json")
65+
return json.loads(metadata)
66+
else:
67+
metadata = client.cat(hash_str + "/metadata.json")
68+
return json.loads(metadata)
69+
70+
71+
def cat_hash_cell(hash_str, coord_str, client=None):
72+
if (client is None):
73+
with ipfshttpclient.connect() as client:
74+
return client.cat(hash_str + '/' + coord_str)
75+
else:
76+
return client.cat(hash_str + '/' + coord_str)
77+
78+
def cat_zipped_hash_cell(url, hash_str, coord_str, client=None):
79+
"""
80+
Read a text file on the ipfs server compressed with gzip.
81+
Args:
82+
url (str): the url of the ipfs server
83+
hash_str (str): the hash of the dataset
84+
coord_str (str): the text file coordinate name e.g. 45.000_-96.000
85+
Returns:
86+
the contents of the file as a string
87+
"""
88+
if (client is None):
89+
with ipfshttpclient.connect() as client:
90+
cell = client.cat(hash_str + '/' + coord_str + ".gz")
91+
with gzip.GzipFile(fileobj=io.BytesIO(cell)) as zip_data:
92+
return zip_data.read().decode("utf-8")
93+
else:
94+
cell = client.cat(hash_str + '/' + coord_str + ".gz")
95+
with gzip.GzipFile(fileobj=io.BytesIO(cell)) as zip_data:
96+
return zip_data.read().decode("utf-8")
97+
98+
99+
def cat_dataset_cell(lat, lon, dataset_revision, client=None):
100+
"""
101+
Retrieve the text of a grid cell data file for a given lat lon and dataset.
102+
Args:
103+
lat (float): the latitude of the grid cell, to 3 decimals
104+
lon (float): the longitude of the grid cell, to 3 decimals
105+
Returns:
106+
A tuple (json, str) of the dataset metadata file and the grid cell data text
107+
Raises:
108+
DatasetError: If no matching dataset found on server
109+
InputOutOfRangeError: If the lat/lon is outside the dataset range in metadata
110+
CoordinateNotFoundError: If the lat/lon coordinate is not found on server
111+
"""
112+
all_hashes = get_heads()
113+
if dataset_revision in all_hashes:
114+
dataset_hash = all_hashes[dataset_revision]
115+
else:
116+
raise DatasetError('{} not found on server'.format(dataset_revision))
117+
118+
metadata = cat_metadata(dataset_hash, client)
119+
min_lat, max_lat = sorted(metadata["latitude range"])
120+
min_lon, max_lon = sorted(metadata["longitude range"])
121+
if lat < min_lat or lat > max_lat:
122+
raise InputOutOfRangeError("Latitude {} out of dataset revision range [{:.3f}, {:.3f}] for {}".format(lat, min_lat, max_lat, dataset_revision))
123+
if lon < min_lon or lon > max_lon:
124+
raise InputOutOfRangeError("Longitude {} out of dataset revision range [{:.3f}, {:.3f}] for {}".format(lon, min_lon, max_lon, dataset_revision))
125+
coord_str = "{:.3f}_{:.3f}".format(lat,lon)
126+
try:
127+
if "compression" in metadata and metadata["compression"] == "gzip":
128+
text_data = cat_zipped_hash_cell(GATEWAY_URL, dataset_hash, coord_str, client=client)
129+
else:
130+
text_data = cat_hash_cell(dataset_hash, coord_str, client=client)
131+
return metadata, text_data
132+
except requests.exceptions.HTTPError as e:
133+
raise CoordinateNotFoundError('Coordinate ({}, {}) not found on ipfs in dataset revision {}'.format(lat, lon, dataset_revision))
134+
135+
136+
137+
def cat_rainfall_dict(lat, lon, dataset_revision, return_metadata=False, client=None):
138+
"""
139+
Build a dict of rainfall data for a given grid cell.
140+
Args:
141+
lat (float): the latitude of the grid cell, to 3 decimals
142+
lon (float): the longitude of the grid cell, to 3 decimals
143+
Returns:
144+
a dict ({datetime.date: float}) of datetime dates and the corresponding rainfall in mm for that date
145+
Raises:
146+
DatasetError: If no matching dataset found on server
147+
InputOutOfRangeError: If the lat/lon is outside the dataset range in metadata
148+
CoordinateNotFoundError: If the lat/lon coordinate is not found on server
149+
DataMalformedError: If the grid cell file can't be parsed as rainfall data
150+
"""
151+
metadata, rainfall_text = cat_dataset_cell(lat, lon, dataset_revision, client=client)
152+
dataset_start_date = datetime.datetime.strptime(metadata['date range'][0], "%Y/%m/%d").date()
153+
dataset_end_date = datetime.datetime.strptime(metadata['date range'][1], "%Y/%m/%d").date()
154+
timedelta = dataset_end_date - dataset_start_date
155+
days_in_record = timedelta.days + 1 # we have both the start and end date in the dataset so its the difference + 1
156+
try:
157+
rainfall_text = rainfall_text.decode()
158+
except:
159+
pass
160+
day_strs = rainfall_text.replace(',', ' ').split()
161+
if (len(day_strs) != days_in_record):
162+
raise DataMalformedError ("Number of days in data file does not match the provided metadata")
163+
rainfall_dict = {}
164+
for i in range(days_in_record):
165+
if day_strs[i] == metadata["missing value"]:
166+
rainfall_dict[dataset_start_date + datetime.timedelta(days=i)] = None
167+
else:
168+
rainfall_dict[dataset_start_date + datetime.timedelta(days=i)] = float(day_strs[i])
169+
if return_metadata:
170+
return metadata, rainfall_dict
171+
else:
172+
return rainfall_dict
173+
174+
175+
def cat_rev_rainfall_dict(lat, lon, dataset, desired_end_date, latest_rev):
176+
"""
177+
Build a dictionary of rainfall data. Include as much of the most accurate, final data as possible. Start by buidling from the most accurate data,
178+
then keep appending data from more recent/less accurate versions of the dataset until we run out or reach the end date.
179+
180+
This will not throw an error if there are no revisions with data available, it will simply return what is available.
181+
Args:
182+
lat (float): the grid cell latitude
183+
lon (float): the grid cell longitude
184+
dataset (str): the name of the dataset, e.g., "chirps_05-daily" on hashes.json
185+
desired_end_date (datetime.date): the last day of data needed.
186+
latest_rev (str): the least accurate revision of the dataset that is considered final
187+
Returns:
188+
tuple:
189+
a dict ({datetime.date: float}) of datetime dates and the corresponding rainfall in mm for that date
190+
bool is_final: if all data up to desired end date is final, this will be true
191+
"""
192+
all_rainfall = {}
193+
is_final = True
194+
with ipfshttpclient.connect() as client:
195+
# Build the rainfall from the most accurate revision of the dataset to the least
196+
for dataset_revision in dweather_client.ipfs_datasets.datasets[dataset]:
197+
additional_rainfall = cat_rainfall_dict(lat, lon, dataset_revision, client=client)
198+
all_dates = list(all_rainfall) + list(additional_rainfall)
199+
all_rainfall = {date: all_rainfall[date] if date in all_rainfall else additional_rainfall[date] for date in all_dates}
200+
# stop when we have the desired end date in the dataset
201+
if desired_end_date in all_rainfall:
202+
return all_rainfall, is_final
203+
# data is no longer final after we pass the specified version
204+
if dataset_revision == latest_rev:
205+
is_final = False
206+
207+
# If we don't reach the desired dataset, return all data.
208+
return all_rainfall, is_final
209+
210+
211+
def cat_temperature_dict(lat, lon, dataset_revision, return_metadata=False, client=None):
212+
"""
213+
Build a dict of temperature data for a given grid cell.
214+
Args:
215+
lat (float): the latitude of the grid cell, to 3 decimals
216+
lon (float): the longitude of the grid cell, to 3 decimals
217+
Returns:
218+
tuple (highs, lows) of dicts
219+
highs: dict ({datetime.date: float}) of datetime dates and the corresponding high temperature in degress F
220+
lows: dict ({datetime.date: float}) of datetime dates and the corresponding low temperature in degress F
221+
Raises:
222+
DatasetError: If no matching dataset_revision found on server
223+
InputOutOfRangeError: If the lat/lon is outside the dataset_revision range in metadata
224+
CoordinateNotFoundError: If the lat/lon coordinate is not found on server
225+
DataMalformedError: If the grid cell file can't be parsed as temperature data
226+
"""
227+
metadata, temp_text = cat_dataset_cell(lat, lon, dataset_revision, client=client)
228+
dataset_start_date = datetime.datetime.strptime(metadata['date range'][0], "%Y/%m/%d").date()
229+
dataset_end_date = datetime.datetime.strptime(metadata['date range'][1], "%Y/%m/%d").date()
230+
timedelta = dataset_end_date - dataset_start_date
231+
days_in_record = timedelta.days + 1 # we have both the start and end date in the dataset_revision so its the difference + 1
232+
try:
233+
temp_text = temp_text.decode()
234+
except:
235+
pass
236+
day_strs = temp_text.replace(',', ' ').split()
237+
if (len(day_strs) != days_in_record):
238+
raise DataMalformedError ("Number of days in data file does not match the provided metadata")
239+
highs = {}
240+
lows = {}
241+
for i in range(days_in_record):
242+
low, high = map(float, day_strs[i].split('/'))
243+
date_iter = dataset_start_date + datetime.timedelta(days=i)
244+
highs[date_iter] = high
245+
lows[date_iter] = low
246+
if return_metadata:
247+
return metadata, highs, lows
248+
else:
249+
return highs, lows
250+
251+
252+
def cat_rev_temperature_dict(lat, lon, dataset, desired_end_date, latest_rev):
253+
"""
254+
Build a dictionary of rainfall data. Include as much final data as possible. If the desired end date
255+
is not in the final dataset, append as much prelim as possible.
256+
Args:
257+
lat (float): the latitude of the grid cell, to 3 decimals
258+
lon (float): the longitude of the grid cell, to 3 decimals
259+
dataset (str): the dataset name as on hashes.json
260+
desired_end_date (datetime.date): don't include prelim data after this point if not needed
261+
latest_rev (str): The least accurate revision that is still considered 'final'
262+
returns:
263+
tuple (highs, lows) of dicts and a bool
264+
highs: dict ({datetime.date: float}) of datetime dates and the corresponding high temperature in degress F
265+
lows: dict ({datetime.date: float}) of datetime dates and the corresponding low temperature in degress F
266+
is_final: True if all data is from final dataset, false if prelim included
267+
"""
268+
highs = {}
269+
lows = {}
270+
is_final = True
271+
272+
with ipfshttpclient.connect() as client:
273+
# Build the data from the most accurate version of the dataset to the least
274+
for dataset_revision in dweather_client.ipfs_datasets.datasets[dataset]:
275+
additional_highs, additional_lows = cat_temperature_dict(lat, lon, dataset_revision, client=client)
276+
all_dates = list(highs) + list(additional_highs)
277+
highs = {date: highs[date] if date in highs else additional_highs[date] for date in all_dates}
278+
lows = {date: lows[date] if date in lows else additional_lows[date] for date in all_dates}
279+
# Stop early if we have the end date
280+
if desired_end_date in highs:
281+
return highs, lows, is_final
282+
283+
# data is no longer final after we pass the specified version
284+
if dataset_revision == latest_rev:
285+
is_final = False
286+
287+
# If we don't reach the desired dataset, return all data.
288+
return highs, lows, is_final
289+
290+
291+
def cat_station_csv(station_id, client=None):
292+
"""
293+
Retrieve the contents of a station data csv file.
294+
Args:
295+
station_id (str): the id of the weather station
296+
returns:
297+
the contents of the station csv file as a string
298+
"""
299+
all_hashes = get_heads()
300+
dataset_hash = all_hashes["ghcnd"]
301+
csv_hash = dataset_hash + '/' + station_id + ".csv.gz"
302+
if (client is None):
303+
with ipfshttpclient.connect() as client:
304+
csv = client.cat(csv_hash)
305+
with gzip.GzipFile(fileobj=io.BytesIO(cell)) as zip_data:
306+
return zip_data.read().decode("utf-8")
307+
else:
308+
csv = client.cat(csv_hash)
309+
with gzip.GzipFile(fileobj=io.BytesIO(cell)) as zip_data:
310+
return zip_data.read().decode("utf-8")
311+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import ipfs_client
2+
import http_client
3+
import utils
4+
from datetime import date
5+
6+
def test_ipfs_client_rainfall():
7+
dataset = 'chirps_05'
8+
dataset_revision = 'chirps_05-daily'
9+
heads = http_client.get_heads()
10+
chirps_metadata = ipfs_client.cat_metadata(heads[dataset_revision])
11+
ipfs_client.cat_rainfall_dict(41.175, -75.125, dataset_revision)
12+
rainfall_rev_dict = ipfs_client.cat_rev_rainfall_dict(41.175, -75.125, dataset, date.today(), dataset_revision)
13+
14+
def test_http_client_temperature():
15+
dataset = "cpc_temp"
16+
dataset_revision = 'cpc_temp-daily'
17+
lat, lon = utils.conventional_lat_lon_to_cpc(41.25, -77.75)
18+
heads = http_client.get_heads()
19+
cpc_metadata = ipfs_client.cat_metadata(heads[dataset_revision])
20+
temperature_dict = ipfs_client.cat_temperature_dict(lat, lon, dataset_revision)
21+
temperature_rev_dict = ipfs_client.cat_rev_temperature_dict(lat, lon, dataset, date.today(), dataset_revision)
22+

dweather_client/tests/test_station.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import http_client
2+
import utils
3+
from datetime import date
4+
5+
def test_station():
6+
station = http_client.get_station_csv("ACW00011604")

setup.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
11
import setuptools
22

3-
try:
4-
# for pip >= 10
5-
from pip._internal.req import parse_requirements
6-
except ImportError:
7-
# for pip <= 9.0.3
8-
from pip.req import parse_requirements
9-
10-
def load_requirements(fname):
11-
reqs = parse_requirements(fname, session="test")
12-
return [str(ir.requirement) for ir in reqs]
13-
143
with open("README.md", "r") as fh:
154
long_description = fh.read()
165

176
setuptools.setup(
18-
name="dweather_client-arbol",
19-
install_requires=load_requirements("requirements.txt"),
7+
name="dweather_client",
208
version="1.0",
21-
author="Ben Andre",
9+
author="Arbol Inc",
2210
author_email="[email protected]",
2311
description="Python client for interacting with weather data on IPFS.",
2412
long_description=long_description,

0 commit comments

Comments
 (0)