-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathload.py
65 lines (50 loc) · 2.04 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import collections
import csv
import datetime
import sys
import requests
stations = sys.argv[1].split(",")
years = [int(year) for year in sys.argv[2].split("-")]
start_year = years[0]
end_year = years[1]
TEMPLATE_URL = "https://www.ncei.noaa.gov/data/global-hourly/access/{year}/{station}.csv"
TEMPLATE_FILE = "station_{station}_{year}.csv"
def download_data(station, year):
my_url = TEMPLATE_URL.format(station=station, year=year)
req = requests.get(my_url)
if req.status_code != 200:
return # not found
w = open(TEMPLATE_FILE.format(station=station, year=year), "wt")
w.write(req.text)
w.close()
def download_all_data(stations, start_year, end_year):
for station in stations:
for year in range(start_year, end_year + 1):
download_data(station, year)
# pandas would be more standard
def get_file_temperatures(file_name):
with open(file_name, "rt") as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
station = row[header.index("STATION")]
# date = datetime.datetime.fromisoformat(row[header.index('DATE')])
tmp = row[header.index("TMP")]
temperature, status = tmp.split(",")
if status != "1":
continue
temperature = int(temperature) / 10
yield temperature
def get_all_temperatures(stations, start_year, end_year):
temperatures = collections.defaultdict(list)
for station in stations:
for year in range(start_year, end_year + 1):
for temperature in get_file_temperatures(TEMPLATE_FILE.format(station=station, year=year)):
temperatures[station].append(temperature)
return temperatures
def get_min_temperatures(all_temperatures):
return {station: min(temperatures) for station, temperatures in all_temperatures.items()}
download_all_data(stations, start_year, end_year)
all_temperatures = get_all_temperatures(stations, start_year, end_year)
min_temperatures = get_min_temperatures(all_temperatures)
print(min_temperatures)