-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcma_bst.py
84 lines (66 loc) · 2.27 KB
/
cma_bst.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import csv
import logging
from datetime import datetime
from pathlib import Path
from typing import Any, List, Tuple
from scipy.io.matlab import loadmat
from yupi import Trajectory
from utils.utils import download_dataset
VERSION = 0
NAME = "cma_bst"
_HUR_TRACKS = "https://tcdata.typhoon.org.cn/data/CMABSTdata/CMABSTdata.rar"
LABELS = [
"Weaker or unknown",
"Tropical Depression",
"Tropical Storm",
"Severe Tropical Storm",
"Typhoon",
"Severe Typhoon",
"Super Typhoon",
"",
"",
"Extratropical Cyclone",
]
def build() -> Tuple[List[Trajectory], List[Any]]:
raw_dir = _fetch_raw_data()
return _yupify(raw_dir)
def _fetch_raw_data() -> Path:
raw_trajs_filepath = download_dataset(_HUR_TRACKS, NAME)
return raw_trajs_filepath.parent
def _get_datetime(date_str: str) -> datetime:
return datetime.strptime(date_str.strip(), "%Y%m%d%H")
def _process_huracane(hur_rows: List[str]) -> Tuple[Trajectory, str]:
lat, long, time, max_cat = [], [], [], 0
start_time = None
for row in hur_rows:
if start_time is None:
start_time = _get_datetime(row[0])
time.append(0)
else:
_time = _get_datetime(row[0])
time.append((_time - start_time).total_seconds())
max_cat = max(max_cat, int(row[1]))
lat.append(float(row[2]))
long.append(float(row[3]))
label = LABELS[max_cat]
traj = Trajectory(x=long, y=lat, t=time)
return traj, label
def _yupify(raw_dir) -> Tuple[List[Trajectory], List[str]]:
# Loads the raw data and preprocess it
logging.info("Preprocessing Typhoon raw data")
trajs, labels = [], []
for year_file in raw_dir.glob("*.txt"):
with open(year_file, "r", encoding="utf-8") as file:
reader = csv.reader(file, delimiter=" ")
hur_rows = []
for row in reader:
row = [item for item in row if item]
if row[0].startswith("66666"):
if len(hur_rows) > 2:
traj, label = _process_huracane(hur_rows)
trajs.append(traj)
labels.append(label)
hur_rows = []
continue
hur_rows.append(row)
return trajs, labels