Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making attribute names more precise: obsdim->obs_dimname, timedim->time_varname #142

Merged
merged 4 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/example_find_positions_at_obs_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
def gridwaves(tds):
t = tds[['lat', 'lon',
'time']].traj.gridtime(tds['time_waves_imu'].squeeze())
return t.traj.to_2d(obsdim='obs_waves_imu')
return t.traj.to_2d(obs_dimname='obs_waves_imu')


dsw = ds.groupby('trajectory').map(gridwaves)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_read_sfy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ def test_interpret_sfy(test_data):
ds = xr.open_dataset(test_data / 'bug32.nc')
print(ds)

assert ds.traj.obsdim == 'package'
assert ds.traj.timedim == 'position_time'
assert ds.traj.obs_dimname == 'package'
assert ds.traj.time_varname == 'position_time'

assert ds.traj.is_2d()

Expand Down
48 changes: 24 additions & 24 deletions trajan/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def detect_tx_dim(ds):
raise ValueError("Could not determine x / lon variable")


def detect_time_dim(ds, obsdim):
logger.debug(f'Detecting time-dimension for "{obsdim}"..')
def detect_time_dim(ds, obs_dimname):
logger.debug(f'Detecting time-dimension for "{obs_dimname}"..')
for v in ds.variables:
if obsdim in ds[v].dims and 'time' in v:
if obs_dimname in ds[v].dims and 'time' in v:
return v

raise ValueError("no time dimension detected")
Expand All @@ -47,8 +47,8 @@ def __new__(cls, ds):
ds = ds.expand_dims({'trajectory': 1})
ds['trajectory'].attrs['cf_role'] = 'trajectory_id'

obsdim = None
timedim = None
obs_dimname = None
time_varname = None

tx = detect_tx_dim(ds)

Expand All @@ -62,7 +62,7 @@ def __new__(cls, ds):
# NOTE: this is probably not standard; something to point to the CF conventions?
# NOTE: for now, there is no discovery of the "index" dim, this is hardcorded; any way to do better?
if "index" in tx.dims:
obsdim = "index"
obs_dimname = "index"

# discover the timecoord variable name #######################
# find all variables with standard_name "time"
Expand Down Expand Up @@ -90,63 +90,63 @@ def __new__(cls, ds):
else:
raise ValueError(f"cannot deduce rowsizevar; we have the following candidates: {with_dim_trajectory = }")
# sanity check
if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obsdim]):
if not np.sum(ds[rowsizevar].to_numpy()) == len(ds[obs_dimname]):
raise ValueError("mismatch between the index length and the sum of the deduced trajectory lengths")

logger.debug(
f"1D storage dataset; detected: {obsdim = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}"
f"1D storage dataset; detected: {obs_dimname = }, {timecoord = }, {trajectorycoord = }, {rowsizevar}"
)

return ocls(ds, obsdim, timecoord, trajectorycoord, rowsizevar)
return ocls(ds, obs_dimname, timecoord, trajectorycoord, rowsizevar)

else:
logging.warning(f"{ds} has {tx.dims = } which is of dimension 1 but is not index; this is a bit unusual; try to parse with Traj1d or Traj2d")

# we have a ds where 2D arrays are used to store data, this is either Traj1d or Traj2d
# there may also be some slightly unusual cases where these Traj1d and Traj2d classes will be used on data with 1D arrays
if 'obs' in tx.dims:
obsdim = 'obs'
timedim = detect_time_dim(ds, obsdim)
obs_dimname = 'obs'
time_varname = detect_time_dim(ds, obs_dimname)

elif 'index' in tx.dims:
obsdim = 'obs'
timedim = detect_time_dim(ds, obsdim)
obs_dimname = 'obs'
time_varname = detect_time_dim(ds, obs_dimname)

elif 'time' in tx.dims:
obsdim = 'time'
timedim = 'time'
obs_dimname = 'time'
time_varname = 'time'

else:
for d in tx.dims:
if not ds[d].attrs.get(
'cf_role',
None) == 'trajectory_id' and not 'traj' in d:

obsdim = d
timedim = detect_time_dim(ds, obsdim)
obs_dimname = d
time_varname = detect_time_dim(ds, obs_dimname)

break

if obsdim is None:
if obs_dimname is None:
logger.warning('No time or obs dimension detected.')

logger.debug(
f"Detected obs-dim: {obsdim}, detected time-dim: {timedim}.")
f"Detected obs-dim: {obs_dimname}, detected time-dim: {time_varname}.")

if obsdim is None:
if obs_dimname is None:
ocls = Traj1d

elif len(ds[timedim].shape) <= 1:
elif len(ds[time_varname].shape) <= 1:
logger.debug('Detected structured (1D) trajectory dataset')
ocls = Traj1d

elif len(ds[timedim].shape) == 2:
elif len(ds[time_varname].shape) == 2:
logger.debug('Detected un-structured (2D) trajectory dataset')
ocls = Traj2d

else:
raise ValueError(
f'Time dimension has shape greater than 2: {ds["timedim"].shape}'
f'Time dimension has shape greater than 2: {ds["time_varname"].shape}'
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This an example of why old names were not precise enough.
Thus Time dimension has shape greater than 2 should now be changed to the more precise Time variable has more than 2 dimensions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, also this probably needs to be: ds[self.time_varname] without the internal quotes.

)

return ocls(ds, obsdim, timedim)
return ocls(ds, obs_dimname, time_varname)
22 changes: 11 additions & 11 deletions trajan/ragged.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ class ContiguousRagged(Traj):
trajdim: str
rowvar: str

def __init__(self, ds, obsdim, timedim, trajectorycoord, rowsizevar):
def __init__(self, ds, obs_dimname, time_varname, trajectorycoord, rowsizevar):
self.trajdim = trajectorycoord
self.rowvar = rowsizevar
super().__init__(ds, obsdim, timedim)
super().__init__(ds, obs_dimname, time_varname)

def to_2d(self, obsdim='obs'):
def to_2d(self, obs_dimname='obs'):
"""This actually converts a contiguous ragged xarray Dataset into an xarray Dataset that follows the Traj2d conventions."""
global_attrs = self.ds.attrs

Expand All @@ -45,7 +45,7 @@ def to_2d(self, obsdim='obs'):
self.ds[self.rowvar].to_numpy()):
end_index = start_index + crrt_rowsize
array_time[crrt_index, :crrt_rowsize] = self.ds[
self.timedim][start_index:end_index]
self.time_varname][start_index:end_index]
start_index = end_index

# it seems that we need to build the "backbone" of the Dataset independently first
Expand All @@ -70,7 +70,7 @@ def to_2d(self, obsdim='obs'):

# trajectory vars
'time':
xr.DataArray(dims=["trajectory", obsdim],
xr.DataArray(dims=["trajectory", obs_dimname],
data=array_time,
attrs={
"standard_name": "time",
Expand All @@ -81,7 +81,7 @@ def to_2d(self, obsdim='obs'):

# now add all "normal" variables
# NOTE: for now, we only consider scalar vars; if we want to consider more complex vars (e.g., spectra), this will need updated
# NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obsdim, create a higer dim variable
# NOTE: such an update would typically need to look at the dims of the variable, and if there are additional dims to obs_dimname, create a higer dim variable

for crrt_data_var in self.ds.data_vars:
attrs = self.ds[crrt_data_var].attrs
Expand All @@ -90,9 +90,9 @@ def to_2d(self, obsdim='obs'):
continue

if len(self.ds[crrt_data_var].dims
) != 1 or self.ds[crrt_data_var].dims[0] != self.obsdim:
) != 1 or self.ds[crrt_data_var].dims[0] != self.obs_dimname:
raise ValueError(
f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obsdim,)}"
f"data_vars element {crrt_data_var} has dims {self.ds[crrt_data_var].dims}, expected {(self.obs_dimname,)}"
)

crrt_var = np.full((nbr_trajectories, longest_trajectory), np.nan)
Expand All @@ -119,7 +119,7 @@ def to_2d(self, obsdim='obs'):
crrt_data_var = "lat"

ds_converted_to_traj2d[crrt_data_var] = \
xr.DataArray(dims=["trajectory", obsdim],
xr.DataArray(dims=["trajectory", obs_dimname],
data=crrt_var,
attrs=attrs)

Expand All @@ -140,6 +140,6 @@ def plot(self) -> Plot:
def timestep(self, average=np.median):
return self.to_2d().traj.timestep(average)

def gridtime(self, times, timedim=None, round=True):
return self.to_2d().traj.gridtime(times, timedim, round)
def gridtime(self, times, time_varname=None, round=True):
return self.to_2d().traj.gridtime(times, time_varname, round)

58 changes: 27 additions & 31 deletions trajan/traj.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def detect_tx_dim(ds):
raise ValueError("Could not determine x / lon variable")


def detect_time_dim(ds, obsdim):
logger.debug(f'Detecting time-dimension for "{obsdim}"..')
def detect_time_dim(ds, obs_dimname):
logger.debug(f'Detecting time-dimension for "{obs_dimname}"..')
for v in ds.variables:
if obsdim in ds[v].dims and 'time' in v:
if obs_dimname in ds[v].dims and 'time' in v:
return v

raise ValueError("no time dimension detected")
Expand All @@ -50,28 +50,24 @@ class Traj:

__gcrs__: pyproj.CRS

obsdim: str
"""
Name of the dimension along which observations are taken. Usually either `obs` or `time`.
"""
timedim: str

def __init__(self, ds, obsdim, timedim):
def __init__(self, ds, obs_dimname, time_varname):
self.ds = ds
self.__plot__ = None
self.__animate__ = None
self.__gcrs__ = pyproj.CRS.from_epsg(4326)
self.obsdim = obsdim
self.timedim = timedim
self.obs_dimname = obs_dimname # dimension along which time increases
self.time_varname = time_varname

def __repr__(self):
output = '=======================\n'
output += 'TrajAn info:\n'
output += '------------\n'
output += f'{self.ds.sizes["trajectory"]} trajectories\n'
if 'time' in self.ds.variables:
if self.timedim in self.ds.sizes:
output += f'{self.ds.sizes[self.timedim]} timesteps\n'
if self.time_varname in self.ds.sizes:
output += f'{self.ds.sizes[self.time_varname]} timesteps'
timevar = self.ds[self.time_varname]
output += f' {timevar.name}{list(timevar.sizes)} ({len(timevar.sizes)}D)\n'
try:
timestep = self.timestep()
timestep = timedelta(seconds=int(timestep))
Expand Down Expand Up @@ -532,18 +528,18 @@ def distance_to_next(self):

lon = self.ds.lon
lat = self.ds.lat
lenobs = self.ds.sizes[self.obsdim]
lonfrom = lon.isel({self.obsdim: slice(0, lenobs - 1)})
latfrom = lat.isel({self.obsdim: slice(0, lenobs - 1)})
lonto = lon.isel({self.obsdim: slice(1, lenobs)})
latto = lat.isel({self.obsdim: slice(1, lenobs)})
lenobs = self.ds.sizes[self.obs_dimname]
lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)})
latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)})
lonto = lon.isel({self.obs_dimname: slice(1, lenobs)})
latto = lat.isel({self.obs_dimname: slice(1, lenobs)})
geod = pyproj.Geod(ellps='WGS84')
azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto,
latto)

distance = xr.DataArray(distance, coords=lonfrom.coords, dims=lon.dims)
distance = xr.concat((distance, distance.isel({self.obsdim: -1})),
dim=self.obsdim) # repeating last time step to
distance = xr.concat((distance, distance.isel({self.obs_dimname: -1})),
dim=self.obs_dimname) # repeating last time step to
return distance

def azimuth_to_next(self):
Expand All @@ -564,11 +560,11 @@ def azimuth_to_next(self):
# TODO: method is almost duplicate of "distance_to_next" above
lon = self.ds.lon
lat = self.ds.lat
lenobs = self.ds.dims[self.obsdim]
lonfrom = lon.isel({self.obsdim: slice(0, lenobs - 1)})
latfrom = lat.isel({self.obsdim: slice(0, lenobs - 1)})
lonto = lon.isel({self.obsdim: slice(1, lenobs)})
latto = lat.isel({self.obsdim: slice(1, lenobs)})
lenobs = self.ds.dims[self.obs_dimname]
lonfrom = lon.isel({self.obs_dimname: slice(0, lenobs - 1)})
latfrom = lat.isel({self.obs_dimname: slice(0, lenobs - 1)})
lonto = lon.isel({self.obs_dimname: slice(1, lenobs)})
latto = lat.isel({self.obs_dimname: slice(1, lenobs)})
geod = pyproj.Geod(ellps='WGS84')
azimuth_forward, a2, distance = geod.inv(lonfrom, latfrom, lonto,
latto)
Expand All @@ -577,8 +573,8 @@ def azimuth_to_next(self):
coords=lonfrom.coords,
dims=lon.dims)
azimuth_forward = xr.concat(
(azimuth_forward, azimuth_forward.isel({self.obsdim: -1})),
dim=self.obsdim) # repeating last time step to
(azimuth_forward, azimuth_forward.isel({self.obs_dimname: -1})),
dim=self.obs_dimname) # repeating last time step to
return azimuth_forward

def velocity_components(self):
Expand Down Expand Up @@ -679,7 +675,7 @@ def get_area_convex_hull(self):
return np.array(hull.volume) # volume=area for 2D as here

@abstractmethod
def gridtime(self, times, timedim=None) -> xr.Dataset:
def gridtime(self, times, time_varname=None) -> xr.Dataset:
"""Interpolate dataset to a regular time interval or a different grid.

Parameters
Expand All @@ -689,7 +685,7 @@ def gridtime(self, times, timedim=None) -> xr.Dataset:
- an array of times, or
- a string specifying a Pandas daterange (e.g. 'h', '6h, 'D'...) suitable for `pd.date_range`.

timedim : str
time_varname : str
Name of new time dimension. The default is to use the same name as previously.

Returns
Expand Down Expand Up @@ -894,7 +890,7 @@ def condense_obs(self) -> xr.Dataset:
"""

@abstractmethod
def to_2d(self, obsdim='obs') -> xr.Dataset:
def to_2d(self, obs_dimname='obs') -> xr.Dataset:
"""
Convert dataset into a 2D dataset from.
"""
Loading
Loading