Skip to content

Commit

Permalink
Fix pandas warnings about .ravel() and .apply()
Browse files Browse the repository at this point in the history
Pandas was issuing FutureWarnings about calling `Series.ravel`, which
could be fixed by calling `np.ravel()` instead, and a keyword argument
of the groupby `.apply` changing the default to not include the index in
the calculations (which is the right thing to do).
  • Loading branch information
leouieda committed Apr 17, 2024
1 parent 07b270b commit 944256e
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 17 deletions.
3 changes: 2 additions & 1 deletion verde/base/least_squares.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""
from warnings import warn

import numpy as np
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler

Expand Down Expand Up @@ -65,6 +66,6 @@ def least_squares(jacobian, data, weights, damping=None, copy_jacobian=False):
regr = LinearRegression(fit_intercept=False)
else:
regr = Ridge(alpha=damping, fit_intercept=False)
regr.fit(jacobian, data.ravel(), sample_weight=weights)
regr.fit(jacobian, np.ravel(data), sample_weight=weights)
params = regr.coef_ / scaler.scale_
return params
8 changes: 4 additions & 4 deletions verde/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def score_estimator(scoring, estimator, coordinates, data, weights=None):
result = np.mean(
[
scorer(
DummyEstimator(pred.ravel()),
DummyEstimator(np.ravel(pred)),
coordinates,
data[i].ravel(),
np.ravel(data[i]),
sample_weight=weights[i],
)
for i, pred in enumerate(predicted)
Expand Down Expand Up @@ -251,7 +251,7 @@ def check_fit_input(coordinates, data, weights, unpack=True):
)
if any(i.size != j.size for i in weights for j in data):
raise ValueError("Weights must have the same size as the data array.")
weights = tuple(i.ravel() for i in weights)
weights = tuple(np.ravel(i) for i in weights)
else:
weights = tuple([None] * len(data))
if unpack:
Expand Down Expand Up @@ -291,4 +291,4 @@ def n_1d_arrays(arrays, n):
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))
"""
return tuple(np.atleast_1d(i).ravel() for i in arrays[:n])
return tuple(np.ravel(np.atleast_1d(i)) for i in arrays[:n])
22 changes: 11 additions & 11 deletions verde/blockreduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,11 @@ def filter(self, coordinates, data, weights=None): # noqa: A003
"data{}".format(i): attach_weights(self.reduction, w)
for i, w in enumerate(weights)
}
columns = {"data{}".format(i): comp.ravel() for i, comp in enumerate(data)}
columns = {"data{}".format(i): np.ravel(comp) for i, comp in enumerate(data)}
columns["block"] = labels
blocked = pd.DataFrame(columns).groupby("block").aggregate(reduction)
blocked_data = tuple(
blocked["data{}".format(i)].values.ravel() for i, _ in enumerate(data)
np.ravel(blocked["data{}".format(i)]) for i, _ in enumerate(data)
)
blocked_coords = self._block_coordinates(coordinates, blocks, labels)
if len(blocked_data) == 1:
Expand Down Expand Up @@ -228,7 +228,7 @@ def _block_coordinates(self, coordinates, block_coordinates, labels):
if self.drop_coords:
coordinates = coordinates[:2]
coords = {
"coordinate{}".format(i): coord.ravel()
"coordinate{}".format(i): np.ravel(coord)
for i, coord in enumerate(coordinates)
}
coords["block"] = labels
Expand All @@ -237,7 +237,7 @@ def _block_coordinates(self, coordinates, block_coordinates, labels):
if self.center_coordinates:
unique = np.unique(labels)
for i, block_coord in enumerate(block_coordinates[:2]):
grouped["coordinate{}".format(i)] = block_coord[unique].ravel()
grouped["coordinate{}".format(i)] = np.ravel(block_coord[unique])
return tuple(
grouped["coordinate{}".format(i)].values for i in range(len(coordinates))
)
Expand Down Expand Up @@ -414,23 +414,21 @@ def filter(self, coordinates, data, weights=None): # noqa: A003
region=self.region,
)
ncomps = len(data)
columns = {"data{}".format(i): comp.ravel() for i, comp in enumerate(data)}
columns = {"data{}".format(i): np.ravel(comp) for i, comp in enumerate(data)}
columns["block"] = labels
if any(w is None for w in weights):
mean, variance = self._blocked_mean_variance(pd.DataFrame(columns), ncomps)
else:
columns.update(
{"weight{}".format(i): comp.ravel() for i, comp in enumerate(weights)}
{"weight{}".format(i): np.ravel(comp) for i, comp in enumerate(weights)}
)
table = pd.DataFrame(columns)
if self.uncertainty:
mean, variance = self._blocked_mean_uncertainty(table, ncomps)
else:
mean, variance = self._blocked_mean_variance_weighted(table, ncomps)
blocked_data = tuple(comp.values.ravel() for comp in mean)
blocked_weights = tuple(
variance_to_weights(var.values.ravel()) for var in variance
)
blocked_data = tuple(np.ravel(comp) for comp in mean)
blocked_weights = tuple(variance_to_weights(np.ravel(var)) for var in variance)
blocked_coords = self._block_coordinates(coordinates, blocks, labels)
if ncomps == 1:
return blocked_coords, blocked_data[0], blocked_weights[0]
Expand Down Expand Up @@ -502,7 +500,9 @@ def weighted_average_variance(group):
data.reshape((1, data.size)), index=[0], columns=columns
)

blocked = table.groupby("block").apply(weighted_average_variance)
blocked = table.groupby("block").apply(
weighted_average_variance, include_groups=False
)
mean = [blocked[i] for i in columns[:ncomps]]
variance = [blocked[i] for i in columns[ncomps:]]
return mean, variance
2 changes: 1 addition & 1 deletion verde/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,4 +817,4 @@ def select(arrays, index):
"""
if arrays is None or any(i is None for i in arrays):
return arrays
return tuple(i.ravel()[index] for i in arrays)
return tuple(np.ravel(i)[index] for i in arrays)

0 comments on commit 944256e

Please sign in to comment.