Fix pandas warnings about .ravel() and .apply()

Pandas was issuing FutureWarnings about calling `Series.ravel`, which could be fixed by calling `np.ravel()` instead, and a keyword argument of the groupby `.apply` changing the default to not include the index in the calculations (which is the right thing to do).
fatiando · Apr 17, 2024 · 944256e · 944256e
1 parent 07b270b
commit 944256e
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 17 deletions.
diff --git a/verde/base/least_squares.py b/verde/base/least_squares.py
@@ -9,6 +9,7 @@
 """
 from warnings import warn
 
+import numpy as np
 from sklearn.linear_model import LinearRegression, Ridge
 from sklearn.preprocessing import StandardScaler
 
@@ -65,6 +66,6 @@ def least_squares(jacobian, data, weights, damping=None, copy_jacobian=False):
         regr = LinearRegression(fit_intercept=False)
     else:
         regr = Ridge(alpha=damping, fit_intercept=False)
-    regr.fit(jacobian, data.ravel(), sample_weight=weights)
+    regr.fit(jacobian, np.ravel(data), sample_weight=weights)
     params = regr.coef_ / scaler.scale_
     return params
diff --git a/verde/base/utils.py b/verde/base/utils.py
@@ -54,9 +54,9 @@ def score_estimator(scoring, estimator, coordinates, data, weights=None):
     result = np.mean(
         [
             scorer(
-                DummyEstimator(pred.ravel()),
+                DummyEstimator(np.ravel(pred)),
                 coordinates,
-                data[i].ravel(),
+                np.ravel(data[i]),
                 sample_weight=weights[i],
             )
             for i, pred in enumerate(predicted)
@@ -251,7 +251,7 @@ def check_fit_input(coordinates, data, weights, unpack=True):
             )
         if any(i.size != j.size for i in weights for j in data):
             raise ValueError("Weights must have the same size as the data array.")
-        weights = tuple(i.ravel() for i in weights)
+        weights = tuple(np.ravel(i) for i in weights)
     else:
         weights = tuple([None] * len(data))
     if unpack:
@@ -291,4 +291,4 @@ def n_1d_arrays(arrays, n):
     (array([0, 1, 2, 3]), array([0, 1, 2, 3]))
 
     """
-    return tuple(np.atleast_1d(i).ravel() for i in arrays[:n])
+    return tuple(np.ravel(np.atleast_1d(i)) for i in arrays[:n])
diff --git a/verde/blockreduce.py b/verde/blockreduce.py
@@ -174,11 +174,11 @@ def filter(self, coordinates, data, weights=None):  # noqa: A003
                 "data{}".format(i): attach_weights(self.reduction, w)
                 for i, w in enumerate(weights)
             }
-        columns = {"data{}".format(i): comp.ravel() for i, comp in enumerate(data)}
+        columns = {"data{}".format(i): np.ravel(comp) for i, comp in enumerate(data)}
         columns["block"] = labels
         blocked = pd.DataFrame(columns).groupby("block").aggregate(reduction)
         blocked_data = tuple(
-            blocked["data{}".format(i)].values.ravel() for i, _ in enumerate(data)
+            np.ravel(blocked["data{}".format(i)]) for i, _ in enumerate(data)
         )
         blocked_coords = self._block_coordinates(coordinates, blocks, labels)
         if len(blocked_data) == 1:
@@ -228,7 +228,7 @@ def _block_coordinates(self, coordinates, block_coordinates, labels):
         if self.drop_coords:
             coordinates = coordinates[:2]
         coords = {
-            "coordinate{}".format(i): coord.ravel()
+            "coordinate{}".format(i): np.ravel(coord)
             for i, coord in enumerate(coordinates)
         }
         coords["block"] = labels
@@ -237,7 +237,7 @@ def _block_coordinates(self, coordinates, block_coordinates, labels):
         if self.center_coordinates:
             unique = np.unique(labels)
             for i, block_coord in enumerate(block_coordinates[:2]):
-                grouped["coordinate{}".format(i)] = block_coord[unique].ravel()
+                grouped["coordinate{}".format(i)] = np.ravel(block_coord[unique])
         return tuple(
             grouped["coordinate{}".format(i)].values for i in range(len(coordinates))
         )
@@ -414,23 +414,21 @@ def filter(self, coordinates, data, weights=None):  # noqa: A003
             region=self.region,
         )
         ncomps = len(data)
-        columns = {"data{}".format(i): comp.ravel() for i, comp in enumerate(data)}
+        columns = {"data{}".format(i): np.ravel(comp) for i, comp in enumerate(data)}
         columns["block"] = labels
         if any(w is None for w in weights):
             mean, variance = self._blocked_mean_variance(pd.DataFrame(columns), ncomps)
         else:
             columns.update(
-                {"weight{}".format(i): comp.ravel() for i, comp in enumerate(weights)}
+                {"weight{}".format(i): np.ravel(comp) for i, comp in enumerate(weights)}
             )
             table = pd.DataFrame(columns)
             if self.uncertainty:
                 mean, variance = self._blocked_mean_uncertainty(table, ncomps)
             else:
                 mean, variance = self._blocked_mean_variance_weighted(table, ncomps)
-        blocked_data = tuple(comp.values.ravel() for comp in mean)
-        blocked_weights = tuple(
-            variance_to_weights(var.values.ravel()) for var in variance
-        )
+        blocked_data = tuple(np.ravel(comp) for comp in mean)
+        blocked_weights = tuple(variance_to_weights(np.ravel(var)) for var in variance)
         blocked_coords = self._block_coordinates(coordinates, blocks, labels)
         if ncomps == 1:
             return blocked_coords, blocked_data[0], blocked_weights[0]
@@ -502,7 +500,9 @@ def weighted_average_variance(group):
                 data.reshape((1, data.size)), index=[0], columns=columns
             )
 
-        blocked = table.groupby("block").apply(weighted_average_variance)
+        blocked = table.groupby("block").apply(
+            weighted_average_variance, include_groups=False
+        )
         mean = [blocked[i] for i in columns[:ncomps]]
         variance = [blocked[i] for i in columns[ncomps:]]
         return mean, variance
diff --git a/verde/model_selection.py b/verde/model_selection.py
@@ -817,4 +817,4 @@ def select(arrays, index):
     """
     if arrays is None or any(i is None for i in arrays):
         return arrays
-    return tuple(i.ravel()[index] for i in arrays)
+    return tuple(np.ravel(i)[index] for i in arrays)