Merge pull request #79 from davidorme/release/0.10.1

davidorme · web-flow · commit ed29b962573a · 2023-07-24T19:55:52.000+01:00
Release 0.10.1
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -140,4 +140,6 @@
           new function is calc_soilmstress_mengoli.
         - The soilmstress argument to PModel is removed and both the Mengoli and Stocker
           approaches are now intended to be applied as penalties to GPP after P Model
-          fitting, allowing the two to be compared from the same P Model outputs.
+          fitting, allowing the two to be compared from the same P Model outputs.
+0.10.1  - Addition of draft extention of subdaily memory_effect function that allows for
+          missing data arising from non-estimable Jmax and Vcmax.
diff --git a/docs/source/users/pmodel/subdaily_details/subdaily_calculations.md b/docs/source/users/pmodel/subdaily_details/subdaily_calculations.md
@@ -20,7 +20,7 @@ steps used in the estimation process in order to show intermediates results but
 practice, as shown in the [worked example](worked_example), most of these calculations
 are handled internally by the model fitting in `pyrealm`.
 
-```{code-cell} ipython3
+```{code-cell}
 :tags: [hide-input]
 
 from importlib import resources
@@ -46,16 +46,15 @@ The code below uses half hourly data from 2014 for the [BE-Vie FluxNET
 site](https://fluxnet.org/doi/FLUXNET2015/BE-Vie), which was also used as a
 demonstration in {cite:t}`mengoli:2022a`.
 
-```{code-cell} ipython3
-
+```{code-cell}
 data_path = resources.files("pyrealm_build_data") / "subdaily_BE_Vie_2014.csv"
 data = np.genfromtxt(
     data_path,
     names=True,
     delimiter=",",
     dtype=None,
     encoding="UTF8",
-    missing_values = "NA",
+    missing_values="NA",
 )
 
 # Extract the key half hourly timestep variables
@@ -74,7 +73,7 @@ This dataset can then be used to calculate the photosynthetic environment at the
 subdaily timescale. The code below also estimates GPP under the standard P Model with no
 slow responses for comparison.
 
-```{code-cell} ipython3
+```{code-cell}
 # Calculate the photosynthetic environment
 subdaily_env = PModelEnvironment(
     tc=temp_subdaily,
@@ -100,7 +99,7 @@ best to sample those conditions. Typically those might be the observed environme
 conditions at the observation closest to noon, or the mean environmental conditions in a
 window around noon.
 
-```{code-cell} ipython3
+```{code-cell}
 # Create the fast slow scaler
 fsscaler = FastSlowScaler(datetime_subdaily)
 
@@ -114,19 +113,18 @@ fsscaler.set_window(
 pmodel_fastslow = FastSlowPModel(
     env=subdaily_env,
     fs_scaler=fsscaler,
+    handle_nan=True,
     ppfd=ppfd_subdaily,
     fapar=fapar_subdaily,
 )
 ```
 
-```{code-cell} ipython3
+```{code-cell}
 :tags: [hide-input]
 
 idx = np.arange(48 * 120, 48 * 130)
 plt.figure(figsize=(10, 4))
-plt.plot(
-    datetime_subdaily[idx], pmodel_subdaily.gpp[idx], label="Instantaneous model"
-)
+plt.plot(datetime_subdaily[idx], pmodel_subdaily.gpp[idx], label="Instantaneous model")
 plt.plot(datetime_subdaily[idx], pmodel_fastslow.gpp[idx], "r-", label="Slow responses")
 plt.ylabel = "GPP"
 plt.legend(frameon=False)
@@ -145,7 +143,7 @@ The daily average conditions during the acclimation window can be sampled and us
 inputs to the standard P Model to calculate the optimal behaviour of plants under those
 conditions.
 
-```{code-cell} ipython3
+```{code-cell}
 # Get the daily acclimation conditions for the forcing variables
 temp_acclim = fsscaler.get_daily_means(temp_subdaily)
 co2_acclim = fsscaler.get_daily_means(co2_subdaily)
@@ -176,7 +174,7 @@ temperatures so $J_{max}$ and $V_{cmax}$ must first be standardised to expected
 at 25°C. This is acheived by multiplying by the reciprocal of the exponential part of
 the Arrhenius equation ($h^{-1}$ in {cite}`mengoli:2022a`).
 
-```{code-cell} ipython3
+```{code-cell}
 # Are these any of the existing values in the constants?
 ha_vcmax25 = 65330
 ha_jmax25 = 43900
@@ -191,18 +189,18 @@ jmax25_acclim = pmodel_acclim.jmax * (1 / calc_ftemp_arrh(tk_acclim, ha_jmax25))
 The memory effect can now be applied to the three parameters with slow
 responses to calculate realised values, here using the default 15 day window.
 
-```{code-cell} ipython3
+```{code-cell}
 # Calculation of memory effect in xi, vcmax25 and jmax25
 xi_real = memory_effect(pmodel_acclim.optchi.xi, alpha=1 / 15)
-vcmax25_real = memory_effect(vcmax25_acclim, alpha=1 / 15)
-jmax25_real = memory_effect(jmax25_acclim, alpha=1 / 15)
+vcmax25_real = memory_effect(vcmax25_acclim, alpha=1 / 15, handle_nan=True)
+jmax25_real = memory_effect(jmax25_acclim, alpha=1 / 15, handle_nan=True)
 ```
 
 The plots below show the instantaneously acclimated values for  $J_{max25}$,
 $V_{cmax25}$ and $\xi$ in grey along with the realised slow reponses.
 applied.
 
-```{code-cell} ipython3
+```{code-cell}
 :tags: [hide-input]
 
 fig, axes = plt.subplots(1, 3, figsize=(16, 5))
@@ -236,7 +234,7 @@ temperature at fast scales:
 * These values are adjusted to the actual half hourly temperatures to give the fast
   responses of $J_{max}$ and $V_{cmax}$.
 
-```{code-cell} ipython3
+```{code-cell}
 tk_subdaily = subdaily_env.tc + pmodel_subdaily.const.k_CtoK
 
 # Fill the realised jmax and vcmax from subdaily to daily
@@ -256,7 +254,7 @@ $\Gamma^\ast$ with the realised slow responses of $\xi$. The original implementa
 $\Gamma^{\ast}$ and $c_a$, interpolated to the subdaily timescale and the actual
 subdaily variation in VPD.
 
-```{code-cell} ipython3
+```{code-cell}
 # Interpolate xi to subdaily scale
 xi_subdaily = fsscaler.fill_daily_to_subdaily(xi_real)
 
@@ -273,7 +271,7 @@ Model, where $c_i$ includes the slow responses of $\xi$ and $V_{cmax}$ and $J_{m
 include the slow responses of $V_{cmax25}$ and $J_{max25}$ and fast responses to
 temperature.
 
-```{code-cell} ipython3
+```{code-cell}
 # Calculate Ac
 Ac_subdaily = (
     vcmax_subdaily
@@ -300,3 +298,7 @@ GPP_subdaily = np.minimum(Ac_subdaily, Aj_subdaily) * PModelConst.k_c_molmass
 diff = GPP_subdaily - pmodel_fastslow.gpp
 print(np.nanmin(diff), np.nanmax(diff))
 ```
+
+```{code-cell}
+
+```
diff --git a/docs/source/users/pmodel/subdaily_details/worked_example.md b/docs/source/users/pmodel/subdaily_details/worked_example.md
@@ -63,7 +63,7 @@ focal_datetime = np.where(datetimes == np.datetime64("2018-06-12 12:00:00"))[0]
 # Plot the temperature data for an example timepoint and show the sites
 focal_temp = ds["Tair"][focal_datetime] - 273.15
 focal_temp.plot()
-plt.plot(sites["lon"], sites["lat"], "xr");
+plt.plot(sites["lon"], sites["lat"], "xr")
 ```
 
 The WFDE data need some conversion for use in the PModel, along with the definition of
@@ -119,7 +119,12 @@ fsscaler.set_window(
     half_width=np.timedelta64(1, "h"),
 )
 fs_pmod = FastSlowPModel(
-    env=pm_env, fs_scaler=fsscaler, fapar=fapar, ppfd=ppfd, alpha=1 / 15
+    env=pm_env,
+    fs_scaler=fsscaler,
+    handle_nan=True,
+    fapar=fapar,
+    ppfd=ppfd,
+    alpha=1 / 15,
 )
 ```
 
@@ -161,7 +166,7 @@ ax2.set_title("Fast Slow")
 # Add a colour bar
 subfig2.colorbar(
     im, ax=[ax1, ax2], shrink=0.55, label=r"GPP ($\mu g C\,m^{-2}\,s^{-1}$)"
-);
+)
 ```
 
 ## Time series predictions
@@ -200,7 +205,3 @@ for ax, st in zip(axes, sites["stid"].values):
 axes[0].legend(loc="lower center", bbox_to_anchor=[0.5, 1], ncols=2, frameon=False)
 plt.tight_layout()
 ```
-
-```{code-cell}
-
-```
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pyrealm"
-version = "0.10.0"
+version = "0.10.1"
 description = "Python implementations of REALM models"
 authors = ["David Orme <d.orme@imperial.ac.uk>"]
 homepage = "https://pyrealm.readthedocs.io/"
diff --git a/pyrealm/pmodel/subdaily.py b/pyrealm/pmodel/subdaily.py
@@ -18,7 +18,9 @@
 )
 
 
-def memory_effect(values: NDArray, alpha: float = 0.067) -> NDArray:
+def memory_effect(
+    values: NDArray, alpha: float = 0.067, handle_nan: bool = False
+) -> NDArray:
     r"""Apply a memory effect to a variable.
 
     Three key photosynthetic parameters (:math:`\xi`, :math:`V_{cmax25}` and
@@ -27,37 +29,91 @@ def memory_effect(values: NDArray, alpha: float = 0.067) -> NDArray:
     average to apply a lagged response to one of these parameters.
 
     The estimation uses the paramater `alpha` (:math:`\alpha`) to control the speed of
-    convergence of the estimated values (:math:`E`) to the calculated optimal values
+    convergence of the realised values (:math:`R`) to the calculated optimal values
     (:math:`O`):
 
     .. math::
 
-        E_{t} = E_{t-1}(1 - \alpha) + O_{t} \alpha
+        R_{t} = R_{t-1}(1 - \alpha) + O_{t} \alpha
 
-    For :math:`t_{0}`, the first value in the optimal values is used so :math:`E_{0} =
+    For :math:`t_{0}`, the first value in the optimal values is used so :math:`R_{0} =
     O_{0}`.
 
     The ``values`` array can have multiple dimensions but the first dimension is always
     assumed to represent time and the memory effect is calculated only along the first
     dimension.
 
+    By default, the ``values`` array must not contain missing values (`numpy.nan`).
+    However, :math:`V_{cmax}` and :math:`J_{max}` are not estimable in some conditions
+    (namely when :math:`m \le c^{\ast}`, see
+    :class:`~pyrealm.pmodel.pmodel.CalcOptimalChi`) and so missing values in P Model
+    predictions can arise even when the forcing data is complete, breaking the recursion
+    shown above. When ``handle_nan=True``, this function fills missing data as follow:
+
+    +-------------------+--------+-------------------------------------------------+
+    |                   |        |   Current optimal (:math:`O_{t}`)               |
+    +-------------------+--------+-----------------+-------------------------------+
+    |                   |        |     NA          |   not NA                      |
+    +-------------------+--------+-----------------+-------------------------------+
+    | Previous          |    NA  |     NA          |     O_{t}                     |
+    | realised          +--------+-----------------+-------------------------------+
+    | (:math:`R_{t-1}`) | not NA | :math:`R_{t-1}` | :math:`R_{t-1}(1-a) + O_{t}a` |
+    +-------------------+--------+-----------------+-------------------------------+
+
+    Initial missing values are kept, and the first observed optimal value is accepted as
+    the first realised value (as with the start of the recursion above). After this, if
+    the current optimal value is missing, then the previous estimate of the realised
+    value is held over until it can next be updated from observed data.
+
     Args:
         values: The values to apply the memory effect to.
-        alpha: The relative weight applied to the most recent observation
+        alpha: The relative weight applied to the most recent observation.
+        handle_nan: Allow missing values to be handled.
 
     Returns:
         An array of the same shape as ``values`` with the memory effect applied.
     """
 
+    # Check for nan and nan handling
+    nan_present = np.any(np.isnan(values))
+    if nan_present and not handle_nan:
+        raise ValueError("Missing values in data passed to memory_effect")
+
     # Initialise the output storage and set the first values to be a slice along the
     # first axis of the input values
     memory_values = np.empty_like(values, dtype=np.float32)
     memory_values[0] = values[0]
 
-    # Loop over the first axis, in each case taking slices through the first axis of the
-    # inputs. This handles arrays of any dimension.
+    # Handle the data if there are no missing data,
+    if not nan_present:
+        # Loop over the first axis, in each case taking slices through the first axis of
+        # the inputs. This handles arrays of any dimension.
+        for idx in range(1, len(memory_values)):
+            memory_values[idx] = (
+                memory_values[idx - 1] * (1 - alpha) + values[idx] * alpha
+            )
+
+        return memory_values
+
+    # Otherwise, do the same thing but handling missing data at each step.
     for idx in range(1, len(memory_values)):
-        memory_values[idx] = memory_values[idx - 1] * (1 - alpha) + values[idx] * alpha
+        # Need to check for nan conditions:
+        # - the previous value might be nan from an initial nan or sequence of nans, in
+        #   which case the current value is accepted without weighting - it could be nan
+        #   itself to extend a chain of initial nan values.
+        # - the current value might be nan, in which case the previous value gets
+        #   held over as the current value.
+        prev_nan = np.isnan(memory_values[idx - 1])
+        curr_nan = np.isnan(values[idx])
+        memory_values[idx] = np.where(
+            prev_nan,
+            values[idx],
+            np.where(
+                curr_nan,
+                memory_values[idx - 1],
+                memory_values[idx - 1] * (1 - alpha) + values[idx] * alpha,
+            ),
+        )
 
     return memory_values
 
@@ -92,7 +148,8 @@ class FastSlowPModel:
     * The :meth:`~pyrealm.pmodel.subdaily.memory_effect` function is then used to
       calculate realised slowly responding values for :math:`\xi`, :math:`V_{cmax25}`
       and :math:`J_{max25}`, given a weight :math:`\alpha \in [0,1]` that sets the speed
-      of acclimation.
+      of acclimation. The ``handle_nan`` argument is passed to this function to set
+      whether missing values in the optimal predictions are permitted and handled.
     * The realised values are then filled back onto the original subdaily timescale,
       with :math:`V_{cmax}` and :math:`J_{max}` then being calculated from the slowly
       responding :math:`V_{cmax25}` and :math:`J_{max25}` and the actual subdaily
@@ -107,6 +164,8 @@ class FastSlowPModel:
         fapar: The :math:`f_{APAR}` for each observation.
         ppfd: The PPDF for each observation.
         alpha: The :math:`\alpha` weight.
+        handle_nan: Should the :func:`~pyrealm.pmodel.subdaily.memory_effect` function
+          be allowe to handle missing values.
         kphio: The quantum yield efficiency of photosynthesis (:math:`\phi_0`, -).
         fill_kind: The approach used to fill daily realised values to the subdaily
           timescale, currently one of 'previous' or 'linear'.
@@ -120,6 +179,7 @@ def __init__(
         fapar: NDArray,
         alpha: float = 1 / 15,
         kphio: float = 1 / 8,
+        handle_nan: bool = False,
         fill_kind: str = "previous",
     ) -> None:
         # Warn about the API
@@ -192,11 +252,17 @@ def __init__(
         )
 
         # Calculate the realised values from the instantaneous optimal values
-        self.xi_real: NDArray = memory_effect(self.pmodel_acclim.optchi.xi, alpha=alpha)
+        self.xi_real: NDArray = memory_effect(
+            self.pmodel_acclim.optchi.xi, alpha=alpha, handle_nan=handle_nan
+        )
         r"""Realised daily slow responses in :math:`\xi`"""
-        self.vcmax25_real: NDArray = memory_effect(self.vcmax25_opt, alpha=alpha)
+        self.vcmax25_real: NDArray = memory_effect(
+            self.vcmax25_opt, alpha=alpha, handle_nan=handle_nan
+        )
         r"""Realised daily slow responses in :math:`V_{cmax25}`"""
-        self.jmax25_real: NDArray = memory_effect(self.jmax25_opt, alpha=alpha)
+        self.jmax25_real: NDArray = memory_effect(
+            self.jmax25_opt, alpha=alpha, handle_nan=handle_nan
+        )
         r"""Realised daily slow responses in :math:`J_{max25}`"""
 
         # Fill the daily realised values onto the subdaily scale
@@ -292,6 +358,8 @@ class FastSlowPModel_JAMES:
         fapar: The :math:`f_{APAR}` for each observation.
         ppfd: The PPDF for each observation.
         alpha: The :math:`\alpha` weight.
+        handle_nan: Should the :func:`~pyrealm.pmodel.subdaily.memory_effect` function
+          be allowe to handle missing values.
         kphio: The quantum yield efficiency of photosynthesis (:math:`\phi_0`, -).
         vpd_scaler: An alternate
           :class:`~pyrealm.pmodel.fast_slow_scaler.FastSlowScaler` instance used to
@@ -310,6 +378,7 @@ def __init__(
         ppfd: NDArray,
         fapar: NDArray,
         alpha: float = 1 / 15,
+        handle_nan: bool = False,
         kphio: float = 1 / 8,
         vpd_scaler: Optional[FastSlowScaler] = None,
         fill_from: Optional[np.timedelta64] = None,
@@ -388,9 +457,13 @@ def __init__(
         )
 
         # Calculate the realised values from the instantaneous optimal values
-        self.vcmax25_real: NDArray = memory_effect(self.vcmax25_opt, alpha=alpha)
+        self.vcmax25_real: NDArray = memory_effect(
+            self.vcmax25_opt, alpha=alpha, handle_nan=handle_nan
+        )
         r"""Realised daily slow responses in :math:`V_{cmax25}`"""
-        self.jmax25_real: NDArray = memory_effect(self.jmax25_opt, alpha=alpha)
+        self.jmax25_real: NDArray = memory_effect(
+            self.jmax25_opt, alpha=alpha, handle_nan=handle_nan
+        )
         r"""Realised daily slow responses in :math:`J_{max25}`"""
 
         # Calculate the daily xi value, which does not have a slow reponse in this
diff --git a/tests/pmodel/test_fast_slow_pmodel.py b/tests/pmodel/test_fast_slow_pmodel.py
diff --git a/tests/pmodel/test_memory_effect.py b/tests/pmodel/test_memory_effect.py