adaptive-machine-learning · hmgomes · Oct 3, 2025 · Sep 25, 2025
diff --git a/invoke.yml b/invoke.yml
@@ -5,7 +5,7 @@ moa_path: "src/capymoa/jar/moa.jar"
 # DROPBOX: When using a dropbox link, ensure that the dl=1 query parameter is 
 # present in the link. This ensures  the file is downloaded directly instead
 # of going to the dropbox page
-moa_url: "https://www.dropbox.com/scl/fi/opuuldhjzlnmizuavi7oz/250912_moa.jar?rlkey=yhphd6z4zugpvkez63wvioad7&dl=1"
+moa_url: "https://www.dropbox.com/scl/fi/yoa2g5ldm0ueyxs1tbvsa/250926_moa.jar?rlkey=kes1hcqj7i4an8f74jgh9xx9x&st=2tk6p0ny&dl=1"
 
 # What notebooks to skip when running them as tests.
 # YOU SHOULD NOT SKIP NOTEBOOKS!

diff --git a/src/capymoa/evaluation/evaluation.py b/src/capymoa/evaluation/evaluation.py
@@ -1096,8 +1096,8 @@ def prequential_evaluation(
         max_instances=max_instances,
         cumulative_evaluator=evaluator_cumulative,
         windowed_evaluator=evaluator_windowed,
-        ground_truth_y=ground_truth_y,
-        predictions=predictions,
+        ground_truth_y=np.array(ground_truth_y) if ground_truth_y else None,
+        predictions=np.array(predictions) if predictions else None,
     )
 
     return results
@@ -1167,6 +1167,8 @@ def prequential_ssl_evaluation(
             delay_length,
             label_probability,
             random_seed,
+            store_y,
+            store_predictions,
         )
 
     # IMPORTANT: delay_length and initial_window_size have not been implemented in python yet
@@ -1269,24 +1271,22 @@ def prequential_ssl_evaluation(
     ):
         evaluator_windowed.result_windows.append(evaluator_windowed.metrics())
 
-    results = PrequentialResults(
+    return PrequentialResults(
         learner=str(learner),
         stream=stream,
         wallclock=elapsed_wallclock_time,
         cpu_time=elapsed_cpu_time,
         max_instances=max_instances,
         cumulative_evaluator=evaluator_cumulative,
         windowed_evaluator=evaluator_windowed,
-        ground_truth_y=ground_truth_y,
-        predictions=predictions,
+        ground_truth_y=np.array(ground_truth_y) if ground_truth_y else None,
+        predictions=np.array(predictions) if predictions else None,
         other_metrics={
             "unlabeled": unlabeled_counter,
             "unlabeled_ratio": unlabeled_counter / i,
         },
     )
 
-    return results
-
 
 def prequential_evaluation_anomaly(
     stream,
@@ -1407,14 +1407,6 @@ def _prequential_evaluation_fast(
     Prequential evaluation fast. This function should not be used directly, users should use prequential_evaluation.
     """
 
-    predictions = None
-    if store_predictions:
-        predictions = []
-
-    ground_truth_y = None
-    if store_y:
-        ground_truth_y = []
-
     if not _is_fast_mode_compilable(stream, learner):
         raise ValueError(
             "`prequential_evaluation_fast` requires the stream object to have a`Stream.moa_stream`"
@@ -1470,19 +1462,6 @@ def _prequential_evaluation_fast(
         start_wallclock_time, start_cpu_time
     )
 
-    if store_y or store_predictions:
-        for i in range(
-            len(
-                moa_results.targets
-                if len(moa_results.targets) != 0
-                else moa_results.predictions
-            )
-        ):
-            if store_y:
-                ground_truth_y.append(moa_results.targets[i])
-            if store_predictions:
-                predictions.append(moa_results.predictions[i])
-
     results = PrequentialResults(
         learner=str(learner),
         stream=stream,
@@ -1491,8 +1470,8 @@ def _prequential_evaluation_fast(
         max_instances=max_instances,
         cumulative_evaluator=basic_evaluator,
         windowed_evaluator=windowed_evaluator,
-        ground_truth_y=ground_truth_y,
-        predictions=predictions,
+        ground_truth_y=np.array(moa_results.targets) if store_y else None,
+        predictions=np.array(moa_results.predictions) if store_predictions else None,
     )
 
     return results
@@ -1501,14 +1480,14 @@ def _prequential_evaluation_fast(
 def _prequential_ssl_evaluation_fast(
     stream,
     learner,
-    max_instances=None,
-    window_size=1000,
-    initial_window_size=0,
-    delay_length=0,
-    label_probability=0.01,
-    random_seed=1,
-    store_y=False,
-    store_predictions=False,
+    max_instances,
+    window_size,
+    initial_window_size,
+    delay_length,
+    label_probability,
+    random_seed,
+    store_y,
+    store_predictions,
 ):
     """
     Prequential SSL evaluation fast.
@@ -1518,14 +1497,6 @@ def _prequential_ssl_evaluation_fast(
             "`prequential_evaluation_fast` requires the stream object to have a`Stream.moa_stream`"
         )
 
-    predictions = None
-    if store_predictions:
-        predictions = []
-
-    ground_truth_y = None
-    if store_y:
-        ground_truth_y = []
-
     if max_instances is None:
         max_instances = -1
 
@@ -1552,8 +1523,8 @@ def _prequential_ssl_evaluation_fast(
         label_probability,
         random_seed,
         True,
-        # store_y,
-        # store_predictions,
+        store_y,
+        store_predictions,
     )
 
     # Reset the windowed_evaluator result_windows
@@ -1570,34 +1541,19 @@ def _prequential_ssl_evaluation_fast(
         start_wallclock_time, start_cpu_time
     )
 
-    if store_y or store_predictions:
-        for i in range(
-            len(
-                moa_results.targets
-                if len(moa_results.targets) != 0
-                else moa_results.predictions
-            )
-        ):
-            if store_y:
-                ground_truth_y.append(moa_results.targets[i])
-            if store_predictions:
-                predictions.append(moa_results.predictions[i])
-
-    results = PrequentialResults(
+    return PrequentialResults(
         learner=str(learner),
         stream=stream,
         wallclock=elapsed_wallclock_time,
         cpu_time=elapsed_cpu_time,
         max_instances=max_instances,
         cumulative_evaluator=basic_evaluator,
         windowed_evaluator=windowed_evaluator,
-        ground_truth_y=ground_truth_y,
-        predictions=predictions,
         other_metrics=dict(moa_results.otherMeasurements),
+        ground_truth_y=np.array(moa_results.targets) if store_y else None,
+        predictions=np.array(moa_results.predictions) if store_predictions else None,
     )
 
-    return results
-
 
 def _prequential_evaluation_anomaly_fast(
     stream,

diff --git a/src/capymoa/evaluation/results.py b/src/capymoa/evaluation/results.py
@@ -4,6 +4,7 @@
 import csv
 import os
 from datetime import datetime
+import numpy as np
 
 
 class PrequentialResults:
@@ -16,9 +17,9 @@ def __init__(
         max_instances: int = None,
         cumulative_evaluator=None,
         windowed_evaluator=None,
-        ground_truth_y=None,
-        predictions=None,
-        other_metrics=None,
+        ground_truth_y: np.ndarray | None = None,
+        predictions: np.ndarray | None = None,
+        other_metrics: dict | None = None,
     ):
         # protected attributes accessible through methods
         self._wallclock = wallclock
@@ -74,10 +75,10 @@ def cpu_time(self):
     def max_instances(self):
         return self._max_instances
 
-    def ground_truth_y(self):
+    def ground_truth_y(self) -> np.ndarray | None:
         return self._ground_truth_y
 
-    def predictions(self):
+    def predictions(self) -> np.ndarray | None:
         return self._predictions
 
     def other_metrics(self):

diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
@@ -1,8 +1,10 @@
 from contextlib import nullcontext
 from itertools import product
+from capymoa.classifier import NoChange
 from capymoa.evaluation.evaluation import (
     _is_fast_mode_compilable,
     prequential_evaluation_anomaly,
+    PrequentialResults,
 )
 from capymoa.regressor import KNNRegressor
 from capymoa.stream.generator import SEA, HyperPlaneRegression, RandomTreeGenerator
@@ -18,6 +20,8 @@
 from capymoa.anomaly import (
     HalfSpaceTrees,
 )
+from numpy.testing import assert_array_equal
+import numpy as np
 
 
 def test_prequential_evaluation():
@@ -297,3 +301,58 @@ def _take_y(num_instances):
             assert y_remaining == y_stream[5:10]
         else:
             assert y_remaining == y_stream[15:20]
+
+
+@pytest.mark.parametrize("optimise", [False, True])
+@pytest.mark.parametrize("store_y", [True, False])
+@pytest.mark.parametrize("store_predictions", [True, False])
+@pytest.mark.parametrize(
+    "eval_func", [prequential_evaluation, prequential_ssl_evaluation]
+)
+def test_store_y_and_store_predictions(
+    eval_func, optimise: bool, store_y: bool, store_predictions: bool
+):
+    """Test ``prequential_evaluation``'s ``store_predictions`` and ``store_y`` flags."""
+    n = 10
+    stream = ElectricityTiny()
+    expected_true_y = [stream.next_instance().y_index for _ in range(n)]
+    stream.restart()
+
+    learner = NoChange(schema=stream.get_schema())
+
+    assert _is_fast_mode_compilable(stream, learner, True) or not optimise, (
+        "Fast mode should be compilable for this test if optimise is True"
+    )
+    results: PrequentialResults = eval_func(
+        stream=stream,
+        learner=learner,
+        window_size=10,
+        max_instances=n,
+        store_predictions=store_predictions,
+        store_y=store_y,
+        optimise=optimise,
+    )
+    true_y = results.ground_truth_y()
+    pred_y = results.predictions()
+
+    if store_y is True:
+        assert true_y is not None
+        assert len(true_y) == n
+        assert isinstance(true_y, np.ndarray)
+        assert_array_equal(true_y, expected_true_y)
+    else:
+        assert true_y is None, "ground truth should not be stored"
+
+    if store_predictions is True:
+        assert pred_y is not None
+        assert len(pred_y) == n
+        assert isinstance(pred_y, np.ndarray) and pred_y.dtype == np.int64
+
+        # TODO: `prequential_ssl_evaluation` sometimes removes labels so we cannot
+        # expect a match
+        if eval_func != prequential_ssl_evaluation:
+            assert_array_equal(
+                pred_y[1:], expected_true_y[:-1]
+            )  # NoChange predicts previous y
+    else:
+        assert pred_y is None, "predictions should not be stored"
diff --git a/tests/test_moajar.py b/tests/test_moajar.py
@@ -3,7 +3,7 @@
 from hashlib import sha256
 import capymoa
 
-_MOA_JAR_HASH = "66e9ad8a7b7607d68b1477e7ccd675570b940e0f09486b080c07bf2fcecf26c3"
+_MOA_JAR_HASH = "fa5fd4fcc4dad46734149f97b69a309bd2ff57d1f6227879f40cce239cf6be35"
 
 
 def test_imports() -> None: