diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD
index 88e74e1a2..33366e934 100644
--- a/compiler_gym/spaces/BUILD
+++ b/compiler_gym/spaces/BUILD
@@ -20,6 +20,7 @@ py_library(
         ":permutation",
         ":reward",
         ":runtime_reward",
+        ":runtime_series_reward",
         ":scalar",
         ":sequence",
         ":space_sequence",
@@ -86,6 +87,16 @@ py_library(
     ],
 )
 
+py_library(
+    name = "runtime_series_reward",
+    srcs = ["runtime_series_reward.py"],
+    deps = [
+        ":reward",
+        "//compiler_gym/errors",
+        "//compiler_gym/util",
+    ],
+)
+
 py_library(
     name = "scalar",
     srcs = ["scalar.py"],
diff --git a/compiler_gym/spaces/CMakeLists.txt b/compiler_gym/spaces/CMakeLists.txt
index e8d3bc69c..6aa41cdd0 100644
--- a/compiler_gym/spaces/CMakeLists.txt
+++ b/compiler_gym/spaces/CMakeLists.txt
@@ -20,6 +20,7 @@ cg_py_library(
     ::permutation
     ::reward
     ::runtime_reward
+    ::runtime_series_reward
     ::scalar
     ::sequence
     ::space_sequence
@@ -90,6 +91,18 @@ cg_py_library(
   PUBLIC
 )
 
+cg_py_library(
+  NAME
+    runtime_series_reward
+  SRCS
+    "runtime_series_reward.py"
+  DEPS
+    ::reward
+    compiler_gym::errors::errors
+    compiler_gym::util::util
+  PUBLIC
+)
+
 cg_py_library(
   NAME
     scalar
diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py
index f52ca0da2..6fc845ce3 100644
--- a/compiler_gym/spaces/__init__.py
+++ b/compiler_gym/spaces/__init__.py
@@ -10,6 +10,7 @@
 from compiler_gym.spaces.permutation import Permutation
 from compiler_gym.spaces.reward import DefaultRewardFromObservation, Reward
 from compiler_gym.spaces.runtime_reward import RuntimeReward
+from compiler_gym.spaces.runtime_series_reward import RuntimeSeriesReward
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
 from compiler_gym.spaces.space_sequence import SpaceSequence
@@ -26,6 +27,7 @@
     "Permutation",
     "Reward",
     "RuntimeReward",
+    "RuntimeSeriesReward",
     "Scalar",
     "Sequence",
     "SpaceSequence",
diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py
new file mode 100644
index 000000000..e01674bed
--- /dev/null
+++ b/compiler_gym/spaces/runtime_series_reward.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+from compiler_gym.errors import BenchmarkInitError, ServiceError
+from compiler_gym.spaces.reward import Reward
+from compiler_gym.util.gym_type_hints import ActionType, ObservationType
+
+import scipy
+import numpy as np
+
+class RuntimeSeriesReward(Reward):
+    def __init__(
+        self,
+        runtime_count: int,
+        warmup_count: int,
+        default_value: int = 0,
+    ):
+        super().__init__(
+            name="runtimeseries",
+            observation_spaces=["Runtime"],
+            default_value=default_value,
+            min=None,
+            max=None,
+            default_negates_returns=True,
+            deterministic=False,
+            platform_dependent=True,
+        )
+        self.runtime_count = runtime_count
+        self.warmup_count = warmup_count
+        self.starting_runtimes: List[float] = None
+        self.previous_runtimes: List[float] = None
+        self.current_benchmark: Optional[str] = None
+
+    def reset(self, benchmark, observation_view) -> None:
+        # If we are changing the benchmark then check that it is runnable.
+        if benchmark != self.current_benchmark:
+            if not observation_view["IsRunnable"]:
+                raise BenchmarkInitError(f"Benchmark is not runnable: {benchmark}")
+            self.current_benchmark = benchmark
+            self.starting_runtimes = None
+
+        # Compute initial runtimes
+        if self.starting_runtimes is None:
+            self.starting_runtimes = observation_view["Runtime"]
+
+        self.previous_runtimes = self.starting_runtimes
+
+    def update(
+        self,
+        actions: List[ActionType],
+        observations: List[ObservationType],
+        observation_view,
+    ) -> float:
+        del actions  # unused
+        del observation_view  # unused
+        runtimes = observations[0]
+        if len(runtimes) != self.runtime_count:
+            raise ServiceError(
+                f"Expected {self.runtime_count} runtimes but received {len(runtimes)}"
+            )
+
+        # Use the Kruskal–Wallis test to determine if the medians are equal
+        # between the two series of runtimes. If the runtimes medians are
+        # significantly different, compute the reward by computing the
+        # difference between the two medians. Otherwise, set the reward as 0.
+        # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
+        _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes)
+
+        # If the pval is less than 0.05, this means that the current series of
+        # runtimes is significantly different from the previous series of
+        # runtimes. In this case, we compute the reward as the differences
+        # between the medians of the two series.
+        if pval < 0.05:
+            reward = np.median(self.previous_runtimes) - np.median(runtimes)
+        # If the runtimes are not significantly different, set reward as 0.
+        else:
+            reward = 0
+
+        # Update previous runtimes
+        self.previous_runtimes = runtimes
+        return reward
diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
index ae181bd28..16d5ce4f7 100644
--- a/compiler_gym/wrappers/__init__.py
+++ b/compiler_gym/wrappers/__init__.py
@@ -48,7 +48,10 @@
 from compiler_gym.wrappers.fork import ForkOnStep
 
 if config.enable_llvm_env:
-    from compiler_gym.wrappers.llvm import RuntimePointEstimateReward  # noqa: F401
+    from compiler_gym.wrappers.llvm import (
+        RuntimePointEstimateReward, # noqa: F401
+        RuntimeSeriesEstimateReward,
+    )
     from compiler_gym.wrappers.sqlite_logger import (  # noqa: F401
         SynchronousSqliteLogger,
     )
@@ -76,4 +79,5 @@
 
 if config.enable_llvm_env:
     __all__.append("RuntimePointEstimateReward")
+    __all__.append("RuntimeSeriesEstimateReward")
     __all__.append("SynchronousSqliteLogger")
diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py
index fe4a8a29b..e50881084 100644
--- a/compiler_gym/wrappers/llvm.py
+++ b/compiler_gym/wrappers/llvm.py
@@ -9,6 +9,7 @@
 
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.spaces import RuntimeReward
+from compiler_gym.spaces import RuntimeSeriesReward
 from compiler_gym.wrappers import CompilerEnvWrapper
 
 
@@ -67,3 +68,53 @@ def fork(self) -> "RuntimePointEstimateReward":
             warmup_count=self.reward.spaces["runtime"].warmup_count,
             estimator=self.reward.spaces["runtime"].estimator,
         )
+
+class RuntimeSeriesEstimateReward(CompilerEnvWrapper):
+    """LLVM wrapper that estimates the runtime of a program using N runtime
+    observations and uses it as the reward.
+
+    This class wraps an LLVM environment and registers a new runtime reward
+    space. It is similar to the RuntimePointEstimateReward except that it only
+    computes runtime differences if the change in runtime is significantly
+    different from the runtimes in the previous step.
+
+    See RuntimeSeriesReward for more details.
+    """
+
+    def __init__(
+        self,
+        env: LlvmEnv,
+        runtime_count: int = 30,
+        warmup_count: int = 0,
+    ):
+        """Constructor.
+        :param env: The environment to wrap.
+        :param runtime_count: The number of times to execute the binary when
+            estimating the runtime.
+        :param warmup_count: The number of warmup runs of the binary to perform
+            before measuring the runtime.
+        """
+        super().__init__(env)
+
+        self.env.unwrapped.reward.add_space(
+            RuntimeSeriesReward(
+                runtime_count=runtime_count,
+                warmup_count=warmup_count,
+            )
+        )
+        self.env.unwrapped.reward_space = "runtimeseries"
+
+        self.env.unwrapped.runtime_observation_count = runtime_count
+        self.env.unwrapped.runtime_warmup_runs_count = warmup_count
+
+    def fork(self) -> "RuntimeSeriesEstimateReward":
+        fkd = self.env.fork()
+        # Remove the original "runtimeseries" space so that we that new
+        # RuntimeSeriesEstimateReward wrapper instance does not attempt to
+        # redefine, raising a warning.
+        del fkd.unwrapped.reward.spaces["runtimeseries"]
+        return RuntimeSeriesEstimateReward(
+            env=fkd,
+            runtime_count=self.reward.spaces["runtimeseries"].runtime_count,
+            warmup_count=self.reward.spaces["runtimeseries"].warmup_count,
+        )
diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py
index bacea33d8..4e1ae0cdb 100644
--- a/examples/llvm_autotuning/autotuners/nevergrad_.py
+++ b/examples/llvm_autotuning/autotuners/nevergrad_.py
@@ -29,7 +29,10 @@ def nevergrad(
 
         https://facebookresearch.github.io/nevergrad/
     """
-    if optimization_target == OptimizationTarget.RUNTIME:
+    if (
+        optimization_target == OptimizationTarget.RUNTIME or
+        optimization_target == OptimizationTarget.RUNTIME_SERIES
+    ):
 
         def calculate_negative_reward(actions: Tuple[ActionType]) -> float:
             env.reset()
diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py
index 7baeba1cb..0672cd4c5 100644
--- a/examples/llvm_autotuning/optimization_target.py
+++ b/examples/llvm_autotuning/optimization_target.py
@@ -15,6 +15,7 @@
 from compiler_gym.datasets import Benchmark
 from compiler_gym.envs import LlvmEnv
 from compiler_gym.wrappers import RuntimePointEstimateReward
+from compiler_gym.wrappers import RuntimeSeriesEstimateReward
 
 logger = logging.getLogger(__name__)
 
@@ -25,6 +26,7 @@ class OptimizationTarget(str, Enum):
     CODESIZE = "codesize"
     BINSIZE = "binsize"
     RUNTIME = "runtime"
+    RUNTIME_SERIES = "runtimeseries"
 
     @property
     def optimization_space_enum_name(self) -> str:
@@ -32,6 +34,7 @@ def optimization_space_enum_name(self) -> str:
             OptimizationTarget.CODESIZE: "IrInstructionCount",
             OptimizationTarget.BINSIZE: "ObjectTextSizeBytes",
             OptimizationTarget.RUNTIME: "Runtime",
+            OptimizationTarget.RUNTIME_SERIES: "RuntimeSeries",
         }[self.value]
 
     def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
@@ -50,6 +53,8 @@ def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv:
             env.reward_space = "ObjectTextSizeOz"
         elif self.value == OptimizationTarget.RUNTIME:
             env = RuntimePointEstimateReward(env, warmup_count=0, runtime_count=3)
+        elif self.value == OptimizationTarget.RUNTIME_SERIES:
+            env = RuntimeSeriesEstimateReward(env, warmup_count=5, runtime_count=30)
         else:
             assert False, f"Unknown OptimizationTarget: {self.value}"
 
@@ -89,7 +94,10 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
                 env.observation.ObjectTextSizeBytes(), 1
             )
 
-        if self.value == OptimizationTarget.RUNTIME:
+        if (
+            self.value == OptimizationTarget.RUNTIME or
+            self.value == OptimizationTarget.RUNTIME_SERIES
+        ):
             with _RUNTIME_LOCK:
                 with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
                     new_env.reset()