diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD index 88e74e1a2..33366e934 100644 --- a/compiler_gym/spaces/BUILD +++ b/compiler_gym/spaces/BUILD @@ -20,6 +20,7 @@ py_library( ":permutation", ":reward", ":runtime_reward", + ":runtime_series_reward", ":scalar", ":sequence", ":space_sequence", @@ -86,6 +87,16 @@ py_library( ], ) +py_library( + name = "runtime_series_reward", + srcs = ["runtime_series_reward.py"], + deps = [ + ":reward", + "//compiler_gym/errors", + "//compiler_gym/util", + ], +) + py_library( name = "scalar", srcs = ["scalar.py"], diff --git a/compiler_gym/spaces/CMakeLists.txt b/compiler_gym/spaces/CMakeLists.txt index e8d3bc69c..6aa41cdd0 100644 --- a/compiler_gym/spaces/CMakeLists.txt +++ b/compiler_gym/spaces/CMakeLists.txt @@ -20,6 +20,7 @@ cg_py_library( ::permutation ::reward ::runtime_reward + ::runtime_series_reward ::scalar ::sequence ::space_sequence @@ -90,6 +91,18 @@ cg_py_library( PUBLIC ) +cg_py_library( + NAME + runtime_series_reward + SRCS + "runtime_series_reward.py" + DEPS + ::reward + compiler_gym::errors::errors + compiler_gym::util::util + PUBLIC +) + cg_py_library( NAME scalar diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py index f52ca0da2..6fc845ce3 100644 --- a/compiler_gym/spaces/__init__.py +++ b/compiler_gym/spaces/__init__.py @@ -10,6 +10,7 @@ from compiler_gym.spaces.permutation import Permutation from compiler_gym.spaces.reward import DefaultRewardFromObservation, Reward from compiler_gym.spaces.runtime_reward import RuntimeReward +from compiler_gym.spaces.runtime_series_reward import RuntimeSeriesReward from compiler_gym.spaces.scalar import Scalar from compiler_gym.spaces.sequence import Sequence from compiler_gym.spaces.space_sequence import SpaceSequence @@ -26,6 +27,7 @@ "Permutation", "Reward", "RuntimeReward", + "RuntimeSeriesReward", "Scalar", "Sequence", "SpaceSequence", diff --git a/compiler_gym/spaces/runtime_series_reward.py b/compiler_gym/spaces/runtime_series_reward.py new file mode 100644 index 000000000..e01674bed --- /dev/null +++ b/compiler_gym/spaces/runtime_series_reward.py @@ -0,0 +1,85 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List, Optional + +from compiler_gym.errors import BenchmarkInitError, ServiceError +from compiler_gym.spaces.reward import Reward +from compiler_gym.util.gym_type_hints import ActionType, ObservationType + +import scipy +import numpy as np + +class RuntimeSeriesReward(Reward): + def __init__( + self, + runtime_count: int, + warmup_count: int, + default_value: int = 0, + ): + super().__init__( + name="runtimeseries", + observation_spaces=["Runtime"], + default_value=default_value, + min=None, + max=None, + default_negates_returns=True, + deterministic=False, + platform_dependent=True, + ) + self.runtime_count = runtime_count + self.warmup_count = warmup_count + self.starting_runtimes: List[float] = None + self.previous_runtimes: List[float] = None + self.current_benchmark: Optional[str] = None + + def reset(self, benchmark, observation_view) -> None: + # If we are changing the benchmark then check that it is runnable. + if benchmark != self.current_benchmark: + if not observation_view["IsRunnable"]: + raise BenchmarkInitError(f"Benchmark is not runnable: {benchmark}") + self.current_benchmark = benchmark + self.starting_runtimes = None + + # Compute initial runtimes + if self.starting_runtimes is None: + self.starting_runtimes = observation_view["Runtime"] + + self.previous_runtimes = self.starting_runtimes + + def update( + self, + actions: List[ActionType], + observations: List[ObservationType], + observation_view, + ) -> float: + del actions # unused + del observation_view # unused + runtimes = observations[0] + if len(runtimes) != self.runtime_count: + raise ServiceError( + f"Expected {self.runtime_count} runtimes but received {len(runtimes)}" + ) + + # Use the Kruskal–Wallis test to determine if the medians are equal + # between the two series of runtimes. If the runtimes medians are + # significantly different, compute the reward by computing the + # difference between the two medians. Otherwise, set the reward as 0. + # https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance + _, pval = scipy.stats.kruskal(runtimes, self.previous_runtimes) + + # If the pval is less than 0.05, this means that the current series of + # runtimes is significantly different from the previous series of + # runtimes. In this case, we compute the reward as the differences + # between the medians of the two series. + if pval < 0.05: + reward = np.median(self.previous_runtimes) - np.median(runtimes) + # If the runtimes are not significantly different, set reward as 0. + else: + reward = 0 + + # Update previous runtimes + self.previous_runtimes = runtimes + return reward diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py index ae181bd28..16d5ce4f7 100644 --- a/compiler_gym/wrappers/__init__.py +++ b/compiler_gym/wrappers/__init__.py @@ -48,7 +48,10 @@ from compiler_gym.wrappers.fork import ForkOnStep if config.enable_llvm_env: - from compiler_gym.wrappers.llvm import RuntimePointEstimateReward # noqa: F401 + from compiler_gym.wrappers.llvm import ( + RuntimePointEstimateReward, # noqa: F401 + RuntimeSeriesEstimateReward, + ) from compiler_gym.wrappers.sqlite_logger import ( # noqa: F401 SynchronousSqliteLogger, ) @@ -76,4 +79,5 @@ if config.enable_llvm_env: __all__.append("RuntimePointEstimateReward") + __all__.append("RuntimeSeriesEstimateReward") __all__.append("SynchronousSqliteLogger") diff --git a/compiler_gym/wrappers/llvm.py b/compiler_gym/wrappers/llvm.py index fe4a8a29b..e50881084 100644 --- a/compiler_gym/wrappers/llvm.py +++ b/compiler_gym/wrappers/llvm.py @@ -9,6 +9,7 @@ from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.spaces import RuntimeReward +from compiler_gym.spaces import RuntimeSeriesReward from compiler_gym.wrappers import CompilerEnvWrapper @@ -67,3 +68,53 @@ def fork(self) -> "RuntimePointEstimateReward": warmup_count=self.reward.spaces["runtime"].warmup_count, estimator=self.reward.spaces["runtime"].estimator, ) + +class RuntimeSeriesEstimateReward(CompilerEnvWrapper): + """LLVM wrapper that estimates the runtime of a program using N runtime + observations and uses it as the reward. + + This class wraps an LLVM environment and registers a new runtime reward + space. It is similar to the RuntimePointEstimateReward except that it only + computes runtime differences if the change in runtime is significantly + different from the runtimes in the previous step. + + See RuntimeSeriesReward for more details. + """ + + def __init__( + self, + env: LlvmEnv, + runtime_count: int = 30, + warmup_count: int = 0, + ): + """Constructor. + :param env: The environment to wrap. + :param runtime_count: The number of times to execute the binary when + estimating the runtime. + :param warmup_count: The number of warmup runs of the binary to perform + before measuring the runtime. + """ + super().__init__(env) + + self.env.unwrapped.reward.add_space( + RuntimeSeriesReward( + runtime_count=runtime_count, + warmup_count=warmup_count, + ) + ) + self.env.unwrapped.reward_space = "runtimeseries" + + self.env.unwrapped.runtime_observation_count = runtime_count + self.env.unwrapped.runtime_warmup_runs_count = warmup_count + + def fork(self) -> "RuntimeSeriesEstimateReward": + fkd = self.env.fork() + # Remove the original "runtimeseries" space so that we that new + # RuntimeSeriesEstimateReward wrapper instance does not attempt to + # redefine, raising a warning. + del fkd.unwrapped.reward.spaces["runtimeseries"] + return RuntimeSeriesEstimateReward( + env=fkd, + runtime_count=self.reward.spaces["runtimeseries"].runtime_count, + warmup_count=self.reward.spaces["runtimeseries"].warmup_count, + ) diff --git a/examples/llvm_autotuning/autotuners/nevergrad_.py b/examples/llvm_autotuning/autotuners/nevergrad_.py index bacea33d8..4e1ae0cdb 100644 --- a/examples/llvm_autotuning/autotuners/nevergrad_.py +++ b/examples/llvm_autotuning/autotuners/nevergrad_.py @@ -29,7 +29,10 @@ def nevergrad( https://facebookresearch.github.io/nevergrad/ """ - if optimization_target == OptimizationTarget.RUNTIME: + if ( + optimization_target == OptimizationTarget.RUNTIME or + optimization_target == OptimizationTarget.RUNTIME_SERIES + ): def calculate_negative_reward(actions: Tuple[ActionType]) -> float: env.reset() diff --git a/examples/llvm_autotuning/optimization_target.py b/examples/llvm_autotuning/optimization_target.py index 7baeba1cb..0672cd4c5 100644 --- a/examples/llvm_autotuning/optimization_target.py +++ b/examples/llvm_autotuning/optimization_target.py @@ -15,6 +15,7 @@ from compiler_gym.datasets import Benchmark from compiler_gym.envs import LlvmEnv from compiler_gym.wrappers import RuntimePointEstimateReward +from compiler_gym.wrappers import RuntimeSeriesEstimateReward logger = logging.getLogger(__name__) @@ -25,6 +26,7 @@ class OptimizationTarget(str, Enum): CODESIZE = "codesize" BINSIZE = "binsize" RUNTIME = "runtime" + RUNTIME_SERIES = "runtimeseries" @property def optimization_space_enum_name(self) -> str: @@ -32,6 +34,7 @@ def optimization_space_enum_name(self) -> str: OptimizationTarget.CODESIZE: "IrInstructionCount", OptimizationTarget.BINSIZE: "ObjectTextSizeBytes", OptimizationTarget.RUNTIME: "Runtime", + OptimizationTarget.RUNTIME_SERIES: "RuntimeSeries", }[self.value] def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv: @@ -50,6 +53,8 @@ def make_env(self, benchmark: Union[str, Benchmark]) -> LlvmEnv: env.reward_space = "ObjectTextSizeOz" elif self.value == OptimizationTarget.RUNTIME: env = RuntimePointEstimateReward(env, warmup_count=0, runtime_count=3) + elif self.value == OptimizationTarget.RUNTIME_SERIES: + env = RuntimeSeriesEstimateReward(env, warmup_count=5, runtime_count=30) else: assert False, f"Unknown OptimizationTarget: {self.value}" @@ -89,7 +94,10 @@ def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float: env.observation.ObjectTextSizeBytes(), 1 ) - if self.value == OptimizationTarget.RUNTIME: + if ( + self.value == OptimizationTarget.RUNTIME or + self.value == OptimizationTarget.RUNTIME_SERIES + ): with _RUNTIME_LOCK: with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env: new_env.reset()