From 9eaf1c7b80f494abc5192615ac38cbe5254b240b Mon Sep 17 00:00:00 2001 From: MaxwellJryao Date: Wed, 1 Apr 2026 14:10:48 +0800 Subject: [PATCH 1/2] [reward] fix: use ProcessPoolExecutor in math_verify for thread-safe timeout math_verify uses signal.alarm() for timeout, which only works in the main thread. In verl's RewardLoopWorker, compute_score is called from a thread pool (asyncio run_in_executor), causing signal.alarm() to raise ValueError. The previous fix (#5635) disabled timeout entirely (parsing_timeout=None), but this can cause hangs on adversarial inputs. Run verification in a subprocess via ProcessPoolExecutor where signal.alarm() works normally, with an outer future.result(timeout=30) as a fallback. Co-Authored-By: Claude Opus 4.6 (1M context) --- verl/utils/reward_score/math_verify.py | 51 +++++++++++++++++--------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/verl/utils/reward_score/math_verify.py b/verl/utils/reward_score/math_verify.py index 2e53b1d64e1..3a75081a8ac 100644 --- a/verl/utils/reward_score/math_verify.py +++ b/verl/utils/reward_score/math_verify.py @@ -12,34 +12,51 @@ # See the License for the specific language governing permissions and # limitations under the License. +import threading +from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import TimeoutError as FuturesTimeoutError + try: from math_verify.errors import TimeoutException - from math_verify.grader import verify - from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig, parse except ImportError: print("To use Math-Verify, please install it first by running `pip install math-verify`.") -_GOLD_TARGETS = (LatexExtractionConfig(),) -_PRED_TARGETS = (ExprExtractionConfig(), LatexExtractionConfig()) +_pool = None +_pool_lock = threading.Lock() -def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0) -> float: - ret_score = 0.0 +def _get_pool(): + global _pool + if _pool is None: + with _pool_lock: + if _pool is None: + _pool = ProcessPoolExecutor(max_workers=4) + return _pool - # Wrap the ground truth in \boxed{} format for verification + +def _verify_in_subprocess(ground_truth_boxed: str, model_output: str) -> float: + """Run math_verify in a subprocess where signal.alarm() works.""" + from math_verify.grader import verify + from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig, parse + + gold_targets = (LatexExtractionConfig(),) + pred_targets = (ExprExtractionConfig(), LatexExtractionConfig()) + + extracted_gold = parse(ground_truth_boxed, gold_targets) + extracted_pred = parse(model_output, pred_targets) + if extracted_gold and extracted_pred: + return max(1.0 if any(verify(g, p) for g in extracted_gold) else 0.0 for p in extracted_pred) + return 0.0 + + +def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0, timeout: float = 30.0) -> float: + ret_score = 0.0 ground_truth_boxed = "\\boxed{" + ground_truth + "}" try: - # Use parsing_timeout=None and timeout_seconds=None to disable - # signal.alarm() which crashes in non-main threads (Ray workers). - extracted_gold = parse(ground_truth_boxed, _GOLD_TARGETS, parsing_timeout=None) - extracted_pred = parse(model_output, _PRED_TARGETS, parsing_timeout=None) - if extracted_gold and extracted_pred: - ret_score = max( - 1.0 if any(verify(g, p, timeout_seconds=None) for g in extracted_gold) else 0.0 for p in extracted_pred - ) - except TimeoutException: + future = _get_pool().submit(_verify_in_subprocess, ground_truth_boxed, model_output) + ret_score = future.result(timeout=timeout) + except (FuturesTimeoutError, TimeoutException): ret_score = timeout_score except Exception: pass - return ret_score From e320eade44b89f11873e2a06aea6fec03648a115 Mon Sep 17 00:00:00 2001 From: MaxwellJryao Date: Wed, 1 Apr 2026 14:34:13 +0800 Subject: [PATCH 2/2] address review: TimeoutException placeholder, spawn context, log errors - Define TimeoutException placeholder when math_verify is not installed - Use spawn mp_context to avoid fork deadlocks in threaded environments - Log exceptions instead of silently swallowing them --- verl/utils/reward_score/math_verify.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/verl/utils/reward_score/math_verify.py b/verl/utils/reward_score/math_verify.py index 3a75081a8ac..7071dacc02b 100644 --- a/verl/utils/reward_score/math_verify.py +++ b/verl/utils/reward_score/math_verify.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import multiprocessing import threading from concurrent.futures import ProcessPoolExecutor from concurrent.futures import TimeoutError as FuturesTimeoutError @@ -19,6 +20,10 @@ try: from math_verify.errors import TimeoutException except ImportError: + + class TimeoutException(Exception): + pass + print("To use Math-Verify, please install it first by running `pip install math-verify`.") _pool = None @@ -30,7 +35,7 @@ def _get_pool(): if _pool is None: with _pool_lock: if _pool is None: - _pool = ProcessPoolExecutor(max_workers=4) + _pool = ProcessPoolExecutor(max_workers=4, mp_context=multiprocessing.get_context("spawn")) return _pool @@ -57,6 +62,6 @@ def compute_score(model_output: str, ground_truth: str, timeout_score: float = 0 ret_score = future.result(timeout=timeout) except (FuturesTimeoutError, TimeoutException): ret_score = timeout_score - except Exception: - pass + except Exception as e: + print(f"Error in math_verify compute_score: {e}") return ret_score