meta-llama · OCWC22 · Jun 9, 2025 · Jun 9, 2025 · Jun 9, 2025
diff --git a/src/llama_prompt_ops/core/prompt_strategies.py b/src/llama_prompt_ops/core/prompt_strategies.py
@@ -316,7 +316,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
                 max_labeled_demos=self.max_labeled_demos,
                 auto=dspy_auto_mode,  # Use the mapped value
                 num_candidates=self.num_candidates,
-                num_threads=self.num_threads,
+                # num_threads is passed via eval_kwargs in compile() call instead
                 max_errors=self.max_errors,
                 seed=self.seed,
                 init_temperature=self.init_temperature,
@@ -530,10 +530,18 @@ def custom_propose_instructions(self, *args, **kwargs):
                 try:
                     # Call compile with all parameters
                     logging.info("Calling optimizer.compile")
+
+                    # Configure eval_kwargs to pass arguments to dspy.evaluate.Evaluate,
+                    # which is used internally by the compile method. This is the correct
+                    # way to set num_threads for parallel evaluation in MIPROv2.
+                    # Note: num_threads should NOT be passed to the MIPROv2 constructor.
+                    eval_kwargs = {"num_threads": self.num_threads}
+
                     optimized_program = optimizer.compile(
                         program,
                         trainset=self.trainset,
                         valset=self.valset,
+                        eval_kwargs=eval_kwargs,  # Pass num_threads to internal evaluator
                         num_trials=self.num_trials,
                         minibatch=self.minibatch,
                         minibatch_size=self.minibatch_size,

diff --git a/tests/integration/test_cli_integration.py b/tests/integration/test_cli_integration.py
@@ -304,3 +304,114 @@ def test_end_to_end_cli_flow(self, mock_api_key_check, temp_config_file):
             # Clean up the temporary output file
             if os.path.exists(output_path):
                 os.unlink(output_path)
+
+    def test_cli_migrate_with_num_threads_e2e(self, mock_api_key_check):
+        """
+        End-to-end CLI test for num_threads parameter passing bug fix.
+
+        This test verifies that the num_threads setting from the config file
+        is correctly passed through the entire CLI pipeline without causing
+        the TypeError that was fixed.
+        """
+        import tempfile
+
+        import yaml
+        from click.testing import CliRunner
+
+        runner = CliRunner()
+
+        # Create a config that specifically includes num_threads to test the fix
+        test_config = {
+            "dataset": {
+                "path": "test_data.json",
+                "input_field": ["inputs", "question"],
+                "golden_output_field": ["outputs", "answer"],
+            },
+            "model": {"name": "gpt-3.5-turbo", "temperature": 0.7},
+            "metric": {"class": "llama_prompt_ops.core.metrics.FacilityMetric"},
+            "optimization": {
+                "strategy": "basic",
+                "num_threads": 3,  # Specific value to test the fix
+                "max_bootstrapped_demos": 2,
+                "num_trials": 1,
+            },
+        }
+
+        # Create temporary config file
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            yaml.dump(test_config, f)
+            config_path = f.name
+
+        try:
+            # Mock all external dependencies but let the CLI process the config
+            mock_migrator = MagicMock()
+            mock_optimized = MagicMock()
+            mock_optimized.signature.instructions = "Test optimized prompt"
+            mock_migrator.optimize.return_value = mock_optimized
+            mock_migrator.load_dataset_with_adapter.return_value = ([], [], [])
+
+            # Create a strategy that would trigger the original bug
+            mock_strategy = MagicMock()
+            mock_strategy.num_threads = 3  # This should match our config
+
+            with (
+                patch(
+                    "llama_prompt_ops.interfaces.cli.PromptMigrator",
+                    return_value=mock_migrator,
+                ),
+                patch(
+                    "llama_prompt_ops.interfaces.cli.get_dataset_adapter_from_config",
+                    return_value=MagicMock(),
+                ),
+                patch(
+                    "llama_prompt_ops.interfaces.cli.get_models_from_config",
+                    return_value=(None, None),
+                ),
+                patch(
+                    "llama_prompt_ops.interfaces.cli.get_metric",
+                    return_value=MagicMock(),
+                ),
+                # This is the critical patch - ensure strategy gets created with num_threads
+                patch(
+                    "llama_prompt_ops.interfaces.cli.get_strategy",
+                    return_value=mock_strategy,
+                ),
+                # Mock the actual strategy execution to verify parameters
+                patch(
+                    "llama_prompt_ops.core.prompt_strategies.BasicOptimizationStrategy"
+                ) as mock_strategy_class,
+            ):
+                # Configure the mock strategy class
+                mock_strategy_instance = MagicMock()
+                mock_strategy_class.return_value = mock_strategy_instance
+
+                # The critical test: CLI should process config with num_threads without error
+                result = runner.invoke(cli, ["migrate", "--config", config_path])
+
+                # Debug output if there's an error
+                if result.exit_code != 0:
+                    print(f"CLI Error: {result.output}")
+                    if result.exception:
+                        print(f"Exception: {result.exception}")
+                        import traceback
+
+                        print(
+                            f"Traceback: {''.join(traceback.format_exception(type(result.exception), result.exception, result.exception.__traceback__))}"
+                        )
+
+                # The test passes if the CLI completes without crashing
+                # (The original bug would cause a TypeError during strategy instantiation)
+                assert (
+                    result.exit_code == 0
+                ), f"CLI should complete successfully, got: {result.output}"
+
+                # Verify that our configuration was processed
+                # (The actual strategy creation may be mocked, but config parsing should work)
+                print(
+                    "✅ E2E CLI test passed: num_threads config processed without TypeError"
+                )
+
+        finally:
+            # Clean up temporary config file
+            if os.path.exists(config_path):
+                os.unlink(config_path)
diff --git a/tests/integration/test_core_integration.py b/tests/integration/test_core_integration.py
@@ -310,3 +310,150 @@ def test_end_to_end_flow_with_mocks(facility_config_path):
             # Check results
             assert result is not None
             assert result.signature.instructions == "Optimized prompt"
+
+
+@pytest.mark.skipif(
+    not CORE_COMPONENTS_AVAILABLE,
+    reason=get_core_skip_reason() or "Core components available",
+)
+def test_basic_optimization_strategy_num_threads_integration():
+    """
+    Integration test for the MIPROv2 num_threads bug fix.
+
+    This test verifies that BasicOptimizationStrategy correctly passes num_threads
+    to the dspy library without causing parameter errors. This is a regression test
+    for the bug where num_threads was incorrectly passed to MIPROv2 constructor.
+    """
+    import json
+    import tempfile
+
+    # Create minimal test data
+    test_data = [
+        {
+            "inputs": {"question": "Test maintenance request"},
+            "outputs": {
+                "answer": json.dumps(
+                    {
+                        "categories": {"routine_maintenance_requests": True},
+                        "sentiment": "neutral",
+                        "urgency": "low",
+                    }
+                )
+            },
+        },
+        {
+            "inputs": {"question": "Emergency repair needed"},
+            "outputs": {
+                "answer": json.dumps(
+                    {
+                        "categories": {"emergency_repair_services": True},
+                        "sentiment": "urgent",
+                        "urgency": "high",
+                    }
+                )
+            },
+        },
+    ]
+
+    # Create temporary dataset file
+    with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False) as tmp:
+        json.dump(test_data, tmp)
+        tmp_path = tmp.name
+
+    try:
+        # Load dataset
+        adapter = ConfigurableJSONAdapter(
+            dataset_path=tmp_path,
+            input_field=["inputs", "question"],
+            golden_output_field=["outputs", "answer"],
+        )
+
+        dataset = adapter.adapt()
+
+        # Create strategy with specific num_threads value to test the fix
+        strategy = BasicOptimizationStrategy(
+            num_threads=2,  # Specific value to verify correct parameter passing
+            max_bootstrapped_demos=1,  # Minimal for faster testing
+            max_labeled_demos=1,
+            num_trials=1,  # Single trial for speed
+            metric=FacilityMetric(),
+        )
+
+        # Set up datasets (minimal size for integration test)
+        strategy.trainset = dataset[:1]  # Use just one example
+        strategy.valset = dataset[1:2] if len(dataset) > 1 else dataset[:1]
+
+        # Mock the models to avoid real API calls but test dspy integration
+        with patch("dspy.LM") as mock_lm:
+            # Configure mock to return valid responses
+            mock_instance = MagicMock()
+            mock_lm.return_value = mock_instance
+
+            # The key test: verify that strategy instantiation and basic setup
+            # works without TypeError from incorrect num_threads parameter passing
+            strategy.task_model = mock_instance
+            strategy.prompt_model = mock_instance
+
+            prompt_data = {
+                "text": "Categorize customer messages",
+                "inputs": ["question"],
+                "outputs": ["answer"],
+            }
+
+            # This is the critical test - the strategy should be able to configure
+            # without throwing a TypeError about num_threads parameter
+            try:
+                # We patch the actual dspy.MIPROv2 to verify it's called correctly
+                with (
+                    patch("dspy.MIPROv2") as mock_mipro,
+                    patch("dspy.ChainOfThought") as mock_cot,
+                ):
+
+                    mock_optimizer = MagicMock()
+                    mock_mipro.return_value = mock_optimizer
+                    mock_program = MagicMock()
+                    mock_cot.return_value = mock_program
+                    mock_optimizer.compile.return_value = mock_program
+
+                    # This call should succeed without TypeError
+                    result = strategy.run(prompt_data)
+
+                    # Verify correct API usage:
+                    # 1. num_threads should NOT be in MIPROv2 constructor
+                    mock_mipro.assert_called_once()
+                    constructor_kwargs = mock_mipro.call_args.kwargs
+                    assert (
+                        "num_threads" not in constructor_kwargs
+                    ), "num_threads should not be passed to MIPROv2 constructor"
+
+                    # 2. num_threads SHOULD be in compile eval_kwargs
+                    mock_optimizer.compile.assert_called_once()
+                    compile_kwargs = mock_optimizer.compile.call_args.kwargs
+                    assert (
+                        "eval_kwargs" in compile_kwargs
+                    ), "eval_kwargs should be present in compile call"
+                    assert (
+                        compile_kwargs["eval_kwargs"]["num_threads"] == 2
+                    ), "num_threads should be correctly passed via eval_kwargs"
+
+                    # 3. Strategy should return a result
+                    assert result is not None
+
+                    print(
+                        "✅ Integration test passed: num_threads correctly handled by dspy"
+                    )
+
+            except TypeError as e:
+                if "num_threads" in str(e):
+                    pytest.fail(
+                        f"Bug regression detected: {e}. "
+                        "The num_threads parameter is being incorrectly passed to MIPROv2 constructor."
+                    )
+                else:
+                    # Re-raise other TypeErrors as they might be legitimate
+                    raise
+
+    finally:
+        # Clean up temporary file
+        if os.path.exists(tmp_path):
+            os.unlink(tmp_path)