Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/llama_prompt_ops/core/prompt_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any:
max_labeled_demos=self.max_labeled_demos,
auto=dspy_auto_mode, # Use the mapped value
num_candidates=self.num_candidates,
num_threads=self.num_threads,
# num_threads is passed via eval_kwargs in compile() call instead
max_errors=self.max_errors,
seed=self.seed,
init_temperature=self.init_temperature,
Expand Down Expand Up @@ -530,10 +530,18 @@ def custom_propose_instructions(self, *args, **kwargs):
try:
# Call compile with all parameters
logging.info("Calling optimizer.compile")

# Configure eval_kwargs to pass arguments to dspy.evaluate.Evaluate,
# which is used internally by the compile method. This is the correct
# way to set num_threads for parallel evaluation in MIPROv2.
# Note: num_threads should NOT be passed to the MIPROv2 constructor.
eval_kwargs = {"num_threads": self.num_threads}

optimized_program = optimizer.compile(
program,
trainset=self.trainset,
valset=self.valset,
eval_kwargs=eval_kwargs, # Pass num_threads to internal evaluator
num_trials=self.num_trials,
minibatch=self.minibatch,
minibatch_size=self.minibatch_size,
Expand Down
111 changes: 111 additions & 0 deletions tests/integration/test_cli_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,114 @@ def test_end_to_end_cli_flow(self, mock_api_key_check, temp_config_file):
# Clean up the temporary output file
if os.path.exists(output_path):
os.unlink(output_path)

def test_cli_migrate_with_num_threads_e2e(self, mock_api_key_check):
"""
End-to-end CLI test for num_threads parameter passing bug fix.

This test verifies that the num_threads setting from the config file
is correctly passed through the entire CLI pipeline without causing
the TypeError that was fixed.
"""
import tempfile

import yaml
from click.testing import CliRunner

runner = CliRunner()

# Create a config that specifically includes num_threads to test the fix
test_config = {
"dataset": {
"path": "test_data.json",
"input_field": ["inputs", "question"],
"golden_output_field": ["outputs", "answer"],
},
"model": {"name": "gpt-3.5-turbo", "temperature": 0.7},
"metric": {"class": "llama_prompt_ops.core.metrics.FacilityMetric"},
"optimization": {
"strategy": "basic",
"num_threads": 3, # Specific value to test the fix
"max_bootstrapped_demos": 2,
"num_trials": 1,
},
}

# Create temporary config file
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
yaml.dump(test_config, f)
config_path = f.name

try:
# Mock all external dependencies but let the CLI process the config
mock_migrator = MagicMock()
mock_optimized = MagicMock()
mock_optimized.signature.instructions = "Test optimized prompt"
mock_migrator.optimize.return_value = mock_optimized
mock_migrator.load_dataset_with_adapter.return_value = ([], [], [])

# Create a strategy that would trigger the original bug
mock_strategy = MagicMock()
mock_strategy.num_threads = 3 # This should match our config

with (
patch(
"llama_prompt_ops.interfaces.cli.PromptMigrator",
return_value=mock_migrator,
),
patch(
"llama_prompt_ops.interfaces.cli.get_dataset_adapter_from_config",
return_value=MagicMock(),
),
patch(
"llama_prompt_ops.interfaces.cli.get_models_from_config",
return_value=(None, None),
),
patch(
"llama_prompt_ops.interfaces.cli.get_metric",
return_value=MagicMock(),
),
# This is the critical patch - ensure strategy gets created with num_threads
patch(
"llama_prompt_ops.interfaces.cli.get_strategy",
return_value=mock_strategy,
),
# Mock the actual strategy execution to verify parameters
patch(
"llama_prompt_ops.core.prompt_strategies.BasicOptimizationStrategy"
) as mock_strategy_class,
):
# Configure the mock strategy class
mock_strategy_instance = MagicMock()
mock_strategy_class.return_value = mock_strategy_instance

# The critical test: CLI should process config with num_threads without error
result = runner.invoke(cli, ["migrate", "--config", config_path])

# Debug output if there's an error
if result.exit_code != 0:
print(f"CLI Error: {result.output}")
if result.exception:
print(f"Exception: {result.exception}")
import traceback

print(
f"Traceback: {''.join(traceback.format_exception(type(result.exception), result.exception, result.exception.__traceback__))}"
)

# The test passes if the CLI completes without crashing
# (The original bug would cause a TypeError during strategy instantiation)
assert (
result.exit_code == 0
), f"CLI should complete successfully, got: {result.output}"

# Verify that our configuration was processed
# (The actual strategy creation may be mocked, but config parsing should work)
print(
"✅ E2E CLI test passed: num_threads config processed without TypeError"
)

finally:
# Clean up temporary config file
if os.path.exists(config_path):
os.unlink(config_path)
147 changes: 147 additions & 0 deletions tests/integration/test_core_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,150 @@ def test_end_to_end_flow_with_mocks(facility_config_path):
# Check results
assert result is not None
assert result.signature.instructions == "Optimized prompt"


@pytest.mark.skipif(
not CORE_COMPONENTS_AVAILABLE,
reason=get_core_skip_reason() or "Core components available",
)
def test_basic_optimization_strategy_num_threads_integration():
"""
Integration test for the MIPROv2 num_threads bug fix.

This test verifies that BasicOptimizationStrategy correctly passes num_threads
to the dspy library without causing parameter errors. This is a regression test
for the bug where num_threads was incorrectly passed to MIPROv2 constructor.
"""
import json
import tempfile

# Create minimal test data
test_data = [
{
"inputs": {"question": "Test maintenance request"},
"outputs": {
"answer": json.dumps(
{
"categories": {"routine_maintenance_requests": True},
"sentiment": "neutral",
"urgency": "low",
}
)
},
},
{
"inputs": {"question": "Emergency repair needed"},
"outputs": {
"answer": json.dumps(
{
"categories": {"emergency_repair_services": True},
"sentiment": "urgent",
"urgency": "high",
}
)
},
},
]

# Create temporary dataset file
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False) as tmp:
json.dump(test_data, tmp)
tmp_path = tmp.name

try:
# Load dataset
adapter = ConfigurableJSONAdapter(
dataset_path=tmp_path,
input_field=["inputs", "question"],
golden_output_field=["outputs", "answer"],
)

dataset = adapter.adapt()

# Create strategy with specific num_threads value to test the fix
strategy = BasicOptimizationStrategy(
num_threads=2, # Specific value to verify correct parameter passing
max_bootstrapped_demos=1, # Minimal for faster testing
max_labeled_demos=1,
num_trials=1, # Single trial for speed
metric=FacilityMetric(),
)

# Set up datasets (minimal size for integration test)
strategy.trainset = dataset[:1] # Use just one example
strategy.valset = dataset[1:2] if len(dataset) > 1 else dataset[:1]

# Mock the models to avoid real API calls but test dspy integration
with patch("dspy.LM") as mock_lm:
# Configure mock to return valid responses
mock_instance = MagicMock()
mock_lm.return_value = mock_instance

# The key test: verify that strategy instantiation and basic setup
# works without TypeError from incorrect num_threads parameter passing
strategy.task_model = mock_instance
strategy.prompt_model = mock_instance

prompt_data = {
"text": "Categorize customer messages",
"inputs": ["question"],
"outputs": ["answer"],
}

# This is the critical test - the strategy should be able to configure
# without throwing a TypeError about num_threads parameter
try:
# We patch the actual dspy.MIPROv2 to verify it's called correctly
with (
patch("dspy.MIPROv2") as mock_mipro,
patch("dspy.ChainOfThought") as mock_cot,
):

mock_optimizer = MagicMock()
mock_mipro.return_value = mock_optimizer
mock_program = MagicMock()
mock_cot.return_value = mock_program
mock_optimizer.compile.return_value = mock_program

# This call should succeed without TypeError
result = strategy.run(prompt_data)

# Verify correct API usage:
# 1. num_threads should NOT be in MIPROv2 constructor
mock_mipro.assert_called_once()
constructor_kwargs = mock_mipro.call_args.kwargs
assert (
"num_threads" not in constructor_kwargs
), "num_threads should not be passed to MIPROv2 constructor"

# 2. num_threads SHOULD be in compile eval_kwargs
mock_optimizer.compile.assert_called_once()
compile_kwargs = mock_optimizer.compile.call_args.kwargs
assert (
"eval_kwargs" in compile_kwargs
), "eval_kwargs should be present in compile call"
assert (
compile_kwargs["eval_kwargs"]["num_threads"] == 2
), "num_threads should be correctly passed via eval_kwargs"

# 3. Strategy should return a result
assert result is not None

print(
"✅ Integration test passed: num_threads correctly handled by dspy"
)

except TypeError as e:
if "num_threads" in str(e):
pytest.fail(
f"Bug regression detected: {e}. "
"The num_threads parameter is being incorrectly passed to MIPROv2 constructor."
)
else:
# Re-raise other TypeErrors as they might be legitimate
raise

finally:
# Clean up temporary file
if os.path.exists(tmp_path):
os.unlink(tmp_path)
Loading