@@ -48,6 +48,61 @@ def test_build_prompt(self):
4848 self .assertIn ("def test(): pass" , prompt ["user" ])
4949 self .assertIn ("score: 0.5" , prompt ["user" ])
5050
51+ def test_metric_minimization_feature (self ):
52+ """Test that metrics starting with '-' are handled correctly for minimization"""
53+ current_program = "def test(): pass"
54+ parent_program = "def test(): pass"
55+
56+ # Test with both regular and minimization metrics
57+ program_metrics = {
58+ "improvement" : 0.3 ,
59+ "improvement(-)" : 0.1 ,
60+ "mixed" : 0.3 ,
61+ "mixed(-)" : 0.3 ,
62+ "regression" : 0.1 ,
63+ "regression(-)" : 0.5 ,
64+ }
65+
66+ # Create previous programs with different metric values to test comparison logic
67+ previous_programs = [
68+ {
69+ "id" : "prev1" ,
70+ "code" : "def prev1(): pass" ,
71+ "metrics" : {
72+ "improvement" : 0.1 ,
73+ "improvement(-)" : 0.2 ,
74+ "mixed" : 0.1 ,
75+ "mixed(-)" : 0.5 ,
76+ "regression" : 0.5 ,
77+ "regression(-)" : 0.3 ,
78+ },
79+ },
80+ {
81+ "id" : "prev2" ,
82+ "code" : "def prev2(): pass" ,
83+ "metrics" : {
84+ "improvement" : 0.2 ,
85+ "improvement(-)" : 0.3 ,
86+ "mixed" : 0.5 ,
87+ "mixed(-)" : 0.1 ,
88+ "regression" : 0.7 ,
89+ "regression(-)" : 0.2 ,
90+ },
91+ }
92+ ]
5193
94+ response = self .prompt_sampler ._identify_improvement_areas (
95+ current_program = current_program ,
96+ parent_program = parent_program ,
97+ metrics = program_metrics ,
98+ previous_programs = previous_programs
99+ )
100+ expected_response = [
101+ "- Metrics showing improvement: improvement, improvement(-). Consider continuing with similar changes." ,
102+ "- Metrics showing regression: regression, regression(-). Consider reverting or revising recent changes in these areas."
103+ ]
104+ expected_response = "\n " .join (expected_response )
105+ self .assertEqual (response , expected_response )
106+
52107if __name__ == "__main__" :
53108 unittest .main ()
0 commit comments