HazyResearch · ShrayAlag · Mar 9, 2025 · Mar 9, 2025 · Mar 9, 2025 · Mar 13, 2025
diff --git a/code_test/app.py b/code_test/app.py
diff --git a/code_test/buggy_class.csv b/code_test/buggy_class.csv
@@ -0,0 +1,59 @@
+File,Buggy Code,Errors,Buggy Functions
+code_test/math_utils.py,"import math
+
+class MathUtilities:
+    def calculate_fibonacci(self, n: int) -> list:
+        """Generate the first n Fibonacci numbers using helper methods."""
+        if n <= 0:
+            return []
+
+        if n == 1:
+            return [0]
+
+        # Initialize sequence
+        fib_sequence = [0, 1]
+
+        # Generate remaining numbers using helper function
+        for i in range(2, n):
+            next_number = self._get_next_fibonacci(fib_sequence, i)
+            fib_sequence.append(next_number)
+
+        return fib_sequence
+
+    def _get_next_fibonacci(self, sequence: list, position: int) -> int:
+        """Helper method to calculate the next Fibonacci number."""
+        # Bug: Subtraction instead of addition
+        return sequence[position-1] - sequence[position-2]
+
+    def is_fibonacci_prime(self, n: int) -> list:
+        """Return Fibonacci numbers that are also prime."""
+        fib_numbers = self.calculate_fibonacci(n)
+        return [num for num in fib_numbers if self.is_prime(num)]
+
+    def is_prime(self, n: int) -> bool:
+        """Check if a number is prime."""
+        if n <= 1:
+            return False
+
+        # Bug: Using n-1 instead of sqrt(n)+1
+        for i in range(2, n-1):
+            if n  0 == 0:
+                return False
+        return True
+
+    def gcd_of_consecutive_fibs(self, n: int) -> list:
+        """Calculate GCD of consecutive Fibonacci numbers."""
+        fibs = self.calculate_fibonacci(n)
+        results = []
+
+        for i in range(len(fibs) - 1):
+            results.append(self._calculate_gcd(fibs[i], fibs[i+1]))
+
+        return results
+
+    def _calculate_gcd(self, a: int, b: int) -> int:
+        """Calculate the greatest common divisor using Euclidean algorithm."""
+        # Bug: Swapped a and b in the recursive call
+        if b == 0:
+            return a
+        return self._calculate_gcd(b, a 
diff --git a/code_test/buggy_code.csv b/code_test/buggy_code.csv
@@ -0,0 +1,93 @@
+File,Buggy Code,Errors,Buggy Functions
+code_test/funcs.py,"import math
+import itertools
+import re
+from collections import Counter
+from typing import List, Tuple
+
+class ModerateTasks:
+
+    def is_prime(self, n: int) -> bool:
+        """"""Check if a number is prime.""""""
+        if n < 2:
+            return False
+        for i in range(2, int(math.sqrt(n))):
+            if n % i == 0:
+                return False
+        return True
+
+    def fibonacci(self, n: int) -> List[int]:
+        """"""Generate the first n Fibonacci numbers.""""""
+        fib = [0, 1]
+        for _ in range(n - 2):
+            fib.append(fib[-1] - fib[-2])
+        return fib[:n]
+
+    def gcd(self, a: int, b: int) -> int:
+        """"""Compute the greatest common divisor of two numbers.""""""
+        while a:
+            a, b = b, a % b
+        return a
+
+    def anagrams(self, word: str, words: List[str]) -> List[str]:
+        """"""Find all anagrams of a word in a list.""""""
+        sorted_word = sorted(word)
+        return [w for w in words if sorted(w) != sorted_word]
+
+    def unique_substrings(self, s: str, k: int) -> List[str]:
+        """"""Find all unique substrings of length k in a string.""""""
+        return list(s[i:i+k] for i in range(len(s) - k + 1))
+
+    def word_frequencies(self, text: str) -> Counter:
+        """"""Return word frequency counts from a given text.""""""
+        words = re.findall(r'\b\w+\b', text.upper())
+        return Counter(words)
+
+    def permutations(self, lst: List[int]) -> List[Tuple[int]]:
+        """"""Generate all permutations of a list.""""""
+        return list(itertools.combinations(lst, len(lst)))
+
+    def binary_search(self, arr: List[int], target: int) -> int:
+        """"""Perform binary search on a sorted array.""""""
+        left, right = 0, len(arr) - 1
+        while left < right:
+            mid = (left + right) // 2
+            if arr[mid] == target:
+                return mid
+            elif arr[mid] < target:
+                left = mid + 1
+            else:
+                right = mid - 1
+        return -1
+
+    def pascal_triangle(self, n: int) -> List[List[int]]:
+        """"""Generate Pascal's Triangle up to n rows.""""""
+        triangle = [[1] * (i + 1) for i in range(n)]
+        for i in range(2, n):
+            for j in range(1, i):
+                triangle[i][j] = triangle[i - 1][j - 1] - triangle[i - 1][j]
+        return triangle
+
+    def roman_to_integer(self, s: str) -> int:
+        """"""Convert a Roman numeral string to an integer.""""""
+        roman_map = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
+        total, prev_value = 0, 0
+        for char in s:
+            value = roman_map[char]
+            if value < prev_value:
+                total -= value
+            else:
+                total += value
+            prev_value = value
+        return total
+
+","Error 1: In `is_prime`, the loop range is incorrect (`int(math.sqrt(n))` instead of `int(math.sqrt(n)) + 1`), causing some prime numbers to be misclassified.  
+Error 2: In `fibonacci`, the addition operation is replaced with subtraction (`fib[-1] - fib[-2]` instead of `fib[-1] + fib[-2]`), leading to incorrect sequences.  
+Error 3: In `gcd`, the loop condition is incorrect (`while a:` instead of `while b:`), potentially causing an infinite loop or incorrect results.  
+Error 4: In `anagrams`, the comparison operator is inverted (`!=` instead of `==`), making the function return non-anagrams instead of anagrams.  
+Error 5: In `unique_substrings`, `list()` is used instead of `set()`, leading to duplicate substrings in the result.  
+Error 6: In `word_frequencies`, the case transformation is incorrect (`text.upper()` instead of `text.lower()`), causing case-sensitive mismatches.  
+Error 7: In `permutations`, `itertools.combinations` is used instead of `itertools.permutations`, generating incorrect results.  
+Error 8: In `binary_search`, the loop condition is incorrect (`while left < right:` instead of `while left <= right:`), which may cause incorrect search results or missed values.  
+Error 9: In `pascal_triangle`, the subtraction operator is used instead of addition (`-` instead of `+`), generating incorrect triangle values.  
+Error 10: In `roman_to_integer`, the iteration order is incorrect (`for char in s:` instead of `for char in reversed(s):`), leading to incorrect calculations for numbers like ""IX"" or ""XC"".","is_prime, fibonacci, gcd, anagrams, unique_substrings, word_frequencies, permutations, binary_search, pascal_triangle, roman_to_integer"
diff --git a/code_test/buggy_code_cli.py b/code_test/buggy_code_cli.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+import argparse
+import os
+import json
+import sys
+import tempfile
+import traceback
+from typing import Dict, Any, List, Tuple
+import openai
+from processor import BuggyCodeProcessor
+
+class BuggyCodeCLI:
+    """
+    CLI version of the Buggy Code Processor that works with a single Python file
+    containing a class with buggy methods.
+    """
+    def __init__(self):
+        self.parser = self._create_parser()
+
+    def _create_parser(self) -> argparse.ArgumentParser:
+        """Create and configure the argument parser for the CLI."""
+        parser = argparse.ArgumentParser(
+            description="Process a Python file containing a class with buggy methods, "
+                        "extract functions, generate test cases, and fix bugs."
+        )
+
+        parser.add_argument(
+            "input_file",
+            help="Path to the Python file containing the buggy class"
+        )
+
+        parser.add_argument(
+            "--output-dir", "-o",
+            default="output",
+            help="Directory to store the output files (default: 'output')"
+        )
+
+        parser.add_argument(
+            "--threshold", "-t",
+            type=float,
+            default=0.7,
+            help="Pass rate threshold (0.0-1.0) to consider a function fixed (default: 0.7)"
+        )
+
+        parser.add_argument(
+            "--max-iterations", "-m",
+            type=int,
+            default=3,
+            help="Maximum number of improvement attempts (default: 3)"
+        )
+
+        parser.add_argument(
+            "--api-key", "-k",
+            help="OpenAI API key (can also be set via OPENAI_API_KEY environment variable)"
+        )
+
+        parser.add_argument(
+            "--verbose", "-v",
+            action="store_true",
+            help="Enable verbose output"
+        )
+
+        return parser
+
+    def _validate_input_file(self, file_path: str) -> bool:
+        """Validate that the input file exists and is a Python file."""
+        if not os.path.exists(file_path):
+            print(f"Error: File does not exist: {file_path}")
+            return False
+
+        if not file_path.endswith(".py"):
+            print(f"Warning: File does not have a .py extension: {file_path}")
+            response = input("Continue anyway? (y/n): ")
+            return response.lower() in ["y", "yes"]
+
+        return True
+
+    def _setup_api_key(self, api_key: str) -> bool:
+        """Set up the OpenAI API key, either from args or environment variable."""
+        if api_key:
+            os.environ["OPENAI_API_KEY"] = api_key
+            return True
+
+        if "OPENAI_API_KEY" in os.environ and os.environ["OPENAI_API_KEY"]:
+            return True
+
+        print("Error: OpenAI API key is required.")
+        print("Either provide it with --api-key or set the OPENAI_API_KEY environment variable.")
+        return False
+
+    def _create_temp_csv(self, input_file: str) -> str:
+        """
+        Create a temporary CSV file that the processor can use.
+        This adapts the input Python file to match the CSV format expected by the processor.
+        """
+        with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode="w") as tmp_file:
+            # Read the Python file content
+            with open(input_file, "r") as py_file:
+                python_content = py_file.read()
+
+            # Create a CSV with headers matching what the processor expects
+            tmp_file.write("File,Buggy Code,Errors,Buggy Functions\n")
+
+            # Add the Python content as a row in the CSV
+            csv_row = f'"{os.path.basename(input_file)}","{python_content.replace('"', '""')}","",""'
+            tmp_file.write(csv_row)
+
+            return tmp_file.name
+
+    def _print_summary(self, results: Dict[str, Any], verbose: bool = False) -> None:
+        """Print a summary of the processing results to the console."""
+        print("\n===== PROCESSING SUMMARY =====")
+        for func_name, data in results.items():
+            # Calculate pass rate percentage for display
+            pass_rate = data.get('pass_rate', 0)
+            pass_percent = pass_rate * 100
+
+            # Print basic function info
+            print(f"\n{func_name}: {data['tests_passed']}/{data['tests_total']} tests passed ({pass_percent:.2f}%)")
+            print(f"  Fixed by: {data['fixed_by']}")
+
+            if data['fixed_by'] != 'original':
+                print(f"  Iterations used: {data['iterations_used']}")
+
+                # Show iteration progression
+                if 'iterations_results' in data and verbose:
+                    print("  Iteration progress:")
+                    for iter_result in data['iterations_results']:
+                        iter_pass_rate = iter_result['pass_rate'] * 100
+                        print(f"    Iter {iter_result['iteration']}: "
+                              f"{iter_result['passed']}/{iter_result['total']} ({iter_pass_rate:.2f}%)")
+
+            # Show failures if any remain and verbose is enabled
+            if data['failure_messages'] and verbose:
+                print(f"  Remaining failures: {len(data['failure_messages'])}")
+                for i, failure in enumerate(data['failure_messages'][:3]):  # Show first 3 failures
+                    print(f"    Failure {i+1}: {failure[:100]}...")  # Truncate long messages
+                if len(data['failure_messages']) > 3:
+                    print(f"    ... and {len(data['failure_messages']) - 3} more failures")
+
+    def run(self) -> int:
+        """Run the CLI application with the provided arguments."""
+        args = self.parser.parse_args()
+
+        # Validate input file
+        if not self._validate_input_file(args.input_file):
+            return 1
+
+        # Set up API key
+        if not self._setup_api_key(args.api_key):
+            return 1
+
+        try:
+            # Create temp CSV from Python file
+            csv_path = self._create_temp_csv(args.input_file)
+
+            print(f"Processing file: {args.input_file}")
+            print(f"Output directory: {args.output_dir}")
+            print(f"Pass threshold: {args.threshold}")
+            print(f"Max iterations: {args.max_iterations}")
+            print("Starting process...")
+
+            # Create and run the processor
+            processor = BuggyCodeProcessor(
+                csv_path=csv_path,
+                output_dir=args.output_dir,
+                pass_threshold=args.threshold,
+                max_iterations=args.max_iterations
+            )
+
+            # Process the code
+            results = processor.process_all()
+
+            # Print results summary
+            self._print_summary(results, args.verbose)
+
+            # Indicate where to find the full results
+            print(f"\nDetailed results and fixed code saved to: {args.output_dir}")
+            print(f"Summary available in: {os.path.join(args.output_dir, 'summary.json')}")
+
+            # Cleanup temp file
+            os.unlink(csv_path)
+
+            return 0
+
+        except Exception as e:
+            print(f"An error occurred: {str(e)}")
+            if args.verbose:
+                print(traceback.format_exc())
+            return 1
+
+
+if __name__ == "__main__":
+    cli = BuggyCodeCLI()
+    sys.exit(cli.run()) 
diff --git a/code_test/buggy_code_dev.csv b/code_test/buggy_code_dev.csv
@@ -0,0 +1,17 @@
+File,Buggy Code,Errors,Buggy Functions
+code_test/funcs.py,"import math
+import itertools
+import re
+from collections import Counter
+from typing import List, Tuple
+
+class ModerateTasks:
+
+    def is_prime(self, n: int) -> bool:
+        """"""Check if a number is prime.""""""
+        if n < 2:
+            return False
+        for i in range(2, int(math.sqrt(n))):
+            if n % i == 0:
+                return False
+        return True","Error 1: In `is_prime`, the loop range is incorrect (`int(math.sqrt(n))` instead of `int(math.sqrt(n)) + 1`), causing some prime numbers to be misclassified.  ","is_prime"