Skip to content
This repository was archived by the owner on Mar 9, 2026. It is now read-only.

Commit adb4749

Browse files
committed
Enhance file handling and testing in Coder class 📁🔍
Resolves #57 This commit introduces several improvements to the `Coder` class in `aicodebot/coder.py`. We've added functionality to better handle different file types, including binary files and files with unknown types. This includes the addition of the `get_file_info` and `is_binary_file` methods. We've also updated the handling of new files in the `git_diff_context` method to account for binary files. In addition, we've added a new binary file `assets/robot.png` to the repository. Finally, we've expanded our test coverage in `tests/test_coder.py` to include tests for the new `get_file_info` method. This ensures our file handling remains robust and reliable. 🧪👍
1 parent 3a86df8 commit adb4749

3 files changed

Lines changed: 76 additions & 4 deletions

File tree

aicodebot/coder.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from openai.api_resources import engine
55
from pathlib import Path
66
from prompt_toolkit.completion import Completer, Completion
7-
import fnmatch, functools, openai, os, re, subprocess, tiktoken
7+
from pygments.lexers import ClassNotFound, get_lexer_for_mimetype, guess_lexer_for_filename
8+
import fnmatch, functools, mimetypes, openai, os, re, subprocess, tiktoken
89

910
DEFAULT_MAX_TOKENS = 512
1011
PRECISE_TEMPERATURE = 0.05
@@ -17,6 +18,8 @@ class Coder:
1718
git, and the local file system.
1819
"""
1920

21+
UNKNOWN_FILE_TYPE = "unknown"
22+
2023
@staticmethod
2124
def clone_repo(repo_url, repo_dir):
2225
"""Clone a git repository to a directory."""
@@ -79,6 +82,31 @@ def generate_directory_structure(cls, path, ignore_patterns=None, use_gitignore=
7982

8083
return structure
8184

85+
@classmethod
86+
def get_file_info(cls, file_path):
87+
# Use the mimetypes module to guess the MIME type
88+
mime_type = mimetypes.guess_type(file_path)[0]
89+
90+
# Use the is_binary_file function to check if the file is binary
91+
is_binary = cls.is_binary_file(file_path)
92+
93+
try:
94+
# Try to get the lexer for the MIME type
95+
lexer = get_lexer_for_mimetype(mime_type)
96+
except ClassNotFound:
97+
try:
98+
# If that fails, try to guess the lexer based on the file name
99+
lexer = guess_lexer_for_filename(file_path, "")
100+
except ClassNotFound:
101+
# If that also fails, set the file type to UNKNOWN_FILE_TYPE
102+
file_type = cls.UNKNOWN_FILE_TYPE
103+
else:
104+
file_type = lexer.name
105+
else:
106+
file_type = lexer.name
107+
108+
return is_binary, file_type
109+
82110
@staticmethod
83111
@functools.lru_cache
84112
def get_openai_supported_engines():
@@ -230,9 +258,13 @@ def git_diff_context(commit=None, files=None):
230258
if status_code == "A":
231259
# If the file is new, include the entire file content
232260
file_name = status_parts[1]
233-
contents = Path(file_name).read_text()
234-
diffs.append(f"## New file added: {file_name}")
235-
diffs.append(contents)
261+
if Coder.is_binary_file(file_name):
262+
# Don't include the diff for binary files
263+
diffs.append(f"## New binary file added: {file_name}")
264+
else:
265+
diffs.append(f"## New file added: {file_name}")
266+
contents = Path(file_name).read_text()
267+
diffs.append(contents)
236268
elif status_code == "R":
237269
# If the file is renamed, get the diff and note the old and new names
238270
old_file_name, new_file_name = status_parts[1], status_parts[2]
@@ -258,6 +290,18 @@ def git_staged_files():
258290
def git_unstaged_files():
259291
return exec_and_get_output(["git", "diff", "HEAD", "--name-only"]).splitlines()
260292

293+
@staticmethod
294+
def is_binary_file(file_path):
295+
chunksize = 4000
296+
with Path(file_path).open("rb") as file:
297+
while True:
298+
chunk = file.read(chunksize)
299+
if b"\0" in chunk: # Null byte
300+
return True
301+
if len(chunk) < chunksize:
302+
break # End of file
303+
return False
304+
261305
@staticmethod
262306
def parse_github_url(repo_url):
263307
"""

assets/robot.png

144 KB
Loading

tests/test_coder.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,34 @@ def test_generate_directory_structure(tmp_path):
4444
assert len(file_list) > 10
4545

4646

47+
def test_get_file_info():
48+
# Test with a text file
49+
is_binary, file_type = Coder.get_file_info("tests/test_coder.py")
50+
assert is_binary is False
51+
assert file_type == "Python"
52+
53+
# Test with a binary file
54+
is_binary, file_type = Coder.get_file_info("assets/robot.png")
55+
assert is_binary is True
56+
assert file_type == Coder.UNKNOWN_FILE_TYPE
57+
58+
is_binary, file_type = Coder.get_file_info("LICENSE")
59+
assert is_binary is False
60+
assert file_type == Coder.UNKNOWN_FILE_TYPE
61+
62+
is_binary, file_type = Coder.get_file_info("README.md")
63+
assert is_binary is False
64+
assert file_type == "Markdown"
65+
66+
is_binary, file_type = Coder.get_file_info("pyproject.toml")
67+
assert is_binary is False
68+
assert file_type == "TOML"
69+
70+
# Test with a non-existent file
71+
with pytest.raises(FileNotFoundError):
72+
Coder.get_file_info("non_existent_file.txt")
73+
74+
4775
def test_get_token_length():
4876
text = ""
4977
assert Coder.get_token_length(text) == 0

0 commit comments

Comments
 (0)