-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGitClone.py
43 lines (37 loc) · 1.38 KB
/
GitClone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import shutil
import git
import tiktoken
from langchain_openai import OpenAIEmbeddings
import time
def clone_repo(repo_url, clone_dir):
"""
Clones a GitHub repository into a specified directory.
If the directory already exists, it deletes the directory first.
"""
if os.path.exists(clone_dir):
shutil.rmtree(clone_dir) # Remove the existing directory
# Clone the repository into the specified directory
git.Repo.clone_from(repo_url, clone_dir)
return clone_dir
def read_files_from_repo(clone_dir):
"""
Reads all files from the cloned repository.
Returns a dictionary where keys are file paths and values are file contents.
"""
repo_contents = {}
for root, dirs, files in os.walk(clone_dir):
if '.git' in dirs:
dirs.remove('.git') # Remove the .git directory
for file in files:
file_path = os.path.join(root, file)
try:
with open(file_path, 'r', encoding='utf-8') as f:
repo_contents[file_path] = f.read()
except UnicodeDecodeError:
try:
with open(file_path, 'r', encoding='latin-1') as f:
repo_contents[file_path] = f.read()
except UnicodeDecodeError:
print(f"Skipping file due to encoding error: {file_path}")
return repo_contents