-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfileProcessor.js
161 lines (137 loc) · 4.05 KB
/
fileProcessor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import fs from "fs/promises";
import path from "path";
import ignore from "ignore";
import { execSync } from "child_process";
const DEFAULT_IGNORE_PATTERNS = [
".git",
".svn",
".hg",
".bzr",
"CVS",
".gitignore",
".gitattributes",
".gitmodules",
"node_modules",
"LICENSE",
];
function isTextFile(filePath) {
try {
// Use the 'file' command to determine file type
const output = execSync(`file -b --mime-type "${filePath}"`, {
encoding: "utf-8",
}).trim();
// List of MIME types we consider as text
const textMimeTypes = [
"text/",
"application/json",
"application/xml",
"application/javascript",
"application/x-python-code",
"application/x-empty",
];
// Check if the file's MIME type starts with any of the text MIME types
if (textMimeTypes.some((type) => output.startsWith(type))) {
return true;
}
// For files that might be misidentified, check the extension
const textExtensions = [
".js",
".py",
".json",
".ndjson",
".md",
".txt",
".html",
".css",
".yml",
".yaml",
];
if (textExtensions.includes(path.extname(filePath).toLowerCase())) {
return true;
}
// If still not identified as text, try to read the first few bytes
const fd = fs.openSync(filePath, "r");
const buffer = Buffer.alloc(1024);
const bytesRead = fs.readSync(fd, buffer, 0, 1024, 0);
fs.closeSync(fd);
// Check if the file contains only printable ASCII characters and common whitespace
for (let i = 0; i < bytesRead; i++) {
const byte = buffer[i];
if ((byte < 32 || byte > 126) && ![9, 10, 13].includes(byte)) {
return false;
}
}
return true;
} catch (error) {
console.warn(
`Warning: Unable to determine file type for ${filePath}. Assuming it's not a text file.`,
);
return false;
}
}
export async function processDirectoryOrPaths(paths, options) {
const files = [];
const ig = ignore();
// Add default ignore patterns
ig.add(DEFAULT_IGNORE_PATTERNS);
// Ignore all dot files and directories by default
ig.add(".*");
// Add user-specified ignore patterns
if (options.ignorePatterns) {
ig.add(options.ignorePatterns);
}
// Add user-specified dot files/directories to include
if (options.includeDotFiles) {
options.includeDotFiles.forEach((pattern) => {
ig.add(`!${pattern}`);
});
}
for (const itemPath of paths) {
const stat = await fs.stat(itemPath);
if (stat.isDirectory()) {
// Read .gitignore file if it exists in the directory
try {
const gitignorePath = path.join(itemPath, ".gitignore");
const gitignoreContent = await fs.readFile(gitignorePath, "utf-8");
ig.add(gitignoreContent);
} catch (error) {
if (error.code !== "ENOENT") {
console.warn(
`Warning: Error reading .gitignore file in ${itemPath}: ${error.message}`,
);
}
}
await traverseDirectory(itemPath, itemPath, files, ig, options);
} else if (stat.isFile()) {
await processFile(itemPath, files, options);
}
}
return files;
}
async function traverseDirectory(baseDir, currentDir, files, ig, options) {
const entries = await fs.readdir(currentDir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(currentDir, entry.name);
const relativePath = path.relative(baseDir, fullPath);
if (ig.ignores(relativePath)) continue;
if (entry.isDirectory()) {
await traverseDirectory(baseDir, fullPath, files, ig, options);
} else if (entry.isFile()) {
await processFile(fullPath, files, options);
}
}
}
async function processFile(filePath, files, options) {
if (
options.extensions &&
!options.extensions.includes(path.extname(filePath).slice(1))
) {
return;
}
if (!isTextFile(filePath)) {
console.log(`Skipping non-text file: ${filePath}`);
return;
}
const content = await fs.readFile(filePath, "utf-8");
files.push({ path: filePath, content });
}