-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathlib_util.py
338 lines (303 loc) · 11.2 KB
/
lib_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# BuildFox ninja generator
import os
import re
import sys
import shlex
import shutil
re_folder_part = re.compile(r"^((?:\(\[\^\\\/\]\*\)(?:\(\?\![\w\|]+\))?\(\[\^\\\/\]\*\)|(?:[^\r\n(\[\"\\]|\\.))+)(\\\/|\/|\\).*$") # match folder part in filename regex
re_non_escaped_char = re.compile(r"(?<!\\)\\(.)") # looking for not escaped \ with char
re_capture_group_ref = re.compile(r"(?<!\\)\\(p?)(\d+)") # match regex capture group reference
re_pattern_split = re.compile(r"(?<!\[\^)\/")
re_recursive_glob = re.compile(r"\(\[\^\\\/\]\*\)(\(\?\![\w\|]+\))?\(\[\^\\\/\]\*\)\\\/")
re_recursive_glob_noslash = re.compile(r"\(\[\^\/\]\*\)(\(\?\![\w\|]+\))?\(\[\^\/\]\*\)")
# return relative path to current work dir
def rel_dir(filename):
path = os.path.relpath(os.path.dirname(os.path.abspath(filename)), os.getcwd()).replace("\\", "/") + "/"
if path == "./":
path = ""
return path
# return regex value in filename for regex or wildcard
# replace_groups replace wildcards with group reference indexes
def wildcard_regex(filename, replace_groups = False, rec_capture_groups = set()):
if filename.startswith("r\""):
return filename[2:-1] # strip r" and "
if filename.startswith("\""):
filename = filename[1:-1] # strip " and "
if "!" in filename or "*" in filename or "?" in filename or "[" in filename:
# based on fnmatch.translate with each wildcard is a capture group
i, n = 0, len(filename)
groups = 1
res = ""
while i < n:
c = filename[i]
i = i + 1
if c == "*":
if i < n and filename[i] == "*":
if replace_groups:
res += "\\p" + str(groups) # p (path) will mean that it's ok to substitute this group with string that may contain slashes
else:
res += "([^\/]*)([^\/]*)"
rec_capture_groups.add(groups)
i = i + 1
else:
if replace_groups:
# if inputs have recursive capture groups and output don't use them
# then prepend recursive group to file name and just switch to next non recursive capture group
while groups in rec_capture_groups:
res += "\\" + str(groups) + "_"
groups += 1
res += "\\" + str(groups)
else:
res += "([^\/]*)"
groups += 1
elif c == "?":
if replace_groups:
res += "\\" + str(groups)
else:
res += "([^\/])"
groups += 1
elif replace_groups:
res += c
elif c == "!":
j = i
if j < n and filename[j] == "(":
j = j + 1
while j < n and filename[j] != ")":
j = j + 1
if j >= n:
res += "\!"
else:
stuff = filename[i + 1: j].replace("\\", "\\\\")
i = j + 1
res += "(?!%s)([^\/]*)" % stuff
elif c == "[":
j = i
if j < n and filename[j] == "!":
j = j + 1
if j < n and filename[j] == "]":
j = j + 1
while j < n and filename[j] != "]":
j = j + 1
if j >= n:
res += "\\["
else:
stuff = filename[i:j].replace("\\", "\\\\")
i = j + 1
if stuff[0] == "!":
stuff = "^" + stuff[1:]
elif stuff[0] == "^":
stuff = "\\" + stuff
res = "%s([%s])" % (res, stuff)
else:
res += re.escape(c)
if replace_groups:
return res
else:
return "%s\Z(?ms)" % res
else:
return None
# return list of folders (always ends with /) that match provided pattern
# please note that some result folders may point into non existing location
# because it's too costly here to check if they exist
def glob_folders(pattern, base_path, generated, excluded_dirs):
if not pattern.endswith("/"): # this shouldn't fail
raise ValueError("pattern should always end with \"/\", but got \"%s\"" % pattern)
real_folders = [base_path.rstrip("/")]
gen_folders = [base_path.rstrip("/")]
pattern = pattern[2:] if pattern.startswith("./") else pattern
for folder in re_pattern_split.split(pattern):
recursive_match = re_recursive_glob_noslash.match(folder)
if recursive_match:
regex_filter = recursive_match.group(1)
re_regex_filter = re.compile("^%s.*$" % regex_filter) if regex_filter else None
new_real_folders = []
for real_folder in real_folders:
new_real_folders.append(real_folder)
for root, dirs, filenames in os.walk(real_folder, topdown = True): # TODO this is slow, optimize
dirs[:] = [dir for dir in dirs if dir not in excluded_dirs]
if re_regex_filter:
dirs[:] = [dir for dir in dirs if re_regex_filter.match(dir)]
for dir in dirs:
result = os.path.join(root, dir).replace("\\", "/")
new_real_folders.append(result)
real_folders = new_real_folders
new_gen_folders = []
for gen_folder in gen_folders:
prepend_dot = False
if gen_folder.startswith("./"):
prepend_dot = True
gen_folder = gen_folder[2:] # strip ./
gen_folder_len = len(gen_folder)
for folder in generated.keys():
if folder.startswith(gen_folder):
root = folder[:gen_folder_len]
sub_folders = folder[gen_folder_len:]
sub_folders = sub_folders.lstrip("/").rstrip("/")
# walk through directories in similar fashion with os.walk
new_gen_folders.append("./%s" % root if prepend_dot else root)
for subfolder in sub_folders.split("/"):
if subfolder in excluded_dirs:
break
if re_regex_filter and not re_regex_filter.match(subfolder):
break
root += "/%s" % subfolder
new_gen_folders.append("./%s" % root if prepend_dot else root)
gen_folders = list(set(new_gen_folders))
else:
real_folders = ["%s/%s" % (p, folder) for p in real_folders]
gen_folders = ["%s/%s" % (p, folder) for p in gen_folders]
return (real_folders, gen_folders)
# input can be string or list of strings
# outputs are always lists
def find_files(inputs, outputs = None, rel_path = "", generated = None, excluded_dirs = set()):
# rename regex back to readable form
def replace_non_esc(match_group):
return match_group.group(1)
rec_capture_groups = set()
if inputs:
result = []
matched = []
for input in inputs:
regex = wildcard_regex(input, False, rec_capture_groups)
if regex:
# find the folder where to look for files
base_folder = re_folder_part.match(regex)
lookup_path = rel_path if rel_path else "./"
real_folders = [lookup_path]
gen_folders = [lookup_path]
if base_folder:
base_folder = base_folder.group(1) + base_folder.group(2)
base_folder = re_non_escaped_char.sub(replace_non_esc, base_folder)
if "\\" in base_folder:
raise ValueError("please only use forward slashes in path \"%s\"" % input)
real_folders, gen_folders = glob_folders(base_folder, lookup_path, generated, excluded_dirs)
# look for files
fs_files = set()
for real_folder in real_folders:
if os.path.isdir(real_folder):
root = real_folder[len(lookup_path):]
files = [root + file for file in os.listdir(real_folder) if os.path.isfile(real_folder + "/" + file)]
fs_files = fs_files.union(files)
gen_files = set()
for gen_folder in gen_folders:
# in case if gen_folder is "./something" then we need to strip ./
# but if gen_folder is just "./" then we don't need to strip it !
if len(gen_folder) > 2 and gen_folder.startswith("./"):
check_folder = gen_folder[2:]
else:
check_folder = gen_folder
if check_folder in generated:
root = gen_folder[len(lookup_path):]
files = [root + file for file in generated.get(check_folder)]
gen_files = gen_files.union(files)
# we must have stable sort here
# so output ninja files will be same between runs
all_files = list(fs_files.union(gen_files))
all_files = sorted(all_files)
# while capturing ** we want just to capture */ optionally
# so we can match files in root folder as well
# please note that result regex will not have folder ignore semantic
# we rely on glob_folders to filter all ignored folders
regex = re_recursive_glob.sub("(?:(.*)\/)?", regex)
# if you want to match something in local folder
# then you may write wildcard/regex that starts as ./
if regex.startswith("\.\/"):
regex = regex[4:]
re_regex = re.compile(regex)
for file in all_files:
match = re_regex.match(file)
if match:
result.append(rel_path + file)
matched.append(match.groups())
else:
result.append(rel_path + input)
inputs = result
if outputs:
result = []
for output in outputs:
# we want \number instead of capture groups
regex = wildcard_regex(output, True, rec_capture_groups)
if regex:
for match in matched:
# replace \number with data
def replace_group(matchobj):
index = int(matchobj.group(2)) - 1
if index >= 0 and index < len(match):
if matchobj.group(1) == "p":
return match[index] # if capture group have p suffix then pass string as is
else:
return match[index].replace("/", "_") if match[index] else None
else:
return ""
file = re_capture_group_ref.sub(replace_group, regex)
file = re_non_escaped_char.sub(replace_non_esc, file)
# in case of **/* mask in output, input capture group
# for ** can be empty, so we get // in output, so just fix it here
file = file.replace("//", "/").lstrip("/")
result.append(rel_path + file)
else:
result.append(rel_path + output)
# normalize results
result = [os.path.normpath(file).replace("\\", "/") for file in result]
# normalize inputs
inputs = [os.path.normpath(file).replace("\\", "/") for file in inputs]
if outputs:
return inputs, result
else:
return inputs
# finds the file in path
def which(cmd, mode = os.F_OK | os.X_OK, path = None):
if sys.version_info[0:2] >= (3, 3):
return shutil.which(cmd, mode, path)
else:
def _access_check(fn, mode):
return (os.path.exists(fn) and os.access(fn, mode)
and not os.path.isdir(fn))
if os.path.dirname(cmd):
if _access_check(cmd, mode):
return cmd
return None
if path is None:
path = os.environ.get("PATH", os.defpath)
if not path:
return None
path = path.split(os.pathsep)
if sys.platform == "win32":
if not os.curdir in path:
path.insert(0, os.curdir)
pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
files = [cmd]
else:
files = [cmd + ext for ext in pathext]
else:
files = [cmd]
seen = set()
for dir in path:
normdir = os.path.normcase(dir)
if not normdir in seen:
seen.add(normdir)
for thefile in files:
name = os.path.join(dir, thefile)
if _access_check(name, mode):
return name
return None
# parses string of generic cxx defines and return list of strings
def cxx_defines(defines):
dirs = shlex.split(defines)
dirs = [dir[2:] if dir.startswith("/D") or dir.startswith("-D") else dir for dir in dirs]
dirs = filter(lambda d: len(d), dirs)
return list(dirs)
# parses string of generic cxx include dirs and return list of strings
def cxx_includedirs(includedirs):
dirs = shlex.split(includedirs)
dirs = [dir[2:] if dir.startswith("/I") or dir.startswith("-I") else dir for dir in dirs]
dirs = filter(lambda d: len(d), dirs)
return list(dirs)
# find files of intereset in provided all files dict
def cxx_findfiles(all_files):
ext_of_interest_src = (".c", ".cpp", ".cxx", ".c++", ".cc", ".h", ".hpp", ".hxx", ".in")
return ["%s%s" % ("" if folder == "./" else folder, name)
for folder, names in all_files.items()
for name in names
if name.lower().endswith(ext_of_interest_src)]