Skip to content

Commit dd4ea02

Browse files
authored
Merge pull request #63 from Remi-Gau/fix/62
[FIX] skip .git and few other files / directory during validation
2 parents 218a233 + b5e595f commit dd4ea02

File tree

1 file changed

+110
-54
lines changed

1 file changed

+110
-54
lines changed

reproschema/validate.py

Lines changed: 110 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,120 @@
11
import json
2-
import os
32
from pathlib import Path
43

54
from .jsonldutils import load_file, validate_data
65
from .utils import lgr, start_server, stop_server
76

7+
DIR_TO_SKIP = [
8+
".git",
9+
".github",
10+
"__pycache__",
11+
"env",
12+
"venv",
13+
]
14+
FILES_TO_SKIP = [
15+
".DS_Store",
16+
".gitignore",
17+
".flake8",
18+
".autorc",
19+
"LICENSE",
20+
"Makefile",
21+
]
22+
SUPPORTED_EXTENSIONS = [
23+
".jsonld",
24+
"json",
25+
"js",
26+
"",
27+
]
828

9-
def validate_dir(directory, started=False, http_kwargs={}):
29+
30+
def validate_dir(
31+
directory: str,
32+
started: bool = False,
33+
http_kwargs: None | dict[str, int] = None,
34+
stop=None,
35+
):
1036
"""Validate a directory containing JSONLD documents against the ReproSchema pydantic model.
1137
38+
Recursively goes through the directory tree and validates files with the allowed extensions.
39+
1240
Parameters
1341
----------
1442
directory: str
1543
Path to directory to walk for validation
44+
1645
started : bool
1746
Whether an http server exists or not
18-
http_kwargs : dict
47+
48+
http_kwargs : dict or None
1949
Keyword arguments for the http server. Valid keywords are: port, path
2050
and tmpdir
2151
52+
stop: None or function
53+
Function to use to stop the HTTP server
54+
2255
Returns
2356
-------
2457
conforms: bool
2558
Whether the document is conformant with the shape. Raises an exception
2659
if any document is non-conformant.
2760
2861
"""
29-
if not os.path.isdir(directory):
30-
raise Exception(f"{directory} is not a directory")
31-
print(f"Validating directory {directory}")
32-
stop = None
33-
if not started:
34-
stop, port = start_server(**http_kwargs)
35-
http_kwargs["port"] = port
36-
else:
37-
if "port" not in http_kwargs:
38-
raise KeyError("HTTP server started, but port key is missing")
39-
40-
for root, _, files in os.walk(directory):
41-
for name in files:
42-
full_file_name = os.path.join(root, name)
43-
44-
if Path(full_file_name).suffix not in [
45-
".jsonld",
46-
"json",
47-
"js",
48-
"",
49-
]:
50-
lgr.info(f"Skipping file {full_file_name}")
51-
continue
52-
53-
lgr.debug(f"Validating file {full_file_name}")
54-
try:
55-
data = load_file(
56-
full_file_name, started=True, http_kwargs=http_kwargs
57-
)
58-
if len(data) == 0:
59-
raise ValueError("Empty data graph")
60-
print(f"Validating {full_file_name}")
61-
conforms, vtext = validate_data(data)
62-
except (ValueError, json.JSONDecodeError):
62+
if http_kwargs is None:
63+
http_kwargs = {}
64+
65+
directory = Path(directory)
66+
67+
if not directory.is_dir():
68+
if stop is not None:
69+
stop_server(stop)
70+
raise Exception(f"{str(directory)} is not a directory")
71+
72+
if directory.name in DIR_TO_SKIP:
73+
lgr.info(f"Skipping directory {directory}")
74+
return True
75+
76+
lgr.info(f"Validating directory {directory}")
77+
78+
files_to_validate = [
79+
str(x)
80+
for x in directory.iterdir()
81+
if x.is_file()
82+
and x.name not in FILES_TO_SKIP
83+
and x.suffix in SUPPORTED_EXTENSIONS
84+
]
85+
86+
for name in files_to_validate:
87+
lgr.debug(f"Validating file {name}")
88+
89+
try:
90+
data = load_file(name, started=started, http_kwargs=http_kwargs)
91+
if len(data) == 0:
6392
if stop is not None:
6493
stop_server(stop)
65-
raise
66-
else:
67-
if not conforms:
68-
lgr.critical(
69-
f"File {full_file_name} has validation errors."
70-
)
71-
if stop is not None:
72-
stop_server(stop)
73-
raise ValueError(vtext)
74-
if not started:
75-
stop_server(stop)
76-
return True
94+
raise ValueError(f"Empty data graph in file {name}")
95+
conforms, vtext = validate_data(data)
96+
except (ValueError, json.JSONDecodeError):
97+
if stop is not None:
98+
stop_server(stop)
99+
raise
100+
else:
101+
if not conforms:
102+
lgr.critical(f"File {name} has validation errors.")
103+
stop_server(stop)
104+
raise ValueError(vtext)
105+
106+
dirs_to_validate = [
107+
str(x)
108+
for x in directory.iterdir()
109+
if x.is_dir() and x.name not in DIR_TO_SKIP
110+
]
111+
112+
for dir in dirs_to_validate:
113+
conforms, stop = validate_dir(
114+
dir, started=started, http_kwargs=http_kwargs, stop=stop
115+
)
116+
117+
return True, stop
77118

78119

79120
def validate(path):
@@ -91,17 +132,32 @@ def validate(path):
91132
exception.
92133
93134
"""
94-
if os.path.isdir(path):
95-
conforms = validate_dir(path)
135+
if Path(path).is_dir():
136+
137+
lgr.info(f"Validating directory {path}")
138+
139+
stop, port = start_server()
140+
http_kwargs = {"port": port}
141+
started = True
142+
143+
conforms, _ = validate_dir(
144+
path, started=started, http_kwargs=http_kwargs, stop=stop
145+
)
146+
147+
stop_server(stop)
148+
96149
else:
97-
# Skip validation for .DS_Store files
98-
if Path(path).name == ".DS_Store":
99-
lgr.info(f"{path} is a .DS_Store file and is skipped.")
150+
151+
if Path(path).name in FILES_TO_SKIP:
152+
lgr.info(f"Skipping file {path}")
100153
return True
154+
101155
data = load_file(path, started=False)
102156
conforms, vtext = validate_data(data)
103157
if not conforms:
104158
lgr.critical(f"File {path} has validation errors.")
105159
raise ValueError(vtext)
160+
106161
lgr.info(f"{path} conforms.")
162+
107163
return conforms

0 commit comments

Comments
 (0)