1
1
import json
2
- import os
3
2
from pathlib import Path
4
3
5
4
from .jsonldutils import load_file , validate_data
6
5
from .utils import lgr , start_server , stop_server
7
6
7
+ DIR_TO_SKIP = [
8
+ ".git" ,
9
+ ".github" ,
10
+ "__pycache__" ,
11
+ "env" ,
12
+ "venv" ,
13
+ ]
14
+ FILES_TO_SKIP = [
15
+ ".DS_Store" ,
16
+ ".gitignore" ,
17
+ ".flake8" ,
18
+ ".autorc" ,
19
+ "LICENSE" ,
20
+ "Makefile" ,
21
+ ]
22
+ SUPPORTED_EXTENSIONS = [
23
+ ".jsonld" ,
24
+ "json" ,
25
+ "js" ,
26
+ "" ,
27
+ ]
8
28
9
- def validate_dir (directory , started = False , http_kwargs = {}):
29
+
30
+ def validate_dir (
31
+ directory : str ,
32
+ started : bool = False ,
33
+ http_kwargs : None | dict [str , int ] = None ,
34
+ stop = None ,
35
+ ):
10
36
"""Validate a directory containing JSONLD documents against the ReproSchema pydantic model.
11
37
38
+ Recursively goes through the directory tree and validates files with the allowed extensions.
39
+
12
40
Parameters
13
41
----------
14
42
directory: str
15
43
Path to directory to walk for validation
44
+
16
45
started : bool
17
46
Whether an http server exists or not
18
- http_kwargs : dict
47
+
48
+ http_kwargs : dict or None
19
49
Keyword arguments for the http server. Valid keywords are: port, path
20
50
and tmpdir
21
51
52
+ stop: None or function
53
+ Function to use to stop the HTTP server
54
+
22
55
Returns
23
56
-------
24
57
conforms: bool
25
58
Whether the document is conformant with the shape. Raises an exception
26
59
if any document is non-conformant.
27
60
28
61
"""
29
- if not os .path .isdir (directory ):
30
- raise Exception (f"{ directory } is not a directory" )
31
- print (f"Validating directory { directory } " )
32
- stop = None
33
- if not started :
34
- stop , port = start_server (** http_kwargs )
35
- http_kwargs ["port" ] = port
36
- else :
37
- if "port" not in http_kwargs :
38
- raise KeyError ("HTTP server started, but port key is missing" )
39
-
40
- for root , _ , files in os .walk (directory ):
41
- for name in files :
42
- full_file_name = os .path .join (root , name )
43
-
44
- if Path (full_file_name ).suffix not in [
45
- ".jsonld" ,
46
- "json" ,
47
- "js" ,
48
- "" ,
49
- ]:
50
- lgr .info (f"Skipping file { full_file_name } " )
51
- continue
52
-
53
- lgr .debug (f"Validating file { full_file_name } " )
54
- try :
55
- data = load_file (
56
- full_file_name , started = True , http_kwargs = http_kwargs
57
- )
58
- if len (data ) == 0 :
59
- raise ValueError ("Empty data graph" )
60
- print (f"Validating { full_file_name } " )
61
- conforms , vtext = validate_data (data )
62
- except (ValueError , json .JSONDecodeError ):
62
+ if http_kwargs is None :
63
+ http_kwargs = {}
64
+
65
+ directory = Path (directory )
66
+
67
+ if not directory .is_dir ():
68
+ if stop is not None :
69
+ stop_server (stop )
70
+ raise Exception (f"{ str (directory )} is not a directory" )
71
+
72
+ if directory .name in DIR_TO_SKIP :
73
+ lgr .info (f"Skipping directory { directory } " )
74
+ return True
75
+
76
+ lgr .info (f"Validating directory { directory } " )
77
+
78
+ files_to_validate = [
79
+ str (x )
80
+ for x in directory .iterdir ()
81
+ if x .is_file ()
82
+ and x .name not in FILES_TO_SKIP
83
+ and x .suffix in SUPPORTED_EXTENSIONS
84
+ ]
85
+
86
+ for name in files_to_validate :
87
+ lgr .debug (f"Validating file { name } " )
88
+
89
+ try :
90
+ data = load_file (name , started = started , http_kwargs = http_kwargs )
91
+ if len (data ) == 0 :
63
92
if stop is not None :
64
93
stop_server (stop )
65
- raise
66
- else :
67
- if not conforms :
68
- lgr .critical (
69
- f"File { full_file_name } has validation errors."
70
- )
71
- if stop is not None :
72
- stop_server (stop )
73
- raise ValueError (vtext )
74
- if not started :
75
- stop_server (stop )
76
- return True
94
+ raise ValueError (f"Empty data graph in file { name } " )
95
+ conforms , vtext = validate_data (data )
96
+ except (ValueError , json .JSONDecodeError ):
97
+ if stop is not None :
98
+ stop_server (stop )
99
+ raise
100
+ else :
101
+ if not conforms :
102
+ lgr .critical (f"File { name } has validation errors." )
103
+ stop_server (stop )
104
+ raise ValueError (vtext )
105
+
106
+ dirs_to_validate = [
107
+ str (x )
108
+ for x in directory .iterdir ()
109
+ if x .is_dir () and x .name not in DIR_TO_SKIP
110
+ ]
111
+
112
+ for dir in dirs_to_validate :
113
+ conforms , stop = validate_dir (
114
+ dir , started = started , http_kwargs = http_kwargs , stop = stop
115
+ )
116
+
117
+ return True , stop
77
118
78
119
79
120
def validate (path ):
@@ -91,17 +132,32 @@ def validate(path):
91
132
exception.
92
133
93
134
"""
94
- if os .path .isdir (path ):
95
- conforms = validate_dir (path )
135
+ if Path (path ).is_dir ():
136
+
137
+ lgr .info (f"Validating directory { path } " )
138
+
139
+ stop , port = start_server ()
140
+ http_kwargs = {"port" : port }
141
+ started = True
142
+
143
+ conforms , _ = validate_dir (
144
+ path , started = started , http_kwargs = http_kwargs , stop = stop
145
+ )
146
+
147
+ stop_server (stop )
148
+
96
149
else :
97
- # Skip validation for .DS_Store files
98
- if Path (path ).name == ".DS_Store" :
99
- lgr .info (f"{ path } is a .DS_Store file and is skipped. " )
150
+
151
+ if Path (path ).name in FILES_TO_SKIP :
152
+ lgr .info (f"Skipping file { path } " )
100
153
return True
154
+
101
155
data = load_file (path , started = False )
102
156
conforms , vtext = validate_data (data )
103
157
if not conforms :
104
158
lgr .critical (f"File { path } has validation errors." )
105
159
raise ValueError (vtext )
160
+
106
161
lgr .info (f"{ path } conforms." )
162
+
107
163
return conforms
0 commit comments