Skip to content

Commit e64697a

Browse files
PR comments addressed
1 parent 497feba commit e64697a

File tree

2 files changed

+177
-81
lines changed

2 files changed

+177
-81
lines changed

dpytools/validation/json/validation.py

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,63 +6,77 @@
66
import jsonschema
77
from jsonschema import ValidationError
88

9-
"""
10-
# data_dict is for passing in a dictionary to be validated
11-
validate_json_schema("/path/to/schema.json", data_dict=some_dictionary)
12-
13-
# msg is to include a helpful context when debugging (i.e "what we're validating")
14-
validate_json_schema("/path/to/schema.json", data_dict=some_dictionary, msg="Some helpful message should this validation fail")
15-
16-
validate_json_schema("/path/to/schema.json", data_path="/path/to/some/json", msg="Some helpful message should this validation fail")
17-
18-
# indent should pretty print the json contents of the error to make it
19-
# more easily parsed by humans
20-
validate_json_schema("/path/to/schema.json", data_dict=some_dictionary, indent=2)
21-
"""
22-
239

2410
def validate_json_schema(
25-
schema_path: str,
26-
data_dict_or_path: Union[Dict, str],
27-
msg: str = "",
28-
indent: int = 2,
29-
) -> Optional[ValidationError]:
11+
schema_path: Union[Path, str],
12+
data_dict: Optional[Dict] = None,
13+
data_path: Union[Path, str, None] = None,
14+
error_msg: Optional[str] = "",
15+
indent: Optional[int] = 2,
16+
):
3017
"""
31-
Validate metadata.json files against schema provided.
18+
Validate metadata.json files against schema.
19+
20+
Either `data_dict` or `data_path` must be provided.
3221
33-
`schema_path`: file path of schema to validate against
34-
`data_dict_or_path`: file path or dictionary of data to be validated
35-
`msg`: optional string to provide additional information about validation
36-
`indent`: optional integer to be used when indenting json output
22+
`msg` and `indent` are used to format the error message if validation fails.
3723
"""
24+
# Confirm that *either* `data_dict` *or* `data_path` has been provided, otherwise raise ValueError
25+
if data_dict and data_path:
26+
raise ValueError(
27+
"Both a dictionary and file path of data have been provided - please specify either one or the other, not both."
28+
)
29+
if data_dict is None and data_path is None:
30+
raise ValueError(
31+
"Please provide either a dictionary or a file path of the data to be validated against the schema."
32+
)
33+
3834
# Load schema as dict
39-
parsed_schema_path = urlparse(schema_path)
40-
if parsed_schema_path.scheme == "http":
41-
# TODO Load schema from URL
42-
raise NotImplementedError("Validation from remote schema not yet supported")
43-
else:
44-
with open(Path(schema_path), "r") as f:
45-
schema_from_path = json.load(f)
35+
if isinstance(schema_path, str):
36+
parsed_schema_path = urlparse(schema_path)
37+
if parsed_schema_path.scheme == "http":
38+
# TODO Load schema from URL
39+
raise NotImplementedError("Validation from remote schema not yet supported")
40+
schema_path = Path(schema_path).absolute()
41+
if not schema_path.exists():
42+
raise ValueError(f"Schema path '{schema_path}' does not exist")
43+
with open(schema_path, "r") as f:
44+
schema_from_path = json.load(f)
4645

47-
# Load data as dict
48-
if isinstance(data_dict_or_path, Dict):
49-
data_to_validate = data_dict_or_path
50-
elif isinstance(data_dict_or_path, str):
51-
with open(Path(data_dict_or_path), "r") as f:
46+
# Load data to be validated as dict
47+
if data_dict:
48+
if not isinstance(data_dict, Dict):
49+
raise ValueError("Invalid data format")
50+
data_to_validate = data_dict
51+
52+
if data_path:
53+
if isinstance(data_path, str):
54+
data_path = Path(data_path).absolute()
55+
if not isinstance(data_path, Path):
56+
raise ValueError("Invalid data format")
57+
if not data_path.exists():
58+
raise ValueError(f"Data path '{data_path}' does not exist")
59+
with open(data_path, "r") as f:
5260
data_to_validate = json.load(f)
53-
else:
54-
raise ValueError("Invalid data format")
5561

5662
# Validate data against schema
57-
print(msg)
5863
try:
5964
jsonschema.validate(data_to_validate, schema_from_path)
6065
except jsonschema.ValidationError as err:
6166
# TODO Handle jsonschema.SchemaError?
62-
print(f"Error when validating data: {err.message}")
63-
# If the error relates to a specific field, print the error's location
67+
# If error is in a specific field, get the JSON path of the error location
6468
if err.json_path != "$":
65-
print(f"Error in data field: {err.json_path}")
66-
# Print the data that failed validation
67-
print(f"Contents of data:\n{json.dumps(data_to_validate, indent=indent)}")
68-
return err
69+
error_location = err.json_path
70+
else:
71+
error_location = "JSON data"
72+
# Create formatted message to be output on ValidationError
73+
if error_msg or indent:
74+
formatted_msg = f"""
75+
Exception: {error_msg}
76+
Error details: {err.message}
77+
Error location: {error_location}
78+
JSON data:
79+
{json.dumps(data_to_validate, indent=indent)}
80+
"""
81+
raise ValidationError(formatted_msg) from err
82+
raise err

tests/test_json_validation.py

Lines changed: 118 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
from jsonschema import ValidationError
23

34
import pytest
45
from dpytools.validation.json.validation import validate_json_schema
@@ -12,9 +13,9 @@ def test_validate_json_schema_data_path():
1213
pipeline_config = "tests/test_cases/pipeline_config.json"
1314
assert (
1415
validate_json_schema(
15-
pipeline_config_schema,
16-
pipeline_config,
17-
"Validating pipeline_config.json",
16+
schema_path=pipeline_config_schema,
17+
data_path=pipeline_config,
18+
error_msg="Validating pipeline_config.json",
1819
)
1920
is None
2021
)
@@ -35,22 +36,74 @@ def test_validate_json_schema_data_dict():
3536
}
3637
assert (
3738
validate_json_schema(
38-
pipeline_config_schema,
39-
pipeline_config,
40-
"Validating pipeline_config dict",
39+
schema_path=pipeline_config_schema,
40+
data_dict=pipeline_config,
41+
error_msg="Validating pipeline_config dict",
4142
)
4243
is None
4344
)
4445

4546

46-
def test_validate_json_schema_invalid_data_format():
47+
def test_validate_json_schema_data_dict_and_data_path():
48+
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
49+
pipeline_config_path = "tests/test_cases/pipeline_config.json"
50+
pipeline_config_dict = {
51+
"schema": "airflow.schemas.ingress.sdmx.v1.schema.json",
52+
"required_files": [{"matches": "*.sdmx", "count": 1}],
53+
"supplementary_distributions": [{"matches": "*.sdmx", "count": 1}],
54+
"priority": 1,
55+
"contact": ["[email protected]"],
56+
"pipeline": "default",
57+
}
58+
with pytest.raises(ValueError) as err:
59+
validate_json_schema(
60+
schema_path=pipeline_config_schema,
61+
data_dict=pipeline_config_dict,
62+
data_path=pipeline_config_path,
63+
)
64+
assert (
65+
"Both a dictionary and file path of data have been provided - please specify either one or the other, not both."
66+
in str(err.value)
67+
)
68+
69+
70+
def test_validate_json_schema_no_data_dict_or_data_path():
71+
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
72+
73+
with pytest.raises(ValueError) as err:
74+
validate_json_schema(
75+
schema_path=pipeline_config_schema,
76+
)
77+
assert (
78+
"Please provide either a dictionary or a file path of the data to be validated against the schema."
79+
in str(err.value)
80+
)
81+
82+
83+
def test_validate_json_schema_invalid_data_path_format():
84+
"""
85+
Raise ValueError if data_path is not a file path
86+
"""
87+
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
88+
pipeline_config = ["Invalid", "data", "format"]
89+
with pytest.raises(ValueError) as err:
90+
validate_json_schema(
91+
schema_path=pipeline_config_schema, data_path=pipeline_config
92+
)
93+
assert "Invalid data format" in str(err.value)
94+
95+
96+
def test_validate_json_schema_invalid_data_dict_format():
4797
"""
48-
Raise ValueError if data is not a file path or dictionary
98+
Raise ValueError if data_dict is not a dictionary
4999
"""
50100
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
51101
pipeline_config = ["Invalid", "data", "format"]
52-
with pytest.raises(ValueError):
53-
validate_json_schema(pipeline_config_schema, pipeline_config)
102+
with pytest.raises(ValueError) as err:
103+
validate_json_schema(
104+
schema_path=pipeline_config_schema, data_dict=pipeline_config
105+
)
106+
assert "Invalid data format" in str(err.value)
54107

55108

56109
def test_validate_json_schema_url():
@@ -59,8 +112,33 @@ def test_validate_json_schema_url():
59112
"""
60113
pipeline_config_schema = "http://example.org"
61114
pipeline_config = "tests/test_cases/pipeline_config.json"
62-
with pytest.raises(NotImplementedError):
63-
validate_json_schema(pipeline_config_schema, pipeline_config)
115+
with pytest.raises(NotImplementedError) as err:
116+
validate_json_schema(
117+
schema_path=pipeline_config_schema, data_path=pipeline_config
118+
)
119+
assert "Validation from remote schema not yet supported" in str(err.value)
120+
121+
122+
def test_validate_json_schema_invalid_schema_path():
123+
pipeline_config_schema = "tests/test_cases/does_not_exist.json"
124+
pipeline_config = "tests/test_cases/pipeline_config.json"
125+
schema_path = Path(pipeline_config_schema).absolute()
126+
with pytest.raises(ValueError) as err:
127+
validate_json_schema(
128+
schema_path=pipeline_config_schema, data_path=pipeline_config
129+
)
130+
assert f"Schema path '{schema_path}' does not exist" in str(err.value)
131+
132+
133+
def test_validate_json_schema_invalid_data_path():
134+
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
135+
pipeline_config = "tests/test_cases/does_not_exist.json"
136+
data_path = Path(pipeline_config).absolute()
137+
with pytest.raises(ValueError) as err:
138+
validate_json_schema(
139+
schema_path=pipeline_config_schema, data_path=pipeline_config
140+
)
141+
assert f"Data path '{data_path}' does not exist" in str(err.value)
64142

65143

66144
def test_validate_json_schema_data_path_required_field_missing():
@@ -69,12 +147,13 @@ def test_validate_json_schema_data_path_required_field_missing():
69147
"""
70148
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
71149
pipeline_config = "tests/test_cases/pipeline_config_missing_required_field.json"
72-
err = validate_json_schema(
73-
pipeline_config_schema,
74-
pipeline_config,
75-
"Validating pipeline_config_missing_required_field.json",
76-
)
77-
assert err.message == "'priority' is a required property"
150+
with pytest.raises(ValidationError) as err:
151+
validate_json_schema(
152+
schema_path=pipeline_config_schema,
153+
data_path=pipeline_config,
154+
error_msg="Error validating pipeline_config_missing_required_field.json",
155+
)
156+
assert "'priority' is a required property" in str(err.value)
78157

79158

80159
def test_validate_json_schema_data_path_invalid_data_type():
@@ -83,12 +162,13 @@ def test_validate_json_schema_data_path_invalid_data_type():
83162
"""
84163
pipeline_config_schema = "tests/test_cases/pipeline_config_schema.json"
85164
pipeline_config = "tests/test_cases/pipeline_config_invalid_data_type.json"
86-
err = validate_json_schema(
87-
pipeline_config_schema,
88-
pipeline_config,
89-
"Validating pipeline_config_invalid_data_type.json",
90-
)
91-
assert err.message == "'1' is not of type 'integer'"
165+
with pytest.raises(ValidationError) as err:
166+
validate_json_schema(
167+
schema_path=pipeline_config_schema,
168+
data_path=pipeline_config,
169+
error_msg="Error validating pipeline_config_invalid_data_type.json",
170+
)
171+
assert "'1' is not of type 'integer'" in str(err.value)
92172

93173

94174
def test_validate_json_schema_data_dict_required_field_missing():
@@ -103,12 +183,13 @@ def test_validate_json_schema_data_dict_required_field_missing():
103183
"contact": ["[email protected]"],
104184
"pipeline": "default",
105185
}
106-
err = validate_json_schema(
107-
pipeline_config_schema,
108-
pipeline_config,
109-
"Validating pipeline_config with required field missing",
110-
)
111-
assert err.message == "'priority' is a required property"
186+
with pytest.raises(ValidationError) as err:
187+
validate_json_schema(
188+
schema_path=pipeline_config_schema,
189+
data_dict=pipeline_config,
190+
error_msg="Error validating pipeline_config with required field missing",
191+
)
192+
assert "'priority' is a required property" in str(err.value)
112193

113194

114195
def test_validate_json_schema_data_dict_invalid_data_type():
@@ -124,9 +205,10 @@ def test_validate_json_schema_data_dict_invalid_data_type():
124205
"contact": ["[email protected]"],
125206
"pipeline": "default",
126207
}
127-
err = validate_json_schema(
128-
pipeline_config_schema,
129-
pipeline_config,
130-
f"Validating pipeline_config dict with invalid data type",
131-
)
132-
assert err.message == "'1' is not of type 'integer'"
208+
with pytest.raises(ValidationError) as err:
209+
validate_json_schema(
210+
schema_path=pipeline_config_schema,
211+
data_dict=pipeline_config,
212+
error_msg="Error validating pipeline_config dict with invalid data type",
213+
)
214+
assert "'1' is not of type 'integer'" in str(err.value)

0 commit comments

Comments
 (0)