Skip to content

Commit 7a9b9bd

Browse files
author
Hugo Rialan
authored
Merge pull request #1 from devoteamgcloud/dv
New assertion type
2 parents 1c47146 + 231d3ec commit 7a9b9bd

File tree

8 files changed

+97
-6
lines changed

8 files changed

+97
-6
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Dataform Assertions
22

3-
Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, and data completeness.
3+
Unlock advanced data testing capabilities with this Dataform package, offering a comprehensive and common suite of assertions designed for testing various facets of your warehouse data, including data freshness, unique keys, row conditions, data completeness and referential integrity.
44

55
Your contributions are highly encouraged – whether you have an innovative assertion idea or wish to enhance the existing ones, feel free to open an issue or submit a pull request to enrich the Dataform community.
66

@@ -72,6 +72,7 @@ This package includes the following types of assertions:
7272
- **Unique key conditions**: Check if a given primary key (can be a set of columns) is not duplicated in a table.
7373
- **Data freshness conditions**: Check if the data in a table is fresh enough given some conditions.
7474
- **Data completeness conditions**: Check if the data in a column have less than a given percentage of null values.
75+
- **Referential integrity conditions**: Check if foreign key relationships are maintained between tables.
7576

7677
## Warning
7778

definitions/example.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ const commonAssertionsResult = commonAssertions({
88
"tags": ["assertions"],
99
// Sometimes data quality is not good in some environments,
1010
// assertions can be disabled in those environments.
11+
// Set the 'dataform.projectConfig.vars.env' var in 'dataform.json' for this to work.
1112
// "disabledInEnvs": ["dv", "qa"]
1213
},
1314
rowConditions: {
@@ -44,6 +45,19 @@ const commonAssertionsResult = commonAssertions({
4445
"second_table": {
4546
"id": 30
4647
}
48+
},
49+
referentialIntegrityConditions: {
50+
"first_table": [{
51+
"parentKey": "id",
52+
"childTable": "second_table",
53+
"childKey": "id"
54+
},
55+
{
56+
"parentKey": "id",
57+
"childTable": "third_table",
58+
"childKey": "parent_id"
59+
}
60+
]
4761
}
4862
});
4963

definitions/first_table.sqlx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ config {
22
type: "table"
33
}
44

5+
SELECT
6+
1 AS id,
7+
CURRENT_DATE() AS updated_date
8+
UNION ALL
59
SELECT
610
1 AS id,
711
CURRENT_DATE() AS updated_date

definitions/third_table.sqlx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
config {
2+
type: "table"
3+
}
4+
5+
SELECT
6+
1 AS parent_id,
7+
CURRENT_DATE() AS updated_date
8+
UNION ALL
9+
SELECT
10+
2 AS parent_id,
11+
CURRENT_DATE() AS updated_date
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**
2+
* referential_integrity_assertions.js
3+
*
4+
* This file contains a function to create referential integrity assertions for specific tables in a database.
5+
* The assertions are used to check if the foreign key relationships are maintained between tables.
6+
* The conditions for referential integrity checks are defined in an object format:
7+
* { parentTable: [{ parentKey, childTable, childKey }, ...], ... }
8+
*
9+
* The function `createReferentialIntegrityAssertions` takes in global parameters and the referential integrity conditions.
10+
*/
11+
12+
/**
13+
* @param {Object} globalParams - See index.js for details.
14+
* @param {Object} parentTable - The name of the parent table in the foreign key relationship.
15+
* @param {Object} parentKey - The name of the column in the parent table that is the primary key.
16+
* @param {Object} childTable - The name of the child table in the foreign key relationship.
17+
* @param {Object} childKey - The name of the column in the child table that is the foreign key.
18+
*/
19+
20+
const assertions = [];
21+
22+
const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKey, childTable, childKey) => {
23+
24+
const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`)
25+
.database(globalParams.database)
26+
.schema(globalParams.schema)
27+
.description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`)
28+
.tags("assert-referential-integrity")
29+
.query(ctx => `
30+
SELECT pt.${parentKey}
31+
FROM ${ctx.ref(parentTable)} AS pt
32+
LEFT JOIN ${ctx.ref(childTable)} AS t ON t.${childKey} = pt.${parentKey}
33+
WHERE t.${childKey} IS NULL
34+
`);
35+
36+
(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));
37+
38+
(globalParams.disabledInEnvs && globalParams.disabledInEnvs.includes(dataform.projectConfig.vars.env)) && assertion.disabled();
39+
40+
assertions.push(assertion);
41+
};
42+
43+
module.exports = (globalParams, referentialIntegrityConditions) => {
44+
for (let parentTable in referentialIntegrityConditions) {
45+
const relationships = referentialIntegrityConditions[parentTable];
46+
47+
relationships.forEach(({
48+
parentKey,
49+
childTable,
50+
childKey
51+
}) => {
52+
createReferentialIntegrityAssertion(globalParams, parentTable, parentKey, childTable, childKey);
53+
})
54+
}
55+
return assertions;
56+
};

index.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
* @property {Object} uniqueKeyConditions - An object mapping table names to unique key conditions. Format: { tableName: [column1, column2, ...], ... }
1818
* @property {Object} dataFreshnessConditions - An object mapping table names to data freshness conditions. Format: { tableName: { delayCondition, timeUnit, dateColumn }, ... }
1919
* @property {Object} dataCompletenessConditions - An object mapping table names to data completeness conditions. Format: { tableName: { columnName: allowedPercentageNull, ... }, ... }
20+
* @property {Object} referentialIntegrityConditions - An object mapping parent table names to referential integrity conditions. Format: { parentTable: [{ parentKey, childTable, childKey }, ...], ... }
2021
*/
2122

2223
const row_condition_assertions = require("./includes/row_condition_assertions");
2324
const unique_key_assertions = require("./includes/unique_key_assertions");
2425
const data_freshness_assertions = require("./includes/data_freshness_assertions");
2526
const data_completeness_assertions = require("./includes/data_completeness_assertions");
27+
const referential_integrity_assertions = require("./includes/referential_integrity_assertions");
2628

2729
module.exports = ({
2830
globalAssertionsParams = {
@@ -35,17 +37,20 @@ module.exports = ({
3537
rowConditions = {},
3638
uniqueKeyConditions = {},
3739
dataFreshnessConditions = {},
38-
dataCompletenessConditions = {}
40+
dataCompletenessConditions = {},
41+
referentialIntegrityConditions = {}
3942
}) => {
4043
const rowConditionAssertionsResult = row_condition_assertions(globalAssertionsParams, rowConditions);
4144
const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, uniqueKeyConditions);
4245
const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, dataFreshnessConditions);
4346
const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, dataCompletenessConditions);
47+
const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, referentialIntegrityConditions); // New assertion
4448

4549
return {
4650
rowConditionAssertions: rowConditionAssertionsResult,
4751
uniqueKeyAssertions: uniqueKeyAssertionsResult,
4852
dataFreshnessAssertions: dataFreshnessAssertionsResult,
49-
dataCompletenessAssertions: dataCompletenessAssertionsResult
53+
dataCompletenessAssertions: dataCompletenessAssertionsResult,
54+
referentialIntegrityAssertions: referentialIntegrityAssertionsResult
5055
};
5156
}

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devoteamgcloud/dataform-assertions",
3-
"version": "1.0.2",
3+
"version": "1.1.0",
44
"repository": {
55
"type": "git",
66
"url": "https://github.com/devoteamgcloud/dataform-assertions.git"

0 commit comments

Comments
 (0)