Skip to content

Commit fc5cdfd

Browse files
author
Hugo Rialan
authored
Merge pull request #11 from tanukifk/sandbox
add config block
2 parents 7eb982a + 23f710e commit fc5cdfd

File tree

7 files changed

+115
-34
lines changed

7 files changed

+115
-34
lines changed

definitions/example.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ const commonAssertionsResult = commonAssertions({
1111
// Set the 'dataform.projectConfig.vars.env' var in 'dataform.json' for this to work.
1212
// "disabledInEnvs": ["dv", "qa"]
1313
},
14+
config: {
15+
"first_table": {
16+
"where": "updated_date >= CURRENT_DATE() - 7"
17+
},
18+
},
1419
rowConditions: {
1520
"first_table": {
1621
"id_not_null": "id IS NOT NULL",

includes/data_completeness_assertions.js

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14+
* @param {string} filter - The condition to filter the data.
1415
* @param {string} tableName - The name of the table to check for data completeness.
1516
* @param {Object} columnConditions - An object mapping column names to their allowed percentage of null values. If a value is an object, it should have an `allowedPercentageNull` property.
1617
*/
1718

1819
const assertions = [];
1920

20-
const createDataCompletenessAssertion = (globalParams, tableName, columnConditions) => {
21+
const createDataCompletenessAssertion = (globalParams, filter, tableName, columnConditions) => {
2122

2223
for (let columnName in columnConditions) {
2324
const allowedPercentageNull = columnConditions[columnName];
@@ -27,10 +28,21 @@ const createDataCompletenessAssertion = (globalParams, tableName, columnConditio
2728
.schema(globalParams.schema)
2829
.description(`Check data completeness for ${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`)
2930
.tags("assert-data-completeness")
30-
.query(ctx => `SELECT COUNT(*) AS total_rows,
31+
.query(ctx => `
32+
WITH
33+
filtering AS (
34+
SELECT
35+
*
36+
FROM
37+
${ctx.ref(tableName)}
38+
WHERE
39+
${filter}
40+
)
41+
SELECT COUNT(*) AS total_rows,
3142
SUM(CASE WHEN ${columnName} IS NULL THEN 1 ELSE 0 END) AS null_count
32-
FROM ${ctx.ref(tableName)}
33-
HAVING SAFE_DIVIDE(null_count, total_rows) > ${allowedPercentageNull / 100} AND null_count > 0 AND total_rows > 0`);
43+
FROM filtering
44+
HAVING SAFE_DIVIDE(null_count, total_rows) > ${allowedPercentageNull / 100} AND null_count > 0 AND total_rows > 0
45+
`);
3446

3547
(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));
3648

@@ -41,11 +53,12 @@ const createDataCompletenessAssertion = (globalParams, tableName, columnConditio
4153

4254
};
4355

44-
module.exports = (globalParams, dataCompletenessConditions) => {
56+
module.exports = (globalParams, config, dataCompletenessConditions) => {
4557
// Loop through dataCompletenessConditions to create data completeness check assertions.
4658
for (let tableName in dataCompletenessConditions) {
4759
const columnConditions = dataCompletenessConditions[tableName];
48-
createDataCompletenessAssertion(globalParams, tableName, columnConditions);
60+
const filter = config[tableName]?.where ?? true;
61+
createDataCompletenessAssertion(globalParams, filter, tableName, columnConditions);
4962
}
5063

5164
return assertions;

includes/data_freshness_assertions.js

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,41 @@
99

1010
/**
1111
* @param {Object} globalParams - See index.js for details.
12+
* @param {string} filter - The condition to filter the data.
1213
* @param {string} tableName - The name of the table to check for data freshness.
1314
* @param {number} delayCondition - The maximum allowed delay (in units specified by `timeUnit`) for the data to be considered fresh.
1415
* @param {string} timeUnit - The unit of time to use for the delay condition. This should be a string that is valid in a SQL `DATE_DIFF` function, such as 'DAY', 'HOUR', etc.
1516
* @param {string} dateColumn - The name of the date column to check for data freshness.
17+
* @param {string} timeZone - The name of the time zone for the date column.
1618
*/
1719

1820
const assertions = [];
1921

20-
const createDataFreshnessAssertion = (globalParams, tableName, delayCondition, timeUnit, dateColumn, timeZone = "UTC") => {
22+
const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone = "UTC") => {
23+
2124
const assertion = assert(`assert_freshness_${tableName}`)
2225
.database(globalParams.database)
2326
.schema(globalParams.schema)
2427
.description(`Assert that data in ${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`)
2528
.tags("assert-data-freshness")
2629
.query(ctx => `
2730
WITH
31+
filtering AS (
32+
SELECT
33+
*
34+
FROM
35+
${ctx.ref(tableName)}
36+
WHERE
37+
${filter}
38+
),
39+
2840
freshness AS (
2941
SELECT
3042
${["DAY", "WEEK", "MONTH", "QUARTER", "YEAR"].includes(timeUnit)
3143
? `DATE_DIFF(CURRENT_DATE("${timeZone}"), MAX(${dateColumn}), ${timeUnit})`
3244
: `TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(${dateColumn}), ${timeUnit})`} AS delay
3345
FROM
34-
${ctx.ref(tableName)}
46+
filtering
3547
)
3648
SELECT
3749
*
@@ -48,7 +60,7 @@ const createDataFreshnessAssertion = (globalParams, tableName, delayCondition, t
4860
assertions.push(assertion);
4961
};
5062

51-
module.exports = (globalParams, freshnessConditions) => {
63+
module.exports = (globalParams, config, freshnessConditions) => {
5264

5365
// Loop through freshnessConditions to create assertions.
5466
for (let tableName in freshnessConditions) {
@@ -58,7 +70,8 @@ module.exports = (globalParams, freshnessConditions) => {
5870
dateColumn,
5971
timeZone
6072
} = freshnessConditions[tableName];
61-
createDataFreshnessAssertion(globalParams, tableName, delayCondition, timeUnit, dateColumn, timeZone);
73+
const filter = config[tableName]?.where ?? true;
74+
createDataFreshnessAssertion(globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone);
6275
}
6376

6477
return assertions;

includes/referential_integrity_assertions.js

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14+
* @param {string} parentFilter - The condition to filter the data of parent table.
15+
* @param {string} childFilter - The condition to filter the data of child table.
1416
* @param {Object} parentTable - The name of the parent table in the foreign key relationship.
1517
* @param {Object} parentKey - The name of the column in the parent table that is the primary key.
1618
* @param {Object} childTable - The name of the child table in the foreign key relationship.
@@ -19,18 +21,37 @@
1921

2022
const assertions = [];
2123

22-
const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKey, childTable, childKey) => {
24+
const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey) => {
2325

2426
const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`)
2527
.database(globalParams.database)
2628
.schema(globalParams.schema)
2729
.description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`)
2830
.tags("assert-referential-integrity")
2931
.query(ctx => `
30-
SELECT pt.${parentKey}
31-
FROM ${ctx.ref(parentTable)} AS pt
32-
LEFT JOIN ${ctx.ref(childTable)} AS t ON t.${childKey} = pt.${parentKey}
33-
WHERE t.${childKey} IS NULL
32+
WITH
33+
parent_filtering AS (
34+
SELECT
35+
*
36+
FROM
37+
${ctx.ref(parentTable)}
38+
WHERE
39+
${parentFilter}
40+
),
41+
42+
child_filtering AS (
43+
SELECT
44+
*
45+
FROM
46+
${ctx.ref(childTable)}
47+
WHERE
48+
${childFilter}
49+
)
50+
51+
SELECT pt.${parentKey}
52+
FROM parent_filtering AS pt
53+
LEFT JOIN child_filtering AS t ON t.${childKey} = pt.${parentKey}
54+
WHERE t.${childKey} IS NULL
3455
`);
3556

3657
(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));
@@ -40,16 +61,18 @@ const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKe
4061
assertions.push(assertion);
4162
};
4263

43-
module.exports = (globalParams, referentialIntegrityConditions) => {
64+
module.exports = (globalParams, config, referentialIntegrityConditions) => {
4465
for (let parentTable in referentialIntegrityConditions) {
4566
const relationships = referentialIntegrityConditions[parentTable];
67+
const parentFilter = config[parentTable]?.where ?? true;
4668

4769
relationships.forEach(({
4870
parentKey,
4971
childTable,
5072
childKey
5173
}) => {
52-
createReferentialIntegrityAssertion(globalParams, parentTable, parentKey, childTable, childKey);
74+
const childFilter = config[childTable]?.where ?? true;
75+
createReferentialIntegrityAssertion(globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey);
5376
})
5477
}
5578
return assertions;

includes/row_condition_assertions.js

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,34 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14+
* @param {string} filter - The condition to filter the data.
1415
* @param {string} tableName - The name of the table to check for row conditions.
1516
* @param {string} conditionName - The name of the condition to check.
1617
* @param {string} conditionQuery - The SQL query that defines the condition to check.
1718
*/
1819

1920
const assertions = [];
2021

21-
const createRowConditionAssertion = (globalParams, tableName, conditionName, conditionQuery) => {
22+
const createRowConditionAssertion = (globalParams, filter, tableName, conditionName, conditionQuery) => {
2223
const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}_${tableName}`)
2324
.database(globalParams.database)
2425
.schema(globalParams.schema)
2526
.description(`Assert that rows in ${tableName} meet ${conditionName}`)
2627
.tags("assert-row-condition")
27-
.query(ctx => `SELECT "Condition not met: ${conditionQuery}, Table: ${ctx.ref(tableName)}" AS assertion_description
28-
FROM ${ctx.ref(tableName)}
29-
WHERE NOT (${conditionQuery})`);
28+
.query(ctx => `
29+
WITH
30+
filtering AS (
31+
SELECT
32+
*
33+
FROM
34+
${ctx.ref(tableName)}
35+
WHERE
36+
${filter}
37+
)
38+
SELECT "Condition not met: ${conditionQuery}, Table: ${ctx.ref(tableName)}" AS assertion_description
39+
FROM filtering
40+
WHERE NOT (${conditionQuery})
41+
`);
3042

3143
(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));
3244

@@ -35,13 +47,14 @@ const createRowConditionAssertion = (globalParams, tableName, conditionName, con
3547
assertions.push(assertion);
3648
};
3749

38-
module.exports = (globalParams, rowConditions) => {
50+
module.exports = (globalParams, config, rowConditions) => {
3951

4052
// Loop through rowConditions to create assertions.
4153
for (let tableName in rowConditions) {
4254
for (let conditionName in rowConditions[tableName]) {
4355
const conditionQuery = rowConditions[tableName][conditionName];
44-
createRowConditionAssertion(globalParams, tableName, conditionName, conditionQuery);
56+
const filter = config[tableName]?.where ?? true;
57+
createRowConditionAssertion(globalParams, filter, tableName, conditionName, conditionQuery);
4558
}
4659
}
4760

includes/unique_key_assertions.js

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,36 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14+
* @param {string} filter - The condition to filter the data.
1415
* @param {string} tableName - The name of the table to check for unique keys.
1516
* @param {Array} columns - An array of column names that should form a unique key.
1617
*/
1718

1819
const assertions = [];
1920

20-
const createUniqueKeyAssertion = (globalParams, tableName, columns) => {
21+
const createUniqueKeyAssertion = (globalParams, filter, tableName, columns) => {
2122
const uniqueColumns = columns.join(', ');
2223

2324
const assertion = assert(`assert_unique_key_${tableName}`)
2425
.database(globalParams.database)
2526
.schema(globalParams.schema)
2627
.description(`Check that values in columns (${uniqueColumns}) in ${tableName} form a unique key`)
2728
.tags("assert-unique-key")
28-
.query(ctx => `SELECT ${uniqueColumns}
29-
FROM ${ctx.ref(tableName)}
29+
.query(ctx => `
30+
WITH
31+
filtering AS (
32+
SELECT
33+
*
34+
FROM
35+
${ctx.ref(tableName)}
36+
WHERE
37+
${filter}
38+
)
39+
SELECT ${uniqueColumns}
40+
FROM filtering
3041
GROUP BY ${uniqueColumns}
31-
HAVING COUNT(*) > 1`);
42+
HAVING COUNT(*) > 1
43+
`);
3244

3345
(globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag)));
3446

@@ -37,12 +49,13 @@ const createUniqueKeyAssertion = (globalParams, tableName, columns) => {
3749
assertions.push(assertion);
3850
};
3951

40-
module.exports = (globalParams, uniqueKeyConditions) => {
52+
module.exports = (globalParams, config, uniqueKeyConditions) => {
4153

4254
// Loop through uniqueKeyConditions to create unique key check assertions.
4355
for (let tableName in uniqueKeyConditions) {
4456
const columns = uniqueKeyConditions[tableName];
45-
createUniqueKeyAssertion(globalParams, tableName, columns);
57+
const filter = config[tableName]?.where ?? true;
58+
createUniqueKeyAssertion(globalParams, filter, tableName, columns);
4659
}
4760

4861
return assertions;

index.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,18 @@ module.exports = ({
3434
tags: [],
3535
disabledInEnvs: []
3636
},
37+
config = {},
3738
rowConditions = {},
3839
uniqueKeyConditions = {},
3940
dataFreshnessConditions = {},
4041
dataCompletenessConditions = {},
4142
referentialIntegrityConditions = {}
4243
}) => {
43-
const rowConditionAssertionsResult = row_condition_assertions(globalAssertionsParams, rowConditions);
44-
const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, uniqueKeyConditions);
45-
const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, dataFreshnessConditions);
46-
const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, dataCompletenessConditions);
47-
const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, referentialIntegrityConditions); // New assertion
44+
const rowConditionAssertionsResult = row_condition_assertions(globalAssertionsParams, config, rowConditions);
45+
const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, config, uniqueKeyConditions);
46+
const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, config, dataFreshnessConditions);
47+
const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, config, dataCompletenessConditions);
48+
const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, config, referentialIntegrityConditions); // New assertion
4849

4950
return {
5051
rowConditionAssertions: rowConditionAssertionsResult,

0 commit comments

Comments
 (0)