Skip to content

Commit dd0d08d

Browse files
author
Hugo Rialan
authored
Merge pull request #4 from KazuSh1geru/feature/expansion_schema
Fix Issue #3: Add Support for Identifying Identical Table Names Across Different Data Sets
2 parents 331f71a + 4d2f8ab commit dd0d08d

File tree

6 files changed

+156
-105
lines changed

6 files changed

+156
-105
lines changed

definitions/example.js

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,57 +17,74 @@ const commonAssertionsResult = commonAssertions({
1717
},
1818
},
1919
rowConditions: {
20-
"first_table": {
21-
"id_not_null": "id IS NOT NULL",
22-
"id_strict_positive": "id > 0"
23-
},
24-
"second_table": {
25-
"id_in_accepted_values": "id IN (1, 2, 3)"
20+
// Format: "schema": { "table": { "conditionName": "conditionQuery", ... }, ... }
21+
"dataform": {
22+
"first_table": {
23+
"id_not_null": "id IS NOT NULL",
24+
"id_strict_positive": "id > 0"
25+
},
26+
"second_table": {
27+
"id_in_accepted_values": "id IN (1, 2, 3)"
28+
}
2629
}
2730
},
2831
uniqueKeyConditions: {
29-
"first_table": ["id"],
30-
"second_table": ["id", "updated_date"]
32+
// Format: "schema": { "table": [column1, column2, ...], ... }
33+
"dataform": {
34+
"first_table": ["id"],
35+
"second_table": ["id", "updated_date"]
36+
}
3137
},
3238
dataFreshnessConditions: {
33-
"first_table": {
34-
"dateColumn": "updated_date",
35-
"timeUnit": "DAY",
36-
"delayCondition": 1,
37-
"timeZone": "America/Los_Angeles"
38-
},
39-
"second_table": {
40-
// If timeUnit is not DAY, WEEK, MONTH, QUARTER, or YEAR, dateColumn should be a TIMESTAMP.
41-
// Check here for valid Date time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date_diff
42-
// Check here for valid Timestamp time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#timestamp_diff
43-
"dateColumn": "TIMESTAMP(updated_date)",
44-
"timeUnit": "HOUR",
45-
"delayCondition": 3,
46-
"timeZone": "-08"
39+
// Format: "schema": { "table": { "dateColumn", "timeUnit", "delayCondition" }, ... }
40+
"dataform": {
41+
"first_table": {
42+
"dateColumn": "updated_date",
43+
"timeUnit": "DAY",
44+
"delayCondition": 1,
45+
"timeZone": "America/Los_Angeles"
46+
},
47+
"second_table": {
48+
// If timeUnit is not DAY, WEEK, MONTH, QUARTER, or YEAR, dateColumn should be a TIMESTAMP.
49+
// Check here for valid Date time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date_diff
50+
// Check here for valid Timestamp time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#timestamp_diff
51+
"dateColumn": "TIMESTAMP(updated_date)",
52+
"timeUnit": "HOUR",
53+
"delayCondition": 3,
54+
"timeZone": "-08"
55+
}
4756
}
4857
},
4958
dataCompletenessConditions: {
50-
"first_table": {
51-
// Format: "column": allowedPercentageNull
52-
"updated_date": 1, // 1% of null values allowed in the updated_date column
53-
"id": 20
54-
},
55-
"second_table": {
56-
"id": 30
59+
// Format: "schema": { "table": { "column": allowedPercentageNull, ... }, ... }
60+
"dataform": {
61+
"first_table": {
62+
// Format: "column": allowedPercentageNull
63+
"updated_date": 1, // 1% of null values allowed in the updated_date column
64+
"id": 20
65+
},
66+
"second_table": {
67+
"id": 30
68+
}
5769
}
5870
},
5971
referentialIntegrityConditions: {
60-
"first_table": [{
61-
"parentKey": "id",
62-
"childTable": "second_table",
63-
"childKey": "id"
64-
},
65-
{
66-
"parentKey": "id",
67-
"childTable": "third_table",
68-
"childKey": "parent_id"
69-
}
70-
]
72+
// Format: "parentSchema": { "parentTable": [{ parentKey, childSchema, childTable, childKey }, ...], ... }
73+
"dataform": {
74+
"first_table": [{
75+
"parentKey": "id",
76+
"childSchema": "dataform",
77+
"childTable": "second_table",
78+
"childKey": "id"
79+
},
80+
{
81+
"parentKey": "id",
82+
"childSchema": "dataform",
83+
"childTable": "third_table",
84+
"childKey": "parent_id"
85+
}
86+
]
87+
}
7188
}
7289
});
7390

includes/data_completeness_assertions.js

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,30 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14-
* @param {string} filter - The condition to filter the data.
14+
* @param {string} schemaName - The name of the schema to check for unique keys.
1515
* @param {string} tableName - The name of the table to check for data completeness.
16+
* @param {string} filter - The condition to filter the data.
1617
* @param {Object} columnConditions - An object mapping column names to their allowed percentage of null values. If a value is an object, it should have an `allowedPercentageNull` property.
1718
*/
1819

1920
const assertions = [];
2021

21-
const createDataCompletenessAssertion = (globalParams, filter, tableName, columnConditions) => {
22-
22+
const createDataCompletenessAssertion = (globalParams, schemaName, tableName, filter, columnConditions) => {
2323
for (let columnName in columnConditions) {
2424
const allowedPercentageNull = columnConditions[columnName];
2525

26-
const assertion = assert(`assert_data_completeness_${tableName}_${columnName}`)
26+
const assertion = assert(`assert_data_completeness_${schemaName}_${tableName}_${columnName}`)
2727
.database(globalParams.database)
2828
.schema(globalParams.schema)
29-
.description(`Check data completeness for ${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`)
29+
.description(`Check data completeness for ${schemaName}.${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`)
3030
.tags("assert-data-completeness")
3131
.query(ctx => `
3232
WITH
3333
filtering AS (
3434
SELECT
3535
*
3636
FROM
37-
${ctx.ref(tableName)}
37+
${ctx.ref(schemaName, tableName)}
3838
WHERE
3939
${filter}
4040
)
@@ -55,11 +55,13 @@ const createDataCompletenessAssertion = (globalParams, filter, tableName, column
5555

5656
module.exports = (globalParams, config, dataCompletenessConditions) => {
5757
// Loop through dataCompletenessConditions to create data completeness check assertions.
58-
for (let tableName in dataCompletenessConditions) {
59-
const columnConditions = dataCompletenessConditions[tableName];
60-
const filter = config[tableName]?.where ?? true;
61-
createDataCompletenessAssertion(globalParams, filter, tableName, columnConditions);
58+
for (let schemaName in dataCompletenessConditions) {
59+
const tableNames = dataCompletenessConditions[schemaName];
60+
for (let tableName in tableNames) {
61+
const columnConditions = tableNames[tableName];
62+
const filter = config[tableName]?.where ?? true;
63+
createDataCompletenessAssertion(globalParams, schemaName, tableName, filter, columnConditions);
64+
}
6265
}
63-
6466
return assertions;
6567
};

includes/data_freshness_assertions.js

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99

1010
/**
1111
* @param {Object} globalParams - See index.js for details.
12-
* @param {string} filter - The condition to filter the data.
12+
* @param {string} schemaName - The name of the schema to check for unique keys.
1313
* @param {string} tableName - The name of the table to check for data freshness.
14+
* @param {string} filter - The condition to filter the data.
1415
* @param {number} delayCondition - The maximum allowed delay (in units specified by `timeUnit`) for the data to be considered fresh.
1516
* @param {string} timeUnit - The unit of time to use for the delay condition. This should be a string that is valid in a SQL `DATE_DIFF` function, such as 'DAY', 'HOUR', etc.
1617
* @param {string} dateColumn - The name of the date column to check for data freshness.
@@ -19,20 +20,19 @@
1920

2021
const assertions = [];
2122

22-
const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone = "UTC") => {
23-
24-
const assertion = assert(`assert_freshness_${tableName}`)
23+
const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn) => {
24+
const assertion = assert(`assert_freshness_${schemaName}_${tableName}`)
2525
.database(globalParams.database)
2626
.schema(globalParams.schema)
27-
.description(`Assert that data in ${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`)
27+
.description(`Assert that data in ${schemaName}.${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`)
2828
.tags("assert-data-freshness")
2929
.query(ctx => `
3030
WITH
3131
filtering AS (
3232
SELECT
3333
*
3434
FROM
35-
${ctx.ref(tableName)}
35+
${ctx.ref(schemaName, tableName)}
3636
WHERE
3737
${filter}
3838
),
@@ -59,18 +59,20 @@ const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCond
5959
assertions.push(assertion);
6060
};
6161

62-
module.exports = (globalParams, config, freshnessConditions) => {
6362

63+
module.exports = (globalParams, config, freshnessConditions) => {
6464
// Loop through freshnessConditions to create assertions.
65-
for (let tableName in freshnessConditions) {
66-
const {
67-
delayCondition,
68-
timeUnit,
69-
dateColumn,
70-
timeZone
71-
} = freshnessConditions[tableName];
72-
const filter = config[tableName]?.where ?? true;
73-
createDataFreshnessAssertion(globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone);
65+
for (let schemaName in freshnessConditions) {
66+
const tableNames = freshnessConditions[schemaName];
67+
for (let tableName in tableNames) {
68+
const {
69+
delayCondition,
70+
timeUnit,
71+
dateColumn
72+
} = tableNames[tableName];
73+
const filter = config[tableName]?.where ?? true;
74+
createDataFreshnessAssertion(globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn);
75+
}
7476
}
7577

7678
return assertions;

includes/referential_integrity_assertions.js

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,21 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14-
* @param {string} parentFilter - The condition to filter the data of parent table.
15-
* @param {string} childFilter - The condition to filter the data of child table.
14+
* @param {Object} parentSchema -
1615
* @param {Object} parentTable - The name of the parent table in the foreign key relationship.
1716
* @param {Object} parentKey - The name of the column in the parent table that is the primary key.
17+
* @param {string} parentFilter - The condition to filter the data of parent table.
18+
* @param {Object} childSchema -
1819
* @param {Object} childTable - The name of the child table in the foreign key relationship.
1920
* @param {Object} childKey - The name of the column in the child table that is the foreign key.
21+
* @param {string} childFilter - The condition to filter the data of child table.
2022
*/
2123

2224
const assertions = [];
2325

24-
const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey) => {
26+
const createReferentialIntegrityAssertion = (globalParams, parentSchema, parentTable, parentKey, parentFilter, childSchema, childTable, childKey, childFilter) => {
2527

26-
const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`)
28+
const assertion = assert(`assert_referential_integrity_${parentSchema}_${parentTable}_${childSchema}_${childTable}`)
2729
.database(globalParams.database)
2830
.schema(globalParams.schema)
2931
.description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`)
@@ -34,7 +36,7 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi
3436
SELECT
3537
*
3638
FROM
37-
${ctx.ref(parentTable)}
39+
${ctx.ref(parentSchema, parentTable)}
3840
WHERE
3941
${parentFilter}
4042
),
@@ -43,7 +45,7 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi
4345
SELECT
4446
*
4547
FROM
46-
${ctx.ref(childTable)}
48+
${ctx.ref(childSchema, childTable)}
4749
WHERE
4850
${childFilter}
4951
)
@@ -62,18 +64,32 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi
6264
};
6365

6466
module.exports = (globalParams, config, referentialIntegrityConditions) => {
65-
for (let parentTable in referentialIntegrityConditions) {
66-
const relationships = referentialIntegrityConditions[parentTable];
67-
const parentFilter = config[parentTable]?.where ?? true;
67+
for (let parentSchema in referentialIntegrityConditions) {
68+
const parentTables = referentialIntegrityConditions[parentSchema];
69+
for (let parentTable in parentTables) {
70+
const relationships = parentTables[parentTable];
71+
const parentFilter = config[parentTable]?.where ?? true;
6872

69-
relationships.forEach(({
70-
parentKey,
71-
childTable,
72-
childKey
73-
}) => {
74-
const childFilter = config[childTable]?.where ?? true;
75-
createReferentialIntegrityAssertion(globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey);
76-
})
77-
}
73+
relationships.forEach(({
74+
parentKey,
75+
childSchema,
76+
childTable,
77+
childKey
78+
}) => {
79+
const childFilter = config[childTable]?.where ?? true;
80+
createReferentialIntegrityAssertion(
81+
globalParams,
82+
parentSchema,
83+
parentTable,
84+
parentKey,
85+
parentFilter,
86+
childSchema,
87+
childTable,
88+
childKey,
89+
childFilter
90+
);
91+
})
92+
}
93+
};
7894
return assertions;
7995
};

includes/row_condition_assertions.js

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,28 @@
1111

1212
/**
1313
* @param {Object} globalParams - See index.js for details.
14-
* @param {string} filter - The condition to filter the data.
14+
* @param {string} schemaName - The name of the schema to check for unique keys.
1515
* @param {string} tableName - The name of the table to check for row conditions.
16+
* @param {string} filter - The condition to filter the data.
1617
* @param {string} conditionName - The name of the condition to check.
1718
* @param {string} conditionQuery - The SQL query that defines the condition to check.
1819
*/
1920

2021
const assertions = [];
2122

22-
const createRowConditionAssertion = (globalParams, filter, tableName, conditionName, conditionQuery) => {
23-
const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}_${tableName}`)
23+
const createRowConditionAssertion = (globalParams, schemaName, tableName, filter, conditionName, conditionQuery) => {
24+
const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}${schemaName}_${tableName}`)
2425
.database(globalParams.database)
2526
.schema(globalParams.schema)
26-
.description(`Assert that rows in ${tableName} meet ${conditionName}`)
27+
.description(`Assert that rows in ${schemaName}.${tableName} meet ${conditionName}`)
2728
.tags("assert-row-condition")
2829
.query(ctx => `
2930
WITH
3031
filtering AS (
3132
SELECT
3233
*
3334
FROM
34-
${ctx.ref(tableName)}
35+
${ctx.ref(schemaName, tableName)}
3536
WHERE
3637
${filter}
3738
)
@@ -50,13 +51,22 @@ const createRowConditionAssertion = (globalParams, filter, tableName, conditionN
5051
module.exports = (globalParams, config, rowConditions) => {
5152

5253
// Loop through rowConditions to create assertions.
53-
for (let tableName in rowConditions) {
54-
for (let conditionName in rowConditions[tableName]) {
55-
const conditionQuery = rowConditions[tableName][conditionName];
56-
const filter = config[tableName]?.where ?? true;
57-
createRowConditionAssertion(globalParams, filter, tableName, conditionName, conditionQuery);
54+
for (let schemaName in rowConditions) {
55+
const tableNames = rowConditions[schemaName];
56+
for (let tableName in tableNames) {
57+
for (let conditionName in tableNames[tableName]) {
58+
const conditionQuery = tableNames[tableName][conditionName];
59+
const filter = config[tableName]?.where ?? true;
60+
createRowConditionAssertion(
61+
globalParams,
62+
schemaName,
63+
tableName,
64+
filter,
65+
conditionName,
66+
conditionQuery
67+
);
68+
}
5869
}
5970
}
60-
6171
return assertions;
6272
}

0 commit comments

Comments
 (0)