From 691cb681cf5b38ff85a918a9a95659ee2010b8fe Mon Sep 17 00:00:00 2001
From: James Johnston <james.johnston@thumbtack.com>
Date: Mon, 24 Mar 2025 19:00:12 -0700
Subject: [PATCH] Add model conditions to equality test

This adds the following parameters to generic_tests/equality.sql:

- model_condition
- compare_model_condition

It allows one to filter out rows from either model before comparing the
remaining rows in the models as usual.

The rationale and usage is very similar to the relationships_where test.

Note that model_condition is introduced, because the normal 'where'
clause can't be used because it means the given "model" is no longer a
relation, and so the dbt_utils._is_relation check will fail.
---
 README.md                                           | 12 +++++++++++-
 .../schema_tests/data_test_equality_condition_a.csv |  5 +++++
 .../schema_tests/data_test_equality_condition_b.csv |  5 +++++
 integration_tests/models/generic_tests/schema.yml   |  7 +++++++
 macros/generic_tests/equality.sql                   | 13 ++++++++++---
 5 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 integration_tests/data/schema_tests/data_test_equality_condition_a.csv
 create mode 100644 integration_tests/data/schema_tests/data_test_equality_condition_b.csv

diff --git a/README.md b/README.md
index 378c5930..63437606 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr
 
 ### equality ([source](macros/generic_tests/equality.sql))
 
-Asserts the equality of two relations. Optionally specify a subset of columns to compare or exclude, and a precision to compare numeric columns on.
+Asserts the equality of two relations. Optionally specify a subset of columns to compare or exclude, and a precision to compare numeric columns on. An optional predicate can filter out some rows from the test. This is useful to exclude records such as test entities, rows created in the last X minutes/hours to account for temporary gaps due to ETL limitations, etc.
 
 **Usage:**
 
@@ -145,6 +145,16 @@ models:
           compare_model: ref('other_table_name')
           exclude_columns:
             - third_column
+
+  # only compare some rows in model_name or other_table_name
+  - name: model_name
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('other_table_name')
+          # Exclude rows from model_name with given identifier
+          model_condition: id <> '4ca448b8-24bf-4b88-96c6-b1609499c38b'
+          # Exclude rows from other_table_name newer than the given date
+          compare_model_condition: updated_date < '2020-01-01'
 ```
 
 ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql))
diff --git a/integration_tests/data/schema_tests/data_test_equality_condition_a.csv b/integration_tests/data/schema_tests/data_test_equality_condition_a.csv
new file mode 100644
index 00000000..0861800f
--- /dev/null
+++ b/integration_tests/data/schema_tests/data_test_equality_condition_a.csv
@@ -0,0 +1,5 @@
+col_a,col_b,col_c
+1,1,3
+1,2,1
+2,3,3
+4,5,6
diff --git a/integration_tests/data/schema_tests/data_test_equality_condition_b.csv b/integration_tests/data/schema_tests/data_test_equality_condition_b.csv
new file mode 100644
index 00000000..0765d24e
--- /dev/null
+++ b/integration_tests/data/schema_tests/data_test_equality_condition_b.csv
@@ -0,0 +1,5 @@
+col_a,col_b,col_c
+1,1,3
+1,2,1
+2,3,3
+5,6,7
diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml
index b12e3c7f..801f29b2 100644
--- a/integration_tests/models/generic_tests/schema.yml
+++ b/integration_tests/models/generic_tests/schema.yml
@@ -164,6 +164,13 @@ seeds:
           exclude_columns:
             - col_c
 
+  - name: data_test_equality_condition_a
+    data_tests:
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_condition_b')
+          model_condition: col_a <> 4
+          compare_model_condition: col_a <> 5
+
   - name: data_test_equality_floats_a
     data_tests:
       # test precision only
diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql
index d7d7197c..05395bfc 100644
--- a/macros/generic_tests/equality.sql
+++ b/macros/generic_tests/equality.sql
@@ -1,8 +1,11 @@
-{% test equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
-  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns, precision)) }}
+{% test equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None, model_condition="1=1", compare_model_condition="1=1") %}
+  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns, precision, model_condition, compare_model_condition)) }}
 {% endtest %}
 
-{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
+{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None, model_condition="1=1", compare_model_condition="1=1") %}
+
+{# T-SQL has no boolean data type so we use 1=1 which returns TRUE #}
+{# ref https://stackoverflow.com/a/7170753/3842610 #}
 
 {%- if compare_columns and exclude_columns -%}
     {{ exceptions.raise_compiler_error("Both a compare and an ignore list were provided to the `equality` macro. Only one is allowed") }}
@@ -129,12 +132,16 @@ with a as (
 
     select * from {{ model }}
 
+    where {{ model_condition }}
+
 ),
 
 b as (
 
     select * from {{ compare_model }}
 
+    where {{ compare_model_condition }}
+
 ),
 
 a_minus_b as (