From 3397bf90577e6d2a979aa9ceccd4d598ca19fdc2 Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 13:28:57 -0500
Subject: [PATCH 1/6] test

---
 .../test_canonical_expectations.py            | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
index 96661debf686..ddb9b009a72b 100644
--- a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
+++ b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
@@ -4,6 +4,7 @@
 import pandas as pd
 
 import great_expectations.expectations as gxe
+from great_expectations.core.expectation_suite import ExpectationSuite
 from tests.integration.conftest import parameterize_batch_for_data_sources
 from tests.integration.test_utils.data_source_config import (
     BigQueryDatasourceTestConfig,
@@ -154,3 +155,27 @@ def test_expect_column_mean_to_be_between(batch_for_datasource):
     expectation = gxe.ExpectColumnMeanToBeBetween(column="a", min_value=2, max_value=3)
     result = batch_for_datasource.validate(expectation)
     assert result.success
+
+
+@parameterize_batch_for_data_sources(
+    data_source_configs=[SparkFilesystemCsvDatasourceTestConfig()],
+    data=pd.DataFrame(
+        {
+            "names": ["Bob", "Alice", "Charlie"],
+            "emails": ["bob@gmail.com", "alice@gmail.com", "charlie@gmail.com"],
+            "dates": ["0", "1", "2"],
+        }
+    ),
+)
+def test_faulty_strtime_causes_entire_suite_to_fail(batch_for_datasource):
+    suite = ExpectationSuite(
+        name="faulty",
+        expectations=[
+            gxe.ExpectColumnValuesToMatchStrftimeFormat(column="dates", strftime_format="%Y-%m-%d"),
+            gxe.ExpectColumnValuesToNotBeNull(column="names"),
+            gxe.ExpectColumnValuesToMatchRegex(column="emails", regex="@gmail.com"),
+        ],
+    )
+    result = batch_for_datasource.validate(suite)
+    assert not result.success
+    assert all(res.success is False and res.exception_info for res in result.results)

From b86b7c15030b690c47125b7dd08e3b1ef807bf80 Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 13:39:05 -0500
Subject: [PATCH 2/6] add test

---
 .../test_canonical_expectations.py            | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
index ddb9b009a72b..c6bdf2eee7a7 100644
--- a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
+++ b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
@@ -161,21 +161,28 @@ def test_expect_column_mean_to_be_between(batch_for_datasource):
     data_source_configs=[SparkFilesystemCsvDatasourceTestConfig()],
     data=pd.DataFrame(
         {
-            "names": ["Bob", "Alice", "Charlie"],
-            "emails": ["bob@gmail.com", "alice@gmail.com", "charlie@gmail.com"],
-            "dates": ["0", "1", "2"],
+            "col1": [1, 2, 3, 4, 5],
+            "col2": ["A", "B", "C", "D", None],
+            "col3": [1.1, None, 3.3, 4.4, 5.5],
         }
     ),
 )
-def test_faulty_strtime_causes_entire_suite_to_fail(batch_for_datasource):
+def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasource):
     suite = ExpectationSuite(
         name="faulty",
         expectations=[
-            gxe.ExpectColumnValuesToMatchStrftimeFormat(column="dates", strftime_format="%Y-%m-%d"),
-            gxe.ExpectColumnValuesToNotBeNull(column="names"),
-            gxe.ExpectColumnValuesToMatchRegex(column="emails", regex="@gmail.com"),
+            gxe.ExpectColumnValuesToNotBeNull(column="col1", result_format="COMPLETE"),
+            gxe.ExpectColumnValuesToBeInSet(
+                column="col2",
+                value_set=["A", "B", "C"],
+                row_condition="col3 IS NOT NULL",
+                mostly=0.665,
+                # condition_parser='spark',
+                result_format="COMPLETE",
+            ),
         ],
     )
+
     result = batch_for_datasource.validate(suite)
     assert not result.success
     assert all(res.success is False and res.exception_info for res in result.results)

From 0b7cba6408eb0e23e533d69b67b15dba3f1e7620 Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 13:48:51 -0500
Subject: [PATCH 3/6] fix test

---
 .../test_canonical_expectations.py            | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
index c6bdf2eee7a7..f8263f860a1c 100644
--- a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
+++ b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
@@ -158,7 +158,7 @@ def test_expect_column_mean_to_be_between(batch_for_datasource):
 
 
 @parameterize_batch_for_data_sources(
-    data_source_configs=[SparkFilesystemCsvDatasourceTestConfig()],
+    data_source_configs=[PostgreSQLDatasourceTestConfig()],
     data=pd.DataFrame(
         {
             "col1": [1, 2, 3, 4, 5],
@@ -171,18 +171,27 @@ def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasour
     suite = ExpectationSuite(
         name="faulty",
         expectations=[
-            gxe.ExpectColumnValuesToNotBeNull(column="col1", result_format="COMPLETE"),
+            gxe.ExpectColumnValuesToNotBeNull(column="col1"),
+            gxe.ExpectColumnValuesToNotBeNull(column="col2"),
             gxe.ExpectColumnValuesToBeInSet(
                 column="col2",
                 value_set=["A", "B", "C"],
                 row_condition="col3 IS NOT NULL",
                 mostly=0.665,
-                # condition_parser='spark',
-                result_format="COMPLETE",
+                condition_parser=None,  # Should be specified as 'great_expectations'
             ),
         ],
     )
 
     result = batch_for_datasource.validate(suite)
     assert not result.success
-    assert all(res.success is False and res.exception_info for res in result.results)
+    # Resolution of the 'table.row_count' metric (which is used by all expectations above) fails
+    # because the condition_parser is inaccurate.
+    # BUG - The error message should not permeates across the suite due to the shared metric
+    # dependency.
+    assert all(
+        res.success is False
+        and "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser."
+        in str(res.exception_info)
+        for res in result.results
+    )

From 640759c1aab78ef0021b911385ec1dc04bb803ca Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 14:15:08 -0500
Subject: [PATCH 4/6] fix test

---
 .../test_canonical_expectations.py            | 41 ----------
 .../test_known_issues.py                      | 74 +++++++++++++++++++
 2 files changed, 74 insertions(+), 41 deletions(-)
 create mode 100644 tests/integration/data_sources_and_expectations/test_known_issues.py

diff --git a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
index f8263f860a1c..96661debf686 100644
--- a/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
+++ b/tests/integration/data_sources_and_expectations/test_canonical_expectations.py
@@ -4,7 +4,6 @@
 import pandas as pd
 
 import great_expectations.expectations as gxe
-from great_expectations.core.expectation_suite import ExpectationSuite
 from tests.integration.conftest import parameterize_batch_for_data_sources
 from tests.integration.test_utils.data_source_config import (
     BigQueryDatasourceTestConfig,
@@ -155,43 +154,3 @@ def test_expect_column_mean_to_be_between(batch_for_datasource):
     expectation = gxe.ExpectColumnMeanToBeBetween(column="a", min_value=2, max_value=3)
     result = batch_for_datasource.validate(expectation)
     assert result.success
-
-
-@parameterize_batch_for_data_sources(
-    data_source_configs=[PostgreSQLDatasourceTestConfig()],
-    data=pd.DataFrame(
-        {
-            "col1": [1, 2, 3, 4, 5],
-            "col2": ["A", "B", "C", "D", None],
-            "col3": [1.1, None, 3.3, 4.4, 5.5],
-        }
-    ),
-)
-def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasource):
-    suite = ExpectationSuite(
-        name="faulty",
-        expectations=[
-            gxe.ExpectColumnValuesToNotBeNull(column="col1"),
-            gxe.ExpectColumnValuesToNotBeNull(column="col2"),
-            gxe.ExpectColumnValuesToBeInSet(
-                column="col2",
-                value_set=["A", "B", "C"],
-                row_condition="col3 IS NOT NULL",
-                mostly=0.665,
-                condition_parser=None,  # Should be specified as 'great_expectations'
-            ),
-        ],
-    )
-
-    result = batch_for_datasource.validate(suite)
-    assert not result.success
-    # Resolution of the 'table.row_count' metric (which is used by all expectations above) fails
-    # because the condition_parser is inaccurate.
-    # BUG - The error message should not permeates across the suite due to the shared metric
-    # dependency.
-    assert all(
-        res.success is False
-        and "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser."
-        in str(res.exception_info)
-        for res in result.results
-    )
diff --git a/tests/integration/data_sources_and_expectations/test_known_issues.py b/tests/integration/data_sources_and_expectations/test_known_issues.py
new file mode 100644
index 000000000000..44f74b726048
--- /dev/null
+++ b/tests/integration/data_sources_and_expectations/test_known_issues.py
@@ -0,0 +1,74 @@
+"""
+Responsible for highlighting known bugs we're working to resolve.
+"""
+
+import pandas as pd
+
+import great_expectations.expectations as gxe
+from great_expectations.core.expectation_suite import ExpectationSuite
+from tests.integration.conftest import parameterize_batch_for_data_sources
+from tests.integration.test_utils.data_source_config.pandas_data_frame import (
+    PandasDataFrameDatasourceTestConfig,
+)
+from tests.integration.test_utils.data_source_config.postgres import PostgreSQLDatasourceTestConfig
+
+
+@parameterize_batch_for_data_sources(
+    data_source_configs=[PostgreSQLDatasourceTestConfig()],
+    data=pd.DataFrame(
+        {
+            "col1": [1, 2, 3, 4, 5],
+            "col2": ["A", "B", "C", "D", None],
+            "col3": [1.1, None, 3.3, 4.4, 5.5],
+        }
+    ),
+)
+def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasource):
+    suite = ExpectationSuite(
+        name="faulty",
+        expectations=[
+            gxe.ExpectColumnValuesToNotBeNull(column="col1"),
+            gxe.ExpectColumnValuesToNotBeNull(column="col2"),
+            gxe.ExpectColumnValuesToBeInSet(
+                column="col2",
+                value_set=["A", "B", "C"],
+                row_condition="col3 IS NOT NULL",
+                mostly=0.665,
+                condition_parser=None,  # Should be specified as 'great_expectations'
+            ),
+        ],
+    )
+
+    result = batch_for_datasource.validate(suite)
+
+    # Resolution of the 'table.row_count' metric (which is used by all expectations above) fails
+    # because the condition_parser is inaccurate.
+    # BUG - The error message should not permeate across the suite due to the shared metric
+    # dependency.
+    assert result.success is False
+    assert all(
+        res.success is False
+        and "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser."
+        in str(res.exception_info)
+        for res in result.results
+    )
+
+
+@parameterize_batch_for_data_sources(
+    data_source_configs=[PandasDataFrameDatasourceTestConfig()],
+    data=pd.DataFrame(
+        {
+            "col1": [1, 2, 3, 4, 5],
+        }
+    ),
+)
+def test_catch_exceptions_is_not_respected(batch_for_datasource):
+    expectation = gxe.ExpectColumnValuesToMatchStrftimeFormat(
+        column="col1", strftime_format="%Y-%m-%d", catch_exceptions=False
+    )
+    result = batch_for_datasource.validate(expectation)
+
+    assert result.success is False
+    assert "please call the expectation before converting from string format" in str(
+        result.exception_info
+    )

From 0fb90231cad2507c5175c7158b0d2cddd938f507 Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 14:18:45 -0500
Subject: [PATCH 5/6] update tests

---
 .../test_known_issues.py                      | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/integration/data_sources_and_expectations/test_known_issues.py b/tests/integration/data_sources_and_expectations/test_known_issues.py
index 44f74b726048..1148f23718f0 100644
--- a/tests/integration/data_sources_and_expectations/test_known_issues.py
+++ b/tests/integration/data_sources_and_expectations/test_known_issues.py
@@ -24,6 +24,15 @@
     ),
 )
 def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasource):
+    """
+    This test demonstrates the bug where a missing condition_parser causes the entire suite to fail.
+
+    The bug is that the error message from the third expectation is propagated to all subsequent
+    expectations.
+
+    All expectations share a dependency on the 'table.row_count' metric, causing the issue to
+    propagate.
+    """
     suite = ExpectationSuite(
         name="faulty",
         expectations=[
@@ -41,10 +50,6 @@ def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasour
 
     result = batch_for_datasource.validate(suite)
 
-    # Resolution of the 'table.row_count' metric (which is used by all expectations above) fails
-    # because the condition_parser is inaccurate.
-    # BUG - The error message should not permeate across the suite due to the shared metric
-    # dependency.
     assert result.success is False
     assert all(
         res.success is False
@@ -63,6 +68,12 @@ def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasour
     ),
 )
 def test_catch_exceptions_is_not_respected(batch_for_datasource):
+    """
+    This test demonstrates the bug where catch_exceptions is not respected.
+
+    We do not currently have any logic that respects user configuration
+    and disables the default value of True.
+    """
     expectation = gxe.ExpectColumnValuesToMatchStrftimeFormat(
         column="col1", strftime_format="%Y-%m-%d", catch_exceptions=False
     )

From 06cc26c3efab4307097f46f9bd3207ac6b7789de Mon Sep 17 00:00:00 2001
From: Chetan Kini <chetan@superconductive.com>
Date: Thu, 12 Dec 2024 14:22:23 -0500
Subject: [PATCH 6/6] update tests

---
 .../data_sources_and_expectations/test_known_issues.py    | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/integration/data_sources_and_expectations/test_known_issues.py b/tests/integration/data_sources_and_expectations/test_known_issues.py
index 1148f23718f0..1d4abe836faf 100644
--- a/tests/integration/data_sources_and_expectations/test_known_issues.py
+++ b/tests/integration/data_sources_and_expectations/test_known_issues.py
@@ -23,15 +23,17 @@
         }
     ),
 )
-def test_missing_condition_parser_causes_entire_suite_to_fail(batch_for_datasource):
+def test_shared_metric_dependencies_cause_entire_suite_to_fail(batch_for_datasource):
     """
     This test demonstrates the bug where a missing condition_parser causes the entire suite to fail.
 
     The bug is that the error message from the third expectation is propagated to all subsequent
     expectations.
 
-    All expectations share a dependency on the 'table.row_count' metric, causing the issue to
-    propagate.
+    This highlights a larger issue where shared metric dependencies can cause issues to propagate
+    across expectations. A missing condition parser is simply one example of this issue.
+
+    All expectations here share a dependency on the 'table.row_count' metric.
     """
     suite = ExpectationSuite(
         name="faulty",