ydataai · vigneshgr · Aug 24, 2025 · Aug 24, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/setup.py b/setup.py
@@ -36,7 +36,7 @@
       keywords='data science ydata',
       url='https://github.com/ydataai/ydata-quality',
       license="https://github.com/ydataai/ydata-quality/blob/master/LICENSE",
-      python_requires=">=3.7, <3.9",
+      python_requires=">3.9",
       packages=find_namespace_packages('src'),
       package_dir={'':'src'},
       include_package_data=True,

diff --git a/src/ydata_quality/bias_fairness/engine.py b/src/ydata_quality/bias_fairness/engine.py
@@ -49,8 +49,7 @@ def proxy_identification(self, th=0.5):
 
         Non-sensitive features can serve as proxy for protected attributes, exposing the data to a possible
         subsequent bias in the data pipeline. High association values indicate that alternative features can
-        be used in place of the original sensitive attributes.
-        """
+        be used in place of the original sensitive attributes."""
         # TODO: multiple thresholds per association type (num/num, num/cat, cat/cat)
 
         # Compute association measures for sensitive features

diff --git a/src/ydata_quality/utils/correlations.py b/src/ydata_quality/utils/correlations.py
@@ -7,6 +7,7 @@
 from typing import List, Optional
 
 from matplotlib.pyplot import figure as pltfigure, show as pltshow
+import numpy as np
 from numpy import (
     nan,
     fill_diagonal,
@@ -55,8 +56,9 @@ def filter_associations(corrs: DataFrame, th: float,
     Returns
         corrs (Series): map of feature_pair to association metric value, filtered
     """
+    corrs = corrs if isinstance(corrs, DataFrame) else DataFrame(corrs)  # convert to DataFrame if needed
     corrs = corrs.copy()  # keep original
-    fill_diagonal(corrs.values, nan)  # remove the same column pairs
+    np.fill_diagonal(corrs.to_numpy(), nan)  # remove the same column pairs using numpy array
     corrs = corrs[subset] if subset is not None else corrs  # subset features
     corrs = corrs[(corrs > th) | (corrs < -th)].melt(ignore_index=False).reset_index().dropna()  # subset by threshold
     corrs['features'] = ['_'.join(sorted((i.index, i.variable)))

diff --git a/src/ydata_quality/utils/enum.py b/src/ydata_quality/utils/enum.py
@@ -42,26 +42,35 @@ def __lt__(self, other):
 
 
 class StringEnum(Enum):
+    """Enum that allows case-insensitive string lookup."""
 
     @classmethod
     def _missing_(cls, value):
         if isinstance(value, str):
             upper_value = value.upper()
 
-            key = StringEnum._key_from_str_(upper_value)
+            key = cls.find_member(upper_value)
             if key is not None:
                 return key
 
             lower_value = value.lower()
 
-            key = StringEnum._key_from_str_(lower_value)
+            key = cls.find_member(lower_value)
             if key is not None:
                 return key
 
         raise ValueError(f"{value} is not a valid {cls.__name__}")
 
     @classmethod
-    def _key_from_str_(cls, value: str):
+    def find_member(cls, value: str):
+        """Find an enum member by its string value.
+
+        Args:
+            value: The string value to look up
+
+        Returns:
+            The enum member if found, None otherwise
+        """
         if value in cls.__members__:
             return cls(value)
 

diff --git a/tests/engines/test_bias_fairness.py b/tests/engines/test_bias_fairness.py
@@ -0,0 +1,39 @@
+"""
+Unit tests for the bias fairness engine
+"""
+
+import pandas as pd
+
+from src.ydata_quality.bias_fairness.engine import BiasFairness
+
+
+def get_fake_data():
+    """Returns fake data for tests."""
+    return pd.DataFrame({
+        'age': [25, 35, 45, 55],
+        'salary': [30000, 45000, 60000, 75000],
+        'gender': ['M', 'F', 'M', 'F'],
+        'department': ['IT', 'HR', 'IT', 'HR']
+    })
+
+
+class TestBiasFairness:
+    """Test class for BiasFairness."""
+
+    def test_sensitive_features_property(self):
+        """Test sensitive features property returns correct features."""
+        df = get_fake_data()
+        sensitive_features = ['gender', 'age']
+        bf = BiasFairness(df=df, sensitive_features=sensitive_features)
+        assert bf.sensitive_features == sensitive_features
+
+    def test_proxy_identification(self):
+        """Test proxy identification returns expected correlations."""
+        df = get_fake_data()
+        sensitive_features = ['gender']
+        bf = BiasFairness(
+            df=df,
+            sensitive_features=sensitive_features
+        )
+        correlations = bf.proxy_identification(th=0.5)
+        assert len(correlations) >= 0