microsoft · dayesouza · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
@@ -19,4 +19,6 @@ coverage.xml
 .mypy_cache
 .pytest_cache
 .ruff_cache
-.venv
+.venv
+.streamlit/app_secrets.toml
+.streamlit/secrets.toml
@@ -31,7 +31,10 @@ jobs:
       - name: Install Dependencies
         run: poetry install
 
-      - name: Tests
-        run: poetry run poe test
+      - name: Unit Tests
+        run: poetry run poe test_unit
+
+      - name: Smoke Tests
+        run: poetry run poe test_smoke
 
 
@@ -70,11 +70,17 @@ stages:
               targetType: "inline"
               script: poetry install
           - task: Bash@3
-            displayName: Run Tests
+            displayName: Run Unit Tests
             inputs:
               workingDirectory: ./
               targetType: "inline"
-              script: poetry run poe test
+              script: poetry run poe test_unit
+          - task: Bash@3
+            displayName: Run Smoke Tests
+            inputs:
+              workingDirectory: ./
+              targetType: "inline"
+              script: poetry run poe test_smoke
 
       - job: buildAndPush
         displayName: BuildAndPushContainer

@@ -121,37 +121,28 @@ Use `cd `+ the path to the folder. For example:
 
 `cd C:\Users\user01\projects\intelligence-toolkit`
 
-
-Build it with your own code:
+Build the container:
 
 `docker build . -t intelligence-toolkit`
 
-Or pull the latest built image:
-
-` docker pull ghcr.io/microsoft/intelligence-toolkit:latest` 
-
+Once the build is finished, run the docker container:
 
-Once the pull/build is fininshed, run the docker container:
+- via terminal:
 
-- via shell:
-
-    With `<container-name>` being `intelligence-toolkit` if you used docker build or `ghcr.io/microsoft/intelligence-toolkit:latest` if you used docker pull. 
-
-    `docker run -d --name intelligence-toolkit -p 80:80 <container-name>`
+    `docker run -d --name intelligence-toolkit -p 80:80 intelligence-toolkit`
 
 Open [localhost:80](http://localhost:80)
 
-## Deploying 
-See [instructions]('./DEPLOYING.md')
-
+  **Note that docker might sleep and you might need to start it again. Open Docker Desktop, in the left menu click on Container and press play on intelligence-toolkit.**
 
 # Lifecycle Scripts
 
 For Lifecycle scripts it utilizes [Poetry](https://python-poetry.org/docs#installation) and [poethepoet](https://pypi.org/project/poethepoet/) to manage build scripts.
 
 Available scripts are:
 
-- `poetry run poe test` - This will execute unit tests.
+- `poetry run poe test_unit` - This will execute unit tests on api.
+- `poetry run poe test_smoke` - This will execute smoke tests on api.
 - `poetry run poe check` - This will perform a suite of static checks across the package, including:
   - formatting
   - documentation formatting

@@ -16,4 +16,4 @@ RUN poetry install --only main
 
 # Run application
 EXPOSE 80
-ENTRYPOINT ["poetry", "run", "poe", "run_streamlit", "--server.port=80"]
+ENTRYPOINT ["poetry", "run", "poe", "run_streamlit", "--server.port=80", "--server.address=0.0.0.0"]
@@ -150,9 +150,9 @@ flowchart TD
 
 ### NGOs
 
-Non-profit organizations can apply for an annual Azure credit grant of up to $3,500, which can be used to set up and run an instance of the intelligence-toolkit app for your organization.
+Non-profit organizations can apply for an annual Azure credit grant of $2,000, which can be used to set up and run an instance of the intelligence-toolkit app for your organization.
 
-[Read more about eligibility and registration here](https://nonprofit.microsoft.com/en-us/getting-started)
+[Read more about eligibility and registration here](https://www.microsoft.com/en-us/nonprofits/azure)
 
 ### Getting started
 
@@ -164,10 +164,38 @@ Non-profit organizations can apply for an annual Azure credit grant of up to $3,
 
 - To pull the latest docker image with the app interface, pull it with docker.
 
-      docker pull ghcr.io/microsoft/intelligence-toolkit:latest
+  Download, install and then open docker app: https://www.docker.com/products/docker-desktop/
+
+  Then, open a terminal:
+  Windows: Search and open the app `Windows Powershell` on Windows start menu
+
+  Linux and Mac: Open `Terminal`
+
+  For any OS:
+
+  Navigate to the folder where you cloned this repo. 
+
+  Use `cd `+ the path to the folder. For example:
+
+  `cd C:\Users\user01\projects\intelligence-toolkit`
+
+  Build the container:
+
+  `docker build . -t intelligence-toolkit`
+
+  Once the build is finished, run the docker container:
+
+  - via terminal:
+
+      `docker run -d --name intelligence-toolkit -p 80:80 ghcr.io/microsoft/intelligence-toolkit:latest`
+
+  **Note that docker might sleep and you might need to start it again. Open Docker Desktop, in the left menu click on Container and press play on intelligence-toolkit.**
+
+  Open [localhost:80](http://localhost:80)
 
 
 - To start developing, see [DEVELOPING.md](./DEVELOPING.md).
+- To instructions on how to deploy, see [DEPLOYING.md](./DEPLOYING.md).
 - To learn about our contribution guidelines, see [CONTRIBUTING.md](./CONTRIBUTING.md).
 - For license details, see [LICENSE.md](./LICENSE.md).
 

@@ -225,7 +225,6 @@ def single_csv_uploader(
         options += ["Input data"]
     if processed_df_var is not None:
         options += ["Prepared data"]
-    # dfo = st.radio('Select data table', options=options, index=0, horizontal=True, key=f'{workflow}_{upload_label}_data_table_select')
     option_tabs = st.tabs(options)
     for ix, tab in enumerate(option_tabs):
         with tab:

@@ -0,0 +1,113 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project.
+#
+import shutil
+from functools import wraps
+from pathlib import Path
+from typing import ClassVar
+
+import pandas as pd
+import pytest
+
+from intelligence_toolkit.anonymize_case_data.api import (
+    AnonymizeCaseData,
+    SynthesizabilityStatistics,
+)
+from intelligence_toolkit.anonymize_case_data.visuals import color_schemes
+from intelligence_toolkit.helpers import df_functions
+
+example_outputs_folder = "./example_outputs/anonymize_case_data"
+
+
+def cleanup(skip: bool = False):
+    """Decorator to cleanup the output and cache folders after each test."""
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except AssertionError:
+                raise
+            finally:
+                if not skip:
+                    root = Path(kwargs["input_path"])
+                    shutil.rmtree(root / "anonymize_case_data", ignore_errors=True)
+
+        return wrapper
+
+    return decorator
+
+
+class TestACD:
+    @pytest.fixture()
+    def dataset(self):
+        data_path = f"{example_outputs_folder}/customer_complaints/customer_complaints_prepared.csv"
+        return pd.read_csv(data_path)
+
+    def test_anonymize_case_data(self, dataset):
+        acd = AnonymizeCaseData()
+
+        sensitive_data = df_functions.suppress_boolean_binary(dataset)
+
+        assert not sensitive_data.isin([0.0]).any().any()
+
+        synthesizability_stats = acd.analyze_synthesizability(sensitive_data)
+        assert synthesizability_stats.num_cols == 9
+        assert synthesizability_stats.overall_att_count == 101
+        assert synthesizability_stats.possible_combinations == 27648
+        assert synthesizability_stats.possible_combinations_per_row == 9.2
+        assert synthesizability_stats.mean_vals_per_record == 5.409
+        assert round(synthesizability_stats.max_combinations_per_record, 2) == 42.49
+        assert round(synthesizability_stats.excess_combinations_ratio, 2) == 0.22
+
+        # Anonymize the data
+        acd.anonymize_case_data(
+            df=sensitive_data,
+            epsilon=12.0,
+        )
+
+        assert len(acd.aggregate_df) > 0
+
+        selections = acd.aggregate_df["selections"].to_list()
+        assert "age_range:(30-40]" in selections
+        assert "record_count" in selections
+        assert "quality_issue:True" in selections
+        assert "age_range:(40-50];city:Mountainview;period:2023-H1" in selections
+
+        assert "0.00 %" not in acd.aggregate_error_report["Suppressed %"].to_list()
+
+        count_error = acd.aggregate_error_report["Count +/- Error"].to_list()
+
+        assert "160.66" in count_error[0]
+        assert "23.85" in count_error[1]
+        assert "6.85" in count_error[2]
+        assert "2.85" in count_error[3]
+        assert "6.88" in count_error[4]
+
+        bar_chart, bar_chart_df = acd.get_bar_chart_fig(
+            selection=[],
+            show_attributes=[],
+            unit="Customer",
+            width=700,
+            height=400,
+            scheme=color_schemes["Alphabet"],
+            num_values=10,
+        )
+
+        assert isinstance(bar_chart_df, pd.DataFrame), "Expected a pandas DataFrame"
+        assert len(bar_chart_df) == 10, "Expected 10 rows in the DataFrame"
+        expected_columns = [
+            "Attribute",
+            "Count",
+            "Attribute Value",
+        ]
+        assert all(
+            col in bar_chart_df.columns for col in expected_columns
+        ), f"DataFrame should contain columns: {expected_columns}"
+
+        assert bar_chart.layout.width == 700, "Expected bar chart width of 700"
+        assert bar_chart.layout.height == 400, "Expected bar chart height of 400"
+        assert (
+            len(bar_chart.data) > 0
+        ), "Expected the chart to contain at least one data trace"
@@ -0,0 +1,3 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project.
+#
@@ -0,0 +1,3 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project.
+#
@@ -90,7 +90,8 @@ _pyright = "pyright"
 check_format = 'ruff format . --check --preview'
 fix = "ruff  --preview check --fix ."
 fix_unsafe = "ruff check --preview --fix --unsafe-fixes ."
-test = "pytest ${ARGS}"
+test_unit = "pytest ./intelligence_toolkit/tests/unit"
+test_smoke = "pytest ./intelligence_toolkit/tests/smoke"
 
 run_streamlit = "python -m streamlit run app/Home.py ${ARGS}"