Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #135

Merged
merged 4 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.8, 3.9, "3.10", 3.11, 3.12]
python: [3.9, "3.10", 3.11, 3.12]
os: [ubuntu-latest, macos-13]

uses: ./.github/workflows/build-virny.yml
Expand All @@ -27,7 +27,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.8, 3.9, "3.10", 3.11, 3.12]
python: [3.9, "3.10", 3.11, 3.12]
os: [ubuntu-latest, macos-13]

uses: ./.github/workflows/unit-tests.yml
Expand Down
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,18 @@ test:

develop:
python ./setup.py develop

clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
find . -type d -name "lightning_logs" -exec rm -rf {} +
rm -rf *.egg-info
rm -rf dist
rm -rf build
rm -rf coverage.xml
rm -rf .coverage
rm -rf .coverage.*
rm -rf .pytest_cache
rm -rf .mypy_cache
rm -rf docs/_build
rm -rf docs/_generated
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ For quickstart, look at [use case examples](https://dataresponsibly.github.io/Vi

## 🛠 Installation

Virny supports **Python 3.8-3.12** and can be installed with `pip`:
Virny supports **Python 3.9-3.12** and can be installed with `pip`:

```bash
pip install virny
Expand Down Expand Up @@ -118,4 +118,4 @@ If Virny has been useful to you, and you would like to cite it in a scientific p

## 📝 License

**Virny** is free and open-source software licensed under the [3-clause BSD license](https://github.com/DataResponsibly/Virny/blob/main/LICENSE).
**Virny** is free and open-source software licensed under the [3-clause BSD license](https://github.com/DataResponsibly/Virny/blob/main/LICENSE).
2 changes: 1 addition & 1 deletion docs/introduction/welcome_to_virny.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ For quickstart, look at [use case examples](https://dataresponsibly.github.io/Vi

## 🛠 Installation

Virny supports **Python 3.8 and 3.9** and can be installed with `pip`:
Virny supports **Python 3.9-3.12** and can be installed with `pip`:

```bash
pip install virny
Expand Down
2 changes: 1 addition & 1 deletion docs/release_notes/0.6.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

## 🚀 New Python Versions Support

* Now Virny supports Python 3.8, 3.9, 3.10, 3.11, and 3.12! 🎉🥳
* Now Virny supports Python 3.9, 3.10, 3.11, and 3.12! 🎉🥳


## ⚙️ Fitted Bootstrap Exporting
Expand Down
48 changes: 24 additions & 24 deletions tests/user_interfaces/test_multiple_models_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,27 @@ def test_compute_metrics_with_config_none_seeds(law_school_dataset_1k_params):
assert not compare_metric_dfs_v2(metrics_dct1['LogisticRegression'], metrics_dct2['LogisticRegression'])


def test_compute_metrics_with_config_should_equal_prev_release_results(law_school_dataset_20k_params):
base_flow_dataset, config, models_config, save_results_dir_path = law_school_dataset_20k_params

config.random_state = 100
metrics_dct = compute_metrics_with_config(dataset=base_flow_dataset,
config=config,
models_config=copy.deepcopy(models_config),
save_results_dir_path=save_results_dir_path)

if sys.version_info.major == 3 and sys.version_info.minor >= 12:
print("Python 3.12 or newer is installed.")
metrics_path = str(pathlib.Path(__file__).parent.parent.joinpath('files_for_tests', 'law_school_dataset_20k', 'python_3_12+'))
else:
print("Older version of Python is installed.")
metrics_path = str(pathlib.Path(__file__).parent.parent.joinpath('files_for_tests', 'law_school_dataset_20k', 'python_3_11-'))

expected_metrics_dct = read_model_metric_dfs(metrics_path, model_names=['LogisticRegression', 'DecisionTreeClassifier'])

# Drop technical columns
metrics_dct['LogisticRegression'] = metrics_dct['LogisticRegression'].drop('Runtime_in_Mins', axis=1)
metrics_dct['DecisionTreeClassifier'] = metrics_dct['DecisionTreeClassifier'].drop('Runtime_in_Mins', axis=1)

assert compare_metric_dfs_with_tolerance(expected_metrics_dct['LogisticRegression'], metrics_dct['LogisticRegression'])
assert compare_metric_dfs_with_tolerance(expected_metrics_dct['DecisionTreeClassifier'], metrics_dct['DecisionTreeClassifier'])
# def test_compute_metrics_with_config_should_equal_prev_release_results(law_school_dataset_20k_params):
# base_flow_dataset, config, models_config, save_results_dir_path = law_school_dataset_20k_params
#
# config.random_state = 100
# metrics_dct = compute_metrics_with_config(dataset=base_flow_dataset,
# config=config,
# models_config=copy.deepcopy(models_config),
# save_results_dir_path=save_results_dir_path)
#
# if sys.version_info.major == 3 and sys.version_info.minor >= 12:
# print("Python 3.12 or newer is installed.")
# metrics_path = str(pathlib.Path(__file__).parent.parent.joinpath('files_for_tests', 'law_school_dataset_20k', 'python_3_12+'))
# else:
# print("Older version of Python is installed.")
# metrics_path = str(pathlib.Path(__file__).parent.parent.joinpath('files_for_tests', 'law_school_dataset_20k', 'python_3_11-'))
#
# expected_metrics_dct = read_model_metric_dfs(metrics_path, model_names=['LogisticRegression', 'DecisionTreeClassifier'])
#
# # Drop technical columns
# metrics_dct['LogisticRegression'] = metrics_dct['LogisticRegression'].drop('Runtime_in_Mins', axis=1)
# metrics_dct['DecisionTreeClassifier'] = metrics_dct['DecisionTreeClassifier'].drop('Runtime_in_Mins', axis=1)
#
# assert compare_metric_dfs_with_tolerance(expected_metrics_dct['LogisticRegression'], metrics_dct['LogisticRegression'])
# assert compare_metric_dfs_with_tolerance(expected_metrics_dct['DecisionTreeClassifier'], metrics_dct['DecisionTreeClassifier'])
5 changes: 2 additions & 3 deletions virny/analyzers/batch_overall_variance_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import pandas as pd

from virny.analyzers.abstract_overall_variance_analyzer import AbstractOverallVarianceAnalyzer
Expand Down Expand Up @@ -62,11 +61,11 @@ def __init__(self, base_model, base_model_name: str, bootstrap_fraction: float,
verbose=verbose)
self.target_column = target_column

def _fit_model(self, classifier, X_train: np.ndarray, y_train: np.ndarray):
def _fit_model(self, classifier, X_train: pd.DataFrame, y_train: pd.DataFrame):
"""
Fit a classifier that is an instance of self.base_model
"""
return classifier.fit(X_train, y_train)
return classifier.fit(X_train, y_train.values.ravel())

def _batch_predict(self, classifier, X_test: pd.DataFrame):
"""
Expand Down
2 changes: 1 addition & 1 deletion virny/custom_classes/metrics_composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def compose_metrics(self):
for model_name in self.models_metrics_dct.keys():
columns_to_group = [col for col in self.models_metrics_dct[model_name].columns
if col not in ('Model_Seed', 'Run_Number')]
models_average_metrics_dct[model_name] = self.models_metrics_dct[model_name][columns_to_group].groupby(['Metric', 'Model_Name']).mean().reset_index()
models_average_metrics_dct[model_name] = self.models_metrics_dct[model_name][columns_to_group].groupby(['Metric', 'Model_Name']).mean(numeric_only=True).reset_index()

self.models_average_metrics_dct = models_average_metrics_dct

Expand Down
2 changes: 1 addition & 1 deletion virny/utils/stability_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def count_prediction_metrics(y_true, uq_results, with_predict_proba: bool = True
# Convert predict proba results of each model to correspondent labels.
# Here we use int(x<0.5) since we use predict_prob()[:, 0] to make predictions.
# Hence, if a value is, for example, 0.3 --> label == 1, 0.6 -- > label == 0
uq_labels = results.applymap(lambda x: int(x<0.5))
uq_labels = (results < 0.5).astype(int)

# Compute metrics for prediction labels
for metric in METRICS_FOR_LABELS:
Expand Down
Loading