-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathGiskard_test.py
99 lines (82 loc) Β· 4.88 KB
/
Giskard_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Main module."""
import pandas as pd
import os
import json
import pickle
import warnings
warnings.filterwarnings("ignore")
from giskard import GiskardClient
if __name__ == "__main__":
model_date = os.listdir('trained_model')[0]
trained_model_path='trained_model'
model_filename = trained_model_path+'/'+model_date+'/logistic_regression_model.pkl'
test_data_filename = trained_model_path+'/'+model_date+'/test_data.zip'
clf_logistic_regression = pickle.load(open(model_filename, 'rb'))
test_data = pd.read_pickle(test_data_filename, compression='zip')
# Declare the type of each column in the dataset(example: category, numeric, text)
column_types = {'default':"category",
'account_check_status':"category",
'duration_in_month':"numeric",
'credit_history':"category",
'purpose':"category",
'credit_amount':"numeric",
'savings':"category",
'present_employment_since':"category",
'installment_as_income_perc':"numeric",
'sex':"category",
'personal_status':"category",
'other_debtors':"category",
'present_residence_since':"numeric",
'property':"category",
'age':"numeric",
'other_installment_plans':"category",
'housing':"category",
'credits_this_bank':"numeric",
'job':"category",
'people_under_maintenance':"numeric",
'telephone':"category",
'foreign_worker':"category"}
# feature_types is used to declare the features the model is trained on
feature_types = {i:column_types[i] for i in column_types if i!='default'}
url = os.environ['GSK_URL'] #if Giskard is installed locally (for installation, see: https://docs.giskard.ai/start/guides/installation)
token = os.environ['GSK_TOKEN'] #you can generate your API token in the Admin tab of the Giskard application (for installation, see: https://docs.giskard.ai/start/guides/installation)
client = GiskardClient(url, token)
try:
credit_scoring = client.create_project(os.environ['GSK_PROJECT_KEY'], os.environ['GSK_PROJECT_NAME'], os.environ['GSK_PROJECT_DESCRIPTION'])
print(f"Your project \"{os.environ['GSK_PROJECT_KEY']}\" is now created on {url}")
except:
credit_scoring = client.get_project(os.environ['GSK_PROJECT_KEY'])
print(f"Your project \"{os.environ['GSK_PROJECT_KEY']}\" has already been created on {url}")
model_id, ds_id = credit_scoring.upload_model_and_df(
prediction_function=clf_logistic_regression.predict_proba, # Python function which takes pandas dataframe as input and returns probabilities for classification model OR returns predictions for regression model
model_type='classification', # "classification" for classification model OR "regression" for regression model
df=test_data, # the dataset you want to use to inspect your model
column_types=column_types, # A dictionary with columns names of df as key and types(category, numeric, text) of columns as values
target='default', # The column name in df corresponding to the actual target variable (ground truth).
feature_names=list(feature_types.keys()), # List of the feature names of prediction_function
classification_labels=clf_logistic_regression.classes_ , # List of the classification labels of your prediction
model_name=model_date, # Name of the model
dataset_name='test_data' # Name of the dataset
)
try:
test_suite_id = credit_scoring.list_test_suites()[0]['id']
print("There's already a test suite created")
except:
print(f"Before the release of the test-API v2.0, you would need to create a test suite yourself from the \
{url} in the project \"{os.environ['GSK_PROJECT_KEY']}\" in order for the deployment to work")
test_result = credit_scoring.execute_test_suite(
test_suite_id=test_suite_id,
model_id=model_id)
passed_tests_cnt=0
for test in test_result:
if test['status'] == 'PASSED':
passed_tests_cnt+=1
# Condition upon which the model is successfully deployed
# In this example, we choose to deploy the model, if 50% of the Giskard tests pass successfully
PASSED = passed_tests_cnt/len(test_result)*1. > 0.5
if PASSED:
with open('trained_model/'+model_date+'/Giskard_tests.json', 'w', encoding='utf-8') as f:
json.dump(test_result, f, ensure_ascii=False, indent=4)
print(passed_tests_cnt/len(test_result)*100.,"> 50% of the tests passed. The model is verified!")
else:
raise RuntimeError(passed_tests_cnt/len(test_result)*100.,"< 50% of the tests passed. The model is not verified!")