added documentation to detection_utils

IQTLabs · Dec 15, 2020 · b26b0cf · b26b0cf
1 parent d031028
commit b26b0cf
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 34 deletions.
diff --git a/LICENSE b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2020 IQT Labs LLC, All Rights Reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/detection_utils.py b/detection_utils.py
@@ -5,7 +5,21 @@
 def MMD_test(source_data,target_data,p_val=0.05,preprocess_kwargs={},chunk_size=100,
     n_permutations=20):
     """
-    Functional wrapper around alibi_detect MMDDrift class, uses gaussian kernel
+    Functional wrapper around alibi_detect MMDDrift class that uses uses gaussian kernel
+    (https://docs.seldon.io/projects/alibi-detect/en/stable/api/alibi_detect.cd.mmd.html)
+    
+
+    Inputs:
+        source_data - numpy.ndarray of shape (number of source samples,embedding dimension),
+            samples from the source distribution
+        target_data - numpy.ndarray of shape (number of target samples,embedding dimension),
+            samples from the target distribution
+        p_val - p-value used for the significance of the permutation test.
+        preprocess_kwargs - Kwargs for a preprocessing function, pass callables under "model" key
+        chunk_size - Chunk size if dask is used to parallelise the computation.
+        n_permutations - Number of permutations used in the permutation test.
+    Outputs:
+        p - float, empirical p-value determined using the permutation test
     """
     source_size,source_dim = np.shape(source_data)
     target_size,target_dim = np.shape(target_data)
@@ -24,8 +38,20 @@ def MMD_test(source_data,target_data,p_val=0.05,preprocess_kwargs={},chunk_size=
 def repeated_MMD_test(source_data,target_data,p_val=0.05,preprocess_kwargs={},chunk_size=100,
     n_permutations=20,n_samples=100,n_splits=5):
     """
-    Repeatedly carry out the MMD test, subsampling the data each time.  Returns mean and standard
-    deviation of the p_values
+    Repeatedly carry out the MMD test, subsampling the data each time.  Returns an array of p-values
+    Inputs:
+        source_data - numpy.ndarray of shape (number of source samples,embedding dimension),
+            samples from the source distribution
+        target_data - numpy.ndarray of shape (number of target samples,embedding dimension),
+            samples from the target distribution
+        p_val - p-value used for the significance of the permutation test.
+        preprocess_kwargs - Kwargs for a preprocessing function, pass callables under "model" key
+        chunk_size - Chunk size if dask is used to parallelise the computation.
+        n_permutations - Number of permutations used in the permutation test.
+        n_samples - number of samples to use from the source and target data in each subsampling
+        n_splits - number of different subsamplings to carry out
+    Outputs:
+        p_array - np.ndarray of shape (n_splits,), the set of p-values computed
     """
     source_size,source_dim = np.shape(source_data)
     target_size,target_dim = np.shape(target_data)
@@ -38,7 +64,7 @@ def repeated_MMD_test(source_data,target_data,p_val=0.05,preprocess_kwargs={},ch
         preprocess_kwargs=preprocess_kwargs,n_permutations=n_permutations)
         p_list.append(p_temp)
     p_array = np.array(p_list)
-    return np.mean(p_array),np.std(p_array)
+    return p_array
 
 
 

diff --git a/hypothesis_test.ipynb b/hypothesis_test.ipynb