splunk
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/model/a_threat_hunting_notebook.py‎
Lines changed: 189 additions & 0 deletions b/‎app/model/a_threat_hunting_notebook.py‎
Lines changed: 189 additions & 0 deletions
diff --git a/‎app/model/anomaly_detection_ecod.py‎
Lines changed: 9 additions & 9 deletions b/‎app/model/anomaly_detection_ecod.py‎
Lines changed: 9 additions & 9 deletions
@@ -73,7 +73,7 @@ There are a number of scripts in this repo which can help in various tasks when
 | --- | --- | --- | --- |
 | `build.sh` | Build a container using a configuration tag found in `tag_mapping.csv` | `./build.sh minimal-cpu splunk/ 5.1.1` | |
 | `bulk_build.sh` | Build all containers in a tag list | `./bulk_build.sh tag_mapping.csv splunk/ 5.1.1` | |
-| `compile_image_python_requirements.sh` | Use a base image and simplified dockerfile to pre-compute the python dependancy versions for all libraries listed in the tag's referenced requirements files | `./compile_image_python_requirements.sh minimal-cpu Dockerfile.5.1.1.debian.requirements` | If the Dockerfile for the tag is not specified, the script looks for the tags Dockerfile plus the `.requirements` extension. If this does not exist, please create a requirements dockerfile or specifiy and appropriate requirements dockerfile. An example can be found in /dockerfiles/Dockerfile.5.1.1.debian.requirements |
+| `compile_image_python_requirements.sh` | Use a base image and simplified dockerfile to pre-compute the python dependancy versions for all libraries listed in the tag's referenced requirements files | `./compile_image_python_requirements.sh minimal-cpu` | If the Dockerfile for the tag is not specified, the script looks for the tags Dockerfile plus the `.requirements` extension. If this does not exist, please create a requirements dockerfile or specifiy and appropriate requirements dockerfile. An example can be found in /dockerfiles/Dockerfile.debian.requirements |
 | `bulk_compile.sh` | Attempt to pre-compile python dependancy versions for all containers in a tag list | `./bulk_build.sh tag_mapping.csv` | Makes assumptions about dockerfile names as described above. |
 | `scan_container.sh` | Scan a built container for vulnerabilities and produce a report with Trivy | `./scan_container.sh minimal-cpu splunk/ 5.1.1` | Downloads the Trivy container to run the scan. |
 | `test_container.sh` | Run a set of simulated tests using Playwright on a built container. | `./test_container.sh minimal-cpu splunk/ 5.1.1` | Requires the setup of a python virtual environment that can run Playwright. Specific python versions and dependancies may be required at the system level. |
 
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+
+    
+# In[1]:
+
+
+# this definition exposes all python module imports that should be available in all subsequent commands
+import json
+import numpy as np
+import pandas as pd
+import os
+
+# for operationalization of the model we want to use a few other libraries later
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.ensemble import IsolationForest
+
+# global constants
+MODEL_DIRECTORY = "/srv/app/model/data/"
+
+
+
+
+
+
+
+    
+# In[3]:
+
+
+# this cell is not executed from MLTK and should only be used for staging data into the notebook environment
+def stage(name):
+    with open("data/"+name+".csv", 'r') as f:
+        df = pd.read_csv(f)
+    with open("data/"+name+".json", 'r') as f:
+        param = json.load(f)
+    return df, param
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    
+# In[12]:
+
+
+# initialize your model
+# available inputs: data and parameters
+# returns the model object which will be used as a reference to call fit, apply and summary subsequently
+def init(df,param):
+    model = {}
+    model['encoder'] = OneHotEncoder(handle_unknown='ignore')
+    model['detector'] = IsolationForest(contamination=0.01)
+    return model
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    
+# In[26]:
+
+
+# train your model
+# returns a fit info json object and may modify the model object
+def fit(model,df,param):
+    features_to_encode = df[['ComputerName','EventCode']]
+    model['encoder'].fit(features_to_encode)
+    encoded_features = model['encoder'].transform(features_to_encode)
+    df_encoded_features = pd.concat([df[['count']], pd.DataFrame(encoded_features.toarray()).add_prefix('f_')], axis=1)
+    model['detector'].fit(df_encoded_features)
+    info = {"message": "model trained"}
+    return info
+
+
+
+
+
+
+
+    
+# In[28]:
+
+
+# apply your model
+# returns the calculated results
+def apply(model,df,param):
+    features_to_encode = df[['ComputerName','EventCode']]
+    encoded_features = model['encoder'].transform(features_to_encode)
+    df_encoded_features = pd.concat([df[['count']], pd.DataFrame(encoded_features.toarray()).add_prefix('f_')], axis=1)
+    outliers = model['detector'].predict(df_encoded_features)
+    result = pd.DataFrame(outliers, columns=['outlier'])
+    return result
+
+
+
+
+
+
+
+    
+# In[30]:
+
+
+# save model to name in expected convention "<algo_name>_<model_name>"
+def save(model,name):
+    # we skip saving and loading in this example, but of course you can build your preferred serialization here
+    #with open(MODEL_DIRECTORY + name + ".json", 'w') as file:
+    #    json.dump(model, file)
+    return model
+
+
+
+
+
+    
+# In[31]:
+
+
+# load model from name in expected convention "<algo_name>_<model_name>"
+def load(name):
+    # we skip saving and loading in this example, but of course you can build your preferred deserialization here
+    model = {}
+    #with open(MODEL_DIRECTORY + name + ".json", 'r') as file:
+    #    model = json.load(file)
+    return model
+
+
+
+
+
+    
+# In[32]:
+
+
+# return a model summary
+def summary(model=None):
+    returns = {"version": {"numpy": np.__version__, "pandas": pd.__version__} }
+    return returns
+
+
+
+
+
+
+
@@ -3,7 +3,7 @@
 
 
 
-# In[ ]:
+# In[1]:
 
 
 # this definition exposes all python module imports that should be available in all subsequent commands
@@ -29,7 +29,7 @@
 
 
 
-# In[ ]:
+# In[6]:
 
 
 # this cell is not executed from MLTK and should only be used for staging data into the notebook environment
@@ -51,7 +51,7 @@ def stage(name):
 
 
 
-# In[ ]:
+# In[10]:
 
 
 # initialize your model
@@ -62,7 +62,7 @@ def init(df,param):
     # parallization options for ECOD:
     # ECOD(n_jobs=2)    
     # most of other PyOD models would work similar, e.g. replace with Isolation Forest:
-    #model = IForest()
+    # model = IForest()
 
     return model
 
@@ -73,7 +73,7 @@ def init(df,param):
 
 
 
-# In[ ]:
+# In[12]:
 
 
 # train your model
@@ -95,7 +95,7 @@ def fit(model,df,param):
 
 
 
-# In[ ]:
+# In[14]:
 
 
 # apply your model
@@ -117,7 +117,7 @@ def apply(model,df,param):
 
 
 
-# In[ ]:
+# In[16]:
 
 
 # save model to name in expected convention "<algo_name>_<model_name>"
@@ -133,7 +133,7 @@ def save(model,name):
 
 
 
-# In[ ]:
+# In[17]:
 
 
 # load model from name in expected convention "<algo_name>_<model_name>"
@@ -148,7 +148,7 @@ def load(name):
 
 
 
-# In[ ]:
+# In[18]:
 
 
 # return a model summary