Skip to content

Commit

Permalink
Implementation of LODA (Lightweight On-line Detection of Anomalies)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoanganhngo610 committed Oct 20, 2023
1 parent 424cc38 commit 60521a4
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 0 deletions.
2 changes: 2 additions & 0 deletions river/anomaly/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .filter import QuantileFilter, ThresholdFilter
from .gaussian import GaussianScorer
from .hst import HalfSpaceTrees
from .loda import LODA
from .lof import LocalOutlierFactor
from .svm import OneClassSVM

Expand All @@ -29,4 +30,5 @@
"QuantileFilter",
"ThresholdFilter",
"LocalOutlierFactor",
"LODA",
]
113 changes: 113 additions & 0 deletions river/anomaly/loda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

import math

import numpy as np

from river import anomaly, utils

__all__ = ["LODA"]


class LODA(anomaly.base.AnomalyDetector):
"""LODA (Lightweight on-line detector of anomalies)
LODA [^1] comprises a collection of one-dimensional histograms, each approximating
the probability density of the inputed data projected on a single projection vector. Its output
on a sample is the average of the logarithm of probabilities estimated on individual projection vectors.
LODA shows that an ensemble of very weak detections can lead to a very strong anomaly detector with performance
equal to or even better than state-of-the-art methods.
This implementation within `River` is adapted from the versions implemented by the
[PyOD - Python Outlier Detection](https://pyod.readthedocs.io/en/latest/_modules/pyod/models/loda.html) and
[PySAD - Python Streaming Anomaly Detection](https://pysad.readthedocs.io/en/latest/_modules/pysad/models/loda.html)
frameworks.
Parameters
----------
n_bins
Number of bins of the histograms generated by the algorithm.
n_random_cuts
Number of random cuts
References
----------
[^1] Pevný, T. 2015. LODA: Lightweight on-line detector of anomalies. Machine Learning. 102, 2 (2015), 275–304.
Examples
--------
>>> import pandas as pd
>>> from river import anomaly
>>> from river import datasets
>>> cc_df = pd.DataFrame(datasets.CreditCard())
>>> loda = anomaly.LODA(n_bins=10, n_random_cuts=100)
>>> for x, _ in datasets.CreditCard().take(10_000):
... loda.learn_one(x)
>>> loda.n_features
30
>>> loda.score_one(cc_df[0][10_001])
9.091044415623026e-16
"""

def __init__(self, n_bins=10, n_random_cuts=100):
self.n_bins = n_bins
self.n_random_cuts = n_random_cuts

self.weights = []
self.projections_ = []
self.histograms_ = []
self.limits_ = []
self.n_bins_ = []

self.n_features = 0
self.n_zero_features = 0
self.n_nonzero_features = 0

self.init = True

def learn_one(self, x):
x_np = utils.dict2numpy(x)

if self.init:
self.n_features = len(x)
self.n_nonzero_features = math.sqrt(self.n_features)
self.n_zero_features = self.n_features - np.int_(self.n_nonzero_features)

self.weights = np.ones(self.n_random_cuts) / self.n_random_cuts
self.projections_ = np.random.rand(self.n_random_cuts, self.n_features)
self.histograms_ = np.zeros((self.n_random_cuts, self.n_bins))
self.limits_ = np.zeros((self.n_random_cuts, self.n_bins + 1))

self.init = False

x_np = x_np.reshape(1, -1)

for i in range(self.n_random_cuts):
rands = np.random.permutation(self.n_features)[: self.n_zero_features]
self.projections_[i, rands] = 0
projected_data = self.projections_[i, :].dot(x_np.T)
self.histograms_[i, :], self.limits_[i, :] = np.histogram(
projected_data, bins=self.n_bins, density=False
)
self.histograms_[i, :] += 1e-12
self.histograms_[i, :] /= np.sum(self.histograms_[i, :])

def score_one(self, x):
x_np = utils.dict2numpy(x).reshape(1, -1)

pred_scores = np.zeros([x_np.shape[0], 1])
for i in range(self.n_random_cuts):
projected_data = self.projections_[i, :].dot(x_np.T)
inds = np.searchsorted(self.limits_[i, : self.n_bins - 1], projected_data, side="left")
pred_scores[:, 0] += -self.weights[i] * np.log(self.histograms_[i, inds])
pred_scores /= self.n_random_cuts

return pred_scores.ravel().item()

0 comments on commit 60521a4

Please sign in to comment.