Merge pull request #48 from sysflow-telemetry/ratelimiting

araujof · web-flow · commit fe5280793c62 · 2022-08-01T12:50:22.000-04:00
feat(core): add rate limiting filter with time decaying
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [0.4.4] - 2022-08-01
 
+### Added
+
+- Add rate limiting filter with time decaying
+
 ### Changed
 
 - Bump UBI to 8.6-855
diff --git a/core/flattener/config.go b/core/flattener/config.go
@@ -0,0 +1,82 @@
+//
+// Copyright (C) 2022 IBM Corporation.
+//
+// Authors:
+// Frederico Araujo <frederico.araujo@ibm.com>
+// Teryl Taylor <terylt@ibm.com>
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package flattener flattens input telemetry in a flattened representation.
+package flattener
+
+import (
+	"strconv"
+	"time"
+)
+
+// Configuration keys.
+const (
+	FilterOnOffKey  string = "filter.enabled"
+	FilterMaxAgeKey string = "filter.maxage"
+)
+
+// Config defines a configuration object for the engine.
+type Config struct {
+	FilterOnOff  OnOff
+	FilterMaxAge time.Duration
+}
+
+// CreateConfig creates a new config object from config dictionary.
+func CreateConfig(conf map[string]interface{}) (Config, error) {
+	var c Config = Config{FilterOnOff: Off, FilterMaxAge: 24 * time.Hour} // default values
+	var err error
+	if v, ok := conf[FilterOnOffKey].(string); ok {
+		c.FilterOnOff = parseOnOffType(v)
+	}
+	if v, ok := conf[FilterMaxAgeKey].(string); ok {
+		var duration int
+		duration, err = strconv.Atoi(v)
+		if err == nil {
+			c.FilterMaxAge = time.Duration(duration) * time.Second
+		}
+	}
+	return c, err
+}
+
+// OnOff defines an On-Off state type.
+type OnOff int32
+
+// OnOff types.
+const (
+	Off OnOff = iota
+	On
+)
+
+func (s OnOff) String() string {
+	return [...]string{"off", "on"}[s]
+}
+
+func (s OnOff) Enabled() bool {
+	return s == On
+}
+
+func parseOnOffType(s string) OnOff {
+	if Off.String() == s {
+		return Off
+	}
+	if On.String() == s {
+		return On
+	}
+	return Off
+}
diff --git a/core/flattener/filter.go b/core/flattener/filter.go
@@ -0,0 +1,126 @@
+//
+// Copyright (C) 2022 IBM Corporation.
+//
+// Authors:
+// Frederico Araujo <frederico.araujo@ibm.com>
+// Teryl Taylor <terylt@ibm.com>
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package flattener flattens input telemetry in a flattened representation.
+package flattener
+
+import (
+	"container/list"
+	"encoding/binary"
+	"time"
+
+	"github.com/cespare/xxhash/v2"
+	"github.com/sysflow-telemetry/sf-apis/go/sfgo"
+)
+
+var byteInt64 []byte = make([]byte, 8)
+
+// Filter is a time decaying filter with a TTL per entry.
+type Filter struct {
+	m   map[uint64]int64
+	q   *list.List
+	ttl time.Duration
+}
+
+// Entry encodes a hash value with the time it was first added to the filter.
+type Entry struct {
+	h         uint64
+	firstSeen time.Time
+}
+
+// NewFilter creates a new time decaying filter that evicts entries that have been seen longer than t duration.
+func NewFilter(t time.Duration) *Filter {
+	return &Filter{m: make(map[uint64]int64), q: list.New(), ttl: t}
+}
+
+// Test tests if hash h has been seen since maximum ttl.
+func (f *Filter) Test(h uint64) bool {
+	f.evictAgedEntries()
+	_, ok := f.m[h]
+	return ok
+}
+
+// TestAndAdd tests if hash h has been seen since maximum ttl and adds or increments the element in the filter cache.
+func (f *Filter) TestAndAdd(h uint64) bool {
+	f.evictAgedEntries()
+	_, ok := f.m[h]
+	f.Add(h)
+	return ok
+}
+
+// Contains returns how many times hash h has been seen during its ttl time.
+func (f *Filter) Count(h uint64) int64 {
+	f.evictAgedEntries()
+	if count, ok := f.m[h]; ok {
+		return count
+	}
+	return 0
+}
+
+// Add adds hash h to the filter.
+func (f *Filter) Add(h uint64) {
+	if v, ok := f.m[h]; !ok {
+		f.m[h] = 1
+		f.q.PushBack(Entry{h: h, firstSeen: time.Now()})
+	} else {
+		f.m[h] = v + 1
+	}
+}
+
+func (f *Filter) evictAgedEntries() {
+	for f.q.Len() > 0 {
+		e := f.q.Front()
+		entry := e.Value.(Entry)
+		if time.Since(entry.firstSeen) < f.ttl {
+			break
+		}
+		f.q.Remove(e)
+		delete(f.m, entry.h)
+	}
+}
+
+// semanticHash computes a hash value over record attributes denoting the semantics of the record (used in the time decay filter).
+func semanticHash(fr *sfgo.FlatRecord) uint64 {
+	h := xxhash.New()
+	h.Write([]byte(fr.Strs[sfgo.SYSFLOW_SRC][sfgo.PROC_EXE_STR]))
+	h.Write([]byte(fr.Strs[sfgo.SYSFLOW_SRC][sfgo.PROC_EXEARGS_STR]))
+	binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.PROC_UID_INT]))
+	h.Write(byteInt64)
+	binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.PROC_GID_INT]))
+	h.Write(byteInt64)
+	binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.OPFLAGS_INT]))
+	h.Write(byteInt64)
+	binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.PROC_TTY_INT]))
+	h.Write(byteInt64)
+	sfType := fr.Ints[sfgo.SYSFLOW_IDX][sfgo.SF_REC_TYPE]
+	if sfType == sfgo.NET_FLOW {
+		binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.FL_NETW_SIP_INT]))
+		h.Write(byteInt64)
+		binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.FL_NETW_DIP_INT]))
+		h.Write(byteInt64)
+		binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.FL_NETW_DPORT_INT]))
+		h.Write(byteInt64)
+		binary.LittleEndian.PutUint64(byteInt64, uint64(fr.Ints[sfgo.SYSFLOW_SRC][sfgo.FL_NETW_PROTO_INT]))
+		h.Write(byteInt64)
+	}
+	if sfType == sfgo.FILE_FLOW || sfType == sfgo.FILE_EVT {
+		h.Write([]byte(fr.Strs[sfgo.SYSFLOW_SRC][sfgo.FILE_PATH_STR]))
+	}
+	return h.Sum64()
+}
diff --git a/core/flattener/flattener.go b/core/flattener/flattener.go
@@ -46,7 +46,9 @@ func NewFlattenerChan(size int) interface{} {
 
 // Flattener defines the main class for the flatterner plugin.
 type Flattener struct {
-	outCh []chan *sfgo.FlatRecord
+	config Config
+	filter *Filter
+	outCh  []chan *sfgo.FlatRecord
 }
 
 // NewFlattener creates a new Flattener instance.
@@ -66,6 +68,11 @@ func (s *Flattener) RegisterHandler(hc plugins.SFHandlerCache) {
 
 // Init initializes the handler with a configuration map.
 func (s *Flattener) Init(conf map[string]interface{}) error {
+	s.config, _ = CreateConfig(conf) // no err check, assuming defaults
+	if s.config.FilterOnOff.Enabled() {
+		s.filter = NewFilter(s.config.FilterMaxAge)
+		logger.Info.Printf("Initialized rate limiter with %s time decay", s.config.FilterMaxAge)
+	}
 	return nil
 }
 
@@ -81,6 +88,16 @@ func (s *Flattener) SetOutChan(chObj []interface{}) {
 	}
 }
 
+// out sends a record to every output channel in the plugin.
+func (s *Flattener) out(fr *sfgo.FlatRecord) {
+	if s.config.FilterOnOff.Enabled() && s.filter != nil && s.filter.TestAndAdd(semanticHash(fr)) {
+		return
+	}
+	for _, c := range s.outCh {
+		c <- fr
+	}
+}
+
 // Cleanup tears down resources.
 func (s *Flattener) Cleanup() {
 	logger.Trace.Println("Calling Cleanup on Flattener channel")
@@ -132,9 +149,7 @@ func (s *Flattener) HandleNetFlow(sf *plugins.CtxSysFlow, nf *sfgo.NetworkFlow)
 	fr.Ints[sfgo.SYSFLOW_IDX][sfgo.FL_NETW_NUMWSENDBYTES_INT] = nf.NumWSendBytes
 	fr.Ptree = sf.PTree
 	fr.GraphletID = sf.GraphletID
-	for _, ch := range s.outCh {
-		ch <- fr
-	}
+	s.out(fr)
 	return nil
 }
 
@@ -155,9 +170,7 @@ func (s *Flattener) HandleFileFlow(sf *plugins.CtxSysFlow, ff *sfgo.FileFlow) er
 	fr.Ints[sfgo.SYSFLOW_IDX][sfgo.FL_FILE_NUMWSENDBYTES_INT] = ff.NumWSendBytes
 	fr.Ptree = sf.PTree
 	fr.GraphletID = sf.GraphletID
-	for _, ch := range s.outCh {
-		ch <- fr
-	}
+	s.out(fr)
 	return nil
 }
 
@@ -191,9 +204,7 @@ func (s *Flattener) HandleFileEvt(sf *plugins.CtxSysFlow, fe *sfgo.FileEvent) er
 	fr.Ints[sfgo.SYSFLOW_IDX][sfgo.EV_FILE_RET_INT] = int64(fe.Ret)
 	fr.Ptree = sf.PTree
 	fr.GraphletID = sf.GraphletID
-	for _, ch := range s.outCh {
-		ch <- fr
-	}
+	s.out(fr)
 	return nil
 }
 
@@ -218,9 +229,7 @@ func (s *Flattener) HandleProcEvt(sf *plugins.CtxSysFlow, pe *sfgo.ProcessEvent)
 	fr.Ints[sfgo.SYSFLOW_IDX][sfgo.EV_PROC_RET_INT] = int64(pe.Ret)
 	fr.Ptree = sf.PTree
 	fr.GraphletID = sf.GraphletID
-	for _, ch := range s.outCh {
-		ch <- fr
-	}
+	s.out(fr)
 	return nil
 }
 
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
@@ -1,6 +1,6 @@
 ## Configuration
 
-The pipeline configuration below shows how to configure a pipeline that will read a sysflow stream and push records to the policy engine, which will trigger alerts using a set of runtime policies stored in a `yaml` file.  An example pipeline with this configuration looks as follows:  
+The pipeline configuration below shows how to configure a pipeline that will read a sysflow stream and push records to the policy engine, which will trigger alerts using a set of runtime policies stored in a `yaml` file.  An example pipeline with this configuration looks as follows:
 
 ```json
 {
@@ -38,7 +38,7 @@ The pipeline configuration below shows how to configure a pipeline that will rea
 This pipeline specifies three built-in plugins:
 
 - [sysflowreader](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/processor/processor.go): is a generic reader plugin that ingests sysflow from the driver, caches entities, and presents sysflow objects to a handler object (i.e., an object that implements the [handler interface](https://github.com/sysflow-telemetry/sf-apis/blob/master/go/plugins/handler.go)) for processing. In this case, we are using the [flattener](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/flattener/flattener.go) handler, but custom handlers are possible.
-- [policyengine](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/policyengine/policyengine.go): is the policy engine, which takes [flattened](https://github.com/sysflow-telemetry/sf-apis/blob/master/go/sfgo/flatrecord.go) (row-oriented) SysFlow records as input and outputs [records](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/policyengine/engine/types.go), which represent alerts, or filtered sysflow records depending on the policy engine's _mode_ (more on this later).  
+- [policyengine](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/policyengine/policyengine.go): is the policy engine, which takes [flattened](https://github.com/sysflow-telemetry/sf-apis/blob/master/go/sfgo/flatrecord.go) (row-oriented) SysFlow records as input and outputs [records](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/policyengine/engine/types.go), which represent alerts, or filtered sysflow records depending on the policy engine's _mode_ (more on this later).
 - [exporter](https://github.com/sysflow-telemetry/sf-processor/blob/master/core/exporter/exporter.go): takes records from the policy engine, and exports them to ElasticSearch, syslog, file, or terminal, in a JSON format or in Elastic Common Schema (ECS) format. Note that custom export plugins can be created to export to other serialization formats and transport protocols.
 
 Each plugin has a set of general attributes that are present in all plugins, and a set of attributes that are custom to the specific plugins. For more details on the specific attributes in this example, see the pipeline configuration [template](https://github.com/sysflow-telemetry/sf-processor/blob/master/driver/pipeline.template.json)
@@ -65,7 +65,7 @@ The policy engine (`"processor": "policyengine"`) plugin is driven by a set of r
 - _mode_ (optional): The mode of the policy engine. Allowed values are:
   - `alert` (default): the policy engine generates rule-based alerts; `alert` is a blocking mode that drops all records that do not match any given rule. If no mode is specified, the policy engine runs in `alert` mode by default.
   - `enrich` for enriching records with additional context from the rule. In contrast to `alert`, this is a non-blocking mode which applies tagging and action enrichments to matching records as defined in the policy file. Non-matching records are passed on "as is".
-  
+
 - _monitor_ (optional): Specifies if changes to the policy file(s) should be monitored and updated in the policy engine.
   - `none` (default): no monitor is used.
   - `local`: the processor will monitor for changes in the policies path and update its rule set if changes are detected.
@@ -115,7 +115,7 @@ Data export is done via bulk ingestion. The ingestion can be controlled by some
 - _es.username_  (required): The ES username.
 - _es.password_  (required): The password for the specified ES user.
 - _buffer_ (optional) The bulk size as the number of records to be ingested at once. Default is `0` but value of `0` indicates record-by-record ingestion which may be highly inefficient.
-- _es.bulk.numWorkers_ (optional): The number of ingestion workers used in parallel. Default is `0` which means that the exporter uses as many workers as there are cores in the machine. 
+- _es.bulk.numWorkers_ (optional): The number of ingestion workers used in parallel. Default is `0` which means that the exporter uses as many workers as there are cores in the machine.
 - _es.bulk.flashBuffer_ (optional): The size in bytes of the flush buffer for ingestion. It should be large enough to hold one bulk (the number of records specified in _buffer_), otherwise the bulk is broken into smaller chunks. Default is `5e+6`.
 - _es.bulk.flushTimeout_ (optional): The flush buffer time threshold. Valid values are golang duration strings. Default is `30s`.
 
@@ -134,7 +134,7 @@ Export to IBM Findings API allows adding custom findings to the IBM Cloud Securi
 - _findings.sqlquerycrn_ (required):
 - _findings.s3region_ (required):
 - _findings.s3bucket_ (required):
-- _findings.path_ (required): 
+- _findings.path_ (required):
 - _findings.pool.capacity_ (optional): The capacity of the findings pool, Default is `250`.
 - _findings.pool.maxage_ (woptional): The maximum age of the security findings in the pool in minutes. Default is `1440`.
 
@@ -145,7 +145,7 @@ For more information about inserting custom findings into IBM SCC, refer to [Cus
 It is possible to override any of the custom attributes of a plugin using an environment variable. This is especially useful when operating the processor as a container, where you may have to deploy the processor to multiple nodes, and have attributes that change per node. If an environment variable is set, it overrides the setting inside the config file. The environment variables must follow the following structure:
 
 - Environment variables must follow the naming schema `<PLUGIN NAME>_<CONFIG ATTRIBUTE NAME>`
-- The plugin name inside the pipeline configuration file must be all lower case.  
+- The plugin name inside the pipeline configuration file must be all lower case.
 
 For example, to set the alert mode inside the policy engine, the following environment variable is set:
 
@@ -174,3 +174,18 @@ docker run
 -e EXPORTER_PORT=514
 ...
 ```
+
+### Rate limiter configuration (experimental)
+
+The `flattener` handler has a built-in time decay filter that can be enabled to reduce even rates in the processor. The filter uses a time-decay bloom filter based on a semantic hashing of records. This means that the filter should only forward one record matching a semantic hash per time decay period. The semantic hash takes into consideration process, flow and event attributes. To enable rate limiting, modify the `sysflowreader` processor as follows:
+
+```json
+{
+     "processor": "sysflowreader",
+     "handler": "flattener",
+     "in": "sysflow sysflowchan",
+     "out": "flat flattenerchan",
+     "filter.enabled": "on|off (default: off)",
+     "filter.maxage": "time decay in minutes (default: 24H)"
+}
+```
diff --git a/resources/pipelines/pipeline.template.json b/resources/pipelines/pipeline.template.json