DataDog · andrewqian2001datadog · Sep 9, 2024 · Sep 10, 2024 · Sep 10, 2024 · Oct 28, 2024
@@ -4,7 +4,13 @@
     GaugeMetric,
     SetMetric,
 )
+from datadog.dogstatsd.buffered_metrics import (
+    HistogramMetric,
+    DistributionMetric,
+    TimingMetric
+)
 from datadog.dogstatsd.metric_types import MetricType
+from datadog.dogstatsd.buffered_metrics_context import BufferedMetricContexts
 
 
 class Aggregator(object):
@@ -14,10 +20,18 @@ def __init__(self):
             MetricType.GAUGE: {},
             MetricType.SET: {},
         }
+        self.buffered_metrics_map = {
+            MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric),
+            MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric),
+            MetricType.TIMING: BufferedMetricContexts(TimingMetric)
+        }
         self._locks = {
             MetricType.COUNT: threading.RLock(),
             MetricType.GAUGE: threading.RLock(),
             MetricType.SET: threading.RLock(),
+            MetricType.HISTOGRAM: threading.RLock(),
+            MetricType.DISTRIBUTION: threading.RLock(),
+            MetricType.TIMING: threading.RLock(),
         }
 
     def flush_aggregated_metrics(self):
@@ -28,6 +42,23 @@ def flush_aggregated_metrics(self):
                 self.metrics_map[metric_type] = {}
             for metric in current_metrics.values():
                 metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric])
+
+        for metric_type in self.buffered_metrics_map.keys():
+            with self._locks[metric_type]:
+                metric_context = self.buffered_metrics_map[metric_type]
+                self.buffered_metrics_map[metric_type] = {}
+            for metricList in metric_context.flush():
+                metrics.extend(metricList)
+        return metrics
+
+    def flush_aggregated_buffered_metrics(self):
+        metrics = []
+        for metric_type in self.buffered_metrics_map.keys():
+            with self._locks[metric_type]:
+                current_metrics = self.buffered_metrics_map[metric_type]
+                self.buffered_metrics_map[metric_type] = {}
+            for metric in current_metrics.values():
+                metrics.append(metric)
         return metrics
 
     def get_context(self, name, tags):
@@ -60,3 +91,27 @@ def add_metric(
                 self.metrics_map[metric_type][context] = metric_class(
                     name, value, tags, rate, timestamp
                 )
+
+    def histogram(self, name, value, tags, rate):
+        return self.add_buffered_metric(
+            MetricType.HISTOGRAM, name, value, tags, rate
+        )
+
+    def distribution(self, name, value, tags, rate):
+        return self.add_buffered_metric(
+            MetricType.DISTRIBUTION, name, value, tags, rate
+        )
+
+    def timing(self, name, value, tags, rate):
+        return self.add_buffered_metric(
+            MetricType.TIMING, name, value, tags, rate
+        )
+
+    def add_buffered_metric(
+        self, metric_type, name, value, tags, rate
+    ):
+        context_key = self.get_context(name, tags)
+        metric_context = self.buffered_metrics_map[metric_type]
+        return metric_context.sample(name, value, tags, rate, context_key)
+
+
@@ -561,6 +561,8 @@ def _flush_thread_loop(self, flush_interval):
                 time.sleep(flush_interval)
                 if not self._disable_aggregation:
                     self.flush_aggregated_metrics()
+                    # Histograms, Distribution and Timing metrics are not aggregated
+                    self.flush_buffered_metrics()
                 if not self._disable_buffering:
                     self.flush_buffered_metrics()
         self._flush_thread = threading.Thread(
@@ -1127,7 +1129,10 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0):
         )
 
         # Send it
-        self._send(payload)
+        if metric_type == MetricType.DISTRIBUTION or metric_type == MetricType.HISTOGRAM or metric_type == MetricType.TIMING:
+            self._send_to_buffer(payload)
+        else:
+            self._send(payload)
 
     def _reset_telemetry(self):
         self.metrics_count = 0

diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py
@@ -0,0 +1,68 @@
+import random
+from datadog.dogstatsd.metric_types import MetricType
+from datadog.dogstatsd.metrics import MetricAggregator
+
+
+class BufferedMetric(object):
+    def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_samples=0):
+        self.name = name
+        self.tags = tags
+        self.metric_type = metric_type
+        self.max_metric_samples = max_metric_samples
+        self.specified_rate = specified_rate
+        self.data = []
+        self.stored_metric_samples = 1
+        self.total_metric_samples = 1
+
+    def sample(self, value):
+        self.data.append(value)
+        self.stored_metric_samples += 1
+        self.total_metric_samples += 1
+
+    def maybe_keep_sample(self, value):
+        print("max metric samples is ", self.max_metric_samples)
+        print("stored metric samples is ", self.stored_metric_samples)
+        if self.max_metric_samples > 0:
+            if self.stored_metric_samples >= self.max_metric_samples:
+                i = random.randint(0, self.total_metric_samples - 1)
+                if i < self.max_metric_samples:
+                    print("REPLACE")
+                    self.data[i] = value
+            else:
+                print("APPEND")
+                self.data.append(value)
+                self.stored_metric_samples += 1
+            self.total_metric_samples += 1
+        else:
+            print("APPEND2")
+            self.sample(value)
+
+    def skip_sample(self):
+        self.total_metric_samples += 1
+
+    def flush(self):
+        total_metric_samples = self.total_metric_samples
+        if self.specified_rate != 1.0:
+            rate = self.specified_rate
+        else:
+            rate = self.stored_metric_samples / total_metric_samples
+
+        return [
+            MetricAggregator(self.name, self.tags, rate, self.metric_type, value)
+            for value in self.data
+        ]
+
+
+class HistogramMetric(BufferedMetric):
+    def __init__(self, name, tags, rate=1.0, max_metric_samples=0):
+        super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, rate, max_metric_samples)
+
+
+class DistributionMetric(BufferedMetric):
+    def __init__(self, name, tags, rate=1.0, max_metric_samples=0):
+        super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, rate, max_metric_samples)
+
+
+class TimingMetric(BufferedMetric):
+    def __init__(self, name, tags, rate=1.0, max_metric_samples=0):
+        super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, rate, max_metric_samples)
diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py
@@ -0,0 +1,46 @@
+from threading import Lock
+import random
+
+
+class BufferedMetricContexts:
+    def __init__(self, buffered_metric_type):
+        self.nb_context = 0
+        self.lock = Lock()
+        self.values = {}
+        self.buffered_metric_type = buffered_metric_type
+
+    def flush(self):
+        metrics = []
+        """Flush the metrics and reset the stored values."""
+        with self.lock:
+            values = self.values.copy()
+            self.values.clear()
+
+        for _, metric in values.items():
+            metrics.append(metric.flush())
+
+        self.nb_context += len(values)
+        return metrics
+
+    def sample(self, name, value, tags, rate, context_key):
+        """Sample a metric and store it if it meets the criteria."""
+        keeping_sample = self.should_sample(rate)
+        with self.lock:
+            if context_key not in self.values:
+                # Create a new metric if it doesn't exist
+                self.values[context_key] = self.buffered_metric_type(name, tags, rate)
+            metric = self.values[context_key]
+        if keeping_sample:
+            metric.maybe_keep_sample(value)
+        else:
+            metric.skip_sample()
+
+    def should_sample(self, rate):
+        """Determine if a sample should be kept based on the specified rate."""
+        if rate >= 1:
+            return True
+        return random.random() < rate  # Replace `secrets` with `random`
+
+    def get_nb_context(self):
+        """Return the number of contexts."""
+        return self.nb_context
@@ -2,3 +2,6 @@ class MetricType:
     COUNT = "c"
     GAUGE = "g"
     SET = "s"
+    HISTOGRAM = "h"
+    DISTRIBUTION = "d"
+    TIMING = "ms"
@@ -9,30 +9,30 @@ def setUp(self):
 
     def test_aggregator_sample(self):
         tags = ["tag1", "tag2"]
+        for _ in range(2):
+            self.aggregator.gauge("gaugeTest", 21, tags, 1)
+            self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1)
+            self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE])
 
-        self.aggregator.gauge("gaugeTest", 21, tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1)
-        self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE])
+            self.aggregator.count("countTest", 21, tags, 1)
+            self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1)
+            self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT])
 
-        self.aggregator.count("countTest", 21, tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1)
-        self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT])
+            self.aggregator.set("setTest", "value1", tags, 1)
+            self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1)
+            self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET])
 
-        self.aggregator.set("setTest", "value1", tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1)
-        self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET])
+            self.aggregator.histogram("histogramTest", 21, tags, 1)
+            self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values), 1)
+            self.assertIn("histogramTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values)
 
-        self.aggregator.gauge("gaugeTest", 123, tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1)
-        self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE])
+            self.aggregator.distribution("distributionTest", 21, tags, 1)
+            self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values), 1)
+            self.assertIn("distributionTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values)
 
-        self.aggregator.count("countTest", 10, tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1)
-        self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT])
-
-        self.aggregator.set("setTest", "value1", tags, 1)
-        self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1)
-        self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET])
+            self.aggregator.timing("timingTest", 21, tags, 1)
+            self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING].values), 1)
+            self.assertIn("timingTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.TIMING].values)
 
     def test_aggregator_flush(self):
         tags = ["tag1", "tag2"]
@@ -50,29 +50,52 @@ def test_aggregator_flush(self):
         self.aggregator.set("setTest1", "value2", tags, 1)
         self.aggregator.set("setTest2", "value1", tags, 1)
 
+        self.aggregator.histogram("histogramTest1", 21, tags, 1)
+        self.aggregator.histogram("histogramTest1", 22, tags, 1)
+        self.aggregator.histogram("histogramTest2", 23, tags, 1)
+
+        self.aggregator.distribution("distributionTest1", 21, tags, 1)
+        self.aggregator.distribution("distributionTest1", 22, tags, 1)
+        self.aggregator.distribution("distributionTest2", 23, tags, 1)
+
+        self.aggregator.timing("timingTest1", 21, tags, 1)
+        self.aggregator.timing("timingTest1", 22, tags, 1)
+        self.aggregator.timing("timingTest2", 23, tags, 1)
+
         metrics = self.aggregator.flush_aggregated_metrics()
         self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 0)
         self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 0)
         self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 0)
-
-        self.assertEqual(len(metrics), 7)
+        self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM]), 0)
+        self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION]), 0)
+        self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING]), 0)
+        self.assertEqual(len(metrics), 16)
         metrics.sort(key=lambda m: (m.metric_type, m.name, m.value))
+
         expected_metrics = [
             {"metric_type": MetricType.COUNT, "name": "countTest1", "tags": tags, "rate": 1, "value": 31, "timestamp": 0},
             {"metric_type": MetricType.COUNT, "name": "countTest2", "tags": tags, "rate": 1, "value": 1, "timestamp": 0},
+            {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest1", "tags": tags, "rate": 1, "value": 21},
+            {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest1", "tags": tags, "rate": 1, "value": 22},
+            {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest2", "tags": tags, "rate": 1, "value": 23},
             {"metric_type": MetricType.GAUGE, "name": "gaugeTest1", "tags": tags, "rate": 1, "value": 10, "timestamp": 0},
             {"metric_type": MetricType.GAUGE, "name": "gaugeTest2", "tags": tags, "rate": 1, "value": 15, "timestamp": 0},
+            {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest1", "tags": tags, "rate": 1, "value": 21},
+            {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest1", "tags": tags, "rate": 1, "value": 22},
+            {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest2", "tags": tags, "rate": 1, "value": 23},
+            {"metric_type": MetricType.TIMING, "name": "timingTest1", "tags": tags, "rate": 1, "value": 21},
+            {"metric_type": MetricType.TIMING, "name": "timingTest1", "tags": tags, "rate": 1, "value": 22},
+            {"metric_type": MetricType.TIMING, "name": "timingTest2", "tags": tags, "rate": 1, "value": 23},
             {"metric_type": MetricType.SET, "name": "setTest1", "tags": tags, "rate": 1, "value": "value1", "timestamp": 0},
             {"metric_type": MetricType.SET, "name": "setTest1", "tags": tags, "rate": 1, "value": "value2", "timestamp": 0},
             {"metric_type": MetricType.SET, "name": "setTest2", "tags": tags, "rate": 1, "value": "value1", "timestamp": 0},
         ]
-        
+
         for metric, expected in zip(metrics, expected_metrics):
             self.assertEqual(metric.name, expected["name"])
             self.assertEqual(metric.tags, expected["tags"])
             self.assertEqual(metric.rate, expected["rate"])
             self.assertEqual(metric.value, expected["value"])
-
-
+
 if __name__ == '__main__':
     unittest.main()