@@ -27,121 +27,96 @@ locals {
2727 duration = var. critical_threshold_duration == null ? local. default_duration : var. critical_threshold_duration
2828}
2929
30- resource "newrelic_nrql_alert_condition" "alert" {
30+
31+ module "cpu" {
32+ source = " ../nrql-alert"
33+
3134 account_id = var. account_id
3235 policy_id = var. alert_policy_id
33- type = " static"
34- name = " ${ var . name_prefix } - CPU"
35- enabled = true
36- violation_time_limit_seconds = 259200
37-
38- nrql {
39- query = " SELECT average(${ local . metric_name } ) FROM ${ local . table_name } ${ local . filter_subquery } FACET aws.ec2.InstanceId"
40- }
41-
42- critical {
43- operator = " above"
44- threshold = var. critical_threshold
45- threshold_duration = local. duration
46- threshold_occurrences = " all"
47- }
48- fill_option = " none"
36+ name = format (
37+ " %s - CPU utilization over %s%% for at least %d seconds" ,
38+ var. name_prefix ,
39+ replace (format (" %f" , var. critical_threshold ), " /\\ .0+$/" , " " ),
40+ local. duration
41+ )
42+
43+ nrql_query = " SELECT average(${ local . metric_name } ) FROM ${ local . table_name } ${ local . filter_subquery } FACET aws.ec2.InstanceId"
44+ critical_threshold = var. critical_threshold
45+ critical_threshold_duration = local. duration
4946 aggregation_window = local. window
5047 aggregation_method = " event_timer"
5148 aggregation_timer = local. timer
52- expiration_duration = 600
53- open_violation_on_expiration = false
54- close_violations_on_expiration = false
49+ tags = var. tags
5550}
5651
57- resource "newrelic_nrql_alert_condition" "loss_of_signal" {
52+ module "loss_of_signal" {
5853 count = (var. alert_loss_of_signal ? 1 : 0 )
54+ source = " ../nrql-alert"
5955
6056 account_id = var. account_id
6157 policy_id = var. alert_policy_id
62- type = " static"
63- name = " ${ var . name_prefix } - Loss of Signal"
64- enabled = true
65- violation_time_limit_seconds = 259200
66-
67- nrql {
68- query = " SELECT average(aws.ec2.CPUUtilization) FROM Metric ${ local . filter_subquery } FACET tags.Name"
69- }
70-
71- critical {
72- operator = " above"
73- # This should never actually trigger, since CPUUtilization is a percent.
74- # We don't care about this condition, we're just using this alert to use
75- # the "open_violation_on_expiration" parameter to detect signal loss (by
76- # instance name instead of instance id). Otherwise, every instance refresh
77- # causes alerts/an "incident" in NR.
78- threshold = 101
79- threshold_duration = local. duration
80- threshold_occurrences = " all"
81- }
82- fill_option = " none"
58+ name = format (
59+ " %s - No metrics reported for at least %d seconds" ,
60+ var. name_prefix ,
61+ 600
62+ )
63+
64+ nrql_query = " SELECT average(${ local . metric_name } ) FROM ${ local . table_name } ${ local . filter_subquery } FACET tags.Name"
65+ # This should never actually trigger, since CPUUtilization is a percent.
66+ # We don't care about this condition, we're just using this alert to use
67+ # the "open_violation_on_expiration" parameter to detect signal loss (by
68+ # instance name instead of instance id). Otherwise, every instance refresh
69+ # causes alerts/an "incident" in NR.
70+ critical_threshold = 101
71+ critical_threshold_duration = local. duration
8372 aggregation_window = local. window
8473 aggregation_method = " event_timer"
8574 aggregation_timer = local. timer
8675 expiration_duration = 600
8776 open_violation_on_expiration = true
88- close_violations_on_expiration = false
77+ tags = var . tags
8978}
9079
91- resource "newrelic_nrql_alert_condition" "memory" {
80+ module "memory" {
9281 count = (var. use_agent_metrics ? 1 : 0 )
82+ source = " ../nrql-alert"
9383
9484 account_id = var. account_id
9585 policy_id = var. alert_policy_id
96- type = " static"
97- name = " ${ var . name_prefix } - Memory"
98- enabled = true
99- violation_time_limit_seconds = 259200
100-
101- nrql {
102- query = " SELECT average(memoryUsedPercent) FROM SystemSample ${ local . filter_subquery } FACET aws.ec2.InstanceId"
103- }
104-
105- critical {
106- operator = " above"
107- threshold = var. critical_threshold
108- threshold_duration = local. duration
109- threshold_occurrences = " all"
110- }
111- fill_option = " none"
112- aggregation_window = local. window
86+ name = format (
87+ " %s - Memory usage over %s%% for at least %d seconds" ,
88+ var. name_prefix ,
89+ replace (format (" %f" , var. critical_threshold ), " /\\ .0+$/" , " " ),
90+ local. duration
91+ )
92+
93+ nrql_query = " SELECT average(memoryUsedPercent) FROM SystemSample ${ local . filter_subquery } FACET aws.ec2.InstanceId"
94+ critical_threshold = var. critical_threshold
95+ critical_threshold_duration = local. duration
11396 aggregation_method = " event_timer"
97+ aggregation_window = local. window
11498 aggregation_timer = local. timer
115- expiration_duration = 600
116- open_violation_on_expiration = false
117- close_violations_on_expiration = false
99+ tags = var. tags
118100}
119101
120- resource "newrelic_nrql_alert_condition" "storage" {
102+ module "storage" {
121103 count = (var. use_agent_metrics ? 1 : 0 )
104+ source = " ../nrql-alert"
122105
123106 account_id = var. account_id
124107 policy_id = var. alert_policy_id
125- type = " static"
126- name = " ${ var . name_prefix } - Storage"
127- enabled = true
128- violation_time_limit_seconds = 259200
129-
130- nrql {
131- query = " SELECT average(diskUsedPercent) FROM StorageSample ${ local . filter_subquery } FACET `tags.Name`, mountPoint"
132- }
133-
134- critical {
135- operator = " above"
136- threshold = var. critical_threshold
137- threshold_duration = local. duration
138- threshold_occurrences = " all"
139- }
140- fill_option = " none"
141- aggregation_window = local. window
108+ name = format (
109+ " %s - Storage usage over %s%% for at least %d seconds" ,
110+ var. name_prefix ,
111+ replace (format (" %f" , var. critical_threshold ), " /\\ .0+$/" , " " ),
112+ local. duration
113+ )
114+
115+ nrql_query = " SELECT average(diskUsedPercent) FROM StorageSample ${ local . filter_subquery } FACET `tags.Name`, mountPoint"
116+ critical_threshold = var. critical_threshold
117+ critical_threshold_duration = local. duration
142118 aggregation_method = " event_timer"
119+ aggregation_window = local. window
143120 aggregation_timer = local. timer
144- expiration_duration = 600
145- open_violation_on_expiration = false
146- close_violations_on_expiration = false
121+ tags = var. tags
147122}
0 commit comments