Skip to content

Commit 9521252

Browse files
authored
how to send AWS CloudWatch Alarms to Slack? (#92)
1 parent 2f6c16a commit 9521252

18 files changed

Lines changed: 808 additions & 0 deletions

docs/contents.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@
5353
- [119 - AWS API Gateway - WebSocket API + EC2](../lessons/119)
5454
- [120 - How To SSH into your VM? - Google Cloud Platform](../lessons/120)
5555
- [121 - How to connect to EC2 instance?](../lessons/121)
56+
- [122 - how to send AWS CloudWatch Alarms to Slack?](../lessons/122)

lessons/122/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# how to send AWS CloudWatch Alarms to Slack? (Terraform Included)
2+
3+
You can find tutorial [here](https://antonputra.com/amazon/send-aws-cloudwatch-alarms-to-slack/).
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import urllib3
2+
import json
3+
4+
5+
slack_url = "https://hooks.slack.com/services/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"
6+
http = urllib3.PoolManager()
7+
8+
9+
def get_alarm_attributes(sns_message):
10+
alarm = dict()
11+
12+
alarm['name'] = sns_message['AlarmName']
13+
alarm['description'] = sns_message['AlarmDescription']
14+
alarm['reason'] = sns_message['NewStateReason']
15+
alarm['region'] = sns_message['Region']
16+
alarm['instance_id'] = sns_message['Trigger']['Dimensions'][0]['value']
17+
alarm['state'] = sns_message['NewStateValue']
18+
alarm['previous_state'] = sns_message['OldStateValue']
19+
20+
return alarm
21+
22+
23+
def register_alarm(alarm):
24+
return {
25+
"type": "home",
26+
"blocks": [
27+
{
28+
"type": "header",
29+
"text": {
30+
"type": "plain_text",
31+
"text": ":warning: " + alarm['name'] + " alarm was registered"
32+
}
33+
},
34+
{
35+
"type": "divider"
36+
},
37+
{
38+
"type": "section",
39+
"text": {
40+
"type": "mrkdwn",
41+
"text": "_" + alarm['description'] + "_"
42+
},
43+
"block_id": "text1"
44+
},
45+
{
46+
"type": "divider"
47+
},
48+
{
49+
"type": "context",
50+
"elements": [
51+
{
52+
"type": "mrkdwn",
53+
"text": "Region: *" + alarm['region'] + "*"
54+
}
55+
]
56+
}
57+
]
58+
}
59+
60+
61+
def activate_alarm(alarm):
62+
return {
63+
"type": "home",
64+
"blocks": [
65+
{
66+
"type": "header",
67+
"text": {
68+
"type": "plain_text",
69+
"text": ":red_circle: Alarm: " + alarm['name'],
70+
}
71+
},
72+
{
73+
"type": "divider"
74+
},
75+
{
76+
"type": "section",
77+
"text": {
78+
"type": "mrkdwn",
79+
"text": "_" + alarm['reason'] + "_"
80+
},
81+
"block_id": "text1"
82+
},
83+
{
84+
"type": "divider"
85+
},
86+
{
87+
"type": "context",
88+
"elements": [
89+
{
90+
"type": "mrkdwn",
91+
"text": "Region: *" + alarm['region'] + "*"
92+
}
93+
]
94+
}
95+
]
96+
}
97+
98+
99+
def resolve_alarm(alarm):
100+
return {
101+
"type": "home",
102+
"blocks": [
103+
{
104+
"type": "header",
105+
"text": {
106+
"type": "plain_text",
107+
"text": ":large_green_circle: Alarm: " + alarm['name'] + " was resolved",
108+
}
109+
},
110+
{
111+
"type": "divider"
112+
},
113+
{
114+
"type": "section",
115+
"text": {
116+
"type": "mrkdwn",
117+
"text": "_" + alarm['reason'] + "_"
118+
},
119+
"block_id": "text1"
120+
},
121+
{
122+
"type": "divider"
123+
},
124+
{
125+
"type": "context",
126+
"elements": [
127+
{
128+
"type": "mrkdwn",
129+
"text": "Region: *" + alarm['region'] + "*"
130+
}
131+
]
132+
}
133+
]
134+
}
135+
136+
137+
def lambda_handler(event, context):
138+
sns_message = json.loads(event["Records"][0]["Sns"]["Message"])
139+
alarm = get_alarm_attributes(sns_message)
140+
141+
msg = str()
142+
143+
if alarm['previous_state'] == "INSUFFICIENT_DATA" and alarm['state'] == 'OK':
144+
msg = register_alarm(alarm)
145+
elif alarm['previous_state'] == 'OK' and alarm['state'] == 'ALARM':
146+
msg = activate_alarm(alarm)
147+
elif alarm['previous_state'] == 'ALARM' and alarm['state'] == 'OK':
148+
msg = resolve_alarm(alarm)
149+
150+
encoded_msg = json.dumps(msg).encode("utf-8")
151+
resp = http.request("POST", slack_url, body=encoded_msg)
152+
print(
153+
{
154+
"message": msg,
155+
"status_code": resp.status,
156+
"response": resp.data,
157+
}
158+
)

lessons/122/sns-alarm-event.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"AlarmName": "EC2 High CPU (i-0a4026af087470b96)",
3+
"AlarmDescription": "This metric monitors CPU utilization for the following instance: i-0a4026af087470b96. If the CPU usage exceeds 80%, you'll get an alert.",
4+
"AWSAccountId": "424432388155",
5+
"AlarmConfigurationUpdatedTimestamp": "2022-08-24T07:43:57.952+0000",
6+
"NewStateValue": "ALARM",
7+
"NewStateReason": "Threshold Crossed: 1 out of the last 1 datapoints [98.54495149838328 (24/08/22 07:51:00)] was greater than the threshold (80.0) (minimum 1 datapoint for OK -> ALARM transition).",
8+
"StateChangeTime": "2022-08-24T07:56:51.135+0000",
9+
"Region": "US East (N. Virginia)",
10+
"AlarmArn": "arn:aws:cloudwatch:us-east-1:424432388155:alarm:High CPU",
11+
"OldStateValue": "OK",
12+
"OKActions": [],
13+
"AlarmActions": [
14+
"arn:aws:sns:us-east-1:424432388155:alerts"
15+
],
16+
"InsufficientDataActions": [],
17+
"Trigger": {
18+
"MetricName": "CPUUtilization",
19+
"Namespace": "AWS/EC2",
20+
"StatisticType": "Statistic",
21+
"Statistic": "AVERAGE",
22+
"Unit": null,
23+
"Dimensions": [
24+
{
25+
"value": "i-00413a5fb048ad162",
26+
"name": "InstanceId"
27+
}
28+
],
29+
"Period": 300,
30+
"EvaluationPeriods": 1,
31+
"DatapointsToAlarm": 1,
32+
"ComparisonOperator": "GreaterThanThreshold",
33+
"Threshold": 80.0,
34+
"TreatMissingData": "missing",
35+
"EvaluateLowSampleCountPercentile": ""
36+
}
37+
}

lessons/122/sns-ok-event.json

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"AlarmName": "EC2 High CPU (i-0a4026af087470b96)",
3+
"AlarmDescription": "This metric monitors CPU utilization for the following instance: i-0a4026af087470b96. If the CPU usage exceeds 80%, you'll get an alert.",
4+
"AWSAccountId": "424432388155",
5+
"AlarmConfigurationUpdatedTimestamp": "2022-08-24T09:30:39.877+0000",
6+
"NewStateValue": "OK",
7+
"NewStateReason": "Threshold Crossed: 1 out of the last 1 datapoints [0.050000138902777796 (24/08/22 09:27:00)] was not greater than the threshold (80.0) (minimum 1 datapoint for ALARM -> OK transition).",
8+
"StateChangeTime": "2022-08-24T09:32:05.478+0000",
9+
"Region": "US East (N. Virginia)",
10+
"AlarmArn": "arn:aws:cloudwatch:us-east-1:424432388155:alarm:test2",
11+
"OldStateValue": "INSUFFICIENT_DATA",
12+
"OKActions": [
13+
"arn:aws:sns:us-east-1:424432388155:alerts"
14+
],
15+
"AlarmActions": [
16+
"arn:aws:sns:us-east-1:424432388155:alerts"
17+
],
18+
"InsufficientDataActions": [],
19+
"Trigger": {
20+
"MetricName": "CPUUtilization",
21+
"Namespace": "AWS/EC2",
22+
"StatisticType": "Statistic",
23+
"Statistic": "AVERAGE",
24+
"Unit": null,
25+
"Dimensions": [
26+
{
27+
"value": "i-00413a5fb048ad162",
28+
"name": "InstanceId"
29+
}
30+
],
31+
"Period": 300,
32+
"EvaluationPeriods": 1,
33+
"DatapointsToAlarm": 1,
34+
"ComparisonOperator": "GreaterThanThreshold",
35+
"Threshold": 80.0,
36+
"TreatMissingData": "missing",
37+
"EvaluateLowSampleCountPercentile": ""
38+
}
39+
}

lessons/122/terraform/.terraform.lock.hcl

Lines changed: 59 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Best practices for using Terraform
2+
# https://cloud.google.com/docs/terraform/best-practices-for-terraform
3+
provider "aws" {
4+
region = "us-east-1"
5+
}
6+
7+
terraform {
8+
required_providers {
9+
aws = {
10+
source = "hashicorp/aws"
11+
version = "~> 4.27.0"
12+
}
13+
}
14+
15+
required_version = "~> 1.0"
16+
}

lessons/122/terraform/1-vpc.tf

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Create AWS VPC
2+
resource "aws_vpc" "main" {
3+
cidr_block = "10.0.0.0/16"
4+
5+
enable_dns_support = true
6+
enable_dns_hostnames = true
7+
8+
tags = {
9+
Name = "main"
10+
}
11+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Create a CloudWatch alarm to trigger an alert when a CPU utilization
2+
# for a specific VM is higher than 80 percent
3+
resource "aws_cloudwatch_metric_alarm" "high_cpu" {
4+
alarm_name = "EC2 High CPU (${aws_instance.my_server.id})"
5+
namespace = "AWS/EC2"
6+
metric_name = "CPUUtilization"
7+
# You have to create a separate alarm for each EC2 instance
8+
dimensions = {
9+
InstanceId = aws_instance.my_server.id
10+
}
11+
comparison_operator = "GreaterThanOrEqualToThreshold"
12+
evaluation_periods = "1"
13+
period = "300"
14+
statistic = "Average"
15+
threshold = "80"
16+
alarm_description = "This metric monitors CPU utilization for the following instance: ${aws_instance.my_server.id}. If the CPU usage exceeds 80%, you'll get an alert."
17+
insufficient_data_actions = []
18+
19+
ok_actions = [aws_sns_topic.alarms.arn]
20+
alarm_actions = [aws_sns_topic.alarms.arn]
21+
}

lessons/122/terraform/2-igw.tf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Create an Internet Gateway to provide internet access for public subnets
2+
resource "aws_internet_gateway" "igw" {
3+
vpc_id = aws_vpc.main.id
4+
5+
tags = {
6+
Name = "igw"
7+
}
8+
}

0 commit comments

Comments
 (0)