-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcloudwatch.sh
193 lines (171 loc) · 8.38 KB
/
cloudwatch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env bash
# This script creates AWS CloudWatch alarms based on standard metrics and user input to setup alarms for each environment
# Requires AWS CLI Setup and you must setup your ALARMACTION
ALARMACTION="arn:aws:sns:us-east-1:YOURACCOUNTNUMBER:YOURSNSALERTNAME"
# Functions
# Check Command
function check_command {
type -P $1 &>/dev/null || fail "Unable to find $1, please install it and run this script again."
}
# Completed
function completed(){
echo
HorizontalRule
tput setaf 2; echo "Completed!" && tput sgr0
HorizontalRule
echo
}
# Fail
function fail(){
tput setaf 1; echo "Failure: $*" && tput sgr0
exit 1
}
# Horizontal Rule
function HorizontalRule(){
echo "============================================================"
}
# Verify AWS CLI Credentials are setup
# http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html
if ! grep -q aws_access_key_id ~/.aws/config; then
if ! grep -q aws_access_key_id ~/.aws/credentials; then
fail "AWS config not found or CLI not installed. Please run \"aws configure\"."
fi
fi
# Check for AWS CLI profile argument passed into the script
# http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-multiple-profiles
if [ $# -eq 0 ]; then
scriptname=`basename "$0"`
echo "Usage: ./$scriptname profile"
echo "Where profile is the AWS CLI profile name"
echo "Using default profile"
echo
profile=default
else
profile=$1
fi
# Verify ALARMACTION is setup with some alert mechanism
if [[ -z $ALARMACTION ]] || [[ "$ALARMACTION" == "arn:aws:sns:us-east-1:YOURACCOUNTNUMBER:YOURSNSALERTNAME" ]]; then
echo "Alarm Action SNS Topic ARN?"
echo "Example: arn:aws:sns:us-east-1:YOURACCOUNTNUMBER:YOURSNSALERTNAME"
read -r ALARMACTION
if [[ -z $ALARMACTION ]]; then
fail "Alarm Action must be configured."
fi
fi
HorizontalRule
echo "Create CloudWatch Alarms"
HorizontalRule
echo
read -r -p "Client Name? " CLIENT
if [[ -z $CLIENT ]]; then
fail "Invalid Client Name!"
fi
HorizontalRule
read -r -p "How Many Servers Total? " SERVERNUM
if [[ $SERVERNUM > 0 ]] && echo "$SERVERNUM" | egrep -q '^[0-9]+$'; then
read -r -p "Loadbalanced Environment? (y/n) " LOADBALANCED
# If Loadbalanced Environment
if [[ $LOADBALANCED =~ ^([yY][eE][sS]|[yY])$ ]]; then
read -r -p "Load Balancer ID? " LBID
if [[ -z $LBID ]]; then
fail "Invalid Load Balancer ID!"
fi
# Load Balancer Unhealthy Host Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT Unhealthy Host Check" --alarm-description "$CLIENT Load Balancer Unhealthy Host Detected" --metric-name "UnHealthyHostCount" --namespace "AWS/ELB" --statistic "Sum" --period 60 --threshold 0 --comparison-operator "GreaterThanThreshold" --dimensions Name=LoadBalancerName,Value=$LBID --evaluation-periods 3 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo "Load Balancer Unhealthy Host Alarm Set"
HorizontalRule
# Load Balancer High Latency Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT LB High Latency" --alarm-description "$CLIENT Load Balancer High Latency" --metric-name "Latency" --namespace "AWS/ELB" --statistic "Average" --period 60 --threshold 15 --comparison-operator "GreaterThanThreshold" --dimensions Name=LoadBalancerName,Value=$LBID --evaluation-periods 2 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo "Load Balancer High Latency Alarm Set"
HorizontalRule
fi
# Begin loop to create server alarms
START=1
for (( COUNT=$START; COUNT<=$SERVERNUM; COUNT++ )) do
echo "Server #"$COUNT
read -r -p "Server Environment? (Dev/Staging/Production) " ENVIRONMENT
if [[ -z $ENVIRONMENT ]]; then
fail "Invalid Server Environment!"
fi
read -r -p "Server Name? (Web01, Web02) " SERVERNAME
# Avoid "Dev Dev" situation
if [[ "$ENVIRONMENT" == "$SERVERNAME" ]]; then
SERVERNAME=""
fi
read -r -p "Instance ID? (i-xxxxxxxx or i-xxxxxxxxxxxxxxxxx) " INSTANCEID
if [[ "$INSTANCEID" =~ ^([i]-........)|([i]-.................)$ ]]; then
# CPU Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT $ENVIRONMENT $SERVERNAME CPU Check" --alarm-description "$CLIENT $ENVIRONMENT $SERVERNAME CPU usage >90% for 5 minutes" --namespace "AWS/EC2" --dimensions Name=InstanceId,Value=$INSTANCEID --metric-name "CPUUtilization" --statistic "Average" --comparison-operator "GreaterThanThreshold" --unit "Percent" --period 60 --threshold 90 --evaluation-periods 5 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo $CLIENT $ENVIRONMENT $SERVERNAME "CPU Check Alarm Set"
HorizontalRule
# Status Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT $ENVIRONMENT $SERVERNAME Status Check" --alarm-description "$CLIENT $ENVIRONMENT $SERVERNAME Status Check Failed for 5 minutes" --namespace "AWS/EC2" --dimensions Name=InstanceId,Value=$INSTANCEID --metric-name "StatusCheckFailed" --statistic "Maximum" --comparison-operator "GreaterThanThreshold" --unit "Count" --period 60 --threshold 0 --evaluation-periods 5 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo $CLIENT $ENVIRONMENT $SERVERNAME "Status Check Alarm Set"
HorizontalRule
else
fail "Invalid Instance ID!"
fi
done
else
if [[ $SERVERNUM == 0 ]]; then
echo "Skipping Server Alarms..."
else
tput setaf 1; echo "Invalid Number of Servers!" && tput sgr0
fi
fi
read -r -p "Setup Database Alarms? (y/n) " SETUPDB
# If Database Alarms
if [[ $SETUPDB =~ ^([yY][eE][sS]|[yY])$ ]]; then
HorizontalRule
read -r -p "How Many Database Hosts Total? " DBNUM
if [[ $DBNUM > 0 ]] && echo "$DBNUM" | egrep '^[0-9]+$' >/dev/null 2>&1; then
# Begin loop to create database alarms
START=1
for (( COUNT=$START; COUNT<=$DBNUM; COUNT++ )) do
echo "DB #"$COUNT
read -r -p "Database Environment? (Dev/Staging/Production) " ENVIRONMENT
if [[ -z $ENVIRONMENT ]]; then
fail "Invalid Database Environment!"
fi
# # Avoid "Beta Beta" situation
# if [[ $ENVIRONMENT == "Beta" ]]; then
# SERVERNAME=""
# else
# echo -n "DB Name? (Web01, Web02) "
# read SERVERNAME
# fi
read -r -p "DB Instance ID? " DBID
if [[ -z $DBID ]]; then
fail "Invalid Database Instance ID!"
fi
# Database CPU Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT $ENVIRONMENT DB CPU Check" --alarm-description "$CLIENT $ENVIRONMENT Database CPU usage >90% for 5 minutes" --metric-name "CPUUtilization" --namespace "AWS/RDS" --statistic "Average" --unit "Percent" --period 60 --threshold 90 --comparison-operator "GreaterThanThreshold" --dimensions Name=DBInstanceIdentifier,Value=$DBID --evaluation-periods 5 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo $CLIENT $ENVIRONMENT "Database CPU Check Alarm Set"
HorizontalRule
# Database Memory Usage Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT $ENVIRONMENT DB Mem Check" --alarm-description "$CLIENT $ENVIRONMENT Database Freeable Memory < 200 MB for 5 minutes" --metric-name "FreeableMemory" --namespace "AWS/RDS" --statistic "Average" --unit "Bytes" --period 60 --threshold "200000000" --comparison-operator "LessThanThreshold" --dimensions Name=DBInstanceIdentifier,Value=$DBID --evaluation-periods 5 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo $CLIENT $ENVIRONMENT "Database Memory Usage Alarm Set"
HorizontalRule
# Database Available Storage Space Check
aws cloudwatch put-metric-alarm --alarm-name "$CLIENT $ENVIRONMENT DB Storage Check" --alarm-description "$CLIENT $ENVIRONMENT Database Available Storage Space < 200 MB" --metric-name "FreeStorageSpace" --namespace "AWS/RDS" --statistic "Average" --unit "Bytes" --period 60 --threshold "200000000" --comparison-operator "LessThanThreshold" --dimensions Name=DBInstanceIdentifier,Value=$DBID --evaluation-periods 1 --alarm-actions "$ALARMACTION" --profile $profile
HorizontalRule
echo $CLIENT $ENVIRONMENT "Database Available Storage Space Alarm Set"
HorizontalRule
done
else
if [[ $DBNUM == 0 ]]; then
echo "Skipping Database Alarms..."
else
tput setaf 1; echo "Invalid Number of Databases!" && tput sgr0
fi
fi
else
echo "Exiting"
fi
completed