Skip to content

Commit

Permalink
Merge pull request #135 from onaio/monit-scripts
Browse files Browse the repository at this point in the history
monit scripts for services monitoring
  • Loading branch information
pld committed Jan 21, 2014
2 parents 2ff8776 + 50fe4de commit 3e21138
Show file tree
Hide file tree
Showing 16 changed files with 142 additions and 0 deletions.
9 changes: 9 additions & 0 deletions script/monit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## Required

- pip install -r requirements.txt
- Edit email, aws_email, and replace AWS_*, SITE_DOMAIN, ALERT_RECIPIENTS with correct details for your server setup.

# Deploy

$ fab deploy:prod,scripts='system email aws_email, nginx postgres rabbitmq'

8 changes: 8 additions & 0 deletions script/monit/apache
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
check process apache
with pidfile /var/run/apache2.pid
start program = "/usr/sbin/apache2ctl start" as uid root and gid root
stop program = "/usr/sbin/apache2ctl stop" as uid root and gid root
if totalmem is greater than 1960.0 MB for 2 cycles then restart
if loadavg(5min) greater thab 10 for 8 cycles then restart
if 20 restarts within 20 cycles then timeout
group apache
3 changes: 3 additions & 0 deletions script/monit/aws_email
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set mailserver email-smtp.us-east-1.amazonaws.com port 465
username "AWS_ACCESS_KEY_ID" password "AWS_SECRETE_ACCESS_KEY"
using sslv3 with timeout 15 seconds using hostname SITE_DOMAIN
6 changes: 6 additions & 0 deletions script/monit/celerybeat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
check process celerybeat
with pidfile /var/run/celery/beat.pid
start program = "/etc/init.d/celerybeat start" as uid root and gid root
stop program = "/etc/init.d/celerybeat stop" as uid root and gid root
if 5 restarts within 10 cycles then timeout
group celery
6 changes: 6 additions & 0 deletions script/monit/celeryd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
check process celeryd
with pidfile /var/run/celery/w1-ona.pid
start program = "/etc/init.d/celeryd-ona start" as uid root and gid root
stop program = "/etc/init.d/celeryd-ona stop" as uid root and gid root
if 5 restarts within 10 cycles then timeout
group celery
13 changes: 13 additions & 0 deletions script/monit/email
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set mail-format {
from: noreply@SITE_DOMAIN
subject: $SERVICE $EVENT at $DATE on $HOST
message:
Alert from $HOST
Date: $DATE
For $SERVICE ($EVENT) and take $ACTION
--------------------------------------
$DESCRIPTION
}

# alert recepients
set alert ALERT_RECIPIENTS
41 changes: 41 additions & 0 deletions script/monit/fabfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import sys

from fabric.api import env, sudo, put


DEPLOYMENTS = {
'stage': {
'home': '/home/ubuntu/src/',
'host_string': '[email protected]',
'key_filename': os.path.expanduser('~/.ssh/ona.pem')
},
'prod': {
'home': '/home/ubuntu/src/',
'host_string': '[email protected]',
'key_filename': os.path.expanduser('~/.ssh/ona.pem')
},
}


def check_key_filename(deployment_name):
if 'key_filename' in DEPLOYMENTS[deployment_name] and \
not os.path.exists(DEPLOYMENTS[deployment_name]['key_filename']):
print ("Cannot find required permissions file: %s" %
DEPLOYMENTS[deployment_name]['key_filename'])
return False
return True


def setup_env(deployment_name):
env.update(DEPLOYMENTS[deployment_name])
if not check_key_filename(deployment_name):
sys.exit(1)


def deploy(deployment_name, scripts=''):
setup_env(deployment_name)
sudo('which monit || apt-get install -y monit')
for script in scripts.split(' '):
put(script, '/etc/monit/conf.d/%s' % script, use_sudo=True)
sudo('/etc/init.d/monit restart')
9 changes: 9 additions & 0 deletions script/monit/mongodb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
check process mongodb
with pidfile "/var/lib/mongodb/mongod.lock"
start program "/etc/init.d/mongodb start"
stop program "/etc/init.d/mongodb stop"
if failed port 28017 protocol http
and request "/" with timeout 10 seconds then restart
if 5 restarts within 5 cycles then timeout
group database

3 changes: 3 additions & 0 deletions script/monit/monit
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set httpd port 2812 and
use address localhost
allow localhost
7 changes: 7 additions & 0 deletions script/monit/mysql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
check process mysql
with pidfile /var/run/mysqld/mysqld.pid
start "/etc/init.d/mysql start"
stop "/etc/init.d/mysql stop"
if failed host 127.0.0.1 port 3306 then restart
if 5 restarts within 5 cycles then timeout
group database
8 changes: 8 additions & 0 deletions script/monit/nginx
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
check process nginx
with pidfile /var/run/nginx.pid
start program = "/etc/init.d/nginx start" as uid root and gid root
stop program = "/etc/init.d/nginx stop" as uid root and gid root
if totalmem is greater than 1960.0 MB for 2 cycles then restart
if loadavg(5min) greater than 10 for 8 cycles then restart
if 20 restarts within 20 cycles then timeout
group www-data #(for ubuntu, debian)
7 changes: 7 additions & 0 deletions script/monit/postgres
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
check process postgresql
with pidfile /var/run/postgresql/9.3-main.pid
start "/etc/init.d/postgresql start"
stop "/etc/init.d/postgresql stop"
if failed host 127.0.0.1 port 3306 then restart
if 5 restarts within 5 cycles then timeout
group database
5 changes: 5 additions & 0 deletions script/monit/rabbitmq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
check process rabbitmq
with pidfile /var/run/rabbitmq/pid
start program = "/etc/init.d/rabbitmq-server start"
stop program = "/etc/init.d/rabbitmq-server stop"
group rabbitmq-server
1 change: 1 addition & 0 deletions script/monit/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fabric
7 changes: 7 additions & 0 deletions script/monit/system
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
check system localhost
if memory usage > 85% then alert
if cpu usage (user) > 80% for 3 cycles then alert
if cpu usage (system) > 80% for 3 cycles then alert

check filesystem rootfs with path /
if space usage > 80% then alert
9 changes: 9 additions & 0 deletions script/monit/uwsgi
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
check process uwsgi
with pidfile /var/run/ona.pid
start program = "/sbin/start ona" as uid root and gid root
stop program = "/sbin/stop ona" as uid root and gid root
if totalmem is greater than 1960.0 MB for 3 cycles then restart
if cpu > 50% for 2 cycles then alert
if loadavg(5min) greater than 10 for 8 cycles then restart
if 20 restarts within 20 cycles then timeout
group uwsgi

0 comments on commit 3e21138

Please sign in to comment.