-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathentrypoint.sh
executable file
·145 lines (128 loc) · 3.56 KB
/
entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env bash
INSTALL_PATH=/usr/local/logicmonitor/agent
AGENT_BIN=$INSTALL_PATH/bin/logicmonitor-agent
AGENT_PID_PATH=$INSTALL_PATH/bin/logicmonitor-agent.java.pid
WATCHDOG_BIN=$INSTALL_PATH/bin/logicmonitor-watchdog
WATCHDOG_PID_PATH=$INSTALL_PATH/bin/logicmonitor-watchdog.java.pid
LOG_PATH=$INSTALL_PATH/logs/
UNCLEAN_SHUTDOWN_PATH=$INSTALL_PATH/unclean_shutdown.lck
# setup handlers
trap 'signal_handler' SIGTERM
trap 'signal_handler' SIGINT
get_agent_pid() {
# make sure the PID file exists
if [ -e $AGENT_PID_PATH ]; then
# get the current PID of the collector agent
echo "$(<$AGENT_PID_PATH)"
fi
}
get_watchdog_pid() {
# make sure the PID file exists
if [ -e $WATCHDOG_PID_PATH ]; then
# get the current PID of the collector agent
echo "$(<$WATCHDOG_PID_PATH)"
fi
}
watch_pid() {
PID=$1
STARTUP_SCRIPT_PID=$2
PID_FAIL=0
while true
do
# echo -e "Checking the health of PID $PID"
if ! $(ps $PID > /dev/null); then
# if the PID we're watching dies, wait 6 fails to see if the collector
# starts up again with a new PID
NEW_PID=$(get_agent_pid)
if [[ ! -z $NEW_PID && $NEW_PID != $PID ]]; then
echo -e "Found new PID $NEW_PID"
PID=$NEW_PID
else
PID_FAIL=$(($PID_FAIL+1))
if [ "$PID_FAIL" -ge 24 ]; then
# we want to skip cleanup scripts since the collector failed unexpectedly
echo -e "Watchdog crashed\nExiting"
touch $UNCLEAN_SHUTDOWN_PATH
kill -INT $STARTUP_SCRIPT_PID
fi
fi
else
PID_FAIL=0
fi
sleep 10
done
}
watch_agent() {
# $1 = pid of startup script
# check if the agent is running, and if not, make a note
FAIL=0
while true
do
timeout 10 bash -c -- "\
while [ ! -e $AGENT_PID_PATH ]; do \
echo 'Waiting for agent to start'; \
sleep 1; \
done"
AGENT_PID=$(get_agent_pid)
# if we failed to grab a PID, increment failures and try again
if [ -z "$AGENT_PID" ]; then
FAIL=$(($FAIL+1))
sleep 10
continue
fi
if ! $(ps $AGENT_PID > /dev/null); then
FAIL=$(($FAIL+1))
else
FAIL=0
fi
# if the agent has been down for 6 iterations (1m), it's time to fail
if [ "$FAIL" -ge 6 ]; then
# we want to skip cleanup scripts since the collector failed unexpectedly
echo -e "Agent crashed\nExiting"
touch $UNCLEAN_SHUTDOWN_PATH
kill -INT $2
fi
sleep 10
done
}
# catch shutdown signals from docker and run shutdown scripts
signal_handler() {
# only cleanup if we shutdown cleanly
if [ ! -f $UNCLEAN_SHUTDOWN_PATH ]; then
/usr/local/logicmonitor/agent/bin/sbshutdown;
lmbc shutdown
exit $?
else
rm $UNCLEAN_SHUTDOWN_PATH
exit 1
fi
}
set -e
# run application
# python /collector/startup.py
lmbc start
# while true; do sleep 3; done
APPLYRET=$(lmbc config apply)
# ensure the collector is stopped so that we can control startup
$AGENT_BIN stop > /dev/null
$WATCHDOG_BIN stop > /dev/null
$WATCHDOG_BIN start
# monitor the watchdog process and kill the container if it crashes
timeout 10 bash -c -- "\
while [ ! -e $WATCHDOG_PID_PATH ]; do \
echo 'Waiting for watchdog to start'; \
sleep 1; \
done"
echo "Watchdog started"
watch_pid $(get_watchdog_pid) $$ &
# monitor the agent process and kill the container if it is down for 60s
watch_agent $$ &
while true
do
if [[ -f $LOG_PATH/wrapper.log && -f $LOG_PATH/watchdog.log && -f $LOG_PATH/sbproxy.log ]]; then
tail -f \
$LOG_PATH/watchdog.log \
$LOG_PATH/wrapper.log \
$LOG_PATH/sbproxy.log & wait
fi
done