3
3
DIR=$( dirname $0 )
4
4
5
5
echo " Entering $0 at $( date) "
6
- DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:-/ var/ lib/ docker/ dind-volume}
6
+ DOCKERD_DATA_ROOT=${DOCKERD_DATA_ROOT:-/ var/ lib/ docker}
7
+ DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:- ${DOCKERD_DATA_ROOT} / dind-volume}
7
8
DIND_VOLUME_CREATED_TS_FILE=${DIND_VOLUME_STAT_DIR} /created
8
9
DIND_VOLUME_LAST_USED_TS_FILE=${DIND_VOLUME_STAT_DIR} /last_used
9
10
DIND_VOLUME_USED_BY_PODS_FILE=${DIND_VOLUME_STAT_DIR} /pods
@@ -24,7 +25,7 @@ echo "${POD_NAME} ${CURRENT_TS}" >> ${DIND_VOLUME_USED_BY_PODS_FILE}
24
25
25
26
sigterm_trap (){
26
27
echo " ${1:- SIGTERM} received at $( date) "
27
-
28
+ export SIGTERM=1
28
29
CURRENT_TS=$( date +%s)
29
30
echo ${CURRENT_TS} > ${DIND_VOLUME_LAST_USED_TS_FILE}
30
31
@@ -52,8 +53,8 @@ sigterm_trap(){
52
53
echo " killing MONITOR_PID ${MONITOR_PID} "
53
54
kill $MONITOR_PID
54
55
55
- echo " killing DOCKER_PID ${DOCKER_PID } "
56
- kill $DOCKER_PID
56
+ echo " killing DOCKERD_PID ${DOCKERD_PID } "
57
+ kill $DOCKERD_PID
57
58
sleep 2
58
59
59
60
if [[ -n " ${USE_DIND_IMAGES_LIB} " && " ${USE_DIND_IMAGES_LIB} " != " false" && -n " ${DOCKERD_DATA_ROOT} " ]]; then
@@ -124,23 +125,94 @@ echo "DOCKERD_PARAMS = ${DOCKERD_PARAMS}"
124
125
${DIR} /monitor/start.sh < & - &
125
126
MONITOR_PID=$!
126
127
127
- # ## Trying to start docker
128
- dockerd ${DOCKERD_PARAMS} < & - &
129
- CNT=0
130
- while ! test -f /var/run/docker.pid || test -z " $( cat /var/run/docker.pid) " || ! docker ps
128
+ # ## start docker with retry
129
+ DOCKERD_PID_FILE=/var/run/docker.pid
130
+ DOCKERD_PID_MAXWAIT=${DOCKERD_PID_MAXWAIT:- 20}
131
+ DOCKERD_LOCK_MAXWAIT=${DOCKERD_LOCK_MAXWAIT:- 60}
132
+ DOCKER_UP_MAXWAIT=${DOCKERD_UP_MAXWAIT:- 90}
133
+ while true
131
134
do
132
- echo " $( date) - Waiting for docker to start"
133
- sleep 2
134
- done
135
+ [[ -n " ${SIGTERM} " ]] && break
136
+ echo " Starting docker ..."
137
+ if [[ -f ${DOCKERD_PID_FILE} ]] || pgrep -l dockerd ; then
138
+ DOCKERD_PID=$( cat ${DOCKERD_PID_FILE} )
139
+ echo " Waiting for dockerd pid ${DOCKERD_PID_FILE} to exit ..."
140
+ CNT=0
141
+ pkill dockerd
142
+ while pgrep -l dockerd
143
+ do
144
+ [[ -n " ${SIGTERM} " ]] && break 2
145
+ (( CNT++ ))
146
+ echo " .... old dockerd is still running - $( date) "
147
+ if [[ ${CNT} -ge 120 ]]; then
148
+ echo " Killing old dockerd"
149
+ pkill -9 dockerd
150
+ break
151
+ fi
152
+ sleep 1
153
+ done
154
+ rm -fv ${DOCKERD_PID_FILE}
155
+ fi
135
156
136
- DOCKER_PID=$( cat /var/run/docker.pid)
137
- echo " DOCKER_PID = ${DOCKER_PID} "
157
+ echo " $( date) - Checking if other dockerd running on same /var/lib/docker by check locks on containerd/daemon/io.containerd.metadata.v1.bolt/meta.db "
158
+ CONTEINERD_DB=${DOCKERD_DATA_ROOT} /containerd/daemon/io.containerd.metadata.v1.bolt/meta.db
159
+ if [[ -f ${CONTEINERD_DB} ]]; then
160
+ echo " Checking if another dockerd is running on same ${DOCKERD_DATA_ROOT} boltdb $CONTEINERD_DB is locked"
161
+ CNT=0
162
+ while ! bolter -f ${CONTEINERD_DB}
163
+ do
164
+ [[ -n " ${SIGTERM} " ]] && break 2
165
+ echo " $( date) - Waiting for containerd boltd ${CONTEINERD_DB} "
166
+ (( CNT++ ))
167
+ if (( CNT > ${DOCKERD_LOCK_MAXWAIT} )) ; then
168
+ echo " giving up and trying to start docker anyway Waited more than ${DOCKERD_LOCK_MAXWAIT} s for containerd boltdb unlock"
169
+ break
170
+ fi
171
+ sleep 1
172
+ done
173
+ else
174
+ echo " containerd db is not locked"
175
+ fi
176
+
177
+ echo " Starting dockerd"
178
+ dockerd ${DOCKERD_PARAMS} < & - &
179
+ echo " Waiting at most 20s for docker pid"
180
+ CNT=0
181
+ while ! test -f " ${DOCKERD_PID_FILE} " || test -z " $( cat ${DOCKERD_PID_FILE} ) "
182
+ do
183
+ [[ -n " ${SIGTERM} " ]] && break 2
184
+ echo " $( date) - Waiting for docker pid file ${DOCKERD_PID_FILE} "
185
+ (( CNT++ ))
186
+ if (( CNT > ${DOCKERD_PID_MAXWAIT} )) ; then
187
+ echo " Waited more than ${DOCKERD_PID_MAXWAIT} s for docker pid, retry dockerd start"
188
+ continue 2
189
+ fi
190
+ sleep 1
191
+ done
192
+
193
+ echo " Waiting at most 2m for docker pid"
194
+ CNT=0
195
+ while ! docker ps
196
+ do
197
+ [[ -n " ${SIGTERM} " ]] && break 2
198
+ echo " $( date) - Waiting for docker running by check docker ps "
199
+ (( CNT++ ))
200
+ if (( CNT > ${DOCKER_UP_MAXWAIT} )) ; then
201
+ echo " Waited more than ${DOCKER_UP_MAXWAIT} s for dockerd, retry dockerd start"
202
+ continue 2
203
+ fi
204
+ sleep 1
205
+ done
206
+ echo " $( date) - dockerd has been started"
207
+ break
208
+ done
138
209
139
210
# Starting cleaner agent
140
- if [[ -z " ${DISABLE_CLEANER_AGENT} " ]]; then
211
+ if [[ -z " ${DISABLE_CLEANER_AGENT} " && -z " ${SIGTERM} " ]]; then
141
212
${DIR} /cleaner/cleaner-agent.sh < & - &
142
213
CLEANER_AGENT_PID=$!
143
214
fi
144
215
145
- wait ${DOCKER_PID}
146
-
216
+ DOCKERD_PID=$( cat /var/run/docker.pid)
217
+ echo " DOCKERD_PID = ${DOCKERD_PID} "
218
+ wait ${DOCKERD_PID}
0 commit comments