Skip to content

Commit d48e09e

Browse files
authored
Merge pull request #9 from codefresh-io/dockerd_restart
Dockerd restart
2 parents 2685974 + f90e405 commit d48e09e

File tree

4 files changed

+96
-30
lines changed

4 files changed

+96
-30
lines changed

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@ FROM quay.io/prometheus/node-exporter:v0.15.1 AS node-exporter
33

44
FROM codefresh/dind-cleaner:v1.1 AS dind-cleaner
55

6+
FROM codefresh/bolter AS bolter
7+
68
FROM docker:18.09.5-dind
79
RUN apk add bash jq --no-cache
810
COPY --from=node-exporter /bin/node_exporter /bin/
911
COPY --from=dind-cleaner /usr/local/bin/dind-cleaner /bin/
12+
COPY --from=bolter /go/bin/bolter /bin/
1013

1114
WORKDIR /dind
1215
ADD . /dind

Dockerfile-17.12

Lines changed: 0 additions & 14 deletions
This file was deleted.

Dockerfile.bolter

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
### Build bolter
2+
FROM golang:1.12.6-alpine3.9
3+
RUN apk add git
4+
RUN go get -u github.com/hasit/bolter
5+

run.sh

Lines changed: 88 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
DIR=$(dirname $0)
44

55
echo "Entering $0 at $(date) "
6-
DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:-/var/lib/docker/dind-volume}
6+
DOCKERD_DATA_ROOT=${DOCKERD_DATA_ROOT:-/var/lib/docker}
7+
DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:-${DOCKERD_DATA_ROOT}/dind-volume}
78
DIND_VOLUME_CREATED_TS_FILE=${DIND_VOLUME_STAT_DIR}/created
89
DIND_VOLUME_LAST_USED_TS_FILE=${DIND_VOLUME_STAT_DIR}/last_used
910
DIND_VOLUME_USED_BY_PODS_FILE=${DIND_VOLUME_STAT_DIR}/pods
@@ -24,7 +25,7 @@ echo "${POD_NAME} ${CURRENT_TS}" >> ${DIND_VOLUME_USED_BY_PODS_FILE}
2425

2526
sigterm_trap(){
2627
echo "${1:-SIGTERM} received at $(date)"
27-
28+
export SIGTERM=1
2829
CURRENT_TS=$(date +%s)
2930
echo ${CURRENT_TS} > ${DIND_VOLUME_LAST_USED_TS_FILE}
3031

@@ -52,8 +53,8 @@ sigterm_trap(){
5253
echo "killing MONITOR_PID ${MONITOR_PID}"
5354
kill $MONITOR_PID
5455

55-
echo "killing DOCKER_PID ${DOCKER_PID}"
56-
kill $DOCKER_PID
56+
echo "killing DOCKERD_PID ${DOCKERD_PID}"
57+
kill $DOCKERD_PID
5758
sleep 2
5859

5960
if [[ -n "${USE_DIND_IMAGES_LIB}" && "${USE_DIND_IMAGES_LIB}" != "false" && -n "${DOCKERD_DATA_ROOT}" ]]; then
@@ -124,23 +125,94 @@ echo "DOCKERD_PARAMS = ${DOCKERD_PARAMS}"
124125
${DIR}/monitor/start.sh <&- &
125126
MONITOR_PID=$!
126127

127-
### Trying to start docker
128-
dockerd ${DOCKERD_PARAMS} <&- &
129-
CNT=0
130-
while ! test -f /var/run/docker.pid || test -z "$(cat /var/run/docker.pid)" || ! docker ps
128+
### start docker with retry
129+
DOCKERD_PID_FILE=/var/run/docker.pid
130+
DOCKERD_PID_MAXWAIT=${DOCKERD_PID_MAXWAIT:-20}
131+
DOCKERD_LOCK_MAXWAIT=${DOCKERD_LOCK_MAXWAIT:-60}
132+
DOCKER_UP_MAXWAIT=${DOCKERD_UP_MAXWAIT:-90}
133+
while true
131134
do
132-
echo "$(date) - Waiting for docker to start"
133-
sleep 2
134-
done
135+
[[ -n "${SIGTERM}" ]] && break
136+
echo "Starting docker ..."
137+
if [[ -f ${DOCKERD_PID_FILE} ]] || pgrep -l dockerd ; then
138+
DOCKERD_PID=$(cat ${DOCKERD_PID_FILE})
139+
echo " Waiting for dockerd pid ${DOCKERD_PID_FILE} to exit ..."
140+
CNT=0
141+
pkill dockerd
142+
while pgrep -l dockerd
143+
do
144+
[[ -n "${SIGTERM}" ]] && break 2
145+
(( CNT++ ))
146+
echo ".... old dockerd is still running - $(date)"
147+
if [[ ${CNT} -ge 120 ]]; then
148+
echo "Killing old dockerd"
149+
pkill -9 dockerd
150+
break
151+
fi
152+
sleep 1
153+
done
154+
rm -fv ${DOCKERD_PID_FILE}
155+
fi
135156

136-
DOCKER_PID=$(cat /var/run/docker.pid)
137-
echo "DOCKER_PID = ${DOCKER_PID} "
157+
echo "$(date) - Checking if other dockerd running on same /var/lib/docker by check locks on containerd/daemon/io.containerd.metadata.v1.bolt/meta.db "
158+
CONTEINERD_DB=${DOCKERD_DATA_ROOT}/containerd/daemon/io.containerd.metadata.v1.bolt/meta.db
159+
if [[ -f ${CONTEINERD_DB} ]]; then
160+
echo "Checking if another dockerd is running on same ${DOCKERD_DATA_ROOT} boltdb $CONTEINERD_DB is locked"
161+
CNT=0
162+
while ! bolter -f ${CONTEINERD_DB}
163+
do
164+
[[ -n "${SIGTERM}" ]] && break 2
165+
echo "$(date) - Waiting for containerd boltd ${CONTEINERD_DB}"
166+
(( CNT++ ))
167+
if (( CNT > ${DOCKERD_LOCK_MAXWAIT} )); then
168+
echo " giving up and trying to start docker anyway Waited more than ${DOCKERD_LOCK_MAXWAIT}s for containerd boltdb unlock"
169+
break
170+
fi
171+
sleep 1
172+
done
173+
else
174+
echo "containerd db is not locked"
175+
fi
176+
177+
echo "Starting dockerd"
178+
dockerd ${DOCKERD_PARAMS} <&- &
179+
echo "Waiting at most 20s for docker pid"
180+
CNT=0
181+
while ! test -f "${DOCKERD_PID_FILE}" || test -z "$(cat ${DOCKERD_PID_FILE})"
182+
do
183+
[[ -n "${SIGTERM}" ]] && break 2
184+
echo "$(date) - Waiting for docker pid file ${DOCKERD_PID_FILE}"
185+
(( CNT++ ))
186+
if (( CNT > ${DOCKERD_PID_MAXWAIT} )); then
187+
echo "Waited more than ${DOCKERD_PID_MAXWAIT}s for docker pid, retry dockerd start"
188+
continue 2
189+
fi
190+
sleep 1
191+
done
192+
193+
echo "Waiting at most 2m for docker pid"
194+
CNT=0
195+
while ! docker ps
196+
do
197+
[[ -n "${SIGTERM}" ]] && break 2
198+
echo "$(date) - Waiting for docker running by check docker ps "
199+
(( CNT++ ))
200+
if (( CNT > ${DOCKER_UP_MAXWAIT} )); then
201+
echo "Waited more than ${DOCKER_UP_MAXWAIT}s for dockerd, retry dockerd start"
202+
continue 2
203+
fi
204+
sleep 1
205+
done
206+
echo "$(date) - dockerd has been started"
207+
break
208+
done
138209

139210
# Starting cleaner agent
140-
if [[ -z "${DISABLE_CLEANER_AGENT}" ]]; then
211+
if [[ -z "${DISABLE_CLEANER_AGENT}" && -z "${SIGTERM}" ]]; then
141212
${DIR}/cleaner/cleaner-agent.sh <&- &
142213
CLEANER_AGENT_PID=$!
143214
fi
144215

145-
wait ${DOCKER_PID}
146-
216+
DOCKERD_PID=$(cat /var/run/docker.pid)
217+
echo "DOCKERD_PID = ${DOCKERD_PID} "
218+
wait ${DOCKERD_PID}

0 commit comments

Comments
 (0)