From e2e871415572bda337459a99a4cfe7d5c99c9f25 Mon Sep 17 00:00:00 2001 From: Josh Bohde Date: Thu, 5 Nov 2020 03:49:45 -0600 Subject: [PATCH] Enable graceful shutdown of rq workers (#5214) * Enable graceful shutdown of rq workers * Use `exec` in the `worker` command of the entrypoint to propagate the `TERM` signal * Allow rq processes managed by supervisor to exit without restart on expected status codes * Allow supervisorctl to contact the running supervisor * Add a `shutdown_worker` command that will send `TERM` to all running worker processes and then sleep. This allows orchestration systems to initiate a graceful shutdown before sending `SIGTERM` to supervisord * Use Heroku worker as the BaseWorker This implements a graceful shutdown on SIGTERM, which simplifies external shutdown procedures. * Fix imports based upon review * Remove supervisorctl config --- bin/docker-entrypoint | 4 ++-- redash/tasks/worker.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index f54dd20643..b5d7b0ac17 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -18,8 +18,8 @@ worker() { export WORKERS_COUNT=${WORKERS_COUNT:-2} export QUEUES=${QUEUES:-} - - supervisord -c worker.conf + + exec supervisord -c worker.conf } dev_worker() { diff --git a/redash/tasks/worker.py b/redash/tasks/worker.py index 2fc67415a5..d4ca454a31 100644 --- a/redash/tasks/worker.py +++ b/redash/tasks/worker.py @@ -3,7 +3,8 @@ import signal import time from redash import statsd_client -from rq import Worker as BaseWorker, Queue as BaseQueue, get_current_job +from rq import Queue as BaseQueue, get_current_job +from rq.worker import HerokuWorker # HerokuWorker implements graceful shutdown on SIGTERM from rq.utils import utcnow from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException from rq.job import Job as BaseJob, JobStatus @@ -40,7 +41,7 @@ class RedashQueue(StatsdRecordingQueue, CancellableQueue): pass -class StatsdRecordingWorker(BaseWorker): +class StatsdRecordingWorker(HerokuWorker): """ RQ Worker Mixin that overrides `execute_job` to increment/modify metrics via Statsd """ @@ -58,7 +59,7 @@ def execute_job(self, job, queue): statsd_client.incr("rq.jobs.failed.{}".format(queue.name)) -class HardLimitingWorker(BaseWorker): +class HardLimitingWorker(HerokuWorker): """ RQ's work horses enforce time limits by setting a timed alarm and stopping jobs when they reach their time limits. However, the work horse may be entirely blocked