Skip to content

Commit 04b8fcf

Browse files
committed
Move lost and clearable job deltas to settings, include active unstarted jobs in backlog
1 parent a952311 commit 04b8fcf

File tree

5 files changed

+45
-7
lines changed

5 files changed

+45
-7
lines changed

bolt-jobs/bolt/jobs/admin.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import timedelta
2+
13
from bolt.admin import (
24
AdminModelDetailView,
35
AdminModelListView,
@@ -7,10 +9,32 @@
79
from bolt.admin.cards import Card
810
from bolt.admin.dates import DatetimeRangeAliases
911
from bolt.http import HttpResponseRedirect
12+
from bolt.runtime import settings
1013

1114
from .models import Job, JobRequest, JobResult
1215

1316

17+
def _td_format(td_object):
18+
seconds = int(td_object.total_seconds())
19+
periods = [
20+
("year", 60 * 60 * 24 * 365),
21+
("month", 60 * 60 * 24 * 30),
22+
("day", 60 * 60 * 24),
23+
("hour", 60 * 60),
24+
("minute", 60),
25+
("second", 1),
26+
]
27+
28+
strings = []
29+
for period_name, period_seconds in periods:
30+
if seconds > period_seconds:
31+
period_value, seconds = divmod(seconds, period_seconds)
32+
has_s = "s" if period_value > 1 else ""
33+
strings.append("%s %s%s" % (period_value, period_name, has_s))
34+
35+
return ", ".join(strings)
36+
37+
1438
class SuccessfulJobsCard(Card):
1539
title = "Successful Jobs"
1640
text = "View"
@@ -45,6 +69,10 @@ class LostJobsCard(Card):
4569
title = "Lost Jobs"
4670
text = "View" # TODO make not required - just an icon?
4771

72+
def get_description(self):
73+
delta = timedelta(seconds=settings.JOBS_LOST_AFTER)
74+
return f"Jobs are considered lost after {_td_format(delta)}"
75+
4876
def get_number(self):
4977
return (
5078
JobResult.objects.lost()
@@ -128,6 +156,10 @@ class ListView(AdminModelListView):
128156
allow_global_search = False
129157
default_datetime_range = DatetimeRangeAliases.LAST_7_DAYS
130158

159+
def get_description(self):
160+
delta = timedelta(seconds=settings.JOBS_CLEARABLE_AFTER)
161+
return f"Jobs are cleared after {_td_format(delta)}"
162+
131163
def get_initial_queryset(self):
132164
queryset = super().get_initial_queryset()
133165
if self.filter == "Successful":

bolt-jobs/bolt/jobs/cli.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import click
55

6+
from bolt.runtime import settings
67
from bolt.utils import timezone
78

89
from .models import Job, JobRequest, JobResult
@@ -47,9 +48,8 @@ def worker(max_processes, max_jobs_per_process, stats_every):
4748

4849

4950
@cli.command()
50-
@click.option("--older-than", type=int, default=60 * 60 * 24 * 7)
51-
def clear_completed(older_than):
52-
cutoff = timezone.now() - datetime.timedelta(seconds=older_than)
51+
def clear_completed():
52+
cutoff = timezone.now() - datetime.timedelta(seconds=settings.JOBS_CLEARABLE_AFTER)
5353
click.echo(f"Clearing jobs finished before {cutoff}")
5454
results = (
5555
JobResult.objects.exclude(ended_at__isnull=True)
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
JOBS_CLEARABLE_AFTER: int = 60 * 60 * 24 * 7 # One week
2+
JOBS_LOST_AFTER: int = 60 * 60 * 6 # Six hours

bolt-jobs/bolt/jobs/models.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import uuid
55

66
from bolt.db import models, transaction
7+
from bolt.runtime import settings
78
from bolt.utils import timezone
89

910
from .jobs import load_job
@@ -84,12 +85,12 @@ def convert_to_job(self):
8485

8586
class JobQuerySet(models.QuerySet):
8687
def mark_lost_jobs(self):
87-
# Nothing should be pending after more than a 24 hrs... consider it lost
88-
# Downside to these is that they are mark lost pretty late?
88+
# Lost jobs are jobs that have been pending for too long,
89+
# and probably never going to get picked up by a worker process.
8990
# In theory we could save a timeout per-job and mark them timed-out more quickly,
9091
# but if they're still running, we can't actually send a signal to cancel it...
9192
now = timezone.now()
92-
one_day_ago = now - datetime.timedelta(days=1)
93+
one_day_ago = now - datetime.timedelta(seconds=settings.JOBS_LOST_AFTER)
9394
lost_jobs = self.filter(
9495
created_at__lt=one_day_ago
9596
) # Doesn't matter whether it started or not -- it shouldn't take this long.

bolt-jobs/bolt/jobs/workers.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ def maybe_check_job_results(self):
9898

9999
def log_stats(self):
100100
num_proccesses = len(self.executor._processes)
101-
num_backlog_jobs = JobRequest.objects.count()
101+
num_backlog_jobs = (
102+
JobRequest.objects.count()
103+
+ Job.objects.filter(started_at__isnull=True).count()
104+
)
102105
if num_backlog_jobs > 0:
103106
# Basically show how many jobs aren't about to be picked
104107
# up in this same tick (so if there's 1, we don't really need to log that as a backlog)

0 commit comments

Comments
 (0)