Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions config/kernel-timer.m4
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
dnl #
dnl # 6.2: timer_delete_sync introduced, del_timer_sync deprecated and made
dnl # into a simple wrapper
dnl # 6.2: timer_delete & timer_delete_sync introduced, del_timer &
dnl del_timer_sync deprecated and made into a simple wrapper
dnl # 6.15: del_timer_sync removed
dnl #
dnl # We test for them separately as they appear to have not always been
dnl # backported together
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE], [
ZFS_LINUX_TEST_SRC([timer_delete], [
#include <linux/timer.h>
],[
struct timer_list *timer __attribute__((unused)) = NULL;
timer_delete(timer);
])
])
AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [
ZFS_LINUX_TEST_SRC([timer_delete_sync], [
#include <linux/timer.h>
Expand All @@ -12,6 +23,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [
])
])

AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE], [
AC_MSG_CHECKING([whether timer_delete() is available])
ZFS_LINUX_TEST_RESULT([timer_delete], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_TIMER_DELETE, 1,
[timer_delete is available])
],[
AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [
AC_MSG_CHECKING([whether timer_delete_sync() is available])
ZFS_LINUX_TEST_RESULT([timer_delete_sync], [
Expand All @@ -24,9 +45,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [
])

AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER], [
ZFS_AC_KERNEL_SRC_TIMER_DELETE
ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC
])

AC_DEFUN([ZFS_AC_KERNEL_TIMER], [
ZFS_AC_KERNEL_TIMER_DELETE
ZFS_AC_KERNEL_TIMER_DELETE_SYNC
])
4 changes: 3 additions & 1 deletion include/os/linux/spl/sys/taskq.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Copyright (c) 2024, Klara Inc.
* Copyright (c) 2024, 2025, Klara, Inc.
* Copyright (c) 2024, Syneto
*/

Expand Down Expand Up @@ -134,6 +134,8 @@ typedef struct taskq {
wait_queue_head_t tq_work_waitq; /* new work waitq */
wait_queue_head_t tq_wait_waitq; /* wait waitq */
tq_lock_role_t tq_lock_class; /* class when taking tq_lock */
struct timer_list tq_deadman; /* deadman timer */
unsigned long tq_deadman_at; /* time of last deadman trip */
/* list node for the cpu hotplug callback */
struct hlist_node tq_hp_cb_node;
boolean_t tq_hp_support;
Expand Down
11 changes: 10 additions & 1 deletion man/man4/spl.4
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" Copyright 2013 Turbo Fredriksson <[email protected]>. All rights reserved.
.\" Copyright (c) 2025, Klara, Inc.
.\"
.Dd May 7, 2025
.Dd November 12, 2025
.Dt SPL 4
.Os
.
Expand Down Expand Up @@ -130,6 +131,14 @@ When not enabled, the thread is halted to facilitate further debugging.
.Pp
Set to a non-zero value to enable.
.
.It Sy spl_taskq_deadman_timeout Ns = Ns Sy 20 Pq uint
Log a warning if a taskq has not made progress in N seconds.
"Progress" here means a taskq thread has not picked up a new task in this
time,
or all threads have not completed in this time.
This can be useful for deadlock debugging.
Setting this value to 0 will disable this function.
.
.It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint
Kick stuck taskq to spawn threads.
When writing a non-zero value to it, it will scan all the taskqs.
Expand Down
67 changes: 65 additions & 2 deletions module/os/linux/spl/spl-taskq.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* Solaris Porting Layer (SPL) Task Queue Implementation.
*/
/*
* Copyright (c) 2024, Klara Inc.
* Copyright (c) 2024, 2025, Klara, Inc.
* Copyright (c) 2024, Syneto
*/

Expand All @@ -39,7 +39,14 @@
#include <linux/cpuhotplug.h>
#include <linux/mod_compat.h>

/* Linux 6.2 renamed timer_delete_sync(); point it at its old name for those. */
/*
* Linux 6.2 renamed del_timer()/del_timer_sync() to
* timer_delete()/timer_delete_sync(). For kernels before that, point the new
* names to the old.
*/
#ifndef HAVE_TIMER_DELETE
#define timer_delete(t) del_timer(t)
#endif
#ifndef HAVE_TIMER_DELETE_SYNC
#define timer_delete_sync(t) del_timer_sync(t)
#endif
Expand Down Expand Up @@ -142,6 +149,11 @@ module_param(spl_taskq_thread_sequential, uint, 0644);
MODULE_PARM_DESC(spl_taskq_thread_sequential,
"Create new taskq threads after N sequential tasks");

static uint_t spl_taskq_deadman_timeout = 20;
module_param(spl_taskq_deadman_timeout, uint, 0644);
MODULE_PARM_DESC(spl_taskq_deadman_timeout,
"Log a warning if the taskq has not made progress in N seconds");

/*
* Global system-wide dynamic task queue available for all consumers. This
* taskq is not intended for long-running tasks; instead, a dedicated taskq
Expand Down Expand Up @@ -357,6 +369,34 @@ task_expire(struct timer_list *tl)
task_expire_impl(t);
}

static void
taskq_deadman(struct timer_list *tl)
{
unsigned long irqflags;
taskq_t *tq = container_of(tl, taskq_t, tq_deadman);

spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
if (tq->tq_nactive == 0 || spl_taskq_deadman_timeout == 0) {
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
return;
}

unsigned long nqueued = 0;
struct list_head *pos;
list_for_each(pos, &tq->tq_pend_list)
nqueued++;
list_for_each(pos, &tq->tq_prio_list)
nqueued++;

printk(KERN_INFO "spl: taskq stuck for %us: %s.%d "
"[%d/%d threads active, %lu tasks queued]\n",
spl_taskq_deadman_timeout, tq->tq_name, tq->tq_instance,
tq->tq_nthreads, tq->tq_nactive, nqueued);

tq->tq_deadman_at = jiffies;
spin_unlock_irqrestore(&tq->tq_lock, irqflags);
}

/*
* Returns the lowest incomplete taskqid_t. The taskqid_t may
* be queued on the pending list, on the priority list, on the
Expand Down Expand Up @@ -1071,6 +1111,11 @@ taskq_thread(void *args)

taskq_insert_in_order(tq, tqt);
tq->tq_nactive++;

if (spl_taskq_deadman_timeout > 0)
mod_timer(&tq->tq_deadman,
jiffies + spl_taskq_deadman_timeout * HZ);

spin_unlock_irqrestore(&tq->tq_lock, flags);

TQSTAT_INC(tq, threads_active);
Expand All @@ -1096,6 +1141,21 @@ taskq_thread(void *args)
list_del_init(&tqt->tqt_active_list);
tqt->tqt_task = NULL;

if (tq->tq_nactive == 0 ||
spl_taskq_deadman_timeout == 0)
timer_delete(&tq->tq_deadman);

if (tq->tq_deadman_at > 0) {
unsigned long stuck_for =
jiffies - tq->tq_deadman_at;
tq->tq_deadman_at = 0;

printk(KERN_INFO
"spl: taskq resumed after %lus: %s.%d\n",
stuck_for / HZ, tq->tq_name,
tq->tq_instance);
}

/* For prealloc'd tasks, we don't free anything. */
if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
task_done(tq, t);
Expand Down Expand Up @@ -1375,6 +1435,9 @@ taskq_create(const char *name, int threads_arg, pri_t pri,
tq->tq_next_id = TASKQID_INITIAL;
tq->tq_lowest_id = TASKQID_INITIAL;
tq->lastspawnstop = jiffies;
timer_setup(&tq->tq_deadman, NULL, 0);
tq->tq_deadman.function = taskq_deadman;
tq->tq_deadman_at = 0;
INIT_LIST_HEAD(&tq->tq_free_list);
INIT_LIST_HEAD(&tq->tq_pend_list);
INIT_LIST_HEAD(&tq->tq_prio_list);
Expand Down
Loading