diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 176 | 
1 files changed, 173 insertions, 3 deletions
| diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2c2f971f3e75..316b316c7528 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@  #include <linux/nodemask.h>  #include <linux/moduleparam.h>  #include <linux/uaccess.h> +#include <linux/bug.h>  #include "workqueue_internal.h" @@ -148,6 +149,8 @@ struct worker_pool {  	int			id;		/* I: pool ID */  	unsigned int		flags;		/* X: flags */ +	unsigned long		watchdog_ts;	/* L: watchdog timestamp */ +  	struct list_head	worklist;	/* L: list of pending works */  	int			nr_workers;	/* L: total number of workers */ @@ -1122,6 +1125,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)  	struct pool_workqueue *pwq = get_work_pwq(work);  	trace_workqueue_activate_work(work); +	if (list_empty(&pwq->pool->worklist)) +		pwq->pool->watchdog_ts = jiffies;  	move_linked_works(work, &pwq->pool->worklist, NULL);  	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));  	pwq->nr_active++; @@ -1424,6 +1429,8 @@ retry:  		trace_workqueue_activate_work(work);  		pwq->nr_active++;  		worklist = &pwq->pool->worklist; +		if (list_empty(worklist)) +			pwq->pool->watchdog_ts = jiffies;  	} else {  		work_flags |= WORK_STRUCT_DELAYED;  		worklist = &pwq->delayed_works; @@ -2081,6 +2088,7 @@ __acquires(&pool->lock)  		       current->comm, preempt_count(), task_pid_nr(current),  		       worker->current_func);  		debug_show_held_locks(current); +		BUG_ON(PANIC_CORRUPTION);  		dump_stack();  	} @@ -2196,6 +2204,8 @@ recheck:  			list_first_entry(&pool->worklist,  					 struct work_struct, entry); +		pool->watchdog_ts = jiffies; +  		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {  			/* optimization path, not strictly necessary */  			process_one_work(worker, work); @@ -2279,6 +2289,7 @@ repeat:  					struct pool_workqueue, mayday_node);  		struct worker_pool *pool = pwq->pool;  		struct work_struct *work, *n; +		bool first = true;  		__set_current_state(TASK_RUNNING);  		list_del_init(&pwq->mayday_node); @@ -2295,9 +2306,14 @@ repeat:  		 * process'em.  		 */  		WARN_ON_ONCE(!list_empty(scheduled)); -		list_for_each_entry_safe(work, n, &pool->worklist, entry) -			if (get_work_pwq(work) == pwq) +		list_for_each_entry_safe(work, n, &pool->worklist, entry) { +			if (get_work_pwq(work) == pwq) { +				if (first) +					pool->watchdog_ts = jiffies;  				move_linked_works(work, scheduled, &n); +			} +			first = false; +		}  		if (!list_empty(scheduled)) {  			process_scheduled_works(rescuer); @@ -3108,6 +3124,7 @@ static int init_worker_pool(struct worker_pool *pool)  	pool->cpu = -1;  	pool->node = NUMA_NO_NODE;  	pool->flags |= POOL_DISASSOCIATED; +	pool->watchdog_ts = jiffies;  	INIT_LIST_HEAD(&pool->worklist);  	INIT_LIST_HEAD(&pool->idle_list);  	hash_init(pool->busy_hash); @@ -4347,7 +4364,9 @@ void show_workqueue_state(void)  		pr_info("pool %d:", pool->id);  		pr_cont_pool_info(pool); -		pr_cont(" workers=%d", pool->nr_workers); +		pr_cont(" hung=%us workers=%d", +			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, +			pool->nr_workers);  		if (pool->manager)  			pr_cont(" manager: %d",  				task_pid_nr(pool->manager->task)); @@ -5217,6 +5236,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)  static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }  #endif	/* CONFIG_SYSFS */ +/* + * Workqueue watchdog. + * + * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal + * flush dependency, a concurrency managed work item which stays RUNNING + * indefinitely.  Workqueue stalls can be very difficult to debug as the + * usual warning mechanisms don't trigger and internal workqueue state is + * largely opaque. + * + * Workqueue watchdog monitors all worker pools periodically and dumps + * state if some pools failed to make forward progress for a while where + * forward progress is defined as the first item on ->worklist changing. + * + * This mechanism is controlled through the kernel parameter + * "workqueue.watchdog_thresh" which can be updated at runtime through the + * corresponding sysfs parameter file. + */ +#ifdef CONFIG_WQ_WATCHDOG + +static void wq_watchdog_timer_fn(unsigned long data); + +static unsigned long wq_watchdog_thresh = 30; +static struct timer_list wq_watchdog_timer = +	TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0); + +static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; + +static void wq_watchdog_reset_touched(void) +{ +	int cpu; + +	wq_watchdog_touched = jiffies; +	for_each_possible_cpu(cpu) +		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; +} + +static void wq_watchdog_timer_fn(unsigned long data) +{ +	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; +	bool lockup_detected = false; +	struct worker_pool *pool; +	int pi; + +	if (!thresh) +		return; + +	rcu_read_lock(); + +	for_each_pool(pool, pi) { +		unsigned long pool_ts, touched, ts; + +		if (list_empty(&pool->worklist)) +			continue; + +		/* get the latest of pool and touched timestamps */ +		pool_ts = READ_ONCE(pool->watchdog_ts); +		touched = READ_ONCE(wq_watchdog_touched); + +		if (time_after(pool_ts, touched)) +			ts = pool_ts; +		else +			ts = touched; + +		if (pool->cpu >= 0) { +			unsigned long cpu_touched = +				READ_ONCE(per_cpu(wq_watchdog_touched_cpu, +						  pool->cpu)); +			if (time_after(cpu_touched, ts)) +				ts = cpu_touched; +		} + +		/* did we stall? */ +		if (time_after(jiffies, ts + thresh)) { +			lockup_detected = true; +			pr_emerg("BUG: workqueue lockup - pool"); +			pr_cont_pool_info(pool); +			pr_cont(" stuck for %us!\n", +				jiffies_to_msecs(jiffies - pool_ts) / 1000); +		} +	} + +	rcu_read_unlock(); + +	if (lockup_detected) +		show_workqueue_state(); + +	wq_watchdog_reset_touched(); +	mod_timer(&wq_watchdog_timer, jiffies + thresh); +} + +void wq_watchdog_touch(int cpu) +{ +	if (cpu >= 0) +		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; +	else +		wq_watchdog_touched = jiffies; +} + +static void wq_watchdog_set_thresh(unsigned long thresh) +{ +	wq_watchdog_thresh = 0; +	del_timer_sync(&wq_watchdog_timer); + +	if (thresh) { +		wq_watchdog_thresh = thresh; +		wq_watchdog_reset_touched(); +		mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ); +	} +} + +static int wq_watchdog_param_set_thresh(const char *val, +					const struct kernel_param *kp) +{ +	unsigned long thresh; +	int ret; + +	ret = kstrtoul(val, 0, &thresh); +	if (ret) +		return ret; + +	if (system_wq) +		wq_watchdog_set_thresh(thresh); +	else +		wq_watchdog_thresh = thresh; + +	return 0; +} + +static const struct kernel_param_ops wq_watchdog_thresh_ops = { +	.set	= wq_watchdog_param_set_thresh, +	.get	= param_get_ulong, +}; + +module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh, +		0644); + +static void wq_watchdog_init(void) +{ +	wq_watchdog_set_thresh(wq_watchdog_thresh); +} + +#else	/* CONFIG_WQ_WATCHDOG */ + +static inline void wq_watchdog_init(void) { } + +#endif	/* CONFIG_WQ_WATCHDOG */ +  static void __init wq_numa_init(void)  {  	cpumask_var_t *tbl; @@ -5340,6 +5507,9 @@ static int __init init_workqueues(void)  	       !system_unbound_wq || !system_freezable_wq ||  	       !system_power_efficient_wq ||  	       !system_freezable_power_efficient_wq); + +	wq_watchdog_init(); +  	return 0;  }  early_initcall(init_workqueues); | 
