From 6301cb95c119ebf324bb96ee226fa9ddffad80a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 Jul 2009 14:15:47 +0200 Subject: sched: fix nr_uninterruptible accounting of frozen tasks really commit e3c8ca8336 (sched: do not count frozen tasks toward load) broke the nr_uninterruptible accounting on freeze/thaw. On freeze the task is excluded from accounting with a check for (task->flags & PF_FROZEN), but that flag is cleared before the task is thawed. So while we prevent that the task with state TASK_UNINTERRUPTIBLE is accounted to nr_uninterruptible on freeze we decrement nr_uninterruptible on thaw. Use a separate flag which is handled by the freezing task itself. Set it before calling the scheduler with TASK_UNINTERRUPTIBLE state and clear it after we return from frozen state. Cc: Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 16a982e389fb..3ab08e4bb6b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -209,7 +209,7 @@ extern unsigned long long time_sync_thresh; ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0) + (task->flags & PF_FREEZING) == 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -1680,6 +1680,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -- cgit v1.2.3 From 591d2fb02ea80472d846c0b8507007806bdd69cc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 21 Jul 2009 11:09:39 +0200 Subject: genirq: Delegate irq affinity setting to the irq thread irq_set_thread_affinity() calls set_cpus_allowed_ptr() which might sleep, but irq_set_thread_affinity() is called with desc->lock held and can be called from hard interrupt context as well. The code has another bug as it does not hold a ref on the task struct as required by set_cpus_allowed_ptr(). Just set the IRQTF_AFFINITY bit in action->thread_flags. The next time the thread runs it migrates itself. Solves all of the above problems nicely. Add kerneldoc to irq_set_thread_affinity() while at it. Signed-off-by: Thomas Gleixner LKML-Reference: --- include/linux/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 2721f07e9354..88b056ac5629 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -64,11 +64,13 @@ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run * IRQTF_DIED - handler thread died * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed + * IRQTF_AFFINITY - irq thread is requested to adjust affinity */ enum { IRQTF_RUNTHREAD, IRQTF_DIED, IRQTF_WARNED, + IRQTF_AFFINITY, }; typedef irqreturn_t (*irq_handler_t)(int, void *); -- cgit v1.2.3 From 9ba5f005c994ad28e266a0cd14ef29354be382c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 14:18:35 +0200 Subject: softirq: introduce tasklet_hrtimer infrastructure commit ca109491f (hrtimer: removing all ur callback modes) moved all hrtimer callbacks into hard interrupt context when high resolution timers are active. That breaks code which relied on the assumption that the callback happens in softirq context. Provide a generic infrastructure which combines tasklets and hrtimers together to provide an in-softirq hrtimer experience. Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Cc: kaber@trash.net Cc: David Miller LKML-Reference: <1248265724.27058.1366.camel@twins> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 2721f07e9354..fd4c9c63c757 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -517,6 +518,31 @@ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); +struct tasklet_hrtimer { + struct hrtimer timer; + struct tasklet_struct tasklet; + enum hrtimer_restart (*function)(struct hrtimer *); +}; + +extern void +tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, + enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t which_clock, enum hrtimer_mode mode); + +static inline +int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, + const enum hrtimer_mode mode) +{ + return hrtimer_start(&ttimer->timer, time, mode); +} + +static inline +void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) +{ + hrtimer_cancel(&ttimer->timer); + tasklet_kill(&ttimer->tasklet); +} + /* * Autoprobing for irqs: * -- cgit v1.2.3 From 7f453c24b95a085fc7bd35d53b33abc4dc5a048b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Jul 2009 13:19:40 +0200 Subject: perf_counter: PERF_SAMPLE_ID and inherited counters Anton noted that for inherited counters the counter-id as provided by PERF_SAMPLE_ID isn't mappable to the id found through PERF_RECORD_ID because each inherited counter gets its own id. His suggestion was to always return the parent counter id, since that is the primary counter id as exposed. However, these inherited counters have a unique identifier so that events like PERF_EVENT_PERIOD and PERF_EVENT_THROTTLE can be specific about which counter gets modified, which is important when trying to normalize the sample streams. This patch removes PERF_EVENT_PERIOD in favour of PERF_SAMPLE_PERIOD, which is more useful anyway, since changing periods became a lot more common than initially thought -- rendering PERF_EVENT_PERIOD the less useful solution (also, PERF_SAMPLE_PERIOD reports the more accurate value, since it reports the value used to trigger the overflow, whereas PERF_EVENT_PERIOD simply reports the requested period changed, which might only take effect on the next cycle). This still leaves us PERF_EVENT_THROTTLE to consider, but since that _should_ be a rare occurrence, and linking it to a primary id is the most useful bit to diagnose the problem, we introduce a PERF_SAMPLE_STREAM_ID, for those few cases where the full reconstruction is important. [Does change the ABI a little, but I see no other way out] Suggested-by: Anton Blanchard Signed-off-by: Peter Zijlstra LKML-Reference: <1248095846.15751.8781.camel@twins> --- include/linux/perf_counter.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5e970c7d3fd5..bd15d7a5f5ce 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -120,8 +120,9 @@ enum perf_counter_sample_format { PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ }; /* @@ -312,16 +313,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 time; * u64 id; - * u64 sample_period; - * }; - */ - PERF_EVENT_PERIOD = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; + * u64 stream_id; * }; */ PERF_EVENT_THROTTLE = 5, @@ -356,6 +348,7 @@ enum perf_event_type { * { u64 time; } && PERF_SAMPLE_TIME * { u64 addr; } && PERF_SAMPLE_ADDR * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 period; } && PERF_SAMPLE_PERIOD * -- cgit v1.2.3