summaryrefslogtreecommitdiff
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c110
1 files changed, 57 insertions, 53 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cc2f66f68dc5..2d75c94ae87d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
}
@@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
schedule();
finish_wait(&work->waiters, &wait);
+ return 0;
}
/**
@@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
- if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
- (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
- return POLLIN | POLLRDNORM;
-
if (cpu == RING_BUFFER_ALL_CPUS)
work = &buffer->irq_work;
else {
@@ -627,8 +626,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
work = &cpu_buffer->irq_work;
}
- work->waiters_pending = true;
poll_wait(filp, &work->waiters, poll_table);
+ work->waiters_pending = true;
+ /*
+ * There's a tight race between setting the waiters_pending and
+ * checking if the ring buffer is empty. Once the waiters_pending bit
+ * is set, the next event will wake the task up, but we can get stuck
+ * if there's only a single event in.
+ *
+ * FIXME: Ideally, we need a memory barrier on the writer side as well,
+ * but adding a memory barrier to all events will cause too much of a
+ * performance hit in the fast path. We only need a memory barrier when
+ * the buffer goes from empty to having content. But as this race is
+ * extremely small, and it's not a problem if another event comes in, we
+ * will fix it later.
+ */
+ smp_mb();
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
@@ -1301,7 +1314,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
* In that off case, we need to allocate for all possible cpus.
*/
#ifdef CONFIG_HOTPLUG_CPU
- get_online_cpus();
+ cpu_notifier_register_begin();
cpumask_copy(buffer->cpumask, cpu_online_mask);
#else
cpumask_copy(buffer->cpumask, cpu_possible_mask);
@@ -1324,10 +1337,10 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
#ifdef CONFIG_HOTPLUG_CPU
buffer->cpu_notify.notifier_call = rb_cpu_notify;
buffer->cpu_notify.priority = 0;
- register_cpu_notifier(&buffer->cpu_notify);
+ __register_cpu_notifier(&buffer->cpu_notify);
+ cpu_notifier_register_done();
#endif
- put_online_cpus();
mutex_init(&buffer->mutex);
return buffer;
@@ -1341,7 +1354,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
fail_free_cpumask:
free_cpumask_var(buffer->cpumask);
- put_online_cpus();
+#ifdef CONFIG_HOTPLUG_CPU
+ cpu_notifier_register_done();
+#endif
fail_free_buffer:
kfree(buffer);
@@ -1358,16 +1373,17 @@ ring_buffer_free(struct ring_buffer *buffer)
{
int cpu;
- get_online_cpus();
-
#ifdef CONFIG_HOTPLUG_CPU
- unregister_cpu_notifier(&buffer->cpu_notify);
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&buffer->cpu_notify);
#endif
for_each_buffer_cpu(buffer, cpu)
rb_free_cpu_buffer(buffer->buffers[cpu]);
- put_online_cpus();
+#ifdef CONFIG_HOTPLUG_CPU
+ cpu_notifier_register_done();
+#endif
kfree(buffer->buffers);
free_cpumask_var(buffer->cpumask);
@@ -1687,22 +1703,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
if (!cpu_buffer->nr_pages_to_update)
continue;
- /* The update must run on the CPU that is being updated. */
- preempt_disable();
- if (cpu == smp_processor_id() || !cpu_online(cpu)) {
+ /* Can't run something on an offline CPU. */
+ if (!cpu_online(cpu)) {
rb_update_pages(cpu_buffer);
cpu_buffer->nr_pages_to_update = 0;
} else {
- /*
- * Can not disable preemption for schedule_work_on()
- * on PREEMPT_RT.
- */
- preempt_enable();
schedule_work_on(cpu,
&cpu_buffer->update_pages_work);
- preempt_disable();
}
- preempt_enable();
}
/* wait for all the updates to complete */
@@ -1740,22 +1748,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
get_online_cpus();
- preempt_disable();
- /* The update must run on the CPU that is being updated. */
- if (cpu_id == smp_processor_id() || !cpu_online(cpu_id))
+ /* Can't run something on an offline CPU. */
+ if (!cpu_online(cpu_id))
rb_update_pages(cpu_buffer);
else {
- /*
- * Can not disable preemption for schedule_work_on()
- * on PREEMPT_RT.
- */
- preempt_enable();
schedule_work_on(cpu_id,
&cpu_buffer->update_pages_work);
wait_for_completion(&cpu_buffer->update_done);
- preempt_disable();
}
- preempt_enable();
cpu_buffer->nr_pages_to_update = 0;
put_online_cpus();
@@ -1982,7 +1982,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
/**
* rb_update_event - update event type and data
- * @event: the even to update
+ * @event: the event to update
* @type: the type of event
* @length: the size of the event field in the ring buffer
*
@@ -2397,6 +2397,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
write &= RB_WRITE_MASK;
tail = write - length;
+ /*
+ * If this is the first commit on the page, then it has the same
+ * timestamp as the page itself.
+ */
+ if (!tail)
+ delta = 0;
+
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
@@ -2558,7 +2565,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
if (unlikely(test_time_stamp(delta))) {
int local_clock_stable = 1;
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
- local_clock_stable = sched_clock_stable;
+ local_clock_stable = sched_clock_stable();
#endif
WARN_ONCE(delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
@@ -3348,21 +3355,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
/* Iterator usage is expected to have record disabled */
- if (list_empty(&cpu_buffer->reader_page->list)) {
- iter->head_page = rb_set_head_page(cpu_buffer);
- if (unlikely(!iter->head_page))
- return;
- iter->head = iter->head_page->read;
- } else {
- iter->head_page = cpu_buffer->reader_page;
- iter->head = cpu_buffer->reader_page->read;
- }
+ iter->head_page = cpu_buffer->reader_page;
+ iter->head = cpu_buffer->reader_page->read;
+
+ iter->cache_reader_page = iter->head_page;
+ iter->cache_read = cpu_buffer->read;
+
if (iter->head)
iter->read_stamp = cpu_buffer->read_stamp;
else
iter->read_stamp = iter->head_page->page->time_stamp;
- iter->cache_reader_page = cpu_buffer->reader_page;
- iter->cache_read = cpu_buffer->read;
}
/**
@@ -3755,18 +3757,20 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL;
/*
- * We repeat when a time extend is encountered.
- * Since the time extend is always attached to a data event,
- * we should never loop more than once.
- * (We never hit the following condition more than twice).
+ * We repeat when a time extend is encountered or we hit
+ * the end of the page. Since the time extend is always attached
+ * to a data event, we should never loop more than three times.
+ * Once for going to next page, once on time extend, and
+ * finally once to get the event.
+ * (We never hit the following condition more than thrice).
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
- if (iter->head >= local_read(&iter->head_page->page->commit)) {
+ if (iter->head >= rb_page_size(iter->head_page)) {
rb_inc_iter(iter);
goto again;
}