diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
| -rw-r--r-- | kernel/trace/ring_buffer.c | 110 |
1 files changed, 57 insertions, 53 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index cc2f66f68dc5..2d75c94ae87d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); @@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; } @@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) schedule(); finish_wait(&work->waiters, &wait); + return 0; } /** @@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) - return POLLIN | POLLRDNORM; - if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { @@ -627,8 +626,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - work->waiters_pending = true; poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + /* + * There's a tight race between setting the waiters_pending and + * checking if the ring buffer is empty. Once the waiters_pending bit + * is set, the next event will wake the task up, but we can get stuck + * if there's only a single event in. + * + * FIXME: Ideally, we need a memory barrier on the writer side as well, + * but adding a memory barrier to all events will cause too much of a + * performance hit in the fast path. We only need a memory barrier when + * the buffer goes from empty to having content. But as this race is + * extremely small, and it's not a problem if another event comes in, we + * will fix it later. + */ + smp_mb(); if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) @@ -1301,7 +1314,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, * In that off case, we need to allocate for all possible cpus. */ #ifdef CONFIG_HOTPLUG_CPU - get_online_cpus(); + cpu_notifier_register_begin(); cpumask_copy(buffer->cpumask, cpu_online_mask); #else cpumask_copy(buffer->cpumask, cpu_possible_mask); @@ -1324,10 +1337,10 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, #ifdef CONFIG_HOTPLUG_CPU buffer->cpu_notify.notifier_call = rb_cpu_notify; buffer->cpu_notify.priority = 0; - register_cpu_notifier(&buffer->cpu_notify); + __register_cpu_notifier(&buffer->cpu_notify); + cpu_notifier_register_done(); #endif - put_online_cpus(); mutex_init(&buffer->mutex); return buffer; @@ -1341,7 +1354,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, fail_free_cpumask: free_cpumask_var(buffer->cpumask); - put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU + cpu_notifier_register_done(); +#endif fail_free_buffer: kfree(buffer); @@ -1358,16 +1373,17 @@ ring_buffer_free(struct ring_buffer *buffer) { int cpu; - get_online_cpus(); - #ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&buffer->cpu_notify); + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&buffer->cpu_notify); #endif for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); - put_online_cpus(); +#ifdef CONFIG_HOTPLUG_CPU + cpu_notifier_register_done(); +#endif kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); @@ -1687,22 +1703,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, if (!cpu_buffer->nr_pages_to_update) continue; - /* The update must run on the CPU that is being updated. */ - preempt_disable(); - if (cpu == smp_processor_id() || !cpu_online(cpu)) { + /* Can't run something on an offline CPU. */ + if (!cpu_online(cpu)) { rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { - /* - * Can not disable preemption for schedule_work_on() - * on PREEMPT_RT. - */ - preempt_enable(); schedule_work_on(cpu, &cpu_buffer->update_pages_work); - preempt_disable(); } - preempt_enable(); } /* wait for all the updates to complete */ @@ -1740,22 +1748,14 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, get_online_cpus(); - preempt_disable(); - /* The update must run on the CPU that is being updated. */ - if (cpu_id == smp_processor_id() || !cpu_online(cpu_id)) + /* Can't run something on an offline CPU. */ + if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { - /* - * Can not disable preemption for schedule_work_on() - * on PREEMPT_RT. - */ - preempt_enable(); schedule_work_on(cpu_id, &cpu_buffer->update_pages_work); wait_for_completion(&cpu_buffer->update_done); - preempt_disable(); } - preempt_enable(); cpu_buffer->nr_pages_to_update = 0; put_online_cpus(); @@ -1982,7 +1982,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) /** * rb_update_event - update event type and data - * @event: the even to update + * @event: the event to update * @type: the type of event * @length: the size of the event field in the ring buffer * @@ -2397,6 +2397,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, write &= RB_WRITE_MASK; tail = write - length; + /* + * If this is the first commit on the page, then it has the same + * timestamp as the page itself. + */ + if (!tail) + delta = 0; + /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) return rb_move_tail(cpu_buffer, length, tail, @@ -2558,7 +2565,7 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (unlikely(test_time_stamp(delta))) { int local_clock_stable = 1; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - local_clock_stable = sched_clock_stable; + local_clock_stable = sched_clock_stable(); #endif WARN_ONCE(delta > (1ULL << 59), KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", @@ -3348,21 +3355,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* Iterator usage is expected to have record disabled */ - if (list_empty(&cpu_buffer->reader_page->list)) { - iter->head_page = rb_set_head_page(cpu_buffer); - if (unlikely(!iter->head_page)) - return; - iter->head = iter->head_page->read; - } else { - iter->head_page = cpu_buffer->reader_page; - iter->head = cpu_buffer->reader_page->read; - } + iter->head_page = cpu_buffer->reader_page; + iter->head = cpu_buffer->reader_page->read; + + iter->cache_reader_page = iter->head_page; + iter->cache_read = cpu_buffer->read; + if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; - iter->cache_reader_page = cpu_buffer->reader_page; - iter->cache_read = cpu_buffer->read; } /** @@ -3755,18 +3757,20 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a time extend is encountered. - * Since the time extend is always attached to a data event, - * we should never loop more than once. - * (We never hit the following condition more than twice). + * We repeat when a time extend is encountered or we hit + * the end of the page. Since the time extend is always attached + * to a data event, we should never loop more than three times. + * Once for going to next page, once on time extend, and + * finally once to get the event. + * (We never hit the following condition more than thrice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) return NULL; - if (iter->head >= local_read(&iter->head_page->page->commit)) { + if (iter->head >= rb_page_size(iter->head_page)) { rb_inc_iter(iter); goto again; } |
