From 77ae365eca895061c8bf2b2e3ae1d9ea62869739 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Mar 2009 11:00:29 -0400 Subject: ring-buffer: make lockless This patch converts the ring buffers into a completely lockless buffer recording system. The read side still takes locks since we still serialize readers. But the writers are the ones that must be lockless (those can happen in NMIs). The main change is to the "head_page" pointer. We write to the tail, and read from the head. The "head_page" pointer in the cpu buffer is now just a reference to where to look. The real head page is now kept in the head_page->list->prev->next pointer. That is, in the list head of the previous page we set flags. The list pages are allocated to be aligned such that the lowest significant bits are always zero pointing to the list. This gives us play to put in flags to their pointers. bit 0: set when the page is a head page bit 1: set when the writer is moving the page (for overwrite mode) cmpxchg is used to update the pointer. When the writer wraps the buffer and the tail meets the head, in overwrite mode, the writer must move the head page forward. It first uses cmpxchg to change the pointer flag from 1 to 2. Once this is done, the reader on another CPU will not take the page from the buffer. The writers need to protect against interrupts (we don't bother with disabling interrupts because NMIs are allowed to write too). After the writer sets the pointer flag to 2, it takes care to manage interrupts coming in. This is discribed in detail within the comments of the code. Changes in version 2: - Let reader reset entries value of header page. - Fix tail page passing commit page on reader page test. - Always increment entries and write counter in rb_tail_page_update - Add safety check in rb_set_commit_to_write to break out of infinite loop - add mask in rb_is_reader_page [ Impact: lock free writing to the ring buffer ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 29f8599e6bea..7fca71693ae7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -170,7 +170,6 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From dc892f7339af2d125478b800edb9081d6149665b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Sep 2009 15:33:41 -0400 Subject: ring-buffer: remove ring_buffer_event_discard The function ring_buffer_event_discard can be used on any item in the ring buffer, even after the item was committed. This function provides no safety nets and is very race prone. An item may be safely removed from the ring buffer before it is committed with the ring_buffer_discard_commit. Since there are currently no users of this function, and because this function is racey and error prone, this patch removes it altogether. Note, removing this function also allows the counters to ignore all discarded events (patches will follow). Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7fca71693ae7..e061b4ecdc3a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -74,20 +74,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } -/* - * ring_buffer_event_discard can discard any event in the ring buffer. - * it is up to the caller to protect against a reader from - * consuming it or a writer from wrapping and replacing it. - * - * No external protection is needed if this is called before - * the event is commited. But in that case it would be better to - * use ring_buffer_discard_commit. - * - * Note, if an event that has not been committed is discarded - * with ring_buffer_event_discard, it must still be committed. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event); - /* * ring_buffer_discard_commit will remove an event that has not * ben committed yet. If this is used, then ring_buffer_unlock_commit -- cgit v1.2.3 From 85bac32c4a52c592b857f2c360cc5ec93a097d70 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 4 Sep 2009 14:24:40 -0400 Subject: ring-buffer: only enable ring_buffer_swap_cpu when needed Since the ability to swap the cpu buffers adds a small overhead to the recording of a trace, we only want to add it when needed. Only the irqsoff and preemptoff tracers use this feature, and both are not recommended for production kernels. This patch disables its use when neither irqsoff nor preemptoff is configured. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e061b4ecdc3a..5fcc31ed5771 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -140,8 +140,17 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer); void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_reset(struct ring_buffer *buffer); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, struct ring_buffer *buffer_b, int cpu); +#else +static inline int +ring_buffer_swap_cpu(struct ring_buffer *buffer_a, + struct ring_buffer *buffer_b, int cpu) +{ + return -ENODEV; +} +#endif int ring_buffer_empty(struct ring_buffer *buffer); int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); -- cgit v1.2.3 From 66a8cb95ed04025664d1db4e952155ee1dccd048 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 31 Mar 2010 13:21:56 -0400 Subject: ring-buffer: Add place holder recording of dropped events Currently, when the ring buffer drops events, it does not record the fact that it did so. It does inform the writer that the event was dropped by returning a NULL event, but it does not put in any place holder where the event was dropped. This is not a trivial thing to add because the ring buffer mostly runs in overwrite (flight recorder) mode. That is, when the ring buffer is full, new data will overwrite old data. In a produce/consumer mode, where new data is simply dropped when the ring buffer is full, it is trivial to add the placeholder for dropped events. When there's more room to write new data, then a special event can be added to notify the reader about the dropped events. But in overwrite mode, any new write can overwrite events. A place holder can not be inserted into the ring buffer since there never may be room. A reader could also come in at anytime and miss the placeholder. Luckily, the way the ring buffer works, the read side can find out if events were lost or not, and how many events. Everytime a write takes place, if it overwrites the header page (the next read) it updates a "overrun" variable that keeps track of the number of lost events. When a reader swaps out a page from the ring buffer, it can record this number, perfom the swap, and then check to see if the number changed, and take the diff if it has, which would be the number of events dropped. This can be stored by the reader and returned to callers of the reader. Since the reader page swap will fail if the writer moved the head page since the time the reader page set up the swap, this gives room to record the overruns without worrying about races. If the reader sets up the pages, records the overrun, than performs the swap, if the swap succeeds, then the overrun variable has not been updated since the setup before the swap. For binary readers of the ring buffer, a flag is set in the header of each sub page (sub buffer) of the ring buffer. This flag is embedded in the size field of the data on the sub buffer, in the 31st bit (the size can be 32 or 64 bits depending on the architecture), but only 27 bits needs to be used for the actual size (less actually). We could add a new field in the sub buffer header to also record the number of events dropped since the last read, but this will change the format of the binary ring buffer a bit too much. Perhaps this change can be made if the information on the number of events dropped is considered important enough. Note, the notification of dropped events is only used by consuming reads or peeking at the ring buffer. Iterating over the ring buffer does not keep this information because the necessary data is only available when a page swap is made, and the iterator does not swap out pages. Cc: Robert Richter Cc: Andi Kleen Cc: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Luis Claudio R. Goncalves" Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 5fcc31ed5771..c8297761e414 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -120,9 +120,11 @@ int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts); +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events); struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts); +ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events); struct ring_buffer_iter * ring_buffer_read_start(struct ring_buffer *buffer, int cpu); -- cgit v1.2.3 From 72c9ddfd4c5bf54ef03cfdf57026416cb678eeba Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 20 Apr 2010 15:47:11 -0700 Subject: ring-buffer: Make non-consuming read less expensive with lots of cpus. When performing a non-consuming read, a synchronize_sched() is performed once for every cpu which is actively tracing. This is very expensive, and can make it take several seconds to open up the 'trace' file with lots of cpus. Only one synchronize_sched() call is actually necessary. What is desired is for all cpus to see the disabling state change. So we transform the existing sequence: for_each_cpu() { ring_buffer_read_start(); } where each ring_buffer_start() call performs a synchronize_sched(), into the following: for_each_cpu() { ring_buffer_read_prepare(); } ring_buffer_read_prepare_sync(); for_each_cpu() { ring_buffer_read_start(); } wherein only the single ring_buffer_read_prepare_sync() call needs to do the synchronize_sched(). The first phase, via ring_buffer_read_prepare(), allocates the 'iter' memory and increments ->record_disabled. In the second phase, ring_buffer_read_prepare_sync() makes sure this ->record_disabled state is visible fully to all cpus. And in the final third phase, the ring_buffer_read_start() calls reset the 'iter' objects allocated in the first phase since we now know that none of the cpus are adding trace entries any more. This makes openning the 'trace' file nearly instantaneous on a sparc64 Niagara2 box with 128 cpus tracing. Signed-off-by: David S. Miller LKML-Reference: <20100420.154711.11246950.davem@davemloft.net> Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index c8297761e414..25b4f686d918 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_start(struct ring_buffer *buffer, int cpu); +ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu); +void ring_buffer_read_prepare_sync(void); +void ring_buffer_read_start(struct ring_buffer_iter *iter); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * -- cgit v1.2.3 From afcc5c6872f0215d515a637041bb51f8691a8ea7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Oct 2010 13:37:56 -0400 Subject: ring-buffer: Remove ring_buffer_event_time_delta() The ring_buffer_event_time_delta() static inline function does not have any users. Remove it. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 25b4f686d918..8d3a2486544d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -62,18 +62,6 @@ enum ring_buffer_type { unsigned ring_buffer_event_length(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event); -/** - * ring_buffer_event_time_delta - return the delta timestamp of the event - * @event: the event to get the delta timestamp of - * - * The delta timestamp is the 27 bit timestamp since the last event. - */ -static inline unsigned -ring_buffer_event_time_delta(struct ring_buffer_event *event) -{ - return event->time_delta; -} - /* * ring_buffer_discard_commit will remove an event that has not * ben committed yet. If this is used, then ring_buffer_unlock_commit -- cgit v1.2.3