summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@google.com>2020-11-18 19:35:00 +0100
committerGreg Kroah-Hartman <gregkh@google.com>2020-11-18 19:35:00 +0100
commit4e6c6e008513d98396bf9f92bf38b83b6980fa54 (patch)
treecf5334d164312580a0b114540be244965076ecc0 /kernel/trace
parent34b1d1174c2c7054c01cfa39d274421d80649386 (diff)
parentb71e57af961fc0cc69998a13dea631ba2229333e (diff)
Merge 4.4.244 into android-4.4-p
Changes in 4.4.244 ring-buffer: Fix recursion protection transitions between interrupt context gfs2: Wake up when sd_glock_disposal becomes zero mm: mempolicy: fix potential pte_unmap_unlock pte error time: Prevent undefined behaviour in timespec64_to_ns() btrfs: reschedule when cloning lots of extents net: xfrm: fix a race condition during allocing spi perf tools: Add missing swap for ino_generation ALSA: hda: prevent undefined shift in snd_hdac_ext_bus_get_link() can: dev: can_get_echo_skb(): prevent call to kfree_skb() in hard IRQ context can: dev: __can_get_echo_skb(): fix real payload length return value for RTR frames can: can_create_echo_skb(): fix echo skb generation: always use skb_clone() can: peak_usb: add range checking in decode operations can: peak_usb: peak_usb_get_ts_time(): fix timestamp wrapping Btrfs: fix missing error return if writeback for extent buffer never started pinctrl: devicetree: Avoid taking direct reference to device name string i40e: Wrong truncation from u16 to u8 i40e: Fix of memory leak and integer truncation in i40e_virtchnl.c geneve: add transport ports in route lookup for geneve ath9k_htc: Use appropriate rs_datalen type usb: gadget: goku_udc: fix potential crashes in probe gfs2: Free rd_bits later in gfs2_clear_rgrpd to fix use-after-free gfs2: check for live vs. read-only file system in gfs2_fitrim drm/amdgpu: perform srbm soft reset always on SDMA resume mac80211: fix use of skb payload instead of header cfg80211: regulatory: Fix inconsistent format argument iommu/amd: Increase interrupt remapping table limit to 512 entries xfs: fix a missing unlock on error in xfs_fs_map_blocks of/address: Fix of_node memory leak in of_dma_is_coherent cosa: Add missing kfree in error path of cosa_write perf: Fix get_recursion_context() ext4: correctly report "not supported" for {usr,grp}jquota when !CONFIG_QUOTA ext4: unlock xattr_sem properly in ext4_inline_data_truncate() usb: cdc-acm: Add DISABLE_ECHO for Renesas USB Download mode mei: protect mei_cl_mtu from null dereference ocfs2: initialize ip_next_orphan don't dump the threads that had been already exiting when zapped. drm/gma500: Fix out-of-bounds access to struct drm_device.vblank[] pinctrl: amd: use higher precision for 512 RtcClk pinctrl: amd: fix incorrect way to disable debounce filter swiotlb: fix "x86: Don't panic if can not alloc buffer for swiotlb" IPv6: Set SIT tunnel hard_header_len to zero net/af_iucv: fix null pointer dereference on shutdown net/x25: Fix null-ptr-deref in x25_connect net: Update window_clamp if SOCK_RCVBUF is set random32: make prandom_u32() output unpredictable x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP xen/events: avoid removing an event channel while handling it xen/events: add a proper barrier to 2-level uevent unmasking xen/events: fix race in evtchn_fifo_unmask() xen/events: add a new "late EOI" evtchn framework xen/blkback: use lateeoi irq binding xen/netback: use lateeoi irq binding xen/scsiback: use lateeoi irq binding xen/pciback: use lateeoi irq binding xen/events: switch user event channels to lateeoi model xen/events: use a common cpu hotplug hook for event channels xen/events: defer eoi in case of excessive number of events xen/events: block rogue events for some time perf/core: Fix race in the perf_mmap_close() function Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint" reboot: fix overflow parsing reboot cpu number ext4: fix leaking sysfs kobject after failed mount Convert trailing spaces and periods in path components Linux 4.4.244 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I70bf4c5ac9248a8ca3383b9b0c4871729606e75e
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/ring_buffer.c54
1 files changed, 44 insertions, 10 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1c1ecc1d49ad..547a3a5ac57b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -416,14 +416,16 @@ struct rb_event_info {
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -2585,10 +2587,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -2611,6 +2613,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -2629,8 +2655,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
} else
bit = RB_CTX_NORMAL;
- if (unlikely(val & (1 << bit)))
- return 1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << bit))
+ return 1;
+ }
val |= (1 << bit);
cpu_buffer->current_context = val;