From a9a76a3e318ea559365210916378380109199121 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Feb 2017 11:52:29 +0100 Subject: xfrm: policy: init locks early commit c282222a45cb9503cbfbebfdb60491f06ae84b49 upstream. Dmitry reports following splat: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 13059 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170207 #1 [..] spin_lock_bh include/linux/spinlock.h:304 [inline] xfrm_policy_flush+0x32/0x470 net/xfrm/xfrm_policy.c:963 xfrm_policy_fini+0xbf/0x560 net/xfrm/xfrm_policy.c:3041 xfrm_net_init+0x79f/0x9e0 net/xfrm/xfrm_policy.c:3091 ops_init+0x10a/0x530 net/core/net_namespace.c:115 setup_net+0x2ed/0x690 net/core/net_namespace.c:291 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] Problem is that when we get error during xfrm_net_init we will call xfrm_policy_fini which will acquire xfrm_policy_lock before it was initialized. Just move it around so locks get set up first. Reported-by: Dmitry Vyukov Fixes: 283bc9f35bbbcb0e9 ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_policy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b5e665b3cfb0..36a50ef9295d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3030,6 +3030,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3046,11 +3051,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: -- cgit v1.2.3 From cce7e56dd73f75fef0a7f594fb129285a660fec0 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 22 Mar 2017 07:29:31 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE XFRMA_REPLAY_ESN_VAL replay_window commit 677e806da4d916052585301785d847c3b3e6186a upstream. When a new xfrm state is created during an XFRM_MSG_NEWSA call we validate the user supplied replay_esn to ensure that the size is valid and to ensure that the replay_window size is within the allocated buffer. However later it is possible to update this replay_esn via a XFRM_MSG_NEWAE call. There we again validate the size of the supplied buffer matches the existing state and if so inject the contents. We do not at this point check that the replay_window is within the allocated memory. This leads to out-of-bounds reads and writes triggered by netlink packets. This leads to memory corruption and the potential for priviledge escalation. We already attempt to validate the incoming replay information in xfrm_new_ae() via xfrm_replay_verify_len(). This confirms that the user is not trying to change the size of the replay state buffer which includes the replay_esn. It however does not check the replay_window remains within that buffer. Add validation of the contained replay_window. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 805681a7d356..0e1f833bc77d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -415,6 +415,9 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 22c9e7c092f63335ba7a7301e0e0b4c4ebed53a8 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 23 Mar 2017 07:45:44 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE incoming ESN size harder commit f843ee6dd019bcece3e74e76ad9df0155655d0df upstream. Kees Cook has pointed out that xfrm_replay_state_esn_len() is subject to wrapping issues. To ensure we are correctly ensuring that the two ESN structures are the same size compare both the overall size as reported by xfrm_replay_state_esn_len() and the internal length are the same. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- net/xfrm/xfrm_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0e1f833bc77d..7a5a64e70b4d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,11 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) return -EINVAL; if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) -- cgit v1.2.3 From 927d04793f8a587532a5c26057bcdcb33bc8f5ba Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 23 Mar 2017 08:04:18 +0100 Subject: virtio_balloon: init 1st buffer in stats vq commit fc8653228c8588a120f6b5dad6983b7b61ff669e upstream. When init_vqs runs, virtio_balloon.stats is either uninitialized or contains stale values. The host updates its state with garbage data because it has no way of knowing that this is just a marker buffer used for signaling. This patch updates the stats before pushing the initial buffer. Alternative fixes: * Push an empty buffer in init_vqs. Not easily done with the current virtio implementation and violates the spec "Driver MUST supply the same subset of statistics in all buffers submitted to the statsq". * Push a buffer with invalid tags in init_vqs. Violates the same spec clause, plus "invalid tag" is not really defined. Note: the spec says: When using the legacy interface, the device SHOULD ignore all values in the first buffer in the statsq supplied by the driver after device initialization. Note: Historically, drivers supplied an uninitialized buffer in the first buffer. Unfortunately QEMU does not seem to implement the recommendation even for the legacy interface. Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 56f7e2521202..01d15dca940e 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -416,6 +416,8 @@ static int init_vqs(struct virtio_balloon *vb) * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ + update_balloon_stats(vb); + sg_init_one(&sg, vb->stats, sizeof vb->stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) -- cgit v1.2.3 From 800791e7e0fd9835be2f55c55147c379888b7442 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 14 Mar 2017 08:23:26 -0700 Subject: pinctrl: qcom: Don't clear status bit on irq_unmask commit a6566710adaa4a7dd5e0d99820ff9c9c30ee5951 upstream. Clearing the status bit on irq_unmask will discard any pending interrupt that did arrive after the irq_ack, i.e. while the IRQ handler function was executing. Fixes: f365be092572 ("pinctrl: Add Qualcomm TLMM driver") Cc: Stephen Boyd Reported-by: Timur Tabi Signed-off-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-msm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 146264a41ec8..9736f9be5447 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -597,10 +597,6 @@ static void msm_gpio_irq_unmask(struct irq_data *d) spin_lock_irqsave(&pctrl->lock, flags); - val = readl(pctrl->regs + g->intr_status_reg); - val &= ~BIT(g->intr_status_bit); - writel(val, pctrl->regs + g->intr_status_reg); - val = readl(pctrl->regs + g->intr_cfg_reg); val |= BIT(g->intr_enable_bit); writel(val, pctrl->regs + g->intr_cfg_reg); -- cgit v1.2.3 From 6e174bbd0631865acc193804fa4043852f3198c5 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:53 +0100 Subject: c6x/ptrace: Remove useless PTRACE_SETREGSET implementation commit fb411b837b587a32046dc4f369acb93a10b1def8 upstream. gpr_set won't work correctly and can never have been tested, and the correct behaviour is not clear due to the endianness-dependent task layout. So, just remove it. The core code will now return -EOPNOTSUPPORT when trying to set NT_PRSTATUS on this architecture until/unless a correct implementation is supplied. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/c6x/kernel/ptrace.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c index 3c494e84444d..a511ac16a8e3 100644 --- a/arch/c6x/kernel/ptrace.c +++ b/arch/c6x/kernel/ptrace.c @@ -69,46 +69,6 @@ static int gpr_get(struct task_struct *target, 0, sizeof(*regs)); } -static int gpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - struct pt_regs *regs = task_pt_regs(target); - - /* Don't copyin TSR or CSR */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - 0, PT_TSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_TSR * sizeof(long), - (PT_TSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_TSR + 1) * sizeof(long), - PT_CSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_CSR * sizeof(long), - (PT_CSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_CSR + 1) * sizeof(long), -1); - return ret; -} - enum c6x_regset { REGSET_GPR, }; @@ -120,7 +80,6 @@ static const struct user_regset c6x_regsets[] = { .size = sizeof(u32), .align = sizeof(u32), .get = gpr_get, - .set = gpr_set }, }; -- cgit v1.2.3 From e1dc8904b33b8c01f22d904fed4cb5f2060f5da3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:54 +0100 Subject: h8300/ptrace: Fix incorrect register transfer count commit 502585c7555083d4a949c08350306b9ec196779e upstream. regs_set() and regs_get() are vulnerable to an off-by-1 buffer overrun if CONFIG_CPU_H8S is set, since this adds an extra entry to register_offset[] but not to user_regs_struct. So, iterate over user_regs_struct based on its actual size, not based on the length of register_offset[]. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/h8300/kernel/ptrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 92075544a19a..0dc1c8f622bc 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -95,7 +95,8 @@ static int regs_get(struct task_struct *target, long *reg = (long *)®s; /* build user regs in buffer */ - for (r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -113,7 +114,8 @@ static int regs_set(struct task_struct *target, long *reg; /* build user regs in buffer */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -122,7 +124,7 @@ static int regs_set(struct task_struct *target, return ret; /* write back to pt_regs */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) h8300_put_reg(target, r, *reg++); return 0; } -- cgit v1.2.3 From c8693666856c0db4a1e07235d98ce0b3bde98d9e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:58 +0100 Subject: mips/ptrace: Preserve previous registers for short regset write commit d614fd58a2834cfe4efa472c33c8f3ce2338b09b upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 74d581569778..c95bf18260f8 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -485,7 +485,8 @@ static int fpr_set(struct task_struct *target, &target->thread.fpu, 0, sizeof(elf_fpregset_t)); - for (i = 0; i < NUM_FPU_REGS; i++) { + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); -- cgit v1.2.3 From 962b95a88574359b081e24815fae6aba92fff98d Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:59 +0100 Subject: sparc/ptrace: Preserve previous registers for short regset write commit d3805c546b275c8cc7d40f759d029ae92c7175f2 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Acked-by: David S. Miller Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/ptrace_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 9ddc4928a089..c1566170964f 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -311,7 +311,7 @@ static int genregs64_set(struct task_struct *target, } if (!ret) { - unsigned long y; + unsigned long y = regs->y; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &y, -- cgit v1.2.3 From 2d9bc3695012f1ef7465f56302c1e60c48dccde8 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:55 +0100 Subject: metag/ptrace: Preserve previous registers for short regset write commit a78ce80d2c9178351b34d78fec805140c29c193e upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Acked-by: James Hogan Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/metag/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 7563628822bd..ae659ba61948 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -303,7 +303,7 @@ static int metag_tls_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - void __user *tls; + void __user *tls = target->thread.tls_ptr; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) -- cgit v1.2.3 From e441102d8c074d63d44329a59f3278573cdc1477 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:56 +0100 Subject: metag/ptrace: Provide default TXSTATUS for short NT_PRSTATUS commit 5fe81fe98123ce41265c65e95d34418d30d005d1 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill TXSTATUS, a well-defined default value is used, based on the task's current value. Suggested-by: James Hogan Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/metag/kernel/ptrace.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index ae659ba61948..2e4dfc15abd3 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -24,6 +24,16 @@ * user_regset definitions. */ +static unsigned long user_txstatus(const struct pt_regs *regs) +{ + unsigned long data = (unsigned long)regs->ctx.Flags; + + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + + return data; +} + int metag_gp_regs_copyout(const struct pt_regs *regs, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -62,9 +72,7 @@ int metag_gp_regs_copyout(const struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ - data = (unsigned long)regs->ctx.Flags; - if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) - data |= USER_GP_REGS_STATUS_CATCH_BIT; + data = user_txstatus(regs); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) @@ -119,6 +127,7 @@ int metag_gp_regs_copyin(struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ + data = user_txstatus(regs); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) -- cgit v1.2.3 From 573341eba9c44b0b2198373cb453bbbb5b3f066a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:57 +0100 Subject: metag/ptrace: Reject partial NT_METAG_RPIPE writes commit 7195ee3120d878259e8d94a5d9f808116f34d5ea upstream. It's not clear what behaviour is sensible when doing partial write of NT_METAG_RPIPE, so just don't bother. This patch assumes that userspace will never rely on a partial SETREGSET in this case, since it's not clear what should happen anyway. Signed-off-by: Dave Martin Acked-by: James Hogan Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/metag/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 2e4dfc15abd3..5e2dc7defd2c 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -253,6 +253,8 @@ int metag_rp_state_copyin(struct pt_regs *regs, unsigned long long *ptr; int ret, i; + if (count < 4*13) + return -EINVAL; /* Read the entire pipeline before making any changes */ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &rp, 0, 4*13); -- cgit v1.2.3 From 7a5202190810dde1467718235c1f650fcf57592a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Feb 2017 15:07:11 -0800 Subject: fscrypt: remove broken support for detecting keyring key revocation commit 1b53cf9815bb4744958d41f3795d5d5a1d365e2d upstream. Filesystem encryption ostensibly supported revoking a keyring key that had been used to "unlock" encrypted files, causing those files to become "locked" again. This was, however, buggy for several reasons, the most severe of which was that when key revocation happened to be detected for an inode, its fscrypt_info was immediately freed, even while other threads could be using it for encryption or decryption concurrently. This could be exploited to crash the kernel or worse. This patch fixes the use-after-free by removing the code which detects the keyring key having been revoked, invalidated, or expired. Instead, an encrypted inode that is "unlocked" now simply remains unlocked until it is evicted from memory. Note that this is no worse than the case for block device-level encryption, e.g. dm-crypt, and it still remains possible for a privileged user to evict unused pages, inodes, and dentries by running 'sync; echo 3 > /proc/sys/vm/drop_caches', or by simply unmounting the filesystem. In fact, one of those actions was already needed anyway for key revocation to work even somewhat sanely. This change is not expected to break any applications. In the future I'd like to implement a real API for fscrypt key revocation that interacts sanely with ongoing filesystem operations --- waiting for existing operations to complete and blocking new operations, and invalidating and sanitizing key material and plaintext from the VFS caches. But this is a hard problem, and for now this bug must be fixed. This bug affected almost all versions of ext4, f2fs, and ubifs encryption, and it was potentially reachable in any kernel configured with encryption support (CONFIG_EXT4_ENCRYPTION=y, CONFIG_EXT4_FS_ENCRYPTION=y, CONFIG_F2FS_FS_ENCRYPTION=y, or CONFIG_UBIFS_FS_ENCRYPTION=y). Note that older kernels did not use the shared fs/crypto/ code, but due to the potential security implications of this bug, it may still be worthwhile to backport this fix to them. Fixes: b7236e21d55f ("ext4 crypto: reorganize how we store keys in the inode") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Acked-by: Michael Halcrow Signed-off-by: Greg Kroah-Hartman --- fs/ext4/crypto_key.c | 28 +++++++--------------------- fs/ext4/ext4.h | 14 +------------- fs/ext4/ext4_crypto.h | 1 - fs/f2fs/crypto_key.c | 28 +++++++--------------------- fs/f2fs/f2fs.h | 14 +------------- fs/f2fs/f2fs_crypto.h | 1 - 6 files changed, 16 insertions(+), 70 deletions(-) diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 9a16d1e75a49..505f8afde57c 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -88,8 +88,6 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci) if (!ci) return; - if (ci->ci_keyring_key) - key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); kmem_cache_free(ext4_crypt_info_cachep, ci); } @@ -111,7 +109,7 @@ void ext4_free_encryption_info(struct inode *inode, ext4_free_crypt_info(ci); } -int _ext4_get_encryption_info(struct inode *inode) +int ext4_get_encryption_info(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_crypt_info *crypt_info; @@ -128,22 +126,15 @@ int _ext4_get_encryption_info(struct inode *inode) char mode; int res; + if (ei->i_crypt_info) + return 0; + if (!ext4_read_workqueue) { res = ext4_init_crypto(); if (res) return res; } -retry: - crypt_info = ACCESS_ONCE(ei->i_crypt_info); - if (crypt_info) { - if (!crypt_info->ci_keyring_key || - key_validate(crypt_info->ci_keyring_key) == 0) - return 0; - ext4_free_encryption_info(inode, crypt_info); - goto retry; - } - res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx)); @@ -166,7 +157,6 @@ retry: crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; - crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) @@ -206,7 +196,6 @@ retry: keyring_key = NULL; goto out; } - crypt_info->ci_keyring_key = keyring_key; if (keyring_key->type != &key_type_logon) { printk_once(KERN_WARNING "ext4: key type must be logon\n"); @@ -253,16 +242,13 @@ got_key: ext4_encryption_key_size(mode)); if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); - if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) { - ext4_free_crypt_info(crypt_info); - goto retry; - } - return 0; + if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL) + crypt_info = NULL; out: if (res == -ENOKEY) res = 0; + key_put(keyring_key); ext4_free_crypt_info(crypt_info); memzero_explicit(raw_key, sizeof(raw_key)); return res; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cd5914495ad7..362d59b24f1d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2330,23 +2330,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } /* crypto_key.c */ void ext4_free_crypt_info(struct ext4_crypt_info *ci); void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); -int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); -static inline int ext4_get_encryption_info(struct inode *inode) -{ - struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - - if (!ci || - (ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD))))) - return _ext4_get_encryption_info(inode); - return 0; -} +int ext4_get_encryption_info(struct inode *inode); static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) { diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index ac7d4e813796..1b17b05b9f4d 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -78,7 +78,6 @@ struct ext4_crypt_info { char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; - struct key *ci_keyring_key; char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; }; diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 5de2d866a25c..18595d7a0efc 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c @@ -92,7 +92,6 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci) if (!ci) return; - key_put(ci->ci_keyring_key); crypto_free_ablkcipher(ci->ci_ctfm); kmem_cache_free(f2fs_crypt_info_cachep, ci); } @@ -113,7 +112,7 @@ void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci) f2fs_free_crypt_info(ci); } -int _f2fs_get_encryption_info(struct inode *inode) +int f2fs_get_encryption_info(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_crypt_info *crypt_info; @@ -129,18 +128,12 @@ int _f2fs_get_encryption_info(struct inode *inode) char mode; int res; + if (fi->i_crypt_info) + return 0; + res = f2fs_crypto_initialize(); if (res) return res; -retry: - crypt_info = ACCESS_ONCE(fi->i_crypt_info); - if (crypt_info) { - if (!crypt_info->ci_keyring_key || - key_validate(crypt_info->ci_keyring_key) == 0) - return 0; - f2fs_free_encryption_info(inode, crypt_info); - goto retry; - } res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, @@ -159,7 +152,6 @@ retry: crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; - crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) @@ -197,7 +189,6 @@ retry: keyring_key = NULL; goto out; } - crypt_info->ci_keyring_key = keyring_key; BUG_ON(keyring_key->type != &key_type_logon); ukp = user_key_payload(keyring_key); if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { @@ -230,17 +221,12 @@ retry: if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); - if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) { - f2fs_free_crypt_info(crypt_info); - goto retry; - } - return 0; - + if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) == NULL) + crypt_info = NULL; out: if (res == -ENOKEY && !S_ISREG(inode->i_mode)) res = 0; - + key_put(keyring_key); f2fs_free_crypt_info(crypt_info); memzero_explicit(raw_key, sizeof(raw_key)); return res; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9db5500d63d9..b1aeca83f4be 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2149,7 +2149,6 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *); /* crypto_key.c */ void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *); -int _f2fs_get_encryption_info(struct inode *inode); /* crypto_fname.c */ bool f2fs_valid_filenames_enc_mode(uint32_t); @@ -2170,18 +2169,7 @@ void f2fs_exit_crypto(void); int f2fs_has_encryption_key(struct inode *); -static inline int f2fs_get_encryption_info(struct inode *inode) -{ - struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info; - - if (!ci || - (ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD))))) - return _f2fs_get_encryption_info(inode); - return 0; -} +int f2fs_get_encryption_info(struct inode *inode); void f2fs_fname_crypto_free_buffer(struct f2fs_str *); int f2fs_fname_setup_filename(struct inode *, const struct qstr *, diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h index c2c1c2b63b25..f113f1a1e8c1 100644 --- a/fs/f2fs/f2fs_crypto.h +++ b/fs/f2fs/f2fs_crypto.h @@ -79,7 +79,6 @@ struct f2fs_crypt_info { char ci_filename_mode; char ci_flags; struct crypto_ablkcipher *ci_ctfm; - struct key *ci_keyring_key; char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; }; -- cgit v1.2.3 From 2bed5987692cb6dc3bf3ce15d8abeb79fdf4ab2a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 24 Jan 2017 15:40:06 +0100 Subject: sched/rt: Add a missing rescheduling point commit 619bd4a71874a8fd78eb6ccf9f272c5e98bcc7b7 upstream. Since the change in commit: fd7a4bed1835 ("sched, rt: Convert switched_{from, to}_rt() / prio_changed_rt() to balance callbacks") ... we don't reschedule a task under certain circumstances: Lets say task-A, SCHED_OTHER, is running on CPU0 (and it may run only on CPU0) and holds a PI lock. This task is removed from the CPU because it used up its time slice and another SCHED_OTHER task is running. Task-B on CPU1 runs at RT priority and asks for the lock owned by task-A. This results in a priority boost for task-A. Task-B goes to sleep until the lock has been made available. Task-A is already runnable (but not active), so it receives no wake up. The reality now is that task-A gets on the CPU once the scheduler decides to remove the current task despite the fact that a high priority task is enqueued and waiting. This may take a long time. The desired behaviour is that CPU0 immediately reschedules after the priority boost which made task-A the task with the lowest priority. Suggested-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner Fixes: fd7a4bed1835 ("sched, rt: Convert switched_{from, to}_rt() prio_changed_rt() to balance callbacks") Link: http://lkml.kernel.org/r/20170124144006.29821-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/deadline.c | 3 +-- kernel/sched/rt.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 8b0a15e285f9..e984f059e5fc 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1771,12 +1771,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) queue_push_tasks(rq); -#else +#endif if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); else resched_curr(rq); -#endif } } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..78ae5c1d9412 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2136,10 +2136,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); -#else +#endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio) resched_curr(rq); -#endif /* CONFIG_SMP */ } } -- cgit v1.2.3 From 61a4577c9a4419b99e647744923517d47255da35 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 31 Mar 2017 10:17:09 +0200 Subject: Linux 4.4.59 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3efe2ea99e2d..083724c6ca4d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 58 +SUBLEVEL = 59 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3 From ba46d8fab00a8e1538df241681d9161c8ec85778 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 21 Mar 2017 13:44:28 +0100 Subject: libceph: force GFP_NOIO for socket allocations commit 633ee407b9d15a75ac9740ba9d3338815e1fcb95 upstream. sock_alloc_inode() allocates socket+inode and socket_wq with GFP_KERNEL, which is not allowed on the writeback path: Workqueue: ceph-msgr con_work [libceph] ffff8810871cb018 0000000000000046 0000000000000000 ffff881085d40000 0000000000012b00 ffff881025cad428 ffff8810871cbfd8 0000000000012b00 ffff880102fc1000 ffff881085d40000 ffff8810871cb038 ffff8810871cb148 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x1bd/0x200 [] ? ttwu_do_wakeup+0x2c/0x120 [] ? ttwu_do_activate.constprop.135+0x66/0x70 [] wait_for_completion+0xbf/0x180 [] ? try_to_wake_up+0x390/0x390 [] flush_work+0x165/0x250 [] ? worker_detach_from_pool+0xd0/0xd0 [] xlog_cil_force_lsn+0x81/0x200 [xfs] [] ? __slab_free+0xee/0x234 [] _xfs_log_force_lsn+0x4d/0x2c0 [xfs] [] ? lookup_page_cgroup_used+0xe/0x30 [] ? xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_log_force_lsn+0x3f/0xf0 [xfs] [] ? xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_iunpin_wait+0xc6/0x1a0 [xfs] [] ? wake_atomic_t_function+0x40/0x40 [] xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_reclaim_inodes_ag+0x257/0x3d0 [xfs] [] xfs_reclaim_inodes_nr+0x33/0x40 [xfs] [] xfs_fs_free_cached_objects+0x15/0x20 [xfs] [] super_cache_scan+0x178/0x180 [] shrink_slab_node+0x14e/0x340 [] ? mem_cgroup_iter+0x16b/0x450 [] shrink_slab+0x100/0x140 [] do_try_to_free_pages+0x335/0x490 [] try_to_free_pages+0xb9/0x1f0 [] ? __alloc_pages_direct_compact+0x69/0x1be [] __alloc_pages_nodemask+0x69a/0xb40 [] alloc_pages_current+0x9e/0x110 [] new_slab+0x2c5/0x390 [] __slab_alloc+0x33b/0x459 [] ? sock_alloc_inode+0x2d/0xd0 [] ? inet_sendmsg+0x71/0xc0 [] ? sock_alloc_inode+0x2d/0xd0 [] kmem_cache_alloc+0x1a2/0x1b0 [] sock_alloc_inode+0x2d/0xd0 [] alloc_inode+0x26/0xa0 [] new_inode_pseudo+0x1a/0x70 [] sock_alloc+0x1e/0x80 [] __sock_create+0x95/0x220 [] sock_create_kern+0x24/0x30 [] con_work+0xef9/0x2050 [libceph] [] ? rbd_img_request_submit+0x4c/0x60 [rbd] [] process_one_work+0x159/0x4f0 [] worker_thread+0x11b/0x530 [] ? create_worker+0x1d0/0x1d0 [] kthread+0xc9/0xe0 [] ? flush_kthread_worker+0x90/0x90 [] ret_from_fork+0x58/0x90 [] ? flush_kthread_worker+0x90/0x90 Use memalloc_noio_{save,restore}() to temporarily force GFP_NOIO here. Link: http://tracker.ceph.com/issues/19309 Reported-by: Sergey Jerusalimov Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b8d927c56494..a6b2f2138c9d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -478,11 +479,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; + unsigned int noio_flag; int ret; BUG_ON(con->sock); + + /* sock_create_kern() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); + memalloc_noio_restore(noio_flag); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; -- cgit v1.2.3 From 1eed198ce16b6e05c05ee381e5d90fac35ea67a7 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 12 Dec 2016 14:35:13 +0000 Subject: xen/setup: Don't relocate p2m over existing one commit 7ecec8503af37de6be4f96b53828d640a968705f upstream. When relocating the p2m, take special care not to relocate it so that is overlaps with the current location of the p2m/initrd. This is needed since the full extent of the current location is not marked as a reserved region in the e820. This was seen to happen to a dom0 with a large initial p2m and a small reserved region in the middle of the initial p2m. Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e345891450c3..df8844a1853a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void) size = PFN_PHYS(xen_start_info->nr_p2m_frames); } - if (!xen_is_e820_reserved(start, size)) { - memblock_reserve(start, size); + memblock_reserve(start, size); + if (!xen_is_e820_reserved(start, size)) return; - } #ifdef CONFIG_X86_32 /* @@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void) BUG(); #else xen_relocate_p2m(); + memblock_free(start, size); #endif } -- cgit v1.2.3 From 18639c4bad72218954e728e9ca65c33b13ba673a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 1 Jan 2017 09:39:24 -0800 Subject: scsi: mpt3sas: fix hang on ata passthrough commands commit ffb58456589443ca572221fabbdef3db8483a779 upstream. mpt3sas has a firmware failure where it can only handle one pass through ATA command at a time. If another comes in, contrary to the SAT standard, it will hang until the first one completes (causing long commands like secure erase to timeout). The original fix was to block the device when an ATA command came in, but this caused a regression with commit 669f044170d8933c3d66d231b69ea97cb8447338 Author: Bart Van Assche Date: Tue Nov 22 16:17:13 2016 -0800 scsi: srp_transport: Move queuecommand() wait code to SCSI core So fix the original fix of the secure erase timeout by properly returning SAM_STAT_BUSY like the SAT recommends. The original patch also had a concurrency problem since scsih_qcmd is lockless at that point (this is fixed by using atomic bitops to set and test the flag). [mkp: addressed feedback wrt. test_bit and fixed whitespace] Fixes: 18f6084a989ba1b (mpt3sas: Fix secure erase premature termination) Signed-off-by: James Bottomley Acked-by: Sreekanth Reddy Reviewed-by: Christoph Hellwig Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Martin K. Petersen Cc: Joe Korty Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_base.h | 12 +++++++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 40 +++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 92648a5ea2d2..63f5965acc89 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -390,6 +390,7 @@ struct MPT3SAS_TARGET { * @eedp_enable: eedp support enable bit * @eedp_type: 0(type_1), 1(type_2), 2(type_3) * @eedp_block_length: block size + * @ata_command_pending: SATL passthrough outstanding for device */ struct MPT3SAS_DEVICE { struct MPT3SAS_TARGET *sas_target; @@ -398,6 +399,17 @@ struct MPT3SAS_DEVICE { u8 configured_lun; u8 block; u8 tlr_snoop_check; + /* + * Bug workaround for SATL handling: the mpt2/3sas firmware + * doesn't return BUSY or TASK_SET_FULL for subsequent + * commands while a SATL pass through is in operation as the + * spec requires, it simply does nothing with them until the + * pass through completes, causing them possibly to timeout if + * the passthrough is a long executing command (like format or + * secure erase). This variable allows us to do the right + * thing while a SATL command is pending. + */ + unsigned long ata_command_pending; }; #define MPT3_CMD_NOT_USED 0x8000 /* free */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index f6a8e9958e75..8a5fbdb45cfd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3707,9 +3707,18 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc, } } -static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) { - return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); + struct MPT3SAS_DEVICE *priv = scmd->device->hostdata; + + if (scmd->cmnd[0] != ATA_12 && scmd->cmnd[0] != ATA_16) + return 0; + + if (pending) + return test_and_set_bit(0, &priv->ata_command_pending); + + clear_bit(0, &priv->ata_command_pending); + return 0; } /** @@ -3733,9 +3742,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) if (!scmd) continue; count++; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, - SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpt3sas_base_free_smid(ioc, smid); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery) @@ -3866,13 +3873,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (ioc->logging_level & MPT_DEBUG_SCSI) scsi_print_command(scmd); - /* - * Lock the device for any subsequent command until command is - * done. - */ - if (ata_12_16_cmd(scmd)) - scsi_internal_device_block(scmd->device); - sas_device_priv_data = scmd->device->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { scmd->result = DID_NO_CONNECT << 16; @@ -3886,6 +3886,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } + /* + * Bug work around for firmware SATL handling. The loop + * is based on atomic operations and ensures consistency + * since we're lockless at this point + */ + do { + if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { + scmd->result = SAM_STAT_BUSY; + scmd->scsi_done(scmd); + return 0; + } + } while (_scsih_set_satl_pending(scmd, true)); + sas_target_priv_data = sas_device_priv_data->sas_target; /* invalid device handle */ @@ -4445,8 +4458,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) if (scmd == NULL) return 1; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); -- cgit v1.2.3 From a92f411914cad6532e82e4607bc4075a5ffaa366 Mon Sep 17 00:00:00 2001 From: peter chang Date: Wed, 15 Feb 2017 14:11:54 -0800 Subject: scsi: sg: check length passed to SG_NEXT_CMD_LEN commit bf33f87dd04c371ea33feb821b60d63d754e3124 upstream. The user can control the size of the next command passed along, but the value passed to the ioctl isn't checked against the usable max command size. Signed-off-by: Peter Chang Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index dedcff9cabb5..6514636431ab 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1008,6 +1008,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) result = get_user(val, ip); if (result) return result; + if (val > SG_MAX_CDB_SIZE) + return -ENOMEM; sfp->next_cmd_len = (val > 0) ? val : 0; return 0; case SG_GET_VERSION_NUM: -- cgit v1.2.3 From 75a03869c93a443ae068eae9aca0c0df8b33dff5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Mar 2017 23:07:28 +0800 Subject: scsi: libsas: fix ata xfer length commit 9702c67c6066f583b629cf037d2056245bb7a8e6 upstream. The total ata xfer length may not be calculated properly, in that we do not use the proper method to get an sg element dma length. According to the code comment, sg_dma_len() should be used after dma_map_sg() is called. This issue was found by turning on the SMMUv3 in front of the hisi_sas controller in hip07. Multiple sg elements were being combined into a single element, but the original first element length was being use as the total xfer length. Fixes: ff2aeb1eb64c8a4770a6 ("libata: convert to chained sg") Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/libsas/sas_ata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 9c706d8c1441..6f5e2720ffad 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -218,7 +218,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->num_scatter = qc->n_elem; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) - xfer += sg->length; + xfer += sg_dma_len(sg); task->total_xfer_len = xfer; task->num_scatter = si; -- cgit v1.2.3 From a90d7447e4a154ad26e3b9e09a0878680be49339 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Mar 2017 17:07:57 +0100 Subject: ALSA: seq: Fix race during FIFO resize commit 2d7d54002e396c180db0c800c1046f0a3c471597 upstream. When a new event is queued while processing to resize the FIFO in snd_seq_fifo_clear(), it may lead to a use-after-free, as the old pool that is being queued gets removed. For avoiding this race, we need to close the pool to be deleted and sync its usage before actually deleting it. The issue was spotted by syzkaller. Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_fifo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 3f4efcb85df5..3490d21ab9e7 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) /* NOTE: overflow flag is not cleared */ spin_unlock_irqrestore(&f->lock, flags); + /* close the old pool and wait until all users are gone */ + snd_seq_pool_mark_closing(oldpool); + snd_use_lock_sync(&f->use_lock); + /* release cells in old pool */ for (cell = oldhead; cell; cell = next) { next = cell->next; -- cgit v1.2.3 From ce3dcfdbff04bab023806ef7a342c657ec08915d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 31 Mar 2017 10:31:40 +0800 Subject: ALSA: hda - fix a problem for lineout on a Dell AIO machine commit 2f726aec19a9d2c63bec9a8a53a3910ffdcd09f8 upstream. On this Dell AIO machine, the lineout jack does not work. We found the pin 0x1a is assigned to lineout on this machine, and in the past, we applied ALC298_FIXUP_DELL1_MIC_NO_PRESENCE to fix the heaset-set mic problem for this machine, this fixup will redefine the pin 0x1a to headphone-mic, as a result the lineout doesn't work anymore. After consulting with Dell, they told us this machine doesn't support microphone via headset jack, so we add a new fixup which only defines the pin 0x18 as the headset-mic. [rearranged the fixup insertion position by tiwai in order to make the merge with other branches easier -- tiwai] Fixes: 59ec4b57bcae ("ALSA: hda - Fix headset mic detection problem for two dell machines") Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1d4f34379f56..46a34039ecdc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4831,6 +4831,7 @@ enum { ALC292_FIXUP_DISABLE_AAMIX, ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, ALC275_FIXUP_DELL_XPS, ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, ALC293_FIXUP_LENOVO_SPK_NOISE, @@ -5429,6 +5430,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC275_FIXUP_DELL_XPS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -5501,7 +5511,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_speaker_volume, .chained = true, - .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, -- cgit v1.2.3 From ab48ab614b8c83f3a3b0f83f7882b1d2766962d3 Mon Sep 17 00:00:00 2001 From: Songjun Wu Date: Fri, 24 Feb 2017 15:10:43 +0800 Subject: ASoC: atmel-classd: fix audio clock rate commit cd3ac9affc43b44f49d7af70d275f0bd426ba643 upstream. Fix the audio clock rate according to the datasheet. Reported-by: Dushara Jayasinghe Signed-off-by: Songjun Wu Acked-by: Nicolas Ferre Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/atmel/atmel-classd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 8276675730ef..78a985629607 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -343,7 +343,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai, } #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8) -#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8) +#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8) static struct { int rate; -- cgit v1.2.3 From 3342857ac074768e14e361392ac09fbbd70d840e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Mar 2017 08:56:28 -0500 Subject: ACPI: Fix incompatibility with mcount-based function graph tracing commit 61b79e16c68d703dde58c25d3935d67210b7d71b upstream. Paul Menzel reported a warning: WARNING: CPU: 0 PID: 774 at /build/linux-ROBWaj/linux-4.9.13/kernel/trace/trace_functions_graph.c:233 ftrace_return_to_handler+0x1aa/0x1e0 Bad frame pointer: expected f6919d98, received f6919db0 from func acpi_pm_device_sleep_wake return to c43b6f9d The warning means that function graph tracing is broken for the acpi_pm_device_sleep_wake() function. That's because the ACPI Makefile unconditionally sets the '-Os' gcc flag to optimize for size. That's an issue because mcount-based function graph tracing is incompatible with '-Os' on x86, thanks to the following gcc bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109 I have another patch pending which will ensure that mcount-based function graph tracing is never used with CONFIG_CC_OPTIMIZE_FOR_SIZE on x86. But this patch is needed in addition to that one because the ACPI Makefile overrides that config option for no apparent reason. It has had this flag since the beginning of git history, and there's no related comment, so I don't know why it's there. As far as I can tell, there's no reason for it to be there. The appropriate behavior is for it to honor CONFIG_CC_OPTIMIZE_FOR_{SIZE,PERFORMANCE} like the rest of the kernel. Reported-by: Paul Menzel Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt (VMware) Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 675eaf337178..b9cebca376f9 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -2,7 +2,6 @@ # Makefile for the Linux ACPI interpreter # -ccflags-y := -Os ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT # -- cgit v1.2.3 From 566a8711a7dd11960fa0bf3a4fd89c742eb359f3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 22 Mar 2017 18:33:25 +0100 Subject: ACPI: Do not create a platform_device for IOAPIC/IOxAPIC commit 08f63d97749185fab942a3a47ed80f5bd89b8b7d upstream. No platform-device is required for IO(x)APICs, so don't even create them. [ rjw: This fixes a problem with leaking platform device objects after IOAPIC/IOxAPIC hot-removal events.] Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_platform.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 296b7a14893a..5365ff6e69c1 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -24,9 +24,11 @@ ACPI_MODULE_NAME("platform"); static const struct acpi_device_id forbidden_id_list[] = { - {"PNP0000", 0}, /* PIC */ - {"PNP0100", 0}, /* Timer */ - {"PNP0200", 0}, /* AT DMA Controller */ + {"PNP0000", 0}, /* PIC */ + {"PNP0100", 0}, /* Timer */ + {"PNP0200", 0}, /* AT DMA Controller */ + {"ACPI0009", 0}, /* IOxAPIC */ + {"ACPI000A", 0}, /* IOAPIC */ {"", 0}, }; -- cgit v1.2.3 From 74b8fc017d7689d1a60c9e234b2cfe3550b7f414 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Mon, 20 Mar 2017 11:52:41 +0100 Subject: tty/serial: atmel: fix race condition (TX+DMA) commit 31ca2c63fdc0aee725cbd4f207c1256f5deaabde upstream. If uart_flush_buffer() is called between atmel_tx_dma() and atmel_complete_tx_dma(), the circular buffer has been cleared, but not atmel_port->tx_len. That leads to a circular buffer overflow (dumping (UART_XMIT_SIZE - atmel_port->tx_len) bytes). Tested-by: Nicolas Ferre Signed-off-by: Richard Genoud Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index a0f911641b04..156a262b6b65 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1987,6 +1987,11 @@ static void atmel_flush_buffer(struct uart_port *port) atmel_uart_writel(port, ATMEL_PDC_TCR, 0); atmel_port->pdc_tx.ofs = 0; } + /* + * in uart_flush_buffer(), the xmit circular buffer has just + * been cleared, so we have to reset tx_len accordingly. + */ + atmel_port->tx_len = 0; } /* -- cgit v1.2.3 From 0a1757cfa5ba3b46f6ee7a74ddb7a5c0bd5d7c2f Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 20 Mar 2017 16:38:57 +0100 Subject: tty/serial: atmel: fix TX path in atmel_console_write() commit 497e1e16f45c70574dc9922c7f75c642c2162119 upstream. A side effect of 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx") is that the console can be called with TX path disabled. Then the system would hang trying to push charecters out in atmel_console_putchar(). Signed-off-by: Nicolas Ferre Fixes: 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 156a262b6b65..a15070a7fcd6 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2504,6 +2504,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count) pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN; atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); + /* Make sure that tx path is actually able to send characters */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); + uart_console_write(port, s, count, atmel_console_putchar); /* -- cgit v1.2.3 From eac3ab3e69151c21a0a71ec8711600022cc12fa3 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 24 Mar 2017 13:38:28 -0400 Subject: USB: fix linked-list corruption in rh_call_control() commit 1633682053a7ee8058e10c76722b9b28e97fb73f upstream. Using KASAN, Dmitry found a bug in the rh_call_control() routine: If buffer allocation fails, the routine returns immediately without unlinking its URB from the control endpoint, eventually leading to linked-list corruption. This patch fixes the problem by jumping to the end of the routine (where the URB is unlinked) when an allocation failure occurs. Signed-off-by: Alan Stern Reported-and-tested-by: Dmitry Vyukov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 5724d7c41e29..ca2cbdb3aa67 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -499,8 +499,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) */ tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); tbuf = kzalloc(tbuf_size, GFP_KERNEL); - if (!tbuf) - return -ENOMEM; + if (!tbuf) { + status = -ENOMEM; + goto err_alloc; + } bufp = tbuf; @@ -705,6 +707,7 @@ error: } kfree(tbuf); + err_alloc: /* any errors get returned through the urb completion */ spin_lock_irq(&hcd_root_hub_lock); -- cgit v1.2.3 From 3eb392056aeb4a0beca5fcead9ad3d6b6ff0816e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:17 +0800 Subject: KVM: x86: clear bus pointer when destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df630b8c1e851b5e265dc2ca9c87222e342c093b upstream. When releasing the bus, let's clear the bus pointers to mark it out. If any further device unregister happens on this bus, we know that we're done if we found the bus being released already. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- virt/kvm/kvm_main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 336ed267c407..1ac5b7be7282 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -654,8 +654,10 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) + for (i = 0; i < KVM_NR_BUSES; i++) { kvm_io_bus_destroy(kvm->buses[i]); + kvm->buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); @@ -3376,6 +3378,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + + /* + * It's possible the bus being released before hand. If so, + * we're done here. + */ + if (!bus) + return 0; + r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { -- cgit v1.2.3 From ef55c3df5dbd60eb3daab7797feac850bd1e6fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 24 Mar 2017 19:01:09 +0900 Subject: drm/radeon: Override fpfn for all VRAM placements in radeon_evict_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce4b4f228e51219b0b79588caf73225b08b5b779 upstream. We were accidentally only overriding the first VRAM placement. For BOs with the RADEON_GEM_NO_CPU_ACCESS flag set, radeon_ttm_placement_from_domain creates a second VRAM placment with fpfn == 0. If VRAM is almost full, the first VRAM placement with fpfn > 0 may not work, but the second one with fpfn == 0 always will (the BO's current location trivially satisfies it). Because "moving" the BO to its current location puts it back on the LRU list, this results in an infinite loop. Fixes: 2a85aedd117c ("drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first") Reported-by: Zachary Michaels Reported-and-Tested-by: Julien Isorce Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 35310336dd0a..d684e2b79d2b 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo->placement.num_busy_placement = 0; for (i = 0; i < rbo->placement.num_placement; i++) { if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { - if (rbo->placements[0].fpfn < fpfn) - rbo->placements[0].fpfn = fpfn; + if (rbo->placements[i].fpfn < fpfn) + rbo->placements[i].fpfn = fpfn; } else { rbo->placement.busy_placement = &rbo->placements[i]; -- cgit v1.2.3 From 47e2fe17d14d1a7da3a405b1f364c094c271d35c Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 31 Mar 2017 15:11:55 -0700 Subject: mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd() commit c9d398fa237882ea07167e23bcfc5e6847066518 upstream. I found the race condition which triggers the following bug when move_pages() and soft offline are called on a single hugetlb page concurrently. Soft offlining page 0x119400 at 0x700000000000 BUG: unable to handle kernel paging request at ffffea0011943820 IP: follow_huge_pmd+0x143/0x190 PGD 7ffd2067 PUD 7ffd1067 PMD 0 [61163.582052] Oops: 0000 [#1] SMP Modules linked in: binfmt_misc ppdev virtio_balloon parport_pc pcspkr i2c_piix4 parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi virtio_blk 8139too crc32c_intel ata_piix serio_raw libata virtio_pci 8139cp virtio_ring virtio mii floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: cap_check] CPU: 0 PID: 22573 Comm: iterate_numa_mo Tainted: P OE 4.11.0-rc2-mm1+ #2 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:follow_huge_pmd+0x143/0x190 RSP: 0018:ffffc90004bdbcd0 EFLAGS: 00010202 RAX: 0000000465003e80 RBX: ffffea0004e34d30 RCX: 00003ffffffff000 RDX: 0000000011943800 RSI: 0000000000080001 RDI: 0000000465003e80 RBP: ffffc90004bdbd18 R08: 0000000000000000 R09: ffff880138d34000 R10: ffffea0004650000 R11: 0000000000c363b0 R12: ffffea0011943800 R13: ffff8801b8d34000 R14: ffffea0000000000 R15: 000077ff80000000 FS: 00007fc977710740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea0011943820 CR3: 000000007a746000 CR4: 00000000001406f0 Call Trace: follow_page_mask+0x270/0x550 SYSC_move_pages+0x4ea/0x8f0 SyS_move_pages+0xe/0x10 do_syscall_64+0x67/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fc976e03949 RSP: 002b:00007ffe72221d88 EFLAGS: 00000246 ORIG_RAX: 0000000000000117 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc976e03949 RDX: 0000000000c22390 RSI: 0000000000001400 RDI: 0000000000005827 RBP: 00007ffe72221e00 R08: 0000000000c2c3a0 R09: 0000000000000004 R10: 0000000000c363b0 R11: 0000000000000246 R12: 0000000000400650 R13: 00007ffe72221ee0 R14: 0000000000000000 R15: 0000000000000000 Code: 81 e4 ff ff 1f 00 48 21 c2 49 c1 ec 0c 48 c1 ea 0c 4c 01 e2 49 bc 00 00 00 00 00 ea ff ff 48 c1 e2 06 49 01 d4 f6 45 bc 04 74 90 <49> 8b 7c 24 20 40 f6 c7 01 75 2b 4c 89 e7 8b 47 1c 85 c0 7e 2a RIP: follow_huge_pmd+0x143/0x190 RSP: ffffc90004bdbcd0 CR2: ffffea0011943820 ---[ end trace e4f81353a2d23232 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled This bug is triggered when pmd_present() returns true for non-present hugetlb, so fixing the present check in follow_huge_pmd() prevents it. Using pmd_present() to determine present/non-present for hugetlb is not correct, because pmd_present() checks multiple bits (not only _PAGE_PRESENT) for historical reason and it can misjudge hugetlb state. Fixes: e66f17ff7177 ("mm/hugetlb: take page table lock in follow_huge_pmd()") Link: http://lkml.kernel.org/r/1490149898-20231-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Acked-by: Hillf Danton Cc: Hugh Dickins Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea11123a9249..7294301d8495 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4362,6 +4362,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, { struct page *page = NULL; spinlock_t *ptl; + pte_t pte; retry: ptl = pmd_lockptr(mm, pmd); spin_lock(ptl); @@ -4371,12 +4372,13 @@ retry: */ if (!pmd_huge(*pmd)) goto out; - if (pmd_present(*pmd)) { + pte = huge_ptep_get((pte_t *)pmd); + if (pte_present(pte)) { page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); if (flags & FOLL_GET) get_page(page); } else { - if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { + if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); __migration_entry_wait(mm, (pte_t *)pmd, ptl); goto retry; -- cgit v1.2.3 From 6280ac931a23d3fa40cd26057576abcf90a4f22d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 Jan 2017 12:28:22 +0100 Subject: MIPS: Lantiq: Fix cascaded IRQ setup commit 6c356eda225e3ee134ed4176b9ae3a76f793f4dd upstream. With the IRQ stack changes integrated, the XRX200 devices started emitting a constant stream of kernel messages like this: [ 565.415310] Spurious IRQ: CAUSE=0x1100c300 This is caused by IP0 getting handled by plat_irq_dispatch() rather than its vectored interrupt handler, which is fixed by commit de856416e714 ("MIPS: IRQ Stack: Fix erroneous jal to plat_irq_dispatch"). Fix plat_irq_dispatch() to handle non-vectored IPI interrupts correctly by setting up IP2-6 as proper chained IRQ handlers and calling do_IRQ for all MIPS CPU interrupts. Signed-off-by: Felix Fietkau Acked-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15077/ [james.hogan@imgtec.com: tweaked commit message] Signed-off-by: James Hogan Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/irq.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 2e7f60c9fc5d..51cdc46a87e2 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void) DEFINE_HWx_IRQDISPATCH(5) #endif +static void ltq_hw_irq_handler(struct irq_desc *desc) +{ + ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); +} + #ifdef CONFIG_MIPS_MT_SMP void __init arch_init_ipiirq(int irq, struct irqaction *action) { @@ -313,23 +318,19 @@ static struct irqaction irq_call = { asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - unsigned int i; - - if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) { - do_IRQ(MIPS_CPU_TIMER_IRQ); - goto out; - } else { - for (i = 0; i < MAX_IM; i++) { - if (pending & (CAUSEF_IP2 << i)) { - ltq_hw_irqdispatch(i); - goto out; - } - } + int irq; + + if (!pending) { + spurious_interrupt(); + return; } - pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); -out: - return; + pending >>= CAUSEB_IP; + while (pending) { + irq = fls(pending) - 1; + do_IRQ(MIPS_CPU_IRQ_BASE + irq); + pending &= ~BIT(irq); + } } static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) @@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = { .map = icu_map, }; -static struct irqaction cascade = { - .handler = no_action, - .name = "cascade", -}; - int __init icu_of_init(struct device_node *node, struct device_node *parent) { struct device_node *eiu_node; @@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) mips_cpu_irq_init(); for (i = 0; i < MAX_IM; i++) - setup_irq(i + 2, &cascade); + irq_set_chained_handler(i + 2, ltq_hw_irq_handler); if (cpu_has_vint) { pr_info("Setting up vectored interrupts\n"); -- cgit v1.2.3 From b3ed3864912e8809e228ddea259e8e0fa1deadf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 2 Jul 2016 17:28:08 +0200 Subject: rtc: s35390a: fix reading out alarm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f87e904ddd8f0ef120e46045b0addeb1cc88354e upstream. There are several issues fixed in this patch: - When alarm isn't enabled, set .enabled to zero instead of returning -EINVAL. - Ignore how IRQ1 is configured when determining if IRQ2 is on. - The three alarm registers have an enable flag which must be evaluated. - The chip always triggers when the seconds register gets 0. Note that the rtc framework however doesn't handle the result correctly because it doesn't check wday being initialized and so interprets an alarm being set for 10:00 AM in three days as 10:00 AM tomorrow (or today if that's not over yet). Signed-off-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s35390a.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index f40afdd0e5f5..6507a01cf9ad 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -242,6 +242,8 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (alm->time.tm_wday != -1) buf[S35390A_ALRM_BYTE_WDAY] = bin2bcd(alm->time.tm_wday) | 0x80; + else + buf[S35390A_ALRM_BYTE_WDAY] = 0; buf[S35390A_ALRM_BYTE_HOURS] = s35390a_hr2reg(s35390a, alm->time.tm_hour) | 0x80; @@ -269,23 +271,43 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (err < 0) return err; - if (bitrev8(sts) != S35390A_INT2_MODE_ALARM) - return -EINVAL; + if ((bitrev8(sts) & S35390A_INT2_MODE_MASK) != S35390A_INT2_MODE_ALARM) { + /* + * When the alarm isn't enabled, the register to configure + * the alarm time isn't accessible. + */ + alm->enabled = 0; + return 0; + } else { + alm->enabled = 1; + } err = s35390a_get_reg(s35390a, S35390A_CMD_INT2_REG1, buf, sizeof(buf)); if (err < 0) return err; /* This chip returns the bits of each byte in reverse order */ - for (i = 0; i < 3; ++i) { + for (i = 0; i < 3; ++i) buf[i] = bitrev8(buf[i]); - buf[i] &= ~0x80; - } - alm->time.tm_wday = bcd2bin(buf[S35390A_ALRM_BYTE_WDAY]); - alm->time.tm_hour = s35390a_reg2hr(s35390a, - buf[S35390A_ALRM_BYTE_HOURS]); - alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS]); + /* + * B0 of the three matching registers is an enable flag. Iff it is set + * the configured value is used for matching. + */ + if (buf[S35390A_ALRM_BYTE_WDAY] & 0x80) + alm->time.tm_wday = + bcd2bin(buf[S35390A_ALRM_BYTE_WDAY] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_HOURS] & 0x80) + alm->time.tm_hour = + s35390a_reg2hr(s35390a, + buf[S35390A_ALRM_BYTE_HOURS] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_MINS] & 0x80) + alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS] & ~0x80); + + /* alarm triggers always at s=0 */ + alm->time.tm_sec = 0; dev_dbg(&client->dev, "%s: alm is mins=%d, hours=%d, wday=%d\n", __func__, alm->time.tm_min, alm->time.tm_hour, -- cgit v1.2.3 From fdd4bc9313e59a1757cfc8ac5836cff55ec03eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Apr 2017 23:32:38 +0200 Subject: rtc: s35390a: make sure all members in the output are set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rtc core calls the .read_alarm with all fields initialized to 0. As the s35390a driver doesn't touch some fields the returned date is interpreted as a date in January 1900. So make sure all fields are set to -1; some of them are then overwritten with the right data depending on the hardware state. In mainline this is done by commit d68778b80dd7 ("rtc: initialize output parameter for read alarm to "uninitialized"") in the core. This is considered to dangerous for stable as it might have side effects for other rtc drivers that might for example rely on alarm->time.tm_sec being initialized to 0. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s35390a.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 6507a01cf9ad..47b88bbe4ce7 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -267,6 +267,20 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) char buf[3], sts; int i, err; + /* + * initialize all members to -1 to signal the core that they are not + * defined by the hardware. + */ + alm->time.tm_sec = -1; + alm->time.tm_min = -1; + alm->time.tm_hour = -1; + alm->time.tm_mday = -1; + alm->time.tm_mon = -1; + alm->time.tm_year = -1; + alm->time.tm_wday = -1; + alm->time.tm_yday = -1; + alm->time.tm_isdst = -1; + err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts)); if (err < 0) return err; -- cgit v1.2.3 From a55ae9d1937b0bf4004e5416cfa15750cd6d2b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 2 Jul 2016 17:28:09 +0200 Subject: rtc: s35390a: implement reset routine as suggested by the reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e6583f1b5d1f5f129b873f1428b7e414263d847 upstream. There were two deviations from the reference manual: you have to wait half a second when POC is active and you might have to repeat initialization when POC or BLD are still set after the sequence. Note however that as POC and BLD are cleared by read the driver might not be able to detect that a reset is necessary. I don't have a good idea how to fix this. Additionally report the value read from STATUS1 to the caller. This prepares the next patch. Signed-off-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s35390a.c | 65 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 47b88bbe4ce7..c7c1fce69635 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -15,6 +15,7 @@ #include #include #include +#include #define S35390A_CMD_STATUS1 0 #define S35390A_CMD_STATUS2 1 @@ -94,19 +95,63 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) return 0; } -static int s35390a_reset(struct s35390a *s35390a) +/* + * Returns <0 on error, 0 if rtc is setup fine and 1 if the chip was reset. + * To keep the information if an irq is pending, pass the value read from + * STATUS1 to the caller. + */ +static int s35390a_reset(struct s35390a *s35390a, char *status1) { - char buf[1]; + char buf; + int ret; + unsigned initcount = 0; - if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0) - return -EIO; + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, status1, 1); + if (ret < 0) + return ret; - if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD))) + if (*status1 & S35390A_FLAG_POC) + /* + * Do not communicate for 0.5 seconds since the power-on + * detection circuit is in operation. + */ + msleep(500); + else if (!(*status1 & S35390A_FLAG_BLD)) + /* + * If both POC and BLD are unset everything is fine. + */ return 0; - buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H); - buf[0] &= 0xf0; - return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); + /* + * At least one of POC and BLD are set, so reinitialise chip. Keeping + * this information in the hardware to know later that the time isn't + * valid is unfortunately not possible because POC and BLD are cleared + * on read. So the reset is best done now. + * + * The 24H bit is kept over reset, so set it already here. + */ +initialize: + *status1 = S35390A_FLAG_24H; + buf = S35390A_FLAG_RESET | S35390A_FLAG_24H; + ret = s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + + if (ret < 0) + return ret; + + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + if (ret < 0) + return ret; + + if (buf & (S35390A_FLAG_POC | S35390A_FLAG_BLD)) { + /* Try up to five times to reset the chip */ + if (initcount < 5) { + ++initcount; + goto initialize; + } else + return -EIO; + } + + return 1; } static int s35390a_disable_test_mode(struct s35390a *s35390a) @@ -367,7 +412,7 @@ static int s35390a_probe(struct i2c_client *client, unsigned int i; struct s35390a *s35390a; struct rtc_time tm; - char buf[1]; + char buf[1], status1; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { err = -ENODEV; @@ -396,7 +441,7 @@ static int s35390a_probe(struct i2c_client *client, } } - err = s35390a_reset(s35390a); + err = s35390a_reset(s35390a, &status1); if (err < 0) { dev_err(&client->dev, "error resetting chip\n"); goto exit_dummy; -- cgit v1.2.3 From 3a1246b46df5210164ee43d4c5c560d0dc9ed2ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 2 Jul 2016 17:28:10 +0200 Subject: rtc: s35390a: improve irq handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3bd32722c827d00eafe8e6d5b83e9f3148ea7c7e upstream. On some QNAP NAS devices the rtc can wake the machine. Several people noticed that once the machine was woken this way it fails to shut down. That's because the driver fails to acknowledge the interrupt and so it keeps active and restarts the machine immediatly after shutdown. See https://bugs.debian.org/794266 for a bug report. Doing this correctly requires to interpret the INT2 flag of the first read of the STATUS1 register because this bit is cleared by read. Note this is not maximally robust though because a pending irq isn't detected when the STATUS1 register was already read (and so INT2 is not set) but the irq was not disabled. But that is a hardware imposed problem that cannot easily be fixed by software. Signed-off-by: Uwe Kleine-König Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-s35390a.c | 48 ++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index c7c1fce69635..00662dd28d66 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -35,10 +35,14 @@ #define S35390A_ALRM_BYTE_HOURS 1 #define S35390A_ALRM_BYTE_MINS 2 +/* flags for STATUS1 */ #define S35390A_FLAG_POC 0x01 #define S35390A_FLAG_BLD 0x02 +#define S35390A_FLAG_INT2 0x04 #define S35390A_FLAG_24H 0x40 #define S35390A_FLAG_RESET 0x80 + +/* flag for STATUS2 */ #define S35390A_FLAG_TEST 0x01 #define S35390A_INT2_MODE_MASK 0xF0 @@ -408,11 +412,11 @@ static struct i2c_driver s35390a_driver; static int s35390a_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int err; + int err, err_reset; unsigned int i; struct s35390a *s35390a; struct rtc_time tm; - char buf[1], status1; + char buf, status1; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { err = -ENODEV; @@ -441,29 +445,35 @@ static int s35390a_probe(struct i2c_client *client, } } - err = s35390a_reset(s35390a, &status1); - if (err < 0) { + err_reset = s35390a_reset(s35390a, &status1); + if (err_reset < 0) { + err = err_reset; dev_err(&client->dev, "error resetting chip\n"); goto exit_dummy; } - err = s35390a_disable_test_mode(s35390a); - if (err < 0) { - dev_err(&client->dev, "error disabling test mode\n"); - goto exit_dummy; - } - - err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); - if (err < 0) { - dev_err(&client->dev, "error checking 12/24 hour mode\n"); - goto exit_dummy; - } - if (buf[0] & S35390A_FLAG_24H) + if (status1 & S35390A_FLAG_24H) s35390a->twentyfourhour = 1; else s35390a->twentyfourhour = 0; - if (s35390a_get_datetime(client, &tm) < 0) + if (status1 & S35390A_FLAG_INT2) { + /* disable alarm (and maybe test mode) */ + buf = 0; + err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1); + if (err < 0) { + dev_err(&client->dev, "error disabling alarm"); + goto exit_dummy; + } + } else { + err = s35390a_disable_test_mode(s35390a); + if (err < 0) { + dev_err(&client->dev, "error disabling test mode\n"); + goto exit_dummy; + } + } + + if (err_reset > 0 || s35390a_get_datetime(client, &tm) < 0) dev_warn(&client->dev, "clock needs to be set\n"); device_set_wakeup_capable(&client->dev, 1); @@ -476,6 +486,10 @@ static int s35390a_probe(struct i2c_client *client, err = PTR_ERR(s35390a->rtc); goto exit_dummy; } + + if (status1 & S35390A_FLAG_INT2) + rtc_update_irq(s35390a->rtc, 1, RTC_AF); + return 0; exit_dummy: -- cgit v1.2.3 From 42462d23e60b89a3c2f7d8d63f5f4e464ba77727 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 Mar 2017 18:24:19 +0100 Subject: KVM: kvm_io_bus_unregister_dev() should never fail commit 90db10434b163e46da413d34db8d0e77404cc645 upstream. No caller currently checks the return value of kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on freeing their device. A stale reference will remain in the io_bus, getting at least used again, when the iobus gets teared down on kvm_destroy_vm() - leading to use after free errors. There is nothing the callers could do, except retrying over and over again. So let's simply remove the bus altogether, print an error and make sure no one can access this broken bus again (returning -ENOMEM on any attempt to access it). Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU") Reported-by: Dmitry Vyukov Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 4 ++-- virt/kvm/eventfd.c | 3 ++- virt/kvm/kvm_main.c | 40 +++++++++++++++++++++++----------------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c923350ca20a..d7ce4e3280db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -182,8 +182,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 46dbc0a7dfc1..49001fa84ead 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -868,7 +868,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; + if (kvm->buses[bus_idx]) + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1ac5b7be7282..cb092bd9965b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -655,7 +655,8 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - kvm_io_bus_destroy(kvm->buses[i]); + if (kvm->buses[i]) + kvm_io_bus_destroy(kvm->buses[i]); kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); @@ -3273,6 +3274,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3290,6 +3293,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && @@ -3340,6 +3345,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3352,6 +3359,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + if (!bus) + return -ENOMEM; + /* exclude ioeventfd which is limited by maximum fd */ if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; @@ -3371,45 +3381,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, } /* Caller must hold slots_lock. */ -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev) { - int i, r; + int i; struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - - /* - * It's possible the bus being released before hand. If so, - * we're done here. - */ if (!bus) - return 0; + return; - r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { - r = 0; break; } - if (r) - return r; + if (i == bus->dev_count) + return; new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; + if (!new_bus) { + pr_err("kvm: failed to shrink bus, removing it completely\n"); + goto broken; + } memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); +broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); - return r; + return; } static struct notifier_block kvm_cpu_notifier = { -- cgit v1.2.3 From 063d30f187f5c492aa4a6cca88b8afa08f5a170c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 25 Oct 2016 11:37:59 +0200 Subject: power: reset: at91-poweroff: timely shutdown LPDDR memories commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream. LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the proper power off sequence is used before shutting down the platform. Signed-off-by: Alexandre Belloni Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/reset/at91-poweroff.c | 54 ++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c index e9e24df35f26..2579f025b90b 100644 --- a/drivers/power/reset/at91-poweroff.c +++ b/drivers/power/reset/at91-poweroff.c @@ -14,9 +14,12 @@ #include #include #include +#include #include #include +#include + #define AT91_SHDW_CR 0x00 /* Shut Down Control Register */ #define AT91_SHDW_SHDW BIT(0) /* Shut Down command */ #define AT91_SHDW_KEY (0xa5 << 24) /* KEY Password */ @@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[] = { static void __iomem *at91_shdwc_base; static struct clk *sclk; +static void __iomem *mpddrc_base; static void __init at91_wakeup_status(void) { @@ -73,6 +77,29 @@ static void at91_poweroff(void) writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR); } +static void at91_lpddr_poweroff(void) +{ + asm volatile( + /* Align to cache lines */ + ".balign 32\n\t" + + /* Ensure AT91_SHDW_CR is in the TLB by reading it */ + " ldr r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + /* Power down SDRAM0 */ + " str %1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + /* Shutdown CPU */ + " str %3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t" + + " b .\n\t" + : + : "r" (mpddrc_base), + "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF), + "r" (at91_shdwc_base), + "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW) + : "r0"); +} + static int at91_poweroff_get_wakeup_mode(struct device_node *np) { const char *pm; @@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev) static int __init at91_poweroff_probe(struct platform_device *pdev) { struct resource *res; + struct device_node *np; + u32 ddr_type; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(struct platform_device *pdev) pm_power_off = at91_poweroff; + np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc"); + if (!np) + return 0; + + mpddrc_base = of_iomap(np, 0); + of_node_put(np); + + if (!mpddrc_base) + return 0; + + ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD; + if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) || + (ddr_type == AT91_DDRSDRC_MD_LPDDR3)) + pm_power_off = at91_lpddr_poweroff; + else + iounmap(mpddrc_base); + return 0; } static int __exit at91_poweroff_remove(struct platform_device *pdev) { - if (pm_power_off == at91_poweroff) + if (pm_power_off == at91_poweroff || + pm_power_off == at91_lpddr_poweroff) pm_power_off = NULL; clk_disable_unprepare(sclk); @@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id at91_ramc_of_match[] = { + { .compatible = "atmel,sama5d3-ddramc", }, + { /* sentinel */ } +}; + static const struct of_device_id at91_poweroff_of_match[] = { { .compatible = "atmel,at91sam9260-shdwc", }, { .compatible = "atmel,at91sam9rl-shdwc", }, -- cgit v1.2.3 From 2cbd78f4239bd28b86c6ff8e3b7867db72762f1a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Mar 2017 07:38:05 +1100 Subject: blk: improve order of bio handling in generic_make_request() commit 79bd99596b7305ab08109a8bf44a6a4511dbf1cd upstream. To avoid recursion on the kernel stack when stacked block devices are in use, generic_make_request() will, when called recursively, queue new requests for later handling. They will be handled when the make_request_fn for the current bio completes. If any bios are submitted by a make_request_fn, these will ultimately be handled seqeuntially. If the handling of one of those generates further requests, they will be added to the end of the queue. This strict first-in-first-out behaviour can lead to deadlocks in various ways, normally because a request might need to wait for a previous request to the same device to complete. This can happen when they share a mempool, and can happen due to interdependencies particular to the device. Both md and dm have examples where this happens. These deadlocks can be erradicated by more selective ordering of bios. Specifically by handling them in depth-first order. That is: when the handling of one bio generates one or more further bios, they are handled immediately after the parent, before any siblings of the parent. That way, when generic_make_request() calls make_request_fn for some particular device, we can be certain that all previously submited requests for that device have been completely handled and are not waiting for anything in the queue of requests maintained in generic_make_request(). An easy way to achieve this would be to use a last-in-first-out stack instead of a queue. However this will change the order of consecutive bios submitted by a make_request_fn, which could have unexpected consequences. Instead we take a slightly more complex approach. A fresh queue is created for each call to a make_request_fn. After it completes, any bios for a different device are placed on the front of the main queue, followed by any bios for the same device, followed by all bios that were already on the queue before the make_request_fn was called. This provides the depth-first approach without reordering bios on the same level. This, by itself, it not enough to remove all deadlocks. It just makes it possible for drivers to take the extra step required themselves. To avoid deadlocks, drivers must never risk waiting for a request after submitting one to generic_make_request. This includes never allocing from a mempool twice in the one call to a make_request_fn. A common pattern in drivers is to call bio_split() in a loop, handling the first part and then looping around to possibly split the next part. Instead, a driver that finds it needs to split a bio should queue (with generic_make_request) the second part, handle the first part, and then return. The new code in generic_make_request will ensure the requests to underlying bios are processed first, then the second bio that was split off. If it splits again, the same process happens. In each case one bio will be completely handled before the next one is attempted. With this is place, it should be possible to disable the punt_bios_to_recover() recovery thread for many block devices, and eventually it may be possible to remove it completely. Ref: http://www.spinics.net/lists/raid/msg54680.html Tested-by: Jinpu Wang Inspired-by: Lars Ellenberg Signed-off-by: NeilBrown Signed-off-by: Jens Axboe [jwang: backport to 4.4] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4fab5d610805..7a58a22d1595 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2063,18 +2063,33 @@ blk_qc_t generic_make_request(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { + struct bio_list lower, same, hold; + + /* Create a fresh bio_list for all subordinate requests */ + hold = bio_list_on_stack; + bio_list_init(&bio_list_on_stack); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack, &lower); + bio_list_merge(&bio_list_on_stack, &same); + bio_list_merge(&bio_list_on_stack, &hold); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ -- cgit v1.2.3 From 5cca175b6cda16b68b18967210872327b1cadf4f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 17:00:47 +1100 Subject: blk: Ensure users for current->bio_list can see the full list. commit f5fe1b51905df7cfe4fdfd85c5fb7bc5b71a094f upstream. Commit 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()") changed current->bio_list so that it did not contain *all* of the queued bios, but only those submitted by the currently running make_request_fn. There are two places which walk the list and requeue selected bios, and others that check if the list is empty. These are no longer correct. So redefine current->bio_list to point to an array of two lists, which contain all queued bios, and adjust various code to test or walk both lists. Signed-off-by: NeilBrown Fixes: 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()") Signed-off-by: Jens Axboe [jwang: backport to 4.4] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman [bwh: Restore changes in device-mapper from upstream version] Signed-off-by: Ben Hutchings --- block/bio.c | 12 +++++++++--- block/blk-core.c | 31 +++++++++++++++++++------------ drivers/md/dm.c | 29 ++++++++++++++++------------- drivers/md/raid1.c | 3 ++- drivers/md/raid10.c | 3 ++- 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/block/bio.c b/block/bio.c index 46e2cc1d4016..14263fab94d3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -373,10 +373,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) bio_list_init(&punt); bio_list_init(&nopunt); - while ((bio = bio_list_pop(current->bio_list))) + while ((bio = bio_list_pop(¤t->bio_list[0]))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[0] = nopunt; - *current->bio_list = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_list[1]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[1] = nopunt; spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); @@ -464,7 +468,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * we retry with the original gfp_flags. */ - if (current->bio_list && !bio_list_empty(current->bio_list)) + if (current->bio_list && + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/block/blk-core.c b/block/blk-core.c index 7a58a22d1595..ef083e7a37c5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2021,7 +2021,14 @@ end_io: */ blk_qc_t generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + /* + * bio_list_on_stack[0] contains bios submitted by the current + * make_request_fn. + * bio_list_on_stack[1] contains bios that were submitted before + * the current make_request_fn, but that haven't been processed + * yet. + */ + struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) @@ -2038,7 +2045,7 @@ blk_qc_t generic_make_request(struct bio *bio) * should be added at the tail */ if (current->bio_list) { - bio_list_add(current->bio_list, bio); + bio_list_add(¤t->bio_list[0], bio); goto out; } @@ -2057,17 +2064,17 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; + bio_list_init(&bio_list_on_stack[0]); + current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { - struct bio_list lower, same, hold; + struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ - hold = bio_list_on_stack; - bio_list_init(&bio_list_on_stack); + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); @@ -2077,19 +2084,19 @@ blk_qc_t generic_make_request(struct bio *bio) */ bio_list_init(&lower); bio_list_init(&same); - while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) if (q == bdev_get_queue(bio->bi_bdev)) bio_list_add(&same, bio); else bio_list_add(&lower, bio); /* now assemble so we handle the lowest level first */ - bio_list_merge(&bio_list_on_stack, &lower); - bio_list_merge(&bio_list_on_stack, &same); - bio_list_merge(&bio_list_on_stack, &hold); + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } else { bio_io_error(bio); } - bio = bio_list_pop(current->bio_list); + bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 397f0454100b..320eb3c4bb6b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1481,26 +1481,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) struct dm_offload *o = container_of(cb, struct dm_offload, cb); struct bio_list list; struct bio *bio; + int i; INIT_LIST_HEAD(&o->cb.list); if (unlikely(!current->bio_list)) return; - list = *current->bio_list; - bio_list_init(current->bio_list); - - while ((bio = bio_list_pop(&list))) { - struct bio_set *bs = bio->bi_pool; - if (unlikely(!bs) || bs == fs_bio_set) { - bio_list_add(current->bio_list, bio); - continue; + for (i = 0; i < 2; i++) { + list = current->bio_list[i]; + bio_list_init(¤t->bio_list[i]); + + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(¤t->bio_list[i], bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); } - - spin_lock(&bs->rescue_lock); - bio_list_add(&bs->rescue_list, bio); - queue_work(bs->rescue_workqueue, &bs->rescue_work); - spin_unlock(&bs->rescue_lock); } } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 515554c7365b..9be39988bf06 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -877,7 +877,8 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) ((conf->start_next_window < conf->next_resync + RESYNC_SECTORS) && current->bio_list && - !bio_list_empty(current->bio_list))), + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1])))), conf->resync_lock); conf->nr_waiting--; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a92979e704e3..e5ee4e9e0ea5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -946,7 +946,8 @@ static void wait_barrier(struct r10conf *conf) !conf->barrier || (conf->nr_pending && current->bio_list && - !bio_list_empty(current->bio_list)), + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))), conf->resync_lock); conf->nr_waiting--; } -- cgit v1.2.3 From 84bd21a708b83a24d26cd0010ea94106c96557de Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 23 Mar 2017 12:24:43 +0100 Subject: padata: avoid race in reordering commit de5540d088fe97ad583cc7d396586437b32149a5 upstream. Under extremely heavy uses of padata, crashes occur, and with list debugging turned on, this happens instead: [87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33 __list_add+0xae/0x130 [87487.301868] list_add corruption. prev->next should be next (ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00). [87487.339011] [] dump_stack+0x68/0xa3 [87487.342198] [] ? console_unlock+0x281/0x6d0 [87487.345364] [] __warn+0xff/0x140 [87487.348513] [] warn_slowpath_fmt+0x4a/0x50 [87487.351659] [] __list_add+0xae/0x130 [87487.354772] [] ? _raw_spin_lock+0x64/0x70 [87487.357915] [] padata_reorder+0x1e6/0x420 [87487.361084] [] padata_do_serial+0xa5/0x120 padata_reorder calls list_add_tail with the list to which its adding locked, which seems correct: spin_lock(&squeue->serial.lock); list_add_tail(&padata->list, &squeue->serial.list); spin_unlock(&squeue->serial.lock); This therefore leaves only place where such inconsistency could occur: if padata->list is added at the same time on two different threads. This pdata pointer comes from the function call to padata_get_next(pd), which has in it the following block: next_queue = per_cpu_ptr(pd->pqueue, cpu); padata = NULL; reorder = &next_queue->reorder; if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); spin_unlock(&reorder->lock); pd->processed++; goto out; } out: return padata; I strongly suspect that the problem here is that two threads can race on reorder list. Even though the deletion is locked, call to list_entry is not locked, which means it's feasible that two threads pick up the same padata object and subsequently call list_add_tail on them at the same time. The fix is thus be hoist that lock outside of that block. Signed-off-by: Jason A. Donenfeld Acked-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index b38bea9c466a..401227e3967c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -189,19 +189,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) reorder = &next_queue->reorder; + spin_lock(&reorder->lock); if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); - spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); - spin_unlock(&reorder->lock); pd->processed++; + spin_unlock(&reorder->lock); goto out; } + spin_unlock(&reorder->lock); if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); -- cgit v1.2.3 From 8f8ee9706b0a64a3506b9d9789ace7c44f3d817d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 8 Apr 2017 09:53:53 +0200 Subject: Linux 4.4.60 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 083724c6ca4d..fb7c2b40753d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 59 +SUBLEVEL = 60 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3 From 6a3b9c4984f9edc5a136720e42f1d2ab387857a4 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 10 Apr 2017 18:35:25 +0000 Subject: Revert "Revert "CHROMIUM: android: binder: Fix potential scheduling-while-atomic"" This reverts commit 13c17d0179f9a055062f37e91a6f6cf00a249ebd. Change-Id: I8c3a7eefb72b85c0dd05996c2705636fcbc871f7 --- drivers/android/binder.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9cf4f9bbc711..22025bcc38e9 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -417,6 +417,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; if (files == NULL) return -ESRCH; @@ -427,7 +428,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + preempt_enable_no_resched(); + ret = __alloc_fd(files, 0, rlim_cur, flags); + preempt_disable(); + + return ret; } /* @@ -436,8 +441,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - if (proc->files) + if (proc->files) { + preempt_enable_no_resched(); __fd_install(proc->files, fd, file); + preempt_disable(); + } } /* -- cgit v1.2.3 From a2d978c2adcb7f06b24c4491188d122623384690 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Sun, 19 Jun 2016 23:52:27 -0700 Subject: UPSTREAM: PM / sleep: make PM notifiers called symmetrically (cherry picked from commit ea00f4f4f00cc2bc3b63ad512a4e6df3b20832b9) This makes pm notifier PREPARE/POST symmetrical: if PREPARE fails, we will only undo what ever happened on PREPARE. It fixes the unbalanced CPU hotplug enable in CPU PM notifier. Change-Id: I01dce3cc95c5d6b8913b7b6be301f2909258c745 Signed-off-by: Lianwei Wang Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 20 ++++++++++++-------- kernel/power/main.c | 11 +++++++++-- kernel/power/power.h | 2 ++ kernel/power/suspend.c | 10 ++++++---- kernel/power/user.c | 14 ++++++++------ 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 3124cebaec31..797f19e2aaa9 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -647,7 +647,7 @@ static void power_down(void) */ int hibernate(void) { - int error; + int error, nr_calls = 0; if (!hibernation_available()) { pr_debug("PM: Hibernation not available.\n"); @@ -662,9 +662,11 @@ int hibernate(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Exit; + } printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); @@ -714,7 +716,7 @@ int hibernate(void) /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); Unlock: @@ -740,7 +742,7 @@ int hibernate(void) */ static int software_resume(void) { - int error; + int error, nr_calls = 0; unsigned int flags; /* @@ -827,9 +829,11 @@ static int software_resume(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Close_Finish; + } pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); @@ -855,7 +859,7 @@ static int software_resume(void) unlock_device_hotplug(); thaw_processes(); Finish: - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ diff --git a/kernel/power/main.c b/kernel/power/main.c index b2dd4d999900..68c0eaae8034 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int pm_notifier_call_chain(unsigned long val) +int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) { - int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); + int ret; + + ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, + nr_to_call, nr_calls); return notifier_to_errno(ret); } +int pm_notifier_call_chain(unsigned long val) +{ + return __pm_notifier_call_chain(val, -1, NULL); +} /* If set, devices may be suspended and resumed asynchronously. */ int pm_async_enabled = 1; diff --git a/kernel/power/power.h b/kernel/power/power.h index caadb566e82b..436da5bc72f4 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -191,6 +191,8 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ +extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, + int *nr_calls); extern int pm_notifier_call_chain(unsigned long val); #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 024411816ccf..58209d8bfc56 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -268,16 +268,18 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error; + int error, nr_calls = 0; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Finish; + } trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -288,7 +290,7 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: - pm_notifier_call_chain(PM_POST_SUSPEND); + __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); pm_restore_console(); return error; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 526e8911460a..35310b627388 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error; + int error, nr_calls = 0; if (!hibernation_available()) return -EPERM; @@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; data->free_bitmaps = false; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); } else { /* * Resuming. We may need to wait for the image device to @@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } + } else + nr_calls--; + if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); } if (error) atomic_inc(&snapshot_device_available); -- cgit v1.2.3 From 8eb24ae4dcd3e2b40a9b0f7ca585ff0dab2350c0 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 7 Apr 2017 15:22:19 -0700 Subject: UPSTREAM: checkpatch: special audit for revert commit line Currently checkpatch.pl does not recognize git's default commit revert message and will complain about the hash format. Add special audit for revert commit message line to fix it. Signed-off-by: Wei Wang Acked-by: Joe Perches Bug: 37158168 Test: checkpatch.pl --patch [diff] and no longer see failure Change-Id: I65cf9a46874621dd6d5c349d2d3ca3b862d61ba3 --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2b3c22808c3b..e70147742cce 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2348,6 +2348,7 @@ sub process { # Check for git id commit length and improperly formed commit descriptions if ($in_commit_log && !$commit_log_possible_stack_dump && + $line !~ /^This reverts commit [0-9a-f]{7,40}/ && ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i || ($line =~ /\b[0-9a-f]{12,40}\b/i && $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i && -- cgit v1.2.3 From 2295052f9de4d8288015e7d97ab0363a71604e64 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 10 Apr 2017 20:54:30 -0700 Subject: ANDROID: sdcardfs: Directly pass lower file for mmap Instead of relying on a copy hack, pass the lower file as private data. This lets the kernel find the vma mapping for pages used by the file, allowing pages used by mapping to be reclaimed. This is adapted from following esdfs patches commit 0647e638d: ("esdfs: store lower file in vm_file for mmap") commit 064850866: ("esdfs: keep a counter for mmaped file") Change-Id: I75b74d1e5061db1b8c13be38d184e118c0851a1a Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/file.c | 3 +++ fs/sdcardfs/mmap.c | 57 +++++++++++++++++++++--------------------------------- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index c0146e03fa2e..1f6921e2ffbf 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -192,6 +192,9 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; + vma->vm_private_data = file; + get_file(lower_file); + vma->vm_file = lower_file; out: return err; diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index 0d4089c62c3a..b61f82275e7d 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -23,60 +23,45 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int err; - struct file *file, *lower_file; + struct file *file; const struct vm_operations_struct *lower_vm_ops; - struct vm_area_struct lower_vma; - memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); - file = lower_vma.vm_file; + file = (struct file *)vma->vm_private_data; lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; BUG_ON(!lower_vm_ops); - lower_file = sdcardfs_lower_file(file); - /* - * XXX: vm_ops->fault may be called in parallel. Because we have to - * resort to temporarily changing the vma->vm_file to point to the - * lower file, a concurrent invocation of sdcardfs_fault could see a - * different value. In this workaround, we keep a different copy of - * the vma structure in our stack, so we never expose a different - * value of the vma->vm_file called to us, even temporarily. A - * better fix would be to change the calling semantics of ->fault to - * take an explicit file pointer. - */ - lower_vma.vm_file = lower_file; - err = lower_vm_ops->fault(&lower_vma, vmf); + err = lower_vm_ops->fault(vma, vmf); return err; } +static void sdcardfs_vm_open(struct vm_area_struct *vma) +{ + struct file *file = (struct file *)vma->vm_private_data; + + get_file(file); +} + +static void sdcardfs_vm_close(struct vm_area_struct *vma) +{ + struct file *file = (struct file *)vma->vm_private_data; + + fput(file); +} + static int sdcardfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { int err = 0; - struct file *file, *lower_file; + struct file *file; const struct vm_operations_struct *lower_vm_ops; - struct vm_area_struct lower_vma; - memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); - file = lower_vma.vm_file; + file = (struct file *)vma->vm_private_data; lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; BUG_ON(!lower_vm_ops); if (!lower_vm_ops->page_mkwrite) goto out; - lower_file = sdcardfs_lower_file(file); - /* - * XXX: vm_ops->page_mkwrite may be called in parallel. - * Because we have to resort to temporarily changing the - * vma->vm_file to point to the lower file, a concurrent - * invocation of sdcardfs_page_mkwrite could see a different - * value. In this workaround, we keep a different copy of the - * vma structure in our stack, so we never expose a different - * value of the vma->vm_file called to us, even temporarily. - * A better fix would be to change the calling semantics of - * ->page_mkwrite to take an explicit file pointer. - */ - lower_vma.vm_file = lower_file; - err = lower_vm_ops->page_mkwrite(&lower_vma, vmf); + err = lower_vm_ops->page_mkwrite(vma, vmf); out: return err; } @@ -99,4 +84,6 @@ const struct address_space_operations sdcardfs_aops = { const struct vm_operations_struct sdcardfs_vm_ops = { .fault = sdcardfs_fault, .page_mkwrite = sdcardfs_page_mkwrite, + .open = sdcardfs_vm_open, + .close = sdcardfs_vm_close, }; -- cgit v1.2.3 From 84b6001987a05f1d16bc3939f8ebf1cf9bd672e2 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 11 Apr 2017 21:39:47 +0000 Subject: Revert "Revert "Revert "CHROMIUM: android: binder: Fix potential scheduling-while-atomic""" This reverts commit 6a3b9c4984f9edc5a136720e42f1d2ab387857a4. Sigh. Confusion reigns. The rest of the preempt_disable patch is not in common, so this shouldn't be here afterall (it is in several downstream branches that therefore need this one too). Re-reverting. We don't want the preempt_disable stuff in common since fine-grained locking is coming soon. Change-Id: I2595516cab28041fa72f4a38692266a0f2a01ab4 --- drivers/android/binder.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 22025bcc38e9..9cf4f9bbc711 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -417,7 +417,6 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; - int ret; if (files == NULL) return -ESRCH; @@ -428,11 +427,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - preempt_enable_no_resched(); - ret = __alloc_fd(files, 0, rlim_cur, flags); - preempt_disable(); - - return ret; + return __alloc_fd(files, 0, rlim_cur, flags); } /* @@ -441,11 +436,8 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - if (proc->files) { - preempt_enable_no_resched(); + if (proc->files) __fd_install(proc->files, fd, file); - preempt_disable(); - } } /* -- cgit v1.2.3 From ed528923541afc1228c5a66e98845148aca51e24 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 11:09:08 +0200 Subject: drm/vmwgfx: Type-check lookups of fence objects commit f7652afa8eadb416b23eb57dec6f158529942041 upstream. A malicious caller could otherwise hand over handles to other objects causing all sorts of interesting problems. Testing done: Ran a Fedora 25 desktop using both Xorg and gnome-shell/Wayland. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 77 +++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 8e689b439890..b2f329917eda 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -539,7 +539,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman, struct vmw_fence_obj **p_fence) { struct vmw_fence_obj *fence; - int ret; + int ret; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (unlikely(fence == NULL)) @@ -702,6 +702,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman) } +/** + * vmw_fence_obj_lookup - Look up a user-space fence object + * + * @tfile: A struct ttm_object_file identifying the caller. + * @handle: A handle identifying the fence object. + * @return: A struct vmw_user_fence base ttm object on success or + * an error pointer on failure. + * + * The fence object is looked up and type-checked. The caller needs + * to have opened the fence object first, but since that happens on + * creation and fence objects aren't shareable, that's not an + * issue currently. + */ +static struct ttm_base_object * +vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle) +{ + struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle); + + if (!base) { + pr_err("Invalid fence object handle 0x%08lx.\n", + (unsigned long)handle); + return ERR_PTR(-EINVAL); + } + + if (base->refcount_release != vmw_user_fence_base_release) { + pr_err("Invalid fence object handle 0x%08lx.\n", + (unsigned long)handle); + ttm_base_object_unref(&base); + return ERR_PTR(-EINVAL); + } + + return base; +} + + int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -727,13 +762,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data, arg->kernel_cookie = jiffies + wait_timeout; } - base = ttm_base_object_lookup(tfile, arg->handle); - if (unlikely(base == NULL)) { - printk(KERN_ERR "Wait invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + base = vmw_fence_obj_lookup(tfile, arg->handle); + if (IS_ERR(base)) + return PTR_ERR(base); fence = &(container_of(base, struct vmw_user_fence, base)->fence); @@ -772,13 +803,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data, struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct vmw_private *dev_priv = vmw_priv(dev); - base = ttm_base_object_lookup(tfile, arg->handle); - if (unlikely(base == NULL)) { - printk(KERN_ERR "Fence signaled invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + base = vmw_fence_obj_lookup(tfile, arg->handle); + if (IS_ERR(base)) + return PTR_ERR(base); fence = &(container_of(base, struct vmw_user_fence, base)->fence); fman = fman_from_fence(fence); @@ -1093,6 +1120,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, (struct drm_vmw_fence_event_arg *) data; struct vmw_fence_obj *fence = NULL; struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); + struct ttm_object_file *tfile = vmw_fp->tfile; struct drm_vmw_fence_rep __user *user_fence_rep = (struct drm_vmw_fence_rep __user *)(unsigned long) arg->fence_rep; @@ -1106,15 +1134,11 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, */ if (arg->handle) { struct ttm_base_object *base = - ttm_base_object_lookup_for_ref(dev_priv->tdev, - arg->handle); - - if (unlikely(base == NULL)) { - DRM_ERROR("Fence event invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + vmw_fence_obj_lookup(tfile, arg->handle); + + if (IS_ERR(base)) + return PTR_ERR(base); + fence = &(container_of(base, struct vmw_user_fence, base)->fence); (void) vmw_fence_obj_reference(fence); @@ -1122,7 +1146,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, if (user_fence_rep != NULL) { bool existed; - ret = ttm_ref_object_add(vmw_fp->tfile, base, + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, &existed); if (unlikely(ret != 0)) { DRM_ERROR("Failed to reference a fence " @@ -1166,8 +1190,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, return 0; out_no_create: if (user_fence_rep != NULL) - ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - handle, TTM_REF_USAGE); + ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); out_no_ref_obj: vmw_fence_obj_unreference(&fence); return ret; -- cgit v1.2.3 From b26629453c7b2a6c82000b36fbd1cfc4d9101808 Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 27 Mar 2017 11:12:53 +0200 Subject: drm/vmwgfx: NULL pointer dereference in vmw_surface_define_ioctl() commit 36274ab8c596f1240c606bb514da329add2a1bcd upstream. Before memory allocations vmw_surface_define_ioctl() checks the upper-bounds of a user-supplied size, but does not check if the supplied size is 0. Add check to avoid NULL pointer dereferences. Signed-off-by: Murray McAllister Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 7d620e82e000..b363f0be6512 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -718,8 +718,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) num_sizes += req->mip_levels[i]; - if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * - DRM_VMW_MAX_MIP_LEVELS) + if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS || + num_sizes == 0) return -EINVAL; size = vmw_user_surface_size + 128 + -- cgit v1.2.3 From 0e075f266749ea6507758123f553fece6664e4e2 Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 27 Mar 2017 11:15:12 +0200 Subject: drm/vmwgfx: avoid calling vzalloc with a 0 size in vmw_get_cap_3d_ioctl() commit 63774069d9527a1aeaa4aa20e929ef5e8e9ecc38 upstream. In vmw_get_cap_3d_ioctl(), a user can supply 0 for a size that is used in vzalloc(). This eventually calls dump_stack() (in warn_alloc()), which can leak useful addresses to dmesg. Add check to avoid a size of 0. Signed-off-by: Murray McAllister Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index b8c6a03c8c54..1802d0e7fab8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -186,7 +186,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS); struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - if (unlikely(arg->pad64 != 0)) { + if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) { DRM_ERROR("Illegal GET_3D_CAP argument.\n"); return -EINVAL; } -- cgit v1.2.3 From ad4ae2feef4f65b860f139e0d8455e2a16efb93c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 11:21:25 +0200 Subject: drm/ttm, drm/vmwgfx: Relax permission checking when opening surfaces commit fe25deb7737ce6c0879ccf79c99fa1221d428bf2 upstream. Previously, when a surface was opened using a legacy (non prime) handle, it was verified to have been created by a client in the same master realm. Relax this so that opening is also allowed recursively if the client already has the surface open. This works around a regression in svga mesa where opening of a shared surface is used recursively to obtain surface information. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_object.c | 10 +++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 22 +++++++++------------- include/drm/ttm/ttm_object.h | 5 ++++- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 4f5fa8d65fe9..144367c0c28f 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile, if (unlikely(ret != 0)) goto out_err0; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); if (unlikely(ret != 0)) goto out_err1; @@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists); int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed) + enum ttm_ref_type ref_type, bool *existed, + bool require_existed) { struct drm_open_hash *ht = &tfile->ref_hash[ref_type]; struct ttm_ref_object *ref; @@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, } rcu_read_unlock(); + if (require_existed) + return -EPERM; + ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref), false, false); if (unlikely(ret != 0)) @@ -635,7 +639,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile, prime = (struct ttm_prime_object *) dma_buf->priv; base = &prime->base; *handle = base->hash.key; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); dma_buf_put(dma_buf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index b2f329917eda..6c649f7b5929 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1144,10 +1144,8 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, (void) vmw_fence_obj_reference(fence); if (user_fence_rep != NULL) { - bool existed; - - ret = ttm_ref_object_add(tfile, base, - TTM_REF_USAGE, &existed); + ret = ttm_ref_object_add(vmw_fp->tfile, base, + TTM_REF_USAGE, NULL, false); if (unlikely(ret != 0)) { DRM_ERROR("Failed to reference a fence " "object.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index e57667ca7557..dbca128a9aa6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -591,7 +591,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo, return ret; ret = ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_SYNCCPU_WRITE, &existed); + TTM_REF_SYNCCPU_WRITE, &existed, false); if (ret != 0 || existed) ttm_bo_synccpu_write_release(&user_bo->dma.base); @@ -775,7 +775,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, *handle = user_bo->prime.base.hash.key; return ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_USAGE, NULL); + TTM_REF_USAGE, NULL, false); } /* diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index b363f0be6512..79f78a68d92d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -904,17 +904,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, uint32_t handle; struct ttm_base_object *base; int ret; + bool require_exist = false; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); if (unlikely(ret != 0)) return ret; } else { - if (unlikely(drm_is_render_client(file_priv))) { - DRM_ERROR("Render client refused legacy " - "surface reference.\n"); - return -EACCES; - } + if (unlikely(drm_is_render_client(file_priv))) + require_exist = true; + if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) { DRM_ERROR("Locked master refused legacy " "surface reference.\n"); @@ -942,17 +941,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, /* * Make sure the surface creator has the same - * authenticating master. + * authenticating master, or is already registered with us. */ if (drm_is_primary_client(file_priv) && - user_srf->master != file_priv->master) { - DRM_ERROR("Trying to reference surface outside of" - " master domain.\n"); - ret = -EACCES; - goto out_bad_resource; - } + user_srf->master != file_priv->master) + require_exist = true; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, + require_exist); if (unlikely(ret != 0)) { DRM_ERROR("Could not add a reference to a surface.\n"); goto out_bad_resource; diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h index ed953f98f0e1..1487011fe057 100644 --- a/include/drm/ttm/ttm_object.h +++ b/include/drm/ttm/ttm_object.h @@ -229,6 +229,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base); * @ref_type: The type of reference. * @existed: Upon completion, indicates that an identical reference object * already existed, and the refcount was upped on that object instead. + * @require_existed: Fail with -EPERM if an identical ref object didn't + * already exist. * * Checks that the base object is shareable and adds a ref object to it. * @@ -243,7 +245,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base); */ extern int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed); + enum ttm_ref_type ref_type, bool *existed, + bool require_existed); extern bool ttm_ref_object_exists(struct ttm_object_file *tfile, struct ttm_base_object *base); -- cgit v1.2.3 From 235e914069bd501be22597e6c0176f16b477ae37 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 13:06:05 +0200 Subject: drm/vmwgfx: Remove getparam error message commit 53e16798b0864464c5444a204e1bb93ae246c429 upstream. The mesa winsys sometimes uses unimplemented parameter requests to check for features. Remove the error message to avoid bloating the kernel log. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 1802d0e7fab8..5ec24fd801cd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, param->value = dev_priv->has_dx; break; default: - DRM_ERROR("Illegal vmwgfx get param request: %d\n", - param->param); return -EINVAL; } -- cgit v1.2.3 From c21636bd64c511160846bdf87ef4c7ff48680c99 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Mon, 27 Mar 2017 20:10:53 -0700 Subject: drm/vmwgfx: fix integer overflow in vmw_surface_define_ioctl() commit e7e11f99564222d82f0ce84bd521e57d78a6b678 upstream. In vmw_surface_define_ioctl(), the 'num_sizes' is the sum of the 'req->mip_levels' array. This array can be assigned any value from the user space. As both the 'num_sizes' and the array is uint32_t, it is easy to make 'num_sizes' overflow. The later 'mip_levels' is used as the loop count. This can lead an oob write. Add the check of 'req->mip_levels' to avoid this. Signed-off-by: Li Qiang Reviewed-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 79f78a68d92d..c9c04ccccdd9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -715,8 +715,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, 128; num_sizes = 0; - for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) + for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) { + if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS) + return -EINVAL; num_sizes += req->mip_levels[i]; + } if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS || num_sizes == 0) -- cgit v1.2.3 From 69d8d58bf50d9cd1bb6f000bbdf54026e74717a3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Apr 2017 11:30:34 +1000 Subject: sysfs: be careful of error returns from ops->show() commit c8a139d001a1aab1ea8734db14b22dac9dd143b6 upstream. ops->show() can return a negative error code. Commit 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.") (in v4.4) caused this to be stored in an unsigned 'size_t' variable, so errors would look like large numbers. As a result, if an error is returned, sysfs_kf_read() will return the value of 'count', typically 4096. Commit 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs") (in v4.8) extended this error to use the unsigned large 'len' as a size for memmove(). Consequently, if ->show returns an error, then the first read() on the sysfs file will return 4096 and could return uninitialized memory to user-space. If the application performs a subsequent read, this will trigger a memmove() with extremely large count, and is likely to crash the machine is bizarre ways. This bug can currently only be triggered by reading from an md sysfs attribute declared with __ATTR_PREALLOC() during the brief period between when mddev_put() deletes an mddev from the ->all_mddevs list, and when mddev_delayed_delete() - which is scheduled on a workqueue - completes. Before this, an error won't be returned by the ->show() After this, the ->show() won't be called. I can reproduce it reliably only by putting delay like usleep_range(500000,700000); early in mddev_delayed_delete(). Then after creating an md device md0 run echo clear > /sys/block/md0/md/array_state; cat /sys/block/md0/md/array_state The bug can be triggered without the usleep. Fixes: 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.") Fixes: 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs") Signed-off-by: NeilBrown Acked-by: Tejun Heo Reported-and-tested-by: Miroslav Benes Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b803213d1307..39c75a86c67f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); struct kobject *kobj = of->kn->parent->priv; - size_t len; + ssize_t len; /* * If buf != of->prealloc_buf, we don't know how @@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); + if (len < 0) + return len; if (pos) { if (len <= pos) return 0; len -= pos; memmove(buf, buf + pos, len); } - return min(count, len); + return min_t(ssize_t, count, len); } /* kernfs write callback for regular sysfs files */ -- cgit v1.2.3 From 193b590c71cd4c1fd54f4b4cab1ba73b6212c073 Mon Sep 17 00:00:00 2001 From: Shuxiao Zhang Date: Thu, 6 Apr 2017 22:30:29 +0800 Subject: staging: android: ashmem: lseek failed due to no FMODE_LSEEK. commit 97fbfef6bd597888485b653175fb846c6998b60c upstream. vfs_llseek will check whether the file mode has FMODE_LSEEK, no return failure. But ashmem can be lseek, so add FMODE_LSEEK to ashmem file. Comment From Greg Hackmann: ashmem_llseek() passes the llseek() call through to the backing shmem file. 91360b02ab48 ("ashmem: use vfs_llseek()") changed this from directly calling the file's llseek() op into a VFS layer call. This also adds a check for the FMODE_LSEEK bit, so without that bit ashmem_llseek() now always fails with -ESPIPE. Fixes: 91360b02ab48 ("ashmem: use vfs_llseek()") Signed-off-by: Shuxiao Zhang Tested-by: Greg Hackmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 3f2a3d611e4b..9c6357c03905 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -392,6 +392,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) ret = PTR_ERR(vmfile); goto out; } + vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; } get_file(asma->file); -- cgit v1.2.3 From 8e88806117e4868bc459a3042e55f8bf06c0b9e0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2017 18:20:49 +0000 Subject: arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm commit 90f6e150e44a0dc3883110eeb3ab35d1be42b6bb upstream. We don't hold the mmap_sem while searching for the VMAs when we try to unmap each memslot for a VM. Fix this properly to avoid unexpected results. Fixes: commit 957db105c997 ("arm/arm64: KVM: Introduce stage2_unmap_vm") Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 11b6595c2672..5366a736151e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -796,6 +796,7 @@ void stage2_unmap_vm(struct kvm *kvm) int idx; idx = srcu_read_lock(&kvm->srcu); + down_read(¤t->mm->mmap_sem); spin_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); @@ -803,6 +804,7 @@ void stage2_unmap_vm(struct kvm *kvm) stage2_unmap_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); + up_read(¤t->mm->mmap_sem); srcu_read_unlock(&kvm->srcu, idx); } -- cgit v1.2.3 From d4ad442b9982fba9eab0f9003c8cd185a1afeff6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2017 18:20:50 +0000 Subject: arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region commit 72f310481a08db821b614e7b5d00febcc9064b36 upstream. We don't hold the mmap_sem while searching for VMAs (via find_vma), in kvm_arch_prepare_memory_region, which can end up in expected failures. Fixes: commit 8eef91239e57 ("arm/arm64: KVM: map MMIO regions at creation time") Cc: Ard Biesheuvel Cc: Eric Auger Reviewed-by: Christoffer Dall [ Handle dirty page logging failure case ] Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5366a736151e..f91ee2f27b41 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1761,6 +1761,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, (KVM_PHYS_SIZE >> PAGE_SHIFT)) return -EFAULT; + down_read(¤t->mm->mmap_sem); /* * A memory region could potentially cover multiple VMAs, and any holes * between them, so iterate over all of them to find out if we can map @@ -1804,8 +1805,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ - if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) - return -EINVAL; + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, @@ -1817,7 +1820,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } while (hva < reg_end); if (change == KVM_MR_FLAGS_ONLY) - return ret; + goto out; spin_lock(&kvm->mmu_lock); if (ret) @@ -1825,6 +1828,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); +out: + up_read(¤t->mm->mmap_sem); return ret; } -- cgit v1.2.3 From 8ff7eb4bc8b8cf0416e0746dcdb1545fc6869e98 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 21 Mar 2017 16:52:14 +0100 Subject: iio: bmg160: reset chip when probing commit 4bdc9029685ac03be50b320b29691766d2326c2b upstream. The gyroscope chip might need to be reset to be used. Without the chip being reset, the driver stopped at the first regmap_read (to get the CHIP_ID) and failed to probe. The datasheet of the gyroscope says that a minimum wait of 30ms after the reset has to be done. This patch has been checked on a BMX055 and the datasheet of the BMG160 and the BMI055 give the same reset register and bits. Signed-off-by: Quentin Schulz Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/bmg160_core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index acb3b303d800..90841abd3ce4 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "bmg160.h" #define BMG160_IRQ_NAME "bmg160_event" @@ -53,6 +54,9 @@ #define BMG160_NO_FILTER 0 #define BMG160_DEF_BW 100 +#define BMG160_GYRO_REG_RESET 0x14 +#define BMG160_GYRO_RESET_VAL 0xb6 + #define BMG160_REG_INT_MAP_0 0x17 #define BMG160_INT_MAP_0_BIT_ANY BIT(1) @@ -186,6 +190,14 @@ static int bmg160_chip_init(struct bmg160_data *data) int ret; unsigned int val; + /* + * Reset chip to get it in a known good state. A delay of 30ms after + * reset is required according to the datasheet. + */ + regmap_write(data->regmap, BMG160_GYRO_REG_RESET, + BMG160_GYRO_RESET_VAL); + usleep_range(30000, 30700); + ret = regmap_read(data->regmap, BMG160_REG_CHIP_ID, &val); if (ret < 0) { dev_err(data->dev, "Error reading reg_chip_id\n"); -- cgit v1.2.3 From 5a69c2b268ed938d44011274e6bc87562542ef94 Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Mon, 20 Feb 2017 12:25:58 +0100 Subject: Reset TreeId to zero on SMB2 TREE_CONNECT commit 806a28efe9b78ffae5e2757e1ee924b8e50c08ab upstream. Currently the cifs module breaks the CIFS specs on reconnect as described in http://msdn.microsoft.com/en-us/library/cc246529.aspx: "TreeId (4 bytes): Uniquely identifies the tree connect for the command. This MUST be 0 for the SMB2 TREE_CONNECT Request." Signed-off-by: Jan-Marek Glogowski Reviewed-by: Aurelien Aptel Tested-by: Aurelien Aptel Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2fa754c5fd62..6cb5c4b30e78 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -952,6 +952,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, return -EINVAL; } + /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */ + if (tcon) + tcon->tid = 0; + rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); if (rc) { kfree(unc_path); -- cgit v1.2.3 From 926e1ed2b8ce683f137ea8e0683ac4f6d27c8afb Mon Sep 17 00:00:00 2001 From: "bsegall@google.com" Date: Fri, 7 Apr 2017 16:04:51 -0700 Subject: ptrace: fix PTRACE_LISTEN race corrupting task->state commit 5402e97af667e35e54177af8f6575518bf251d51 upstream. In PT_SEIZED + LISTEN mode STOP/CONT signals cause a wakeup against __TASK_TRACED. If this races with the ptrace_unfreeze_traced at the end of a PTRACE_LISTEN, this can wake the task /after/ the check against __TASK_TRACED, but before the reset of state to TASK_TRACED. This causes it to instead clobber TASK_WAKING, allowing a subsequent wakeup against TRACED while the task is still on the rq wake_list, corrupting it. Oleg said: "The kernel can crash or this can lead to other hard-to-debug problems. In short, "task->state = TASK_TRACED" in ptrace_unfreeze_traced() assumes that nobody else can wake it up, but PTRACE_LISTEN breaks the contract. Obviusly it is very wrong to manipulate task->state if this task is already running, or WAKING, or it sleeps again" [akpm@linux-foundation.org: coding-style fixes] Fixes: 9899d11f ("ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL") Link: http://lkml.kernel.org/r/xm26y3vfhmkp.fsf_-_@bsegall-linux.mtv.corp.google.com Signed-off-by: Ben Segall Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a46c40bfb5f6..c7e8ed99c953 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -151,11 +151,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task) WARN_ON(!task->ptrace || task->parent != current); + /* + * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. + * Recheck state under the lock to close this race. + */ spin_lock_irq(&task->sighand->siglock); - if (__fatal_signal_pending(task)) - wake_up_state(task, __TASK_TRACED); - else - task->state = TASK_TRACED; + if (task->state == __TASK_TRACED) { + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + } spin_unlock_irq(&task->sighand->siglock); } -- cgit v1.2.3 From 5cc244782dabaee110ed9c3900d40cd4b481a517 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:33:59 +0000 Subject: ring-buffer: Fix return value check in test_ringbuffer() commit 62277de758b155dc04b78f195a1cb5208c37b2df upstream. In case of error, the function kthread_run() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Link: http://lkml.kernel.org/r/1466184839-14927-1-git-send-email-weiyj_lk@163.com Fixes: 6c43e554a ("ring-buffer: Add ring buffer startup selftest") Signed-off-by: Wei Yongjun Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index acbb0e73d3a2..7d7f99b0db47 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4875,9 +4875,9 @@ static __init int test_ringbuffer(void) rb_data[cpu].cnt = cpu; rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], "rbtester/%d", cpu); - if (WARN_ON(!rb_threads[cpu])) { + if (WARN_ON(IS_ERR(rb_threads[cpu]))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_threads[cpu]); goto out_free; } @@ -4887,9 +4887,9 @@ static __init int test_ringbuffer(void) /* Now create the rb hammer! */ rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); - if (WARN_ON(!rb_hammer)) { + if (WARN_ON(IS_ERR(rb_hammer))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_hammer); goto out_free; } -- cgit v1.2.3 From ce962cf480331380d7eb3c8e3c625a975e0aa38f Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 10:37:44 +0100 Subject: metag/usercopy: Drop unused macros commit ef62a2d81f73d9cddef14bc3d9097a57010d551c upstream. Metag's lib/usercopy.c has a bunch of copy_from_user macros for larger copies between 5 and 16 bytes which are completely unused. Before fixing zeroing lets drop these macros so there is less to fix. Signed-off-by: James Hogan Cc: Al Viro Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 113 ---------------------------------------------- 1 file changed, 113 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index b3ebfe9c8e88..b4eb1f17069f 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -651,119 +651,6 @@ EXPORT_SYMBOL(__copy_user); #define __asm_copy_from_user_4(to, from, ret) \ __asm_copy_from_user_4x_cont(to, from, ret, "", "", "") -#define __asm_copy_from_user_5(to, from, ret) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "4: SETB [%0++],D1Ar1\n", \ - "5: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 4b,5b\n") - -#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "4: SETW [%0++],D1Ar1\n" COPY, \ - "5: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 4b,5b\n" TENTRY) - -#define __asm_copy_from_user_6(to, from, ret) \ - __asm_copy_from_user_6x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_7(to, from, ret) \ - __asm_copy_from_user_6x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "6: SETB [%0++],D1Ar1\n", \ - "7: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 6b,7b\n") - -#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "4: SETD [%0++],D1Ar1\n" COPY, \ - "5: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 4b,5b\n" TENTRY) - -#define __asm_copy_from_user_8(to, from, ret) \ - __asm_copy_from_user_8x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_9(to, from, ret) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "6: SETB [%0++],D1Ar1\n", \ - "7: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 6b,7b\n") - -#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "6: SETW [%0++],D1Ar1\n" COPY, \ - "7: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 6b,7b\n" TENTRY) - -#define __asm_copy_from_user_10(to, from, ret) \ - __asm_copy_from_user_10x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_11(to, from, ret) \ - __asm_copy_from_user_10x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "8: SETB [%0++],D1Ar1\n", \ - "9: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 8b,9b\n") - -#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "6: SETD [%0++],D1Ar1\n" COPY, \ - "7: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 6b,7b\n" TENTRY) - -#define __asm_copy_from_user_12(to, from, ret) \ - __asm_copy_from_user_12x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_13(to, from, ret) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "8: SETB [%0++],D1Ar1\n", \ - "9: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 8b,9b\n") - -#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "8: SETW [%0++],D1Ar1\n" COPY, \ - "9: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 8b,9b\n" TENTRY) - -#define __asm_copy_from_user_14(to, from, ret) \ - __asm_copy_from_user_14x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_15(to, from, ret) \ - __asm_copy_from_user_14x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "10: SETB [%0++],D1Ar1\n", \ - "11: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 10b,11b\n") - -#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "8: SETD [%0++],D1Ar1\n" COPY, \ - "9: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 8b,9b\n" TENTRY) - -#define __asm_copy_from_user_16(to, from, ret) \ - __asm_copy_from_user_16x_cont(to, from, ret, "", "", "") #define __asm_copy_from_user_8x64(to, from, ret) \ asm volatile ( \ -- cgit v1.2.3 From ae781dee56e4805311f0615ca04ea226bfbcafcd Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 11:23:18 +0100 Subject: metag/usercopy: Fix alignment error checking commit 2257211942bbbf6c798ab70b487d7e62f7835a1a upstream. Fix the error checking of the alignment adjustment code in raw_copy_from_user(), which mistakenly considers it safe to skip the error check when aligning the source buffer on a 2 or 4 byte boundary. If the destination buffer was unaligned it may have started to copy using byte or word accesses, which could well be at the start of a new (valid) source page. This would result in it appearing to have copied 1 or 2 bytes at the end of the first (invalid) page rather than none at all. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index b4eb1f17069f..a6ced9691ddb 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -717,6 +717,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if ((unsigned long) src & 1) { __asm_copy_from_user_1(dst, src, retn); n--; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ @@ -730,6 +732,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if (((unsigned long) src & 2) && n >= 2) { __asm_copy_from_user_2(dst, src, retn); n -= 2; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ @@ -741,12 +745,6 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, } } - /* We only need one check after the unalignment-adjustments, - because if both adjustments were done, either both or - neither reference had an exception. */ - if (retn != 0) - goto copy_exception_bytes; - #ifdef USE_RAPF /* 64 bit copy loop */ if (!(((unsigned long) src | (unsigned long) dst) & 7)) { -- cgit v1.2.3 From dde6f22c1e122907717f45405cbc2c6227e259e5 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 13:35:01 +0100 Subject: metag/usercopy: Add early abort to copy_to_user commit fb8ea062a8f2e85256e13f55696c5c5f0dfdcc8b upstream. When copying to userland on Meta, if any faults are encountered immediately abort the copy instead of continuing on and repeatedly faulting, and worse potentially copying further bytes successfully to subsequent valid pages. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index a6ced9691ddb..714d8562aa20 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -538,23 +538,31 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, if ((unsigned long) src & 1) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ while (n > 0) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } } if (((unsigned long) src & 2) && n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ while (n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } } @@ -569,6 +577,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } if (n >= RAPF_MIN_BUF_SIZE) { @@ -581,6 +591,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } #endif @@ -588,11 +600,15 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 16) { __asm_copy_to_user_16(dst, src, retn); n -= 16; + if (retn) + return retn + n; } while (n >= 4) { __asm_copy_to_user_4(dst, src, retn); n -= 4; + if (retn) + return retn + n; } switch (n) { @@ -609,6 +625,10 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, break; } + /* + * If we get here, retn correctly reflects the number of failing + * bytes. + */ return retn; } EXPORT_SYMBOL(__copy_user); -- cgit v1.2.3 From 29b5eb517c6961ea9e9b16c49b5cf7fd93860be2 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 11:14:02 +0100 Subject: metag/usercopy: Zero rest of buffer from copy_from_user commit 563ddc1076109f2b3f88e6d355eab7b6fd4662cb upstream. Currently we try to zero the destination for a failed read from userland in fixup code in the usercopy.c macros. The rest of the destination buffer is then zeroed from __copy_user_zeroing(), which is used for both copy_from_user() and __copy_from_user(). Unfortunately we fail to zero in the fixup code as D1Ar1 is set to 0 before the fixup code entry labels, and __copy_from_user() shouldn't even be zeroing the rest of the buffer. Move the zeroing out into copy_from_user() and rename __copy_user_zeroing() to raw_copy_from_user() since it no longer does any zeroing. This also conveniently matches the name needed for RAW_COPY_USER support in a later patch. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/include/asm/uaccess.h | 15 ++++++----- arch/metag/lib/usercopy.c | 57 +++++++++++++--------------------------- 2 files changed, 26 insertions(+), 46 deletions(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 273e61225c27..07238b39638c 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -197,20 +197,21 @@ extern long __must_check strnlen_user(const char __user *src, long count); #define strlen_user(str) strnlen_user(str, 32767) -extern unsigned long __must_check __copy_user_zeroing(void *to, - const void __user *from, - unsigned long n); +extern unsigned long raw_copy_from_user(void *to, const void __user *from, + unsigned long n); static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; if (likely(access_ok(VERIFY_READ, from, n))) - return __copy_user_zeroing(to, from, n); - memset(to, 0, n); - return n; + res = raw_copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } -#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n) +#define __copy_from_user(to, from, n) raw_copy_from_user(to, from, n) #define __copy_from_user_inatomic __copy_from_user extern unsigned long __must_check __copy_user(void __user *to, diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 714d8562aa20..e1d553872fd7 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -29,7 +29,6 @@ COPY \ "1:\n" \ " .section .fixup,\"ax\"\n" \ - " MOV D1Ar1,#0\n" \ FIXUP \ " MOVT D1Ar1,#HI(1b)\n" \ " JUMP D1Ar1,#LO(1b)\n" \ @@ -637,16 +636,14 @@ EXPORT_SYMBOL(__copy_user); __asm_copy_user_cont(to, from, ret, \ " GETB D1Ar1,[%1++]\n" \ "2: SETB [%0++],D1Ar1\n", \ - "3: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ + "3: ADD %2,%2,#1\n", \ " .long 2b,3b\n") #define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ __asm_copy_user_cont(to, from, ret, \ " GETW D1Ar1,[%1++]\n" \ "2: SETW [%0++],D1Ar1\n" COPY, \ - "3: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ + "3: ADD %2,%2,#2\n" FIXUP, \ " .long 2b,3b\n" TENTRY) #define __asm_copy_from_user_2(to, from, ret) \ @@ -656,32 +653,26 @@ EXPORT_SYMBOL(__copy_user); __asm_copy_from_user_2x_cont(to, from, ret, \ " GETB D1Ar1,[%1++]\n" \ "4: SETB [%0++],D1Ar1\n", \ - "5: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n", \ " .long 4b,5b\n") #define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ __asm_copy_user_cont(to, from, ret, \ " GETD D1Ar1,[%1++]\n" \ "2: SETD [%0++],D1Ar1\n" COPY, \ - "3: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ + "3: ADD %2,%2,#4\n" FIXUP, \ " .long 2b,3b\n" TENTRY) #define __asm_copy_from_user_4(to, from, ret) \ __asm_copy_from_user_4x_cont(to, from, ret, "", "", "") - #define __asm_copy_from_user_8x64(to, from, ret) \ asm volatile ( \ " GETL D0Ar2,D1Ar1,[%1++]\n" \ "2: SETL [%0++],D0Ar2,D1Ar1\n" \ "1:\n" \ " .section .fixup,\"ax\"\n" \ - " MOV D1Ar1,#0\n" \ - " MOV D0Ar2,#0\n" \ "3: ADD %2,%2,#8\n" \ - " SETL [%0++],D0Ar2,D1Ar1\n" \ " MOVT D0Ar2,#HI(1b)\n" \ " JUMP D0Ar2,#LO(1b)\n" \ " .previous\n" \ @@ -721,11 +712,12 @@ EXPORT_SYMBOL(__copy_user); "SUB %1, %1, #4\n") -/* Copy from user to kernel, zeroing the bytes that were inaccessible in - userland. The return-value is the number of bytes that were - inaccessible. */ -unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, - unsigned long n) +/* + * Copy from user to kernel. The return-value is the number of bytes that were + * inaccessible. + */ +unsigned long raw_copy_from_user(void *pdst, const void __user *psrc, + unsigned long n) { register char *dst asm ("A0.2") = pdst; register const char __user *src asm ("A1.2") = psrc; @@ -738,7 +730,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_1(dst, src, retn); n--; if (retn) - goto copy_exception_bytes; + return retn + n; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ @@ -746,14 +738,14 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_1(dst, src, retn); n--; if (retn) - goto copy_exception_bytes; + return retn + n; } } if (((unsigned long) src & 2) && n >= 2) { __asm_copy_from_user_2(dst, src, retn); n -= 2; if (retn) - goto copy_exception_bytes; + return retn + n; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ @@ -761,7 +753,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_2(dst, src, retn); n -= 2; if (retn) - goto copy_exception_bytes; + return retn + n; } } @@ -777,7 +769,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_8x64(dst, src, retn); n -= 8; if (retn) - goto copy_exception_bytes; + return retn + n; } } @@ -793,7 +785,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_8x64(dst, src, retn); n -= 8; if (retn) - goto copy_exception_bytes; + return retn + n; } } #endif @@ -803,7 +795,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, n -= 4; if (retn) - goto copy_exception_bytes; + return retn + n; } /* If we get here, there were no memory read faults. */ @@ -829,21 +821,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, /* If we get here, retn correctly reflects the number of failing bytes. */ return retn; - - copy_exception_bytes: - /* We already have "retn" bytes cleared, and need to clear the - remaining "n" bytes. A non-optimized simple byte-for-byte in-line - memset is preferred here, since this isn't speed-critical code and - we'd rather have this a leaf-function than calling memset. */ - { - char *endp; - for (endp = dst + n; dst < endp; dst++) - *dst = 0; - } - - return retn + n; } -EXPORT_SYMBOL(__copy_user_zeroing); +EXPORT_SYMBOL(raw_copy_from_user); #define __asm_clear_8x64(to, ret) \ asm volatile ( \ -- cgit v1.2.3 From beb0ad97ad099ac99f0354e195bd129586a60694 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 11:43:26 +0100 Subject: metag/usercopy: Set flags before ADDZ commit fd40eee1290ad7add7aa665e3ce6b0f9fe9734b4 upstream. The fixup code for the copy_to_user rapf loops reads TXStatus.LSM_STEP to decide how far to rewind the source pointer. There is a special case for the last execution of an MGETL/MGETD, since it leaves LSM_STEP=0 even though the number of MGETLs/MGETDs attempted was 4. This uses ADDZ which is conditional upon the Z condition flag, but the AND instruction which masked the TXStatus.LSM_STEP field didn't set the condition flags based on the result. Fix that now by using ANDS which does set the flags, and also marking the condition codes as clobbered by the inline assembly. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index e1d553872fd7..4422928a1746 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -315,7 +315,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -341,7 +341,7 @@ #define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ @@ -486,7 +486,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -512,7 +512,7 @@ #define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ -- cgit v1.2.3 From 3040ecd4253a4ef996e6f940801ee4b80b01c87a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 3 Apr 2017 17:41:40 +0100 Subject: metag/usercopy: Fix src fixup in from user rapf loops commit 2c0b1df88b987a12d95ea1d6beaf01894f3cc725 upstream. The fixup code to rewind the source pointer in __asm_copy_from_user_{32,64}bit_rapf_loop() always rewound the source by a single unit (4 or 8 bytes), however this is insufficient if the fault didn't occur on the first load in the loop, as the source pointer will have been incremented but nothing will have been stored until all 4 register [pairs] are loaded. Read the LSM_STEP field of TXSTATUS (which is already loaded into a register), a bit like the copy_to_user versions, to determine how many iterations of MGET[DL] have taken place, all of which need rewinding. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 4422928a1746..e09c95ba028c 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -687,29 +687,49 @@ EXPORT_SYMBOL(__copy_user); * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 8 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*8 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #8\n") + "LSR D0Ar2, D0Ar2, #5\n" \ + "ANDS D0Ar2, D0Ar2, #0x38\n" \ + "ADDZ D0Ar2, D0Ar2, #32\n" \ + "SUB %1, %1, D0Ar2\n") /* rewind 'from' pointer when a fault occurs * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 4 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*4 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #4\n") + "LSR D0Ar2, D0Ar2, #6\n" \ + "ANDS D0Ar2, D0Ar2, #0x1c\n" \ + "ADDZ D0Ar2, D0Ar2, #16\n" \ + "SUB %1, %1, D0Ar2\n") /* -- cgit v1.2.3 From 435cc436a88652046b9ca89fb56acf3a4b1a44b8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 08:51:34 +0100 Subject: metag/usercopy: Add missing fixups commit b884a190afcecdbef34ca508ea5ee88bb7c77861 upstream. The rapf copy loops in the Meta usercopy code is missing some extable entries for HTP cores with unaligned access checking enabled, where faults occur on the instruction immediately after the faulting access. Add the fixup labels and extable entries for these cases so that corner case user copy failures don't cause kernel crashes. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/metag/lib/usercopy.c | 72 +++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index e09c95ba028c..2792fc621088 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -259,27 +259,31 @@ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #32\n" \ "23:\n" \ - "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "SUB %3, %3, #32\n" \ "24:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "25:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #32\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "25:\n" \ + "27:\n" \ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ + "28:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "29:\n" \ "SUB %3, %3, #32\n" \ - "27:\n" \ + "30:\n" \ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "28:\n" \ + "31:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ "SUB %0, %0, #8\n" \ - "29:\n" \ + "33:\n" \ "SETL [%0++], D0.7, D1.7\n" \ "SUB %3, %3, #32\n" \ "1:" \ @@ -311,7 +315,11 @@ " .long 26b,3b\n" \ " .long 27b,3b\n" \ " .long 28b,3b\n" \ - " .long 29b,4b\n" \ + " .long 29b,3b\n" \ + " .long 30b,3b\n" \ + " .long 31b,3b\n" \ + " .long 32b,3b\n" \ + " .long 33b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ @@ -402,47 +410,55 @@ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ "23:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "24:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ "SUB %3, %3, #16\n" \ - "25:\n" \ + "24:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ + "25:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #16\n" \ "27:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "28:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "29:\n" \ + "SUB %3, %3, #16\n" \ + "30:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "31:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ "SUB %3, %3, #16\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "29:\n" \ + "33:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "30:\n" \ + "34:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "35:\n" \ "SUB %3, %3, #16\n" \ - "31:\n" \ + "36:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "32:\n" \ + "37:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "38:\n" \ "SUB %3, %3, #16\n" \ - "33:\n" \ + "39:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "34:\n" \ + "40:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "41:\n" \ "SUB %3, %3, #16\n" \ - "35:\n" \ + "42:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "36:\n" \ + "43:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "44:\n" \ "SUB %0, %0, #4\n" \ - "37:\n" \ + "45:\n" \ "SETD [%0++], D0.7\n" \ "SUB %3, %3, #16\n" \ "1:" \ @@ -482,7 +498,15 @@ " .long 34b,3b\n" \ " .long 35b,3b\n" \ " .long 36b,3b\n" \ - " .long 37b,4b\n" \ + " .long 37b,3b\n" \ + " .long 38b,3b\n" \ + " .long 39b,3b\n" \ + " .long 40b,3b\n" \ + " .long 41b,3b\n" \ + " .long 42b,3b\n" \ + " .long 43b,3b\n" \ + " .long 44b,3b\n" \ + " .long 45b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ -- cgit v1.2.3 From a67004a3896eacd109a0138b5526957381fe4337 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 29 Mar 2017 19:19:42 +0200 Subject: powerpc/mm: Add missing global TLB invalidate if cxl is active commit 88b1bf7268f56887ca88eb09c6fb0f4fc970121a upstream. Commit 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl") converted local TLB invalidates to global if the cxl driver is active. This is necessary because the CAPP snoops invalidations to forward them to the PSL on the cxl adapter. However one path was forgotten. native_flush_hash_range() still does local TLB invalidates, as found out the hard way recently. This patch fixes it by following the same logic as previously: if the cxl driver is active, the local TLB invalidates are 'upgraded' to global. Fixes: 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl") Signed-off-by: Frederic Barrat Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hash_native_64.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c8822af10a58..19d9b2d2d212 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -645,6 +645,10 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long psize = batch->psize; int ssize = batch->ssize; int i; + unsigned int use_local; + + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); local_irq_save(flags); @@ -671,8 +675,7 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (mmu_has_feature(MMU_FTR_TLBIEL) && - mmu_psize_defs[psize].tlbiel && local) { + if (use_local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { vpn = batch->vpn[i]; -- cgit v1.2.3 From ca9bd55235b346da89dadc1821e37bb4ec22b7eb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 4 Apr 2017 14:56:05 +1000 Subject: powerpc: Don't try to fix up misaligned load-with-reservation instructions commit 48fe9e9488743eec9b7c1addd3c93f12f2123d54 upstream. In the past, there was only one load-with-reservation instruction, lwarx, and if a program attempted a lwarx on a misaligned address, it would take an alignment interrupt and the kernel handler would emulate it as though it was lwzx, which was not really correct, but benign since it is loading the right amount of data, and the lwarx should be paired with a stwcx. to the same address, which would also cause an alignment interrupt which would result in a SIGBUS being delivered to the process. We now have 5 different sizes of load-with-reservation instruction. Of those, lharx and ldarx cause an immediate SIGBUS by luck since their entries in aligninfo[] overlap instructions which were not fixed up, but lqarx overlaps with lhz and will be emulated as such. lbarx can never generate an alignment interrupt since it only operates on 1 byte. To straighten this out and fix the lqarx case, this adds code to detect the l[hwdq]arx instructions and return without fixing them up, resulting in a SIGBUS being delivered to the process. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/align.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 86150fbb42c3..91e5c1758b5c 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -808,14 +808,25 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; - /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ - if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { - nb = 8; - flags = LD+SW; - } else if (IS_XFORM(instruction) && - ((instruction >> 1) & 0x3ff) == 660) { - nb = 8; - flags = ST+SW; + /* + * Handle some cases which give overlaps in the DSISR values. + */ + if (IS_XFORM(instruction)) { + switch (get_xop(instruction)) { + case 532: /* ldbrx */ + nb = 8; + flags = LD+SW; + break; + case 660: /* stdbrx */ + nb = 8; + flags = ST+SW; + break; + case 20: /* lwarx */ + case 84: /* ldarx */ + case 116: /* lharx */ + case 276: /* lqarx */ + return 0; /* not emulated ever */ + } } /* Byteswap little endian loads and stores */ -- cgit v1.2.3 From 1c47303355dc970d692f3625839da43f6b969622 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sun, 2 Apr 2017 20:08:04 -0700 Subject: nios2: reserve boot memory for device tree commit 921d701e6f31e1ffaca3560416af1aa04edb4c4f upstream. Make sure to reserve the boot memory for the flattened device tree. Otherwise it might get overwritten, e.g. when initial_boot_params is copied, leading to a corrupted FDT and a boot hang/crash: bootconsole [early0] enabled Early console on uart16650 initialized at 0xf8001600 OF: fdt: Error -11 processing FDT Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree! ---[ end Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree! Guenter Roeck says: > I think I found the problem. In unflatten_and_copy_device_tree(), with added > debug information: > > OF: fdt: initial_boot_params=c861e400, dt=c861f000 size=28874 (0x70ca) > > ... and then initial_boot_params is copied to dt, which results in corrupted > fdt since the memory overlaps. Looks like the initial_boot_params memory > is not reserved and (re-)allocated by early_init_dt_alloc_memory_arch(). Reported-by: Guenter Roeck Reference: http://lkml.kernel.org/r/20170226210338.GA19476@roeck-us.net Tested-by: Guenter Roeck Signed-off-by: Tobias Klauser Acked-by: Ley Foon Tan Signed-off-by: Greg Kroah-Hartman --- arch/nios2/kernel/prom.c | 7 +++++++ arch/nios2/kernel/setup.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 718dd197909f..de73beb36910 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -48,6 +48,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) return alloc_bootmem_align(size, align); } +int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, + bool nomap) +{ + reserve_bootmem(base, size, BOOTMEM_DEFAULT); + return 0; +} + void __init early_init_devtree(void *params) { __be32 *dtb = (u32 *)__dtb_start; diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index a4ff86d58d5c..6c4e351a7930 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -195,6 +195,9 @@ void __init setup_arch(char **cmdline_p) } #endif /* CONFIG_BLK_DEV_INITRD */ + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + unflatten_and_copy_device_tree(); setup_cpuinfo(); -- cgit v1.2.3 From 765ee8ce4e3d059378aefc40666b024e4cd494f2 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Cerri Date: Mon, 13 Mar 2017 12:14:58 -0300 Subject: s390/decompressor: fix initrd corruption caused by bss clear commit d82c0d12c92705ef468683c9b7a8298dd61ed191 upstream. Reorder the operations in decompress_kernel() to ensure initrd is moved to a safe location before the bss section is zeroed. During decompression bss can overlap with the initrd and this can corrupt the initrd contents depending on the size of the compressed kernel (which affects where the initrd is placed by the bootloader) and the size of the bss section of the decompressor. Also use the correct initrd size when checking for overlaps with parmblock. Fixes: 06c0dd72aea3 ([S390] fix boot failures with compressed kernels) Reviewed-by: Joy Latten Reviewed-by: Vineetha HariPai Signed-off-by: Marcelo Henrique Cerri Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/boot/compressed/misc.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 4da604ebf6fd..ca15613eaaa4 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size) unsigned long decompress_kernel(void) { - unsigned long output_addr; - unsigned char *output; + void *output, *kernel_end; - output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; - check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); - memset(&_bss, 0, &_ebss - &_bss); - free_mem_ptr = (unsigned long)&_end; - free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - output = (unsigned char *) output_addr; + output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE); + kernel_end = output + SZ__bss_start; + check_ipl_parmblock((void *) 0, (unsigned long) kernel_end); #ifdef CONFIG_BLK_DEV_INITRD /* * Move the initrd right behind the end of the decompressed - * kernel image. + * kernel image. This also prevents initrd corruption caused by + * bss clearing since kernel_end will always be located behind the + * current bss section.. */ - if (INITRD_START && INITRD_SIZE && - INITRD_START < (unsigned long) output + SZ__bss_start) { - check_ipl_parmblock(output + SZ__bss_start, - INITRD_START + INITRD_SIZE); - memmove(output + SZ__bss_start, - (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) output + SZ__bss_start; + if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) { + check_ipl_parmblock(kernel_end, INITRD_SIZE); + memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) kernel_end; } #endif + /* + * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be + * initialized afterwards since they reside in bss. + */ + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long) &_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + puts("Uncompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); -- cgit v1.2.3 From 0f5d17253b2868a3e75d623dcb2514e305bc7447 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2017 09:48:04 +0200 Subject: s390/uaccess: get_user() should zero on failure (again) commit d09c5373e8e4eaaa09233552cbf75dc4c4f21203 upstream. Commit fd2d2b191fe7 ("s390: get_user() should zero on failure") intended to fix s390's get_user() implementation which did not zero the target operand if the read from user space faulted. Unfortunately the patch has no effect: the corresponding inline assembly specifies that the operand is only written to ("=") and the previous value is discarded. Therefore the compiler is free to and actually does omit the zero initialization. To fix this simply change the contraint modifier to "+", so the compiler cannot omit the initialization anymore. Fixes: c9ca78415ac1 ("s390/uaccess: provide inline variants of get_user/put_user") Fixes: fd2d2b191fe7 ("s390: get_user() should zero on failure") Cc: Al Viro Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 5c7381c5ad7f..c8d837f0fbbc 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -150,7 +150,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, " jg 2b\n" \ ".popsection\n" \ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : "=d" (__rc), "=Q" (*(to)) \ + : "=d" (__rc), "+Q" (*(to)) \ : "d" (size), "Q" (*(from)), \ "d" (__reg0), "K" (-EFAULT) \ : "cc"); \ -- cgit v1.2.3 From 394d71b1ea24c248a8d497d10635b86dd2fccef7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 16 Feb 2017 12:39:01 +0000 Subject: MIPS: Force o32 fp64 support on 32bit MIPS64r6 kernels commit 2e6c7747730296a6d4fd700894286db1132598c4 upstream. When a 32-bit kernel is configured to support MIPS64r6 (CPU_MIPS64_R6), MIPS_O32_FP64_SUPPORT won't be selected as it should be because MIPS32_O32 is disabled (o32 is already the default ABI available on 32-bit kernels). This results in userland FP breakage as CP0_Status.FR is read-only 1 since r6 (when an FPU is present) so __enable_fpu() will fail to clear FR. This causes the FPU emulator to get used which will incorrectly emulate 32-bit FPU registers. Force o32 fp64 support in this case by also selecting MIPS_O32_FP64_SUPPORT from CPU_MIPS64_R6 if 32BIT. Fixes: 4e9d324d4288 ("MIPS: Require O32 FP64 support for MIPS64 with O32 compat") Signed-off-by: James Hogan Reviewed-by: Paul Burton Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15310/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index db459612de44..75bfca69e418 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1412,7 +1412,7 @@ config CPU_MIPS32_R6 select CPU_SUPPORTS_MSA select GENERIC_CSUM select HAVE_KVM - select MIPS_O32_FP64_SUPPORT + select MIPS_O32_FP64_SUPPORT if 32BIT help Choose this option to build a kernel for release 6 or later of the MIPS32 architecture. New MIPS processors, starting with the Warrior -- cgit v1.2.3 From 22665fe0a60a73734889e1cfc7f8fba4036e0b9a Mon Sep 17 00:00:00 2001 From: John Crispin Date: Sat, 25 Feb 2017 11:54:23 +0100 Subject: MIPS: ralink: Fix typos in rt3883 pinctrl commit 7c5a3d813050ee235817b0220dd8c42359a9efd8 upstream. There are two copy & paste errors in the definition of the 5GHz LNA and second ethernet pinmux. Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Signed-off-by: John Crispin Signed-off-by: Daniel Golle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15328/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/rt3883.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c index f42834c7f007..3c575093f8f1 100644 --- a/arch/mips/ralink/rt3883.c +++ b/arch/mips/ralink/rt3883.c @@ -36,7 +36,7 @@ static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) }; -static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna a", 0, 35, 3) }; +static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) }; static struct rt2880_pmx_func pci_func[] = { FUNC("pci-dev", 0, 40, 32), FUNC("pci-host2", 1, 40, 32), @@ -44,7 +44,7 @@ static struct rt2880_pmx_func pci_func[] = { FUNC("pci-fnc", 3, 40, 32) }; static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) }; -static struct rt2880_pmx_func ge2_func[] = { FUNC("ge1", 0, 84, 12) }; +static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) }; static struct rt2880_pmx_group rt3883_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C), -- cgit v1.2.3 From 768019030ab58e9622caeb6c5a06553260609327 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 23 Feb 2017 14:50:24 +0000 Subject: MIPS: End spinlocks with .insn commit 4b5347a24a0f2d3272032c120664b484478455de upstream. When building for microMIPS we need to ensure that the assembler always knows that there is code at the target of a branch or jump. Recent toolchains will fail to link a microMIPS kernel when this isn't the case due to what it thinks is a branch to non-microMIPS code. mips-mti-linux-gnu-ld kernel/built-in.o: .spinlock.text+0x2fc: Unsupported branch between ISA modes. mips-mti-linux-gnu-ld final link failed: Bad value This is due to inline assembly labels in spinlock.h not being followed by an instruction mnemonic, either due to a .subsection pseudo-op or the end of the inline asm block. Fix this with a .insn direction after such labels. Signed-off-by: Paul Burton Signed-off-by: James Hogan Reviewed-by: Maciej W. Rozycki Cc: Ralf Baechle Cc: Peter Zijlstra Cc: Ingo Molnar Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15325/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/spinlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 40196bebe849..2365ce0ad8f2 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -112,7 +112,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " andi %[ticket], %[ticket], 0xffff \n" " bne %[ticket], %[my_ticket], 4f \n" " subu %[ticket], %[my_ticket], %[ticket] \n" - "2: \n" + "2: .insn \n" " .subsection 2 \n" "4: andi %[ticket], %[ticket], 0xffff \n" " sll %[ticket], 5 \n" @@ -187,7 +187,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) " sc %[ticket], %[ticket_ptr] \n" " beqz %[ticket], 1b \n" " li %[ticket], 1 \n" - "2: \n" + "2: .insn \n" " .subsection 2 \n" "3: b 2b \n" " li %[ticket], 0 \n" @@ -367,7 +367,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) " .set reorder \n" __WEAK_LLSC_MB " li %2, 1 \n" - "2: \n" + "2: .insn \n" : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret) : GCC_OFF_SMALL_ASM() (rw->lock) : "memory"); @@ -407,7 +407,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) " lui %1, 0x8000 \n" " sc %1, %0 \n" " li %2, 1 \n" - "2: \n" + "2: .insn \n" : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret) : GCC_OFF_SMALL_ASM() (rw->lock) -- cgit v1.2.3 From 55f67b97ca05df00c3b61123a7e9e363819c60ee Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 15 Mar 2017 23:26:42 +0100 Subject: MIPS: Lantiq: fix missing xbar kernel panic commit 6ef90877eee63a0d03e83183bb44b64229b624e6 upstream. Commit 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode") accidentally requested the resources from the pmu address region instead of the xbar registers region, but the check for the return value of request_mem_region() was wrong. Commit 98ea51cb0c8c ("MIPS: Lantiq: Fix another request_mem_region() return code check") fixed the check of the return value of request_mem_region() which made the kernel panics. This patch now makes use of the correct memory region for the cross bar. Fixes: 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode") Signed-off-by: Hauke Mehrtens Cc: John Crispin Cc: james.hogan@imgtec.com Cc: arnd@arndb.de Cc: sergei.shtylyov@cogentembedded.com Cc: john@phrozen.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15751 Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/xway/sysctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 3e390a4e3897..daf580ce5ca2 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -467,7 +467,7 @@ void __init ltq_soc_init(void) if (!np_xbar) panic("Failed to load xbar nodes from devicetree"); - if (of_address_to_resource(np_pmu, 0, &res_xbar)) + if (of_address_to_resource(np_xbar, 0, &res_xbar)) panic("Failed to get xbar resources"); if (request_mem_region(res_xbar.start, resource_size(&res_xbar), res_xbar.name) < 0) -- cgit v1.2.3 From 2d1af1b7025f96c86938af7f7c54d60adb4773fe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:27 +0800 Subject: MIPS: Flush wrong invalid FTLB entry for huge page commit 0115f6cbf26663c86496bc56eeea293f85b77897 upstream. On VTLB+FTLB platforms (such as Loongson-3A R2), FTLB's pagesize is usually configured the same as PAGE_SIZE. In such a case, Huge page entry is not suitable to write in FTLB. Unfortunately, when a huge page is created, its page table entries haven't created immediately. Then the TLB refill handler will fetch an invalid page table entry which has no "HUGE" bit, and this entry may be written to FTLB. Since it is invalid, TLB load/store handler will then use tlbwi to write the valid entry at the same place. However, the valid entry is a huge page entry which isn't suitable for FTLB. Our solution is to modify build_huge_handler_tail. Flush the invalid old entry (whether it is in FTLB or VTLB, this is in order to reduce branches) and use tlbwr to write the valid new entry. Signed-off-by: Rui Wang Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15754/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/tlbex.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 29f73e00253d..63b7d6f82d24 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -757,7 +757,8 @@ static void build_huge_update_entries(u32 **p, unsigned int pte, static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, struct uasm_label **l, unsigned int pte, - unsigned int ptr) + unsigned int ptr, + unsigned int flush) { #ifdef CONFIG_SMP UASM_i_SC(p, pte, 0, ptr); @@ -766,6 +767,22 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, #else UASM_i_SW(p, pte, 0, ptr); #endif + if (cpu_has_ftlb && flush) { + BUG_ON(!cpu_has_tlbinv); + + UASM_i_MFC0(p, ptr, C0_ENTRYHI); + uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_tlb_write_entry(p, l, r, tlb_indexed); + + uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0); + + return; + } + build_huge_update_entries(p, pte, ptr); build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); } @@ -2082,7 +2099,7 @@ static void build_r4000_tlb_load_handler(void) uasm_l_tlbl_goaround2(&l, p); } uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbl(&l, p); @@ -2137,7 +2154,7 @@ static void build_r4000_tlb_store_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbs(&l, p); @@ -2193,7 +2210,7 @@ static void build_r4000_tlb_modify_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0); #endif uasm_l_nopage_tlbm(&l, p); -- cgit v1.2.3 From b73d08ce20c5cb2e0cec8c019a27b9574e2c4ec2 Mon Sep 17 00:00:00 2001 From: Chris Salls Date: Fri, 7 Apr 2017 23:48:11 -0700 Subject: mm/mempolicy.c: fix error handling in set_mempolicy and mbind. commit cf01fb9985e8deb25ccf0ea54d916b8871ae0e62 upstream. In the case that compat_get_bitmap fails we do not want to copy the bitmap to the user as it will contain uninitialized stack data and leak sensitive data. Signed-off-by: Chris Salls Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a4217fe60dff..e09b1a0e2cfe 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1492,7 +1492,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, compat_ulong_t, maxnode) { - long err = 0; unsigned long __user *nm = NULL; unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); @@ -1501,14 +1500,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) { - err = compat_get_bitmap(bm, nmask, nr_bits); + if (compat_get_bitmap(bm, nmask, nr_bits)) + return -EFAULT; nm = compat_alloc_user_space(alloc_size); - err |= copy_to_user(nm, bm, alloc_size); + if (copy_to_user(nm, bm, alloc_size)) + return -EFAULT; } - if (err) - return -EFAULT; - return sys_set_mempolicy(mode, nm, nr_bits+1); } @@ -1516,7 +1514,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, compat_ulong_t, mode, compat_ulong_t __user *, nmask, compat_ulong_t, maxnode, compat_ulong_t, flags) { - long err = 0; unsigned long __user *nm = NULL; unsigned long nr_bits, alloc_size; nodemask_t bm; @@ -1525,14 +1522,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) { - err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits); + if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits)) + return -EFAULT; nm = compat_alloc_user_space(alloc_size); - err |= copy_to_user(nm, nodes_addr(bm), alloc_size); + if (copy_to_user(nm, nodes_addr(bm), alloc_size)) + return -EFAULT; } - if (err) - return -EFAULT; - return sys_mbind(start, len, mode, nm, nr_bits+1, flags); } -- cgit v1.2.3 From ec5e61608ad1919c1ff3cc0369dbf1b1ede9eb88 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Apr 2017 12:38:50 +0200 Subject: Linux 4.4.61 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fb7c2b40753d..ef5045b8201d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 60 +SUBLEVEL = 61 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3 From 8affe1f70379abeafb9bc1ee81132beae3e76b93 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:20 +0200 Subject: UPSTREAM: net/packet: fix overflow in check for priv area size Subtracting tp_sizeof_priv from tp_block_size and casting to int to check whether one is less then the other doesn't always work (both of them are unsigned ints). Compare them as is instead. Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as it can overflow inside BLK_PLUS_PRIV otherwise. Bug: 36725304 Upstream commit: 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Change-Id: I46bfbaf5f4a5d80f10ddce731a3030f191de4b28 --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3975ac809934..d76800108ddb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4138,8 +4138,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) -- cgit v1.2.3 From a973601dcbc98f41c9085ce91a68f314c046cf9c Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 23 Dec 2016 00:33:57 +0900 Subject: UPSTREAM: net: ipv4: Don't crash if passing a null sk to ip_do_redirect. Commit e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") made ip_do_redirect call sock_net(sk) to determine the network namespace of the passed-in socket. This crashes if sk is NULL. Fix this by getting the network namespace from the skb instead. Fixes: e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller Change-Id: I16a3c343cb142c482ca6dd363c28b3a12d73a46d Fixes: Change-Id: I910504b508948057912bc188fd1e8aca28294de3 ("net: inet: Support UID-based routing in IP protocols.") (cherry picked from commit 7d99569460eae28b187d574aec930a4cf8b90441) Signed-off-by: Amit Pundir --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9af0f461b00d..d162ce41f761 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -792,6 +792,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf struct rtable *rt; struct flowi4 fl4; const struct iphdr *iph = (const struct iphdr *) skb->data; + struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; @@ -799,7 +800,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } -- cgit v1.2.3 From 2a8af3ae8d93b34b4162f7c05e1b0a63cd22ec07 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 10 Jan 2017 09:30:51 +0100 Subject: UPSTREAM: net: socket: Make unnecessarily global sockfs_setattr() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sockfs_setattr() static as it is not used outside of net/socket.c This fixes the following GCC warning: net/socket.c:534:5: warning: no previous prototype for ‘sockfs_setattr’ [-Wmissing-prototypes] Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Cc: Lorenzo Colitti Signed-off-by: Tobias Klauser Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller Change-Id: Ie613c441b3fe081bdaec8c480d3aade482873bf8 Fixes: Change-Id: Idbc3e9a0cec91c4c6e01916b967b6237645ebe59 ("net: core: Add a UID field to struct sock.") (cherry picked from commit dc647ec88e029307e60e6bf9988056605f11051a) Signed-off-by: Amit Pundir --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 1489761b371e..18aff3d804ec 100644 --- a/net/socket.c +++ b/net/socket.c @@ -520,7 +520,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } -int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); -- cgit v1.2.3 From cb0a2cba62d58caf6668f630858acc15ed40ee23 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 15 Feb 2017 15:52:59 +0200 Subject: drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 34dc8993eef63681b062871413a9484008a2a78f upstream. Certain Baytrails, namely the 4 cpu core variants, have been plaqued by spurious system hangs, mostly occurring with light loads. Multiple bisects by various people point to a commit which changes the reclocking strategy for Baytrail to follow its bigger brethen: commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail") There is also a review comment attached to this commit from Deepak S on avoiding punit access on Cherryview and thus it was excluded on common reclocking path. By taking the same approach and omitting the punit access by not tweaking the thresholds when the hardware has been asked to move into different frequency, considerable gains in stability have been observed. With J1900 box, light render/video load would end up in system hang in usually less than 12 hours. With this patch applied, the cumulative uptime has now been 34 days without issues. To provoke system hang, light loads on both render and bsd engines in parallel have been used: glxgears >/dev/null 2>/dev/null & mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4 So far, author has not witnessed system hang with above load and this patch applied. Reports from the tenacious people at kernel bugzilla are also promising. Considering that the punit access frequency with this patch is considerably less, there is a possibility that this will push the, still unknown, root cause past the triggering point on most loads. But as we now can reliably reproduce the hang independently, we can reduce the pain that users are having and use a static thresholds until a root cause is found. v3: don't break debugfs and simplification (Chris Wilson) References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Cc: Chris Wilson Cc: Ville Syrjälä Cc: Len Brown Cc: Daniel Vetter Cc: Jani Nikula Cc: fritsch@xbmc.org Cc: miku@iki.fi Cc: Ezequiel Garcia CC: Michal Feix Cc: Hans de Goede Cc: Deepak S Cc: Jarkko Nikula Acked-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 6067a27d1f0184596d51decbac1c1fdc4acb012f) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e7c18519274a..e4031fcac4bf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4376,6 +4376,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4394,6 +4400,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; -- cgit v1.2.3 From 8cfaf0ae1f566ddfcda661bd81b625a71b16459a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Mar 2017 17:06:17 +0000 Subject: drm/i915: Stop using RP_DOWN_EI on Baytrail commit 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad upstream. On Baytrail, we manually calculate busyness over the evaluation interval to avoid issues with miscaluations with RC6 enabled. However, it turns out that the DOWN_EI interrupt generator is completely bust - it operates in two modes, continuous or never. Neither of which are conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just compute everything from the UP_EI which does seem to correspond to the desired interval. v2: Fixup gen6_rps_pm_mask() as well v3: Inline vlv_c0_above() to combine the now identical elapsed calculation for up/down and simplify the threshold testing Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk (cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++------------------------- drivers/gpu/drm/i915/intel_pm.c | 5 +-- 3 files changed, 32 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fb9f647bb5cd..5044f2257e89 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1159,7 +1159,7 @@ struct intel_gen6_power_mgmt { struct intel_rps_client semaphores, mmioflips; /* manual wa residency calculations */ - struct intel_rps_ei up_ei, down_ei; + struct intel_rps_ei ei; /* * Protects RPS/RC6 register access and PCU communication. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0f42a2782afc..b7b0a38acd67 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -994,68 +994,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); } -static bool vlv_c0_above(struct drm_i915_private *dev_priv, - const struct intel_rps_ei *old, - const struct intel_rps_ei *now, - int threshold) -{ - u64 time, c0; - unsigned int mul = 100; - - if (old->cz_clock == 0) - return false; - - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) - mul <<= 8; - - time = now->cz_clock - old->cz_clock; - time *= threshold * dev_priv->czclk_freq; - - /* Workload can be split between render + media, e.g. SwapBuffers - * being blitted in X after being rendered in mesa. To account for - * this we need to combine both engines into our activity counter. - */ - c0 = now->render_c0 - old->render_c0; - c0 += now->media_c0 - old->media_c0; - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC; - - return c0 >= time; -} - void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); - dev_priv->rps.up_ei = dev_priv->rps.down_ei; + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { + const struct intel_rps_ei *prev = &dev_priv->rps.ei; struct intel_rps_ei now; u32 events = 0; - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) return 0; vlv_c0_read(dev_priv, &now); if (now.cz_clock == 0) return 0; - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { - if (!vlv_c0_above(dev_priv, - &dev_priv->rps.down_ei, &now, - dev_priv->rps.down_threshold)) - events |= GEN6_PM_RP_DOWN_THRESHOLD; - dev_priv->rps.down_ei = now; - } + if (prev->cz_clock) { + u64 time, c0; + unsigned int mul; - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { - if (vlv_c0_above(dev_priv, - &dev_priv->rps.up_ei, &now, - dev_priv->rps.up_threshold)) - events |= GEN6_PM_RP_UP_THRESHOLD; - dev_priv->rps.up_ei = now; + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */ + if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) + mul <<= 8; + + time = now.cz_clock - prev->cz_clock; + time *= dev_priv->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + c0 = now.render_c0 - prev->render_c0; + c0 += now.media_c0 - prev->media_c0; + c0 *= mul; + + if (c0 > time * dev_priv->rps.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * dev_priv->rps.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; } + dev_priv->rps.ei = now; return events; } @@ -4390,7 +4373,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) /* Let's track the enabled rps events */ if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) /* WaGsvRC0ResidencyMethod:vlv */ - dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e4031fcac4bf..fd4690ed93c0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4411,8 +4411,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { u32 mask = 0; + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ if (val > dev_priv->rps.min_freq_softlimit) - mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; if (val < dev_priv->rps.max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; @@ -4516,7 +4517,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); -- cgit v1.2.3 From 297f55bcb62ad0b6b290b01177d9395305d57020 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Mon, 13 Mar 2017 14:11:32 +0200 Subject: usb: dwc3: gadget: delay unmap of bounced requests commit de288e36fe33f7e06fa272bc8e2f85aa386d99aa upstream. In the case of bounced ep0 requests, we must delay DMA operation until after ->complete() otherwise we might overwrite contents of req->buf. This caused problems with RNDIS gadget. Signed-off-by: Janusz Dziedzic Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 210ff64857e1..ec7a50f98f57 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -235,6 +235,7 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, int status) { struct dwc3 *dwc = dep->dwc; + unsigned int unmap_after_complete = false; int i; if (req->queued) { @@ -259,11 +260,19 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number <= 1) + /* + * NOTICE we don't want to unmap before calling ->complete() if we're + * dealing with a bounced ep0 request. If we unmap it here, we would end + * up overwritting the contents of req->buf and this could confuse the + * gadget driver. + */ + if (dwc->ep0_bounced && dep->number <= 1) { dwc->ep0_bounced = false; - - usb_gadget_unmap_request(&dwc->gadget, &req->request, - req->direction); + unmap_after_complete = true; + } else { + usb_gadget_unmap_request(&dwc->gadget, + &req->request, req->direction); + } dev_dbg(dwc->dev, "request %p from %s completed %d/%d ===> %d\n", req, dep->name, req->request.actual, @@ -273,6 +282,10 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, spin_unlock(&dwc->lock); usb_gadget_giveback_request(&dep->endpoint, &req->request); spin_lock(&dwc->lock); + + if (unmap_after_complete) + usb_gadget_unmap_request(&dwc->gadget, + &req->request, req->direction); } int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned cmd, u32 param) -- cgit v1.2.3 From 5a527d80836e9ad0dc3dceee7de72f16c817fb8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 20 Nov 2016 16:09:30 +0100 Subject: mtd: bcm47xxpart: fix parsing first block after aligned TRX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bd5d21310133921021d78995ad6346f908483124 upstream. After parsing TRX we should skip to the first block placed behind it. Our code was working only with TRX with length not aligned to the blocksize. In other cases (length aligned) it was missing the block places right after TRX. This fixes calculation and simplifies the comment. Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/bcm47xxpart.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index c0720c1ee4c9..5abab8800891 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -225,12 +225,10 @@ static int bcm47xxpart_parse(struct mtd_info *master, last_trx_part = curr_part - 1; - /* - * We have whole TRX scanned, skip to the next part. Use - * roundown (not roundup), as the loop will increase - * offset in next step. - */ - offset = rounddown(offset + trx->length, blocksize); + /* Jump to the end of TRX */ + offset = roundup(offset + trx->length, blocksize); + /* Next loop iteration will increase the offset */ + offset -= blocksize; continue; } -- cgit v1.2.3 From d8b8b5528ea5a394074a91e37571bcca081b27e1 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Dec 2016 14:20:56 +0000 Subject: MIPS: Introduce irq_stack commit fe8bd18ffea5327344d4ec2bf11f47951212abd0 upstream. Allocate a per-cpu irq stack for use within interrupt handlers. Also add a utility function on_irq_stack to determine if a given stack pointer is within the irq stack for that cpu. Signed-off-by: Matt Redfearn Acked-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: Paolo Bonzini Cc: Chris Metcalf Cc: Petr Mladek Cc: James Hogan Cc: Paul Burton Cc: Aaron Tomlin Cc: Andrew Morton Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14740/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/irq.h | 12 ++++++++++++ arch/mips/kernel/asm-offsets.c | 1 + arch/mips/kernel/irq.c | 11 +++++++++++ 3 files changed, 24 insertions(+) diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 15e0fecbc300..ebb9efb02502 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -17,6 +17,18 @@ #include +#define IRQ_STACK_SIZE THREAD_SIZE + +extern void *irq_stack[NR_CPUS]; + +static inline bool on_irq_stack(int cpu, unsigned long sp) +{ + unsigned long low = (unsigned long)irq_stack[cpu]; + unsigned long high = low + IRQ_STACK_SIZE; + + return (low <= sp && sp <= high); +} + #ifdef CONFIG_I8259 static inline int irq_canonicalize(int irq) { diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 154e2039ea5e..ec053ce7bb38 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -101,6 +101,7 @@ void output_thread_info_defines(void) OFFSET(TI_REGS, thread_info, regs); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); + DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); BLANK(); } diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 8eb5af805964..dc1180a8bfa1 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -25,6 +25,8 @@ #include #include +void *irq_stack[NR_CPUS]; + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -55,6 +57,15 @@ void __init init_IRQ(void) irq_set_noprobe(i); arch_init_irq(); + + for_each_possible_cpu(i) { + int irq_pages = IRQ_STACK_SIZE / PAGE_SIZE; + void *s = (void *)__get_free_pages(GFP_KERNEL, irq_pages); + + irq_stack[i] = s; + pr_debug("CPU%d IRQ stack at 0x%p - 0x%p\n", i, + irq_stack[i], irq_stack[i] + IRQ_STACK_SIZE); + } } #ifdef CONFIG_DEBUG_STACKOVERFLOW -- cgit v1.2.3 From 3363653512853754fcc7592d2c68c4769a4825c9 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Dec 2016 14:20:57 +0000 Subject: MIPS: Stack unwinding while on IRQ stack commit d42d8d106b0275b027c1e8992c42aecf933436ea upstream. Within unwind stack, check if the stack pointer being unwound is within the CPU's irq_stack and if so use that page rather than the task's stack page. Signed-off-by: Matt Redfearn Acked-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: Adam Buchbinder Cc: Maciej W. Rozycki Cc: Marcin Nowakowski Cc: Chris Metcalf Cc: James Hogan Cc: Paul Burton Cc: Jiri Slaby Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14741/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index fc537d1b649d..8c26ecac930d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -552,7 +553,19 @@ EXPORT_SYMBOL(unwind_stack_by_address); unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, unsigned long pc, unsigned long *ra) { - unsigned long stack_page = (unsigned long)task_stack_page(task); + unsigned long stack_page = 0; + int cpu; + + for_each_possible_cpu(cpu) { + if (on_irq_stack(cpu, *sp)) { + stack_page = (unsigned long)irq_stack[cpu]; + break; + } + } + + if (!stack_page) + stack_page = (unsigned long)task_stack_page(task); + return unwind_stack_by_address(stack_page, sp, pc, ra); } #endif -- cgit v1.2.3 From 93a82f8dbef8ee421fac80a1bd0564124a8ac41c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Dec 2016 14:20:58 +0000 Subject: MIPS: Only change $28 to thread_info if coming from user mode commit 510d86362a27577f5ee23f46cfb354ad49731e61 upstream. The SAVE_SOME macro is used to save the execution context on all exceptions. If an exception occurs while executing user code, the stack is switched to the kernel's stack for the current task, and register $28 is switched to point to the current_thread_info, which is at the bottom of the stack region. If the exception occurs while executing kernel code, the stack is left, and this change ensures that register $28 is not updated. This is the correct behaviour when the kernel can be executing on the separate irq stack, because the thread_info will not be at the base of it. With this change, register $28 is only switched to it's kernel conventional usage of the currrent thread info pointer at the point at which execution enters kernel space. Doing it on every exception was redundant, but OK without an IRQ stack, but will be erroneous once that is introduced. Signed-off-by: Matt Redfearn Acked-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14742/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/stackframe.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index a71da576883c..5347f130f536 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -216,12 +216,19 @@ LONG_S $25, PT_R25(sp) LONG_S $28, PT_R28(sp) LONG_S $31, PT_R31(sp) + + /* Set thread_info if we're coming from user mode */ + mfc0 k0, CP0_STATUS + sll k0, 3 /* extract cu0 bit */ + bltz k0, 9f + ori $28, sp, _THREAD_MASK xori $28, _THREAD_MASK #ifdef CONFIG_CPU_CAVIUM_OCTEON .set mips64 pref 0, 0($28) /* Prefetch the current pointer */ #endif +9: .set pop .endm -- cgit v1.2.3 From b39b263816687fd71b10c31b3eb916defe8176f0 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Dec 2016 14:20:59 +0000 Subject: MIPS: Switch to the irq_stack in interrupts commit dda45f701c9d7ad4ac0bb446e3a96f6df9a468d9 upstream. When enterring interrupt context via handle_int or except_vec_vi, switch to the irq_stack of the current CPU if it is not already in use. The current stack pointer is masked with the thread size and compared to the base or the irq stack. If it does not match then the stack pointer is set to the top of that stack, otherwise this is a nested irq being handled on the irq stack so the stack pointer should be left as it was. The in-use stack pointer is placed in the callee saved register s1. It will be saved to the stack when plat_irq_dispatch is invoked and can be restored once control returns here. Signed-off-by: Matt Redfearn Acked-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14743/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/genex.S | 81 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index baa7b6fc0a60..2c7cd622673f 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -188,9 +188,44 @@ NESTED(handle_int, PT_SIZE, sp) LONG_L s0, TI_REGS($28) LONG_S sp, TI_REGS($28) - PTR_LA ra, ret_from_irq - PTR_LA v0, plat_irq_dispatch - jr v0 + + /* + * SAVE_ALL ensures we are using a valid kernel stack for the thread. + * Check if we are already using the IRQ stack. + */ + move s1, sp # Preserve the sp + + /* Get IRQ stack for this CPU */ + ASM_CPUID_MFC0 k0, ASM_SMP_CPUID_REG +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) + lui k1, %hi(irq_stack) +#else + lui k1, %highest(irq_stack) + daddiu k1, %higher(irq_stack) + dsll k1, 16 + daddiu k1, %hi(irq_stack) + dsll k1, 16 +#endif + LONG_SRL k0, SMP_CPUID_PTRSHIFT + LONG_ADDU k1, k0 + LONG_L t0, %lo(irq_stack)(k1) + + # Check if already on IRQ stack + PTR_LI t1, ~(_THREAD_SIZE-1) + and t1, t1, sp + beq t0, t1, 2f + + /* Switch to IRQ stack */ + li t1, _IRQ_STACK_SIZE + PTR_ADD sp, t0, t1 + +2: + jal plat_irq_dispatch + + /* Restore sp */ + move sp, s1 + + j ret_from_irq #ifdef CONFIG_CPU_MICROMIPS nop #endif @@ -263,8 +298,44 @@ NESTED(except_vec_vi_handler, 0, sp) LONG_L s0, TI_REGS($28) LONG_S sp, TI_REGS($28) - PTR_LA ra, ret_from_irq - jr v0 + + /* + * SAVE_ALL ensures we are using a valid kernel stack for the thread. + * Check if we are already using the IRQ stack. + */ + move s1, sp # Preserve the sp + + /* Get IRQ stack for this CPU */ + ASM_CPUID_MFC0 k0, ASM_SMP_CPUID_REG +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) + lui k1, %hi(irq_stack) +#else + lui k1, %highest(irq_stack) + daddiu k1, %higher(irq_stack) + dsll k1, 16 + daddiu k1, %hi(irq_stack) + dsll k1, 16 +#endif + LONG_SRL k0, SMP_CPUID_PTRSHIFT + LONG_ADDU k1, k0 + LONG_L t0, %lo(irq_stack)(k1) + + # Check if already on IRQ stack + PTR_LI t1, ~(_THREAD_SIZE-1) + and t1, t1, sp + beq t0, t1, 2f + + /* Switch to IRQ stack */ + li t1, _IRQ_STACK_SIZE + PTR_ADD sp, t0, t1 + +2: + jal plat_irq_dispatch + + /* Restore sp */ + move sp, s1 + + j ret_from_irq END(except_vec_vi_handler) /* -- cgit v1.2.3 From f017e58da4aba293e4a6ab62ca5d4801f79cc929 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Dec 2016 14:21:00 +0000 Subject: MIPS: Select HAVE_IRQ_EXIT_ON_IRQ_STACK commit 3cc3434fd6307d06b53b98ce83e76bf9807689b9 upstream. Since do_IRQ is now invoked on a separate IRQ stack, we select HAVE_IRQ_EXIT_ON_IRQ_STACK so that softirq's may be invoked directly from irq_exit(), rather than requiring do_softirq_own_stack. Signed-off-by: Matt Redfearn Acked-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14744/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 75bfca69e418..d5cfa937d622 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -9,6 +9,7 @@ config MIPS select HAVE_CONTEXT_TRACKING select HAVE_GENERIC_DMA_COHERENT select HAVE_IDE + select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_OPROFILE select HAVE_PERF_EVENTS select PERF_USE_VMALLOC -- cgit v1.2.3 From ba7681e4eee6739e4f23a1ba21fb7737fe4ce4f4 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 25 Jan 2017 17:00:25 +0000 Subject: MIPS: IRQ Stack: Fix erroneous jal to plat_irq_dispatch commit c25f8064c1d5731a2ce5664def890140dcdd3e5c upstream. Commit dda45f701c9d ("MIPS: Switch to the irq_stack in interrupts") changed both the normal and vectored interrupt handlers. Unfortunately the vectored version, "except_vec_vi_handler", was incorrectly modified to unconditionally jal to plat_irq_dispatch, rather than doing a jalr to the vectored handler that has been set up. This is ok for many platforms which set the vectored handler to plat_irq_dispatch anyway, but will cause problems with platforms that use other handlers. Fixes: dda45f701c9d ("MIPS: Switch to the irq_stack in interrupts") Signed-off-by: Matt Redfearn Cc: Ralf Baechle Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15110/ Signed-off-by: James Hogan Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/genex.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2c7cd622673f..619e30e2c4f0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -330,7 +330,7 @@ NESTED(except_vec_vi_handler, 0, sp) PTR_ADD sp, t0, t1 2: - jal plat_irq_dispatch + jalr v0 /* Restore sp */ move sp, s1 -- cgit v1.2.3 From fd8bae310684b557c0b30ae9105420956a41494f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 5 Apr 2017 11:41:03 +0300 Subject: crypto: caam - fix RNG deinstantiation error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40c98cb57cdbc377456116ad4582c89e329721b0 upstream. RNG instantiation was previously fixed by commit 62743a4145bb9 ("crypto: caam - fix RNG init descriptor ret. code checking") while deinstantiation was not addressed. Since the descriptors used are similar, in the sense that they both end with a JUMP HALT command, checking for errors should be similar too, i.e. status code 7000_0000h should be considered successful. Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/ctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 69d4a1326fee..53e61459c69f 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -278,7 +278,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) /* Try to run it through DECO0 */ ret = run_descriptor_deco0(ctrldev, desc, &status); - if (ret || status) { + if (ret || + (status && status != JRSTA_SSRC_JUMP_HALT_CC)) { dev_err(ctrldev, "Failed to deinstantiate RNG4 SH%d\n", sh_idx); -- cgit v1.2.3 From d35f8fa0b93e61dd95b8f86928a783c4d8a32d3e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:20 +0200 Subject: net/packet: fix overflow in check for priv area size commit 2b6867c2ce76c596676bec7d2d525af525fdc6e2 upstream. Subtracting tp_sizeof_priv from tp_block_size and casting to int to check whether one is less then the other doesn't always work (both of them are unsigned ints). Compare them as is instead. Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as it can overflow inside BLK_PLUS_PRIV otherwise. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3975ac809934..d76800108ddb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4138,8 +4138,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) -- cgit v1.2.3 From f4522e36edaa9ec0cada0daa5c2628db762dd3d9 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 6 Dec 2016 13:31:44 -0200 Subject: blk-mq: Avoid memory reclaim when remapping queues commit 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 upstream. While stressing memory and IO at the same time we changed SMT settings, we were able to consistently trigger deadlocks in the mm system, which froze the entire machine. I think that under memory stress conditions, the large allocations performed by blk_mq_init_rq_map may trigger a reclaim, which stalls waiting on the block layer remmaping completion, thus deadlocking the system. The trace below was collected after the machine stalled, waiting for the hotplug event completion. The simplest fix for this is to make allocations in this path non-reclaimable, with GFP_NOIO. With this patch, We couldn't hit the issue anymore. This should apply on top of Jens's for-next branch cleanly. Changes since v1: - Use GFP_NOIO instead of GFP_NOWAIT. Call Trace: [c000000f0160aaf0] [c000000f0160ab50] 0xc000000f0160ab50 (unreliable) [c000000f0160acc0] [c000000000016624] __switch_to+0x2e4/0x430 [c000000f0160ad20] [c000000000b1a880] __schedule+0x310/0x9b0 [c000000f0160ae00] [c000000000b1af68] schedule+0x48/0xc0 [c000000f0160ae30] [c000000000b1b4b0] schedule_preempt_disabled+0x20/0x30 [c000000f0160ae50] [c000000000b1d4fc] __mutex_lock_slowpath+0xec/0x1f0 [c000000f0160aed0] [c000000000b1d678] mutex_lock+0x78/0xa0 [c000000f0160af00] [d000000019413cac] xfs_reclaim_inodes_ag+0x33c/0x380 [xfs] [c000000f0160b0b0] [d000000019415164] xfs_reclaim_inodes_nr+0x54/0x70 [xfs] [c000000f0160b0f0] [d0000000194297f8] xfs_fs_free_cached_objects+0x38/0x60 [xfs] [c000000f0160b120] [c0000000003172c8] super_cache_scan+0x1f8/0x210 [c000000f0160b190] [c00000000026301c] shrink_slab.part.13+0x21c/0x4c0 [c000000f0160b2d0] [c000000000268088] shrink_zone+0x2d8/0x3c0 [c000000f0160b380] [c00000000026834c] do_try_to_free_pages+0x1dc/0x520 [c000000f0160b450] [c00000000026876c] try_to_free_pages+0xdc/0x250 [c000000f0160b4e0] [c000000000251978] __alloc_pages_nodemask+0x868/0x10d0 [c000000f0160b6f0] [c000000000567030] blk_mq_init_rq_map+0x160/0x380 [c000000f0160b7a0] [c00000000056758c] blk_mq_map_swqueue+0x33c/0x360 [c000000f0160b820] [c000000000567904] blk_mq_queue_reinit+0x64/0xb0 [c000000f0160b850] [c00000000056a16c] blk_mq_queue_reinit_notify+0x19c/0x250 [c000000f0160b8a0] [c0000000000f5d38] notifier_call_chain+0x98/0x100 [c000000f0160b8f0] [c0000000000c5fb0] __cpu_notify+0x70/0xe0 [c000000f0160b930] [c0000000000c63c4] notify_prepare+0x44/0xb0 [c000000f0160b9b0] [c0000000000c52f4] cpuhp_invoke_callback+0x84/0x250 [c000000f0160ba10] [c0000000000c570c] cpuhp_up_callbacks+0x5c/0x120 [c000000f0160ba60] [c0000000000c7cb8] _cpu_up+0xf8/0x1d0 [c000000f0160bac0] [c0000000000c7eb0] do_cpu_up+0x120/0x150 [c000000f0160bb40] [c0000000006fe024] cpu_subsys_online+0x64/0xe0 [c000000f0160bb90] [c0000000006f5124] device_online+0xb4/0x120 [c000000f0160bbd0] [c0000000006f5244] online_store+0xb4/0xc0 [c000000f0160bc20] [c0000000006f0a68] dev_attr_store+0x68/0xa0 [c000000f0160bc60] [c0000000003ccc30] sysfs_kf_write+0x80/0xb0 [c000000f0160bca0] [c0000000003cbabc] kernfs_fop_write+0x17c/0x250 [c000000f0160bcf0] [c00000000030fe6c] __vfs_write+0x6c/0x1e0 [c000000f0160bd90] [c000000000311490] vfs_write+0xd0/0x270 [c000000f0160bde0] [c0000000003131fc] SyS_write+0x6c/0x110 [c000000f0160be30] [c000000000009204] system_call+0x38/0xec Signed-off-by: Gabriel Krisman Bertazi Cc: Brian King Cc: Douglas Miller Cc: linux-block@vger.kernel.org Cc: linux-scsi@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d8d63c38bf29..0d1af3e44efb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1470,7 +1470,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&tags->page_list); tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, set->numa_node); if (!tags->rqs) { blk_mq_free_tags(tags); @@ -1496,7 +1496,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, do { page = alloc_pages_node(set->numa_node, - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) break; @@ -1517,7 +1517,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, * Allow kmemleak to scan these pages as they contain pointers * to additional allocations like via ops->init_request(). */ - kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); + kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); entries_per_page = order_to_size(this_order) / rq_size; to_do = min(entries_per_page, set->queue_depth - i); left -= to_do * rq_size; -- cgit v1.2.3 From 0a007f74b826836074de8bfcb1e197cada993718 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 1 Dec 2016 13:49:59 -0800 Subject: usb: hub: Wait for connection to be reestablished after port reset commit 22547c4cc4fe20698a6a85a55b8788859134b8e4 upstream. On a system with a defective USB device connected to an USB hub, an endless sequence of port connect events was observed. The sequence of events as observed is as follows: - Port reports connected event (port status=USB_PORT_STAT_CONNECTION). - Event handler debounces port and resets it by calling hub_port_reset(). - hub_port_reset() calls hub_port_wait_reset() to wait for the reset to complete. - The reset completes, but USB_PORT_STAT_CONNECTION is not immediately set in the port status register. - hub_port_wait_reset() returns -ENOTCONN. - Port initialization sequence is aborted. - A few milliseconds later, the port again reports a connected event, and the sequence repeats. This continues either forever or, randomly, stops if the connection is already re-established when the port status is read. It results in a high rate of udev events. This in turn destabilizes userspace since the above sequence holds the device mutex pretty much continuously and prevents userspace from actually reading the device status. To prevent the problem from happening, let's wait for the connection to be re-established after a port reset. If the device was actually disconnected, the code will still return an error, but it will do so only after the long reset timeout. Cc: Douglas Anderson Signed-off-by: Guenter Roeck Acked-by: Alan Stern Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9e62c93af96e..7c2d87befb51 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2602,8 +2602,15 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, if (ret < 0) return ret; - /* The port state is unknown until the reset completes. */ - if (!(portstatus & USB_PORT_STAT_RESET)) + /* + * The port state is unknown until the reset completes. + * + * On top of that, some chips may require additional time + * to re-establish a connection after the reset is complete, + * so also wait for the connection to be re-established. + */ + if (!(portstatus & USB_PORT_STAT_RESET) && + (portstatus & USB_PORT_STAT_CONNECTION)) break; /* switch to the long delay after two short delay failures */ -- cgit v1.2.3 From f1e6b1149e497dc61ceff290c1d3db259ebf7938 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 29 Dec 2016 18:37:10 +0200 Subject: net/mlx4_en: Fix bad WQE issue commit 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc upstream. Single send WQE in RX buffer should be stamped with software ownership in order to prevent the flow of QP in error in FW once UPDATE_QP is called. Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 28a4b34310b2..82bf1b539d87 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -439,8 +439,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; -- cgit v1.2.3 From 710f793a15de0213d4e15f123f327b2075a0c62b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:37 +0200 Subject: net/mlx4_core: Fix racy CQ (Completion Queue) free commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream. In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/cq.c | 38 +++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 3348e646db70..6eba58044456 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; + rcu_read_lock(); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ ++cq->arm_sn; cq->comp(cq); @@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); - + rcu_read_lock(); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - - spin_unlock(&cq_table->lock); + rcu_read_unlock(); if (!cq) { - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, @@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) return err; - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); if (err) goto err_icm; @@ -347,9 +349,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, return 0; err_radix: - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); @@ -368,15 +370,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq != priv->eq_table.eq[MLX4_EQ_ASYNC].irq) synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); - if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); -- cgit v1.2.3 From ac0cbfbb1e4b84d426f210849492afadbc4b6bb9 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:38 +0200 Subject: net/mlx4_core: Fix when to save some qp context flags for dynamic VST to VGT transitions commit 7c3945bc2073554bb2ecf983e073dee686679c53 upstream. Save the qp context flags byte containing the flag disabling vlan stripping in the RESET to INIT qp transition, rather than in the INIT to RTR transition. Per the firmware spec, the flags in this byte are active in the RESET to INIT transition. As a result of saving the flags in the incorrect qp transition, when switching dynamically from VGT to VST and back to VGT, the vlan remained stripped (as is required for VST) and did not return to not-stripped (as is required for VGT). Fixes: f0f829bf42cd ("net/mlx4_core: Add immediate activate for VGT->VST->VGT") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d314d96dcb1c..d1fc7fa87b05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2955,6 +2955,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } + + /* Save param3 for dynamic changes from VST back to VGT */ + qp->param3 = qpc->param3; put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); @@ -3747,7 +3750,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; u8 orig_sched_queue; - __be32 orig_param3 = qpc->param3; u8 orig_vlan_control = qpc->pri_path.vlan_control; u8 orig_fvl_rx = qpc->pri_path.fvl_rx; u8 orig_pri_path_fl = qpc->pri_path.fl; @@ -3789,7 +3791,6 @@ out: */ if (!err) { qp->sched_queue = orig_sched_queue; - qp->param3 = orig_param3; qp->vlan_control = orig_vlan_control; qp->fvl_rx = orig_fvl_rx; qp->pri_path_fl = orig_pri_path_fl; -- cgit v1.2.3 From 7d170f270a95639192cfd53dcb15e6d8530b4577 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 8 Dec 2016 16:40:03 -0600 Subject: ibmveth: set correct gso_size and gso_type commit 7b5967389f5a8dfb9d32843830f5e2717e20995d upstream. This patch is based on an earlier one submitted by Jon Maxwell with the following commit message: "We recently encountered a bug where a few customers using ibmveth on the same LPAR hit an issue where a TCP session hung when large receive was enabled. Closer analysis revealed that the session was stuck because the one side was advertising a zero window repeatedly. We narrowed this down to the fact the ibmveth driver did not set gso_size which is translated by TCP into the MSS later up the stack. The MSS is used to calculate the TCP window size and as that was abnormally large, it was calculating a zero window, even although the sockets receive buffer was completely empty." We rely on the Virtual I/O Server partition in a pseries environment to provide the MSS through the TCP header checksum field. The stipulation is that users should not disable checksum offloading if rx packet aggregation is enabled through VIOS. Some firmware offerings provide the MSS in the RX buffer. This is signalled by a bit in the RX queue descriptor. Reviewed-by: Brian King Reviewed-by: Pradeep Satyanarayana Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Jonathan Maxwell Reviewed-by: David Dai Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 65 ++++++++++++++++++++++++++++++++++++-- drivers/net/ethernet/ibm/ibmveth.h | 1 + 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 7af870a3c549..855c43d8f7e0 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -58,7 +58,7 @@ static struct kobj_type ktype_veth_pool; static const char ibmveth_driver_name[] = "ibmveth"; static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; -#define ibmveth_driver_version "1.05" +#define ibmveth_driver_version "1.06" MODULE_AUTHOR("Santiago Leon "); MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); @@ -137,6 +137,11 @@ static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter) return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK; } +static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter) +{ + return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT; +} + static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter) { return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length); @@ -1172,6 +1177,45 @@ map_failed: goto retry_bounce; } +static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) +{ + int offset = 0; + + /* only TCP packets will be aggregated */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_TCP) { + offset = iph->ihl * 4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } else { + return; + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data; + + if (iph6->nexthdr == IPPROTO_TCP) { + offset = sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + } else { + return; + } + } else { + return; + } + /* if mss is not set through Large Packet bit/mss in rx buffer, + * expect that the mss will be written to the tcp header checksum. + */ + if (lrg_pkt) { + skb_shinfo(skb)->gso_size = mss; + } else if (offset) { + struct tcphdr *tcph = (struct tcphdr *)(skb->data + offset); + + skb_shinfo(skb)->gso_size = ntohs(tcph->check); + tcph->check = 0; + } +} + static int ibmveth_poll(struct napi_struct *napi, int budget) { struct ibmveth_adapter *adapter = @@ -1180,6 +1224,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) int frames_processed = 0; unsigned long lpar_rc; struct iphdr *iph; + u16 mss = 0; restart_poll: while (frames_processed < budget) { @@ -1197,9 +1242,21 @@ restart_poll: int length = ibmveth_rxq_frame_length(adapter); int offset = ibmveth_rxq_frame_offset(adapter); int csum_good = ibmveth_rxq_csum_good(adapter); + int lrg_pkt = ibmveth_rxq_large_packet(adapter); skb = ibmveth_rxq_get_buffer(adapter); + /* if the large packet bit is set in the rx queue + * descriptor, the mss will be written by PHYP eight + * bytes from the start of the rx buffer, which is + * skb->data at this stage + */ + if (lrg_pkt) { + __be64 *rxmss = (__be64 *)(skb->data + 8); + + mss = (u16)be64_to_cpu(*rxmss); + } + new_skb = NULL; if (length < rx_copybreak) new_skb = netdev_alloc_skb(netdev, length); @@ -1233,11 +1290,15 @@ restart_poll: if (iph->check == 0xffff) { iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - adapter->rx_large_packets++; } } } + if (length > netdev->mtu + ETH_HLEN) { + ibmveth_rx_mss_helper(skb, mss, lrg_pkt); + adapter->rx_large_packets++; + } + napi_gro_receive(napi, skb); /* send it up */ netdev->stats.rx_packets++; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 4eade67fe30c..7acda04d034e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -209,6 +209,7 @@ struct ibmveth_rx_q_entry { #define IBMVETH_RXQ_TOGGLE 0x80000000 #define IBMVETH_RXQ_TOGGLE_SHIFT 31 #define IBMVETH_RXQ_VALID 0x40000000 +#define IBMVETH_RXQ_LRG_PKT 0x04000000 #define IBMVETH_RXQ_NO_CSUM 0x02000000 #define IBMVETH_RXQ_CSUM_GOOD 0x01000000 #define IBMVETH_RXQ_OFF_MASK 0x0000FFFF -- cgit v1.2.3 From a80c068fbf43e22f099c0587b9e1a2337378a505 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Apr 2017 07:15:37 +0200 Subject: Linux 4.4.62 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef5045b8201d..0309acc34472 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 61 +SUBLEVEL = 62 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3 From 9e242ec92f951165b52de630ca8ecc6a2748f655 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2016 11:49:09 -0500 Subject: BACKPORT: [UPSTREAM] mbcache2: reimplement mbcache (Cherry-pick from commit f9a61eb4e2471c56a63cd804c7474128138c38ac) Original mbcache was designed to have more features than what ext? filesystems ended up using. It supported entry being in more hashes, it had a home-grown rwlocking of each entry, and one cache could cache entries from multiple filesystems. This genericity also resulted in more complex locking, larger cache entries, and generally more code complexity. This is reimplementation of the mbcache functionality to exactly fit the purpose ext? filesystems use it for. Cache entries are now considerably smaller (7 instead of 13 longs), the code is considerably smaller as well (414 vs 913 lines of code), and IMO also simpler. The new code is also much more lightweight. I have measured the speed using artificial xattr-bench benchmark, which spawns P processes, each process sets xattr for F different files, and the value of xattr is randomly chosen from a pool of V values. Averages of runtimes for 5 runs for various combinations of parameters are below. The first value in each cell is old mbache, the second value is the new mbcache. V=10 F\P 1 2 4 8 16 32 64 10 0.158,0.157 0.208,0.196 0.500,0.277 0.798,0.400 3.258,0.584 13.807,1.047 61.339,2.803 100 0.172,0.167 0.279,0.222 0.520,0.275 0.825,0.341 2.981,0.505 12.022,1.202 44.641,2.943 1000 0.185,0.174 0.297,0.239 0.445,0.283 0.767,0.340 2.329,0.480 6.342,1.198 16.440,3.888 V=100 F\P 1 2 4 8 16 32 64 10 0.162,0.153 0.200,0.186 0.362,0.257 0.671,0.496 1.433,0.943 3.801,1.345 7.938,2.501 100 0.153,0.160 0.221,0.199 0.404,0.264 0.945,0.379 1.556,0.485 3.761,1.156 7.901,2.484 1000 0.215,0.191 0.303,0.246 0.471,0.288 0.960,0.347 1.647,0.479 3.916,1.176 8.058,3.160 V=1000 F\P 1 2 4 8 16 32 64 10 0.151,0.129 0.210,0.163 0.326,0.245 0.685,0.521 1.284,0.859 3.087,2.251 6.451,4.801 100 0.154,0.153 0.211,0.191 0.276,0.282 0.687,0.506 1.202,0.877 3.259,1.954 8.738,2.887 1000 0.145,0.179 0.202,0.222 0.449,0.319 0.899,0.333 1.577,0.524 4.221,1.240 9.782,3.579 V=10000 F\P 1 2 4 8 16 32 64 10 0.161,0.154 0.198,0.190 0.296,0.256 0.662,0.480 1.192,0.818 2.989,2.200 6.362,4.746 100 0.176,0.174 0.236,0.203 0.326,0.255 0.696,0.511 1.183,0.855 4.205,3.444 19.510,17.760 1000 0.199,0.183 0.240,0.227 1.159,1.014 2.286,2.154 6.023,6.039 ---,10.933 ---,36.620 V=100000 F\P 1 2 4 8 16 32 64 10 0.171,0.162 0.204,0.198 0.285,0.230 0.692,0.500 1.225,0.881 2.990,2.243 6.379,4.771 100 0.151,0.171 0.220,0.210 0.295,0.255 0.720,0.518 1.226,0.844 3.423,2.831 19.234,17.544 1000 0.192,0.189 0.249,0.225 1.162,1.043 2.257,2.093 5.853,4.997 ---,10.399 ---,32.198 We see that the new code is faster in pretty much all the cases and starting from 4 processes there are significant gains with the new code resulting in upto 20-times shorter runtimes. Also for large numbers of cached entries all values for the old code could not be measured as the kernel started hitting softlockups and died before the test completed. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Bug: 32461228 --- fs/Makefile | 2 +- fs/mbcache2.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mbcache2.h | 50 +++++++ 3 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 fs/mbcache2.c create mode 100644 include/linux/mbcache2.h diff --git a/fs/Makefile b/fs/Makefile index 3b54070cd629..dee237540bc0 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o -obj-$(CONFIG_FS_MBCACHE) += mbcache.o +obj-$(CONFIG_FS_MBCACHE) += mbcache.o mbcache2.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_COREDUMP) += coredump.o diff --git a/fs/mbcache2.c b/fs/mbcache2.c new file mode 100644 index 000000000000..5c3e1a8c38f6 --- /dev/null +++ b/fs/mbcache2.c @@ -0,0 +1,359 @@ +#include +#include +#include +#include +#include +#include +#include + +/* + * Mbcache is a simple key-value store. Keys need not be unique, however + * key-value pairs are expected to be unique (we use this fact in + * mb2_cache_entry_delete_block()). + * + * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. + * They use hash of a block contents as a key and block number as a value. + * That's why keys need not be unique (different xattr blocks may end up having + * the same hash). However block number always uniquely identifies a cache + * entry. + * + * We provide functions for creation and removal of entries, search by key, + * and a special "delete entry with given key-value pair" operation. Fixed + * size hash table is used for fast key lookups. + */ + +struct mb2_cache { + /* Hash table of entries */ + struct hlist_bl_head *c_hash; + /* log2 of hash table size */ + int c_bucket_bits; + /* Protects c_lru_list, c_entry_count */ + spinlock_t c_lru_list_lock; + struct list_head c_lru_list; + /* Number of entries in cache */ + unsigned long c_entry_count; + struct shrinker c_shrink; +}; + +static struct kmem_cache *mb2_entry_cache; + +/* + * mb2_cache_entry_create - create entry in cache + * @cache - cache where the entry should be created + * @mask - gfp mask with which the entry should be allocated + * @key - key of the entry + * @block - block that contains data + * + * Creates entry in @cache with key @key and records that data is stored in + * block @block. The function returns -EBUSY if entry with the same key + * and for the same block already exists in cache. Otherwise 0 is returned. + */ +int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, + sector_t block) +{ + struct mb2_cache_entry *entry, *dup; + struct hlist_bl_node *dup_node; + struct hlist_bl_head *head; + + entry = kmem_cache_alloc(mb2_entry_cache, mask); + if (!entry) + return -ENOMEM; + + INIT_LIST_HEAD(&entry->e_lru_list); + /* One ref for hash, one ref returned */ + atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_block = block; + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; + entry->e_hash_list_head = head; + hlist_bl_lock(head); + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { + if (dup->e_key == key && dup->e_block == block) { + hlist_bl_unlock(head); + kmem_cache_free(mb2_entry_cache, entry); + return -EBUSY; + } + } + hlist_bl_add_head(&entry->e_hash_list, head); + hlist_bl_unlock(head); + + spin_lock(&cache->c_lru_list_lock); + list_add_tail(&entry->e_lru_list, &cache->c_lru_list); + /* Grab ref for LRU list */ + atomic_inc(&entry->e_refcnt); + cache->c_entry_count++; + spin_unlock(&cache->c_lru_list_lock); + + return 0; +} +EXPORT_SYMBOL(mb2_cache_entry_create); + +void __mb2_cache_entry_free(struct mb2_cache_entry *entry) +{ + kmem_cache_free(mb2_entry_cache, entry); +} +EXPORT_SYMBOL(__mb2_cache_entry_free); + +static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, + struct mb2_cache_entry *entry, + u32 key) +{ + struct mb2_cache_entry *old_entry = entry; + struct hlist_bl_node *node; + struct hlist_bl_head *head; + + if (entry) + head = entry->e_hash_list_head; + else + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; + hlist_bl_lock(head); + if (entry && !hlist_bl_unhashed(&entry->e_hash_list)) + node = entry->e_hash_list.next; + else + node = hlist_bl_first(head); + while (node) { + entry = hlist_bl_entry(node, struct mb2_cache_entry, + e_hash_list); + if (entry->e_key == key) { + atomic_inc(&entry->e_refcnt); + goto out; + } + node = node->next; + } + entry = NULL; +out: + hlist_bl_unlock(head); + if (old_entry) + mb2_cache_entry_put(cache, old_entry); + + return entry; +} + +/* + * mb2_cache_entry_find_first - find the first entry in cache with given key + * @cache: cache where we should search + * @key: key to look for + * + * Search in @cache for entry with key @key. Grabs reference to the first + * entry found and returns the entry. + */ +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, + u32 key) +{ + return __entry_find(cache, NULL, key); +} +EXPORT_SYMBOL(mb2_cache_entry_find_first); + +/* + * mb2_cache_entry_find_next - find next entry in cache with the same + * @cache: cache where we should search + * @entry: entry to start search from + * + * Finds next entry in the hash chain which has the same key as @entry. + * If @entry is unhashed (which can happen when deletion of entry races + * with the search), finds the first entry in the hash chain. The function + * drops reference to @entry and returns with a reference to the found entry. + */ +struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, + struct mb2_cache_entry *entry) +{ + return __entry_find(cache, entry, entry->e_key); +} +EXPORT_SYMBOL(mb2_cache_entry_find_next); + +/* mb2_cache_entry_delete_block - remove information about block from cache + * @cache - cache we work with + * @key - key of the entry to remove + * @block - block containing data for @key + * + * Remove entry from cache @cache with key @key with data stored in @block. + */ +void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, + sector_t block) +{ + struct hlist_bl_node *node; + struct hlist_bl_head *head; + struct mb2_cache_entry *entry; + + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; + hlist_bl_lock(head); + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { + if (entry->e_key == key && entry->e_block == block) { + /* We keep hash list reference to keep entry alive */ + hlist_bl_del_init(&entry->e_hash_list); + hlist_bl_unlock(head); + spin_lock(&cache->c_lru_list_lock); + if (!list_empty(&entry->e_lru_list)) { + list_del_init(&entry->e_lru_list); + cache->c_entry_count--; + atomic_dec(&entry->e_refcnt); + } + spin_unlock(&cache->c_lru_list_lock); + mb2_cache_entry_put(cache, entry); + return; + } + } + hlist_bl_unlock(head); +} +EXPORT_SYMBOL(mb2_cache_entry_delete_block); + +/* mb2_cache_entry_touch - cache entry got used + * @cache - cache the entry belongs to + * @entry - entry that got used + * + * Move entry in lru list to reflect the fact that it was used. + */ +void mb2_cache_entry_touch(struct mb2_cache *cache, + struct mb2_cache_entry *entry) +{ + spin_lock(&cache->c_lru_list_lock); + if (!list_empty(&entry->e_lru_list)) + list_move_tail(&cache->c_lru_list, &entry->e_lru_list); + spin_unlock(&cache->c_lru_list_lock); +} +EXPORT_SYMBOL(mb2_cache_entry_touch); + +static unsigned long mb2_cache_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct mb2_cache *cache = container_of(shrink, struct mb2_cache, + c_shrink); + + return cache->c_entry_count; +} + +/* Shrink number of entries in cache */ +static unsigned long mb2_cache_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + int nr_to_scan = sc->nr_to_scan; + struct mb2_cache *cache = container_of(shrink, struct mb2_cache, + c_shrink); + struct mb2_cache_entry *entry; + struct hlist_bl_head *head; + unsigned int shrunk = 0; + + spin_lock(&cache->c_lru_list_lock); + while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) { + entry = list_first_entry(&cache->c_lru_list, + struct mb2_cache_entry, e_lru_list); + list_del_init(&entry->e_lru_list); + cache->c_entry_count--; + /* + * We keep LRU list reference so that entry doesn't go away + * from under us. + */ + spin_unlock(&cache->c_lru_list_lock); + head = entry->e_hash_list_head; + hlist_bl_lock(head); + if (!hlist_bl_unhashed(&entry->e_hash_list)) { + hlist_bl_del_init(&entry->e_hash_list); + atomic_dec(&entry->e_refcnt); + } + hlist_bl_unlock(head); + if (mb2_cache_entry_put(cache, entry)) + shrunk++; + cond_resched(); + spin_lock(&cache->c_lru_list_lock); + } + spin_unlock(&cache->c_lru_list_lock); + + return shrunk; +} + +/* + * mb2_cache_create - create cache + * @bucket_bits: log2 of the hash table size + * + * Create cache for keys with 2^bucket_bits hash entries. + */ +struct mb2_cache *mb2_cache_create(int bucket_bits) +{ + struct mb2_cache *cache; + int bucket_count = 1 << bucket_bits; + int i; + + if (!try_module_get(THIS_MODULE)) + return NULL; + + cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL); + if (!cache) + goto err_out; + cache->c_bucket_bits = bucket_bits; + INIT_LIST_HEAD(&cache->c_lru_list); + spin_lock_init(&cache->c_lru_list_lock); + cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), + GFP_KERNEL); + if (!cache->c_hash) { + kfree(cache); + goto err_out; + } + for (i = 0; i < bucket_count; i++) + INIT_HLIST_BL_HEAD(&cache->c_hash[i]); + + cache->c_shrink.count_objects = mb2_cache_count; + cache->c_shrink.scan_objects = mb2_cache_scan; + cache->c_shrink.seeks = DEFAULT_SEEKS; + register_shrinker(&cache->c_shrink); + + return cache; + +err_out: + module_put(THIS_MODULE); + return NULL; +} +EXPORT_SYMBOL(mb2_cache_create); + +/* + * mb2_cache_destroy - destroy cache + * @cache: the cache to destroy + * + * Free all entries in cache and cache itself. Caller must make sure nobody + * (except shrinker) can reach @cache when calling this. + */ +void mb2_cache_destroy(struct mb2_cache *cache) +{ + struct mb2_cache_entry *entry, *next; + + unregister_shrinker(&cache->c_shrink); + + /* + * We don't bother with any locking. Cache must not be used at this + * point. + */ + list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) { + if (!hlist_bl_unhashed(&entry->e_hash_list)) { + hlist_bl_del_init(&entry->e_hash_list); + atomic_dec(&entry->e_refcnt); + } else + WARN_ON(1); + list_del(&entry->e_lru_list); + WARN_ON(atomic_read(&entry->e_refcnt) != 1); + mb2_cache_entry_put(cache, entry); + } + kfree(cache->c_hash); + kfree(cache); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL(mb2_cache_destroy); + +static int __init mb2cache_init(void) +{ + mb2_entry_cache = kmem_cache_create("mbcache", + sizeof(struct mb2_cache_entry), 0, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); + BUG_ON(!mb2_entry_cache); + return 0; +} + +static void __exit mb2cache_exit(void) +{ + kmem_cache_destroy(mb2_entry_cache); +} + +module_init(mb2cache_init) +module_exit(mb2cache_exit) + +MODULE_AUTHOR("Jan Kara "); +MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h new file mode 100644 index 000000000000..b6f160ff2533 --- /dev/null +++ b/include/linux/mbcache2.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_MB2CACHE_H +#define _LINUX_MB2CACHE_H + +#include +#include +#include +#include +#include + +struct mb2_cache; + +struct mb2_cache_entry { + /* LRU list - protected by cache->c_lru_list_lock */ + struct list_head e_lru_list; + /* Hash table list - protected by bitlock in e_hash_list_head */ + struct hlist_bl_node e_hash_list; + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; + /* Block number of hashed block - stable during lifetime of the entry */ + sector_t e_block; + /* Head of hash list (for list bit lock) - stable */ + struct hlist_bl_head *e_hash_list_head; +}; + +struct mb2_cache *mb2_cache_create(int bucket_bits); +void mb2_cache_destroy(struct mb2_cache *cache); + +int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key, + sector_t block); +void __mb2_cache_entry_free(struct mb2_cache_entry *entry); +static inline int mb2_cache_entry_put(struct mb2_cache *cache, + struct mb2_cache_entry *entry) +{ + if (!atomic_dec_and_test(&entry->e_refcnt)) + return 0; + __mb2_cache_entry_free(entry); + return 1; +} + +void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key, + sector_t block); +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, + u32 key); +struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, + struct mb2_cache_entry *entry); +void mb2_cache_entry_touch(struct mb2_cache *cache, + struct mb2_cache_entry *entry); + +#endif /* _LINUX_MB2CACHE_H */ -- cgit v1.2.3 From e29de4e87171faa5d3ab882b0bc555bbd07c9c5b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2016 11:50:13 -0500 Subject: BACKPORT [UPSTREAM] ext4: convert to mbcache2 (Cherry-pick from commit 82939d7999dfc1f1998c4b1c12e2f19edbdff272) The conversion is generally straightforward. The only tricky part is that xattr block corresponding to found mbcache entry can get freed before we get buffer lock for that block. So we have to check whether the entry is still valid after getting buffer lock. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Bug; 32461228 --- fs/ext4/ext4.h | 2 +- fs/ext4/super.c | 7 ++- fs/ext4/xattr.c | 136 ++++++++++++++++++++++++++++---------------------------- fs/ext4/xattr.h | 5 +-- 4 files changed, 75 insertions(+), 75 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ac720a31e4ff..2b3ac9fa9460 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1442,7 +1442,7 @@ struct ext4_sb_info { struct list_head s_es_list; /* List of inodes with reclaimable extents */ long s_es_nr_inode; struct ext4_es_stats s_es_stats; - struct mb_cache *s_mb_cache; + struct mb2_cache *s_mb_cache; spinlock_t s_es_lock ____cacheline_aligned_in_smp; /* Ratelimit ext4 messages. */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 68345a9e59b8..bd8831bfbafe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -816,7 +816,6 @@ static void ext4_put_super(struct super_block *sb) ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); - ext4_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY) && !aborted) { ext4_clear_feature_journal_needs_recovery(sb); @@ -3833,7 +3832,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) no_journal: if (ext4_mballoc_ready) { - sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); + sbi->s_mb_cache = ext4_xattr_create_cache(); if (!sbi->s_mb_cache) { ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); goto failed_mount_wq; @@ -4065,6 +4064,10 @@ failed_mount4: if (EXT4_SB(sb)->rsv_conversion_wq) destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); failed_mount_wq: + if (sbi->s_mb_cache) { + ext4_xattr_destroy_cache(sbi->s_mb_cache); + sbi->s_mb_cache = NULL; + } if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 263002f0389d..f7455e668474 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include "ext4_jbd2.h" #include "ext4.h" @@ -80,10 +80,10 @@ # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif -static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); +static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *); static struct buffer_head *ext4_xattr_cache_find(struct inode *, struct ext4_xattr_header *, - struct mb_cache_entry **); + struct mb2_cache_entry **); static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); static int ext4_xattr_list(struct dentry *dentry, char *buffer, @@ -279,7 +279,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, struct ext4_xattr_entry *entry; size_t size; int error; - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); @@ -426,7 +426,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct inode *inode = d_inode(dentry); struct buffer_head *bh = NULL; int error; - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); @@ -543,11 +543,8 @@ static void ext4_xattr_release_block(handle_t *handle, struct inode *inode, struct buffer_head *bh) { - struct mb_cache_entry *ce = NULL; int error = 0; - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); - ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); BUFFER_TRACE(bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bh); if (error) @@ -555,9 +552,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, lock_buffer(bh); if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { + __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); + ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); + /* + * This must happen under buffer lock for + * ext4_xattr_block_set() to reliably detect freed block + */ + mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash, + bh->b_blocknr); get_bh(bh); unlock_buffer(bh); ext4_free_blocks(handle, inode, bh, 0, 1, @@ -565,8 +568,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, EXT4_FREE_BLOCKS_FORGET); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); - if (ce) - mb_cache_entry_release(ce); /* * Beware of this ugliness: Releasing of xattr block references * from different inodes can race and so we have to protect @@ -779,17 +780,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; struct ext4_xattr_search *s = &bs->s; - struct mb_cache_entry *ce = NULL; + struct mb2_cache_entry *ce = NULL; int error = 0; - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); #define header(x) ((struct ext4_xattr_header *)(x)) if (i->value && i->value_len > sb->s_blocksize) return -ENOSPC; if (s->base) { - ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, - bs->bh->b_blocknr); BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bs->bh); if (error) @@ -797,10 +796,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, lock_buffer(bs->bh); if (header(s->base)->h_refcount == cpu_to_le32(1)) { - if (ce) { - mb_cache_entry_free(ce); - ce = NULL; - } + __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash); + + /* + * This must happen under buffer lock for + * ext4_xattr_block_set() to reliably detect modified + * block + */ + mb2_cache_entry_delete_block(ext4_mb_cache, hash, + bs->bh->b_blocknr); ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s); if (!error) { @@ -824,10 +828,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, int offset = (char *)s->here - bs->bh->b_data; unlock_buffer(bs->bh); - if (ce) { - mb_cache_entry_release(ce); - ce = NULL; - } ea_bdebug(bs->bh, "cloning"); s->base = kmalloc(bs->bh->b_size, GFP_NOFS); error = -ENOMEM; @@ -882,6 +882,31 @@ inserted: if (error) goto cleanup_dquot; lock_buffer(new_bh); + /* + * We have to be careful about races with + * freeing or rehashing of xattr block. Once we + * hold buffer lock xattr block's state is + * stable so we can check whether the block got + * freed / rehashed or not. Since we unhash + * mbcache entry under buffer lock when freeing + * / rehashing xattr block, checking whether + * entry is still hashed is reliable. + */ + if (hlist_bl_unhashed(&ce->e_hash_list)) { + /* + * Undo everything and check mbcache + * again. + */ + unlock_buffer(new_bh); + dquot_free_block(inode, + EXT4_C2B(EXT4_SB(sb), + 1)); + brelse(new_bh); + mb2_cache_entry_put(ext4_mb_cache, ce); + ce = NULL; + new_bh = NULL; + goto inserted; + } le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); @@ -892,7 +917,8 @@ inserted: if (error) goto cleanup_dquot; } - mb_cache_entry_release(ce); + mb2_cache_entry_touch(ext4_mb_cache, ce); + mb2_cache_entry_put(ext4_mb_cache, ce); ce = NULL; } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ @@ -957,7 +983,7 @@ getblk_failed: cleanup: if (ce) - mb_cache_entry_release(ce); + mb2_cache_entry_put(ext4_mb_cache, ce); brelse(new_bh); if (!(bs->bh && s->base == bs->bh->b_data)) kfree(s->base); @@ -1518,17 +1544,6 @@ cleanup: brelse(bh); } -/* - * ext4_xattr_put_super() - * - * This is called when a file system is unmounted. - */ -void -ext4_xattr_put_super(struct super_block *sb) -{ - mb_cache_shrink(sb->s_bdev); -} - /* * ext4_xattr_cache_insert() * @@ -1538,28 +1553,18 @@ ext4_xattr_put_super(struct super_block *sb) * Returns 0, or a negative error number on failure. */ static void -ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) +ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh) { __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); - struct mb_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); - if (!ce) { - ea_bdebug(bh, "out of memory"); - return; - } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); + error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash, + bh->b_blocknr); if (error) { - mb_cache_entry_free(ce); - if (error == -EBUSY) { + if (error == -EBUSY) ea_bdebug(bh, "already in cache"); - error = 0; - } - } else { + } else ea_bdebug(bh, "inserting [%x]", (int)hash); - mb_cache_entry_release(ce); - } } /* @@ -1612,26 +1617,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, */ static struct buffer_head * ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, - struct mb_cache_entry **pce) + struct mb2_cache_entry **pce) { __u32 hash = le32_to_cpu(header->h_hash); - struct mb_cache_entry *ce; - struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); + struct mb2_cache_entry *ce; + struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -again: - ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev, - hash); + ce = mb2_cache_entry_find_first(ext4_mb_cache, hash); while (ce) { struct buffer_head *bh; - if (IS_ERR(ce)) { - if (PTR_ERR(ce) == -EAGAIN) - goto again; - break; - } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { EXT4_ERROR_INODE(inode, "block %lu read error", @@ -1647,7 +1645,7 @@ again: return bh; } brelse(bh); - ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); + ce = mb2_cache_entry_find_next(ext4_mb_cache, ce); } return NULL; } @@ -1722,15 +1720,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, #define HASH_BUCKET_BITS 10 -struct mb_cache * -ext4_xattr_create_cache(char *name) +struct mb2_cache * +ext4_xattr_create_cache(void) { - return mb_cache_create(name, HASH_BUCKET_BITS); + return mb2_cache_create(HASH_BUCKET_BITS); } -void ext4_xattr_destroy_cache(struct mb_cache *cache) +void ext4_xattr_destroy_cache(struct mb2_cache *cache) { if (cache) - mb_cache_destroy(cache); + mb2_cache_destroy(cache); } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index ddc0957760ba..10b0f7323ed6 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); extern void ext4_xattr_delete_inode(handle_t *, struct inode *); -extern void ext4_xattr_put_super(struct super_block *); extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle); @@ -124,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is); -extern struct mb_cache *ext4_xattr_create_cache(char *name); -extern void ext4_xattr_destroy_cache(struct mb_cache *); +extern struct mb2_cache *ext4_xattr_create_cache(void); +extern void ext4_xattr_destroy_cache(struct mb2_cache *); #ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, -- cgit v1.2.3 From 356d8075469a6cab1ecea241923e56dd00641109 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 22 Feb 2016 11:56:38 -0500 Subject: BACKPORT: [UPSTREAM] ext2: convert to mbcache2 (Cherry-pick from commit be0726d33cb8f411945884664924bed3cb8c70ee) The conversion is generally straightforward. We convert filesystem from a global cache to per-fs one. Similarly to ext4 the tricky part is that xattr block corresponding to found mbcache entry can get freed before we get buffer lock for that block. So we have to check whether the entry is still valid after getting the buffer lock. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Bug: 32461228 --- fs/ext2/ext2.h | 3 ++ fs/ext2/super.c | 25 ++++++---- fs/ext2/xattr.c | 143 ++++++++++++++++++++++++++------------------------------ fs/ext2/xattr.h | 21 ++------- 4 files changed, 92 insertions(+), 100 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 4c69c94cafd8..f98ce7e60a0f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -61,6 +61,8 @@ struct ext2_block_alloc_info { #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end +struct mb2_cache; + /* * second extended-fs super-block data in memory */ @@ -111,6 +113,7 @@ struct ext2_sb_info { * of the mount options. */ spinlock_t s_lock; + struct mb2_cache *s_mb_cache; }; static inline spinlock_t * diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 748d35afc902..111a31761ffa 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb) dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - ext2_xattr_put_super(sb); + if (sbi->s_mb_cache) { + ext2_xattr_destroy_cache(sbi->s_mb_cache); + sbi->s_mb_cache = NULL; + } if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; @@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_msg(sb, KERN_ERR, "error: insufficient memory"); goto failed_mount3; } + +#ifdef CONFIG_EXT2_FS_XATTR + sbi->s_mb_cache = ext2_xattr_create_cache(); + if (!sbi->s_mb_cache) { + ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache"); + goto failed_mount3; + } +#endif /* * set up enough so that it can read an inode */ @@ -1149,6 +1160,8 @@ cantfind_ext2: sb->s_id); goto failed_mount; failed_mount3: + if (sbi->s_mb_cache) + ext2_xattr_destroy_cache(sbi->s_mb_cache); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { - int err = init_ext2_xattr(); - if (err) - return err; + int err; + err = init_inodecache(); if (err) - goto out1; + return err; err = register_filesystem(&ext2_fs_type); if (err) goto out; return 0; out: destroy_inodecache(); -out1: - exit_ext2_xattr(); return err; } @@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void) { unregister_filesystem(&ext2_fs_type); destroy_inodecache(); - exit_ext2_xattr(); } MODULE_AUTHOR("Remy Card and others"); diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index fa70848afa8f..24736c8b3d51 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include #include @@ -92,14 +92,12 @@ static int ext2_xattr_set2(struct inode *, struct buffer_head *, struct ext2_xattr_header *); -static int ext2_xattr_cache_insert(struct buffer_head *); +static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *); static struct buffer_head *ext2_xattr_cache_find(struct inode *, struct ext2_xattr_header *); static void ext2_xattr_rehash(struct ext2_xattr_header *, struct ext2_xattr_entry *); -static struct mb_cache *ext2_xattr_cache; - static const struct xattr_handler *ext2_xattr_handler_map[] = { [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -154,6 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, size_t name_len, size; char *end; int error; + struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", name_index, name, buffer, (long)buffer_size); @@ -198,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", goto found; entry = next; } - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; @@ -211,7 +210,7 @@ found: le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) goto bad_block; - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) ea_idebug(inode, "cache insert failed"); if (buffer) { error = -ERANGE; @@ -249,6 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) char *end; size_t rest = buffer_size; int error; + struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; ea_idebug(inode, "buffer=%p, buffer_size=%ld", buffer, (long)buffer_size); @@ -283,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", goto bad_block; entry = next; } - if (ext2_xattr_cache_insert(bh)) + if (ext2_xattr_cache_insert(ext2_mb_cache, bh)) ea_idebug(inode, "cache insert failed"); /* list the attribute names */ @@ -480,22 +480,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* Here we know that we can set the new attribute. */ if (header) { - struct mb_cache_entry *ce; - /* assert(header == HDR(bh)); */ - ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, - bh->b_blocknr); lock_buffer(bh); if (header->h_refcount == cpu_to_le32(1)) { + __u32 hash = le32_to_cpu(header->h_hash); + ea_bdebug(bh, "modifying in-place"); - if (ce) - mb_cache_entry_free(ce); + /* + * This must happen under buffer lock for + * ext2_xattr_set2() to reliably detect modified block + */ + mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache, + hash, bh->b_blocknr); + /* keep the buffer locked while modifying it. */ } else { int offset; - if (ce) - mb_cache_entry_release(ce); unlock_buffer(bh); ea_bdebug(bh, "cloning"); header = kmalloc(bh->b_size, GFP_KERNEL); @@ -623,6 +624,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, struct super_block *sb = inode->i_sb; struct buffer_head *new_bh = NULL; int error; + struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache; if (header) { new_bh = ext2_xattr_cache_find(inode, header); @@ -650,7 +652,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, don't need to change the reference count. */ new_bh = old_bh; get_bh(new_bh); - ext2_xattr_cache_insert(new_bh); + ext2_xattr_cache_insert(ext2_mb_cache, new_bh); } else { /* We need to allocate a new block */ ext2_fsblk_t goal = ext2_group_first_block_no(sb, @@ -671,7 +673,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, memcpy(new_bh->b_data, header, new_bh->b_size); set_buffer_uptodate(new_bh); unlock_buffer(new_bh); - ext2_xattr_cache_insert(new_bh); + ext2_xattr_cache_insert(ext2_mb_cache, new_bh); ext2_xattr_update_super_block(sb); } @@ -704,19 +706,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, error = 0; if (old_bh && old_bh != new_bh) { - struct mb_cache_entry *ce; - /* * If there was an old block and we are no longer using it, * release the old block. */ - ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev, - old_bh->b_blocknr); lock_buffer(old_bh); if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash); + + /* + * This must happen under buffer lock for + * ext2_xattr_set2() to reliably detect freed block + */ + mb2_cache_entry_delete_block(ext2_mb_cache, + hash, old_bh->b_blocknr); /* Free the old block. */ - if (ce) - mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); mark_inode_dirty(inode); @@ -727,8 +731,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, } else { /* Decrement the refcount only. */ le32_add_cpu(&HDR(old_bh)->h_refcount, -1); - if (ce) - mb_cache_entry_release(ce); dquot_free_block_nodirty(inode, 1); mark_inode_dirty(inode); mark_buffer_dirty(old_bh); @@ -754,7 +756,6 @@ void ext2_xattr_delete_inode(struct inode *inode) { struct buffer_head *bh = NULL; - struct mb_cache_entry *ce; down_write(&EXT2_I(inode)->xattr_sem); if (!EXT2_I(inode)->i_file_acl) @@ -774,19 +775,22 @@ ext2_xattr_delete_inode(struct inode *inode) EXT2_I(inode)->i_file_acl); goto cleanup; } - ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr); lock_buffer(bh); if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - if (ce) - mb_cache_entry_free(ce); + __u32 hash = le32_to_cpu(HDR(bh)->h_hash); + + /* + * This must happen under buffer lock for ext2_xattr_set2() to + * reliably detect freed block + */ + mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache, + hash, bh->b_blocknr); ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); get_bh(bh); bforget(bh); unlock_buffer(bh); } else { le32_add_cpu(&HDR(bh)->h_refcount, -1); - if (ce) - mb_cache_entry_release(ce); ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount)); unlock_buffer(bh); @@ -802,18 +806,6 @@ cleanup: up_write(&EXT2_I(inode)->xattr_sem); } -/* - * ext2_xattr_put_super() - * - * This is called when a file system is unmounted. - */ -void -ext2_xattr_put_super(struct super_block *sb) -{ - mb_cache_shrink(sb->s_bdev); -} - - /* * ext2_xattr_cache_insert() * @@ -823,28 +815,20 @@ ext2_xattr_put_super(struct super_block *sb) * Returns 0, or a negative error number on failure. */ static int -ext2_xattr_cache_insert(struct buffer_head *bh) +ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh) { __u32 hash = le32_to_cpu(HDR(bh)->h_hash); - struct mb_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); - if (!ce) - return -ENOMEM; - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); + error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr); if (error) { - mb_cache_entry_free(ce); if (error == -EBUSY) { ea_bdebug(bh, "already in cache (%d cache entries)", atomic_read(&ext2_xattr_cache->c_entry_count)); error = 0; } - } else { - ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, - atomic_read(&ext2_xattr_cache->c_entry_count)); - mb_cache_entry_release(ce); - } + } else + ea_bdebug(bh, "inserting [%x]", (int)hash); return error; } @@ -900,23 +884,17 @@ static struct buffer_head * ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) { __u32 hash = le32_to_cpu(header->h_hash); - struct mb_cache_entry *ce; + struct mb2_cache_entry *ce; + struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache; if (!header->h_hash) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); again: - ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, - hash); + ce = mb2_cache_entry_find_first(ext2_mb_cache, hash); while (ce) { struct buffer_head *bh; - if (IS_ERR(ce)) { - if (PTR_ERR(ce) == -EAGAIN) - goto again; - break; - } - bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { ext2_error(inode->i_sb, "ext2_xattr_cache_find", @@ -924,7 +902,21 @@ again: inode->i_ino, (unsigned long) ce->e_block); } else { lock_buffer(bh); - if (le32_to_cpu(HDR(bh)->h_refcount) > + /* + * We have to be careful about races with freeing or + * rehashing of xattr block. Once we hold buffer lock + * xattr block's state is stable so we can check + * whether the block got freed / rehashed or not. + * Since we unhash mbcache entry under buffer lock when + * freeing / rehashing xattr block, checking whether + * entry is still hashed is reliable. + */ + if (hlist_bl_unhashed(&ce->e_hash_list)) { + mb2_cache_entry_put(ext2_mb_cache, ce); + unlock_buffer(bh); + brelse(bh); + goto again; + } else if (le32_to_cpu(HDR(bh)->h_refcount) > EXT2_XATTR_REFCOUNT_MAX) { ea_idebug(inode, "block %ld refcount %d>%d", (unsigned long) ce->e_block, @@ -933,13 +925,14 @@ again: } else if (!ext2_xattr_cmp(header, HDR(bh))) { ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - mb_cache_entry_release(ce); + mb2_cache_entry_touch(ext2_mb_cache, ce); + mb2_cache_entry_put(ext2_mb_cache, ce); return bh; } unlock_buffer(bh); brelse(bh); } - ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); + ce = mb2_cache_entry_find_next(ext2_mb_cache, ce); } return NULL; } @@ -1012,17 +1005,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, #undef BLOCK_HASH_SHIFT -int __init -init_ext2_xattr(void) +#define HASH_BUCKET_BITS 10 + +struct mb2_cache *ext2_xattr_create_cache(void) { - ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); - if (!ext2_xattr_cache) - return -ENOMEM; - return 0; + return mb2_cache_create(HASH_BUCKET_BITS); } -void -exit_ext2_xattr(void) +void ext2_xattr_destroy_cache(struct mb2_cache *cache) { - mb_cache_destroy(ext2_xattr_cache); + if (cache) + mb2_cache_destroy(cache); } diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index 60edf298644e..6ea38aa9563a 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -53,6 +53,8 @@ struct ext2_xattr_entry { #define EXT2_XATTR_SIZE(size) \ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) +struct mb2_cache; + # ifdef CONFIG_EXT2_FS_XATTR extern const struct xattr_handler ext2_xattr_user_handler; @@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern void ext2_xattr_delete_inode(struct inode *); -extern void ext2_xattr_put_super(struct super_block *); -extern int init_ext2_xattr(void); -extern void exit_ext2_xattr(void); +extern struct mb2_cache *ext2_xattr_create_cache(void); +extern void ext2_xattr_destroy_cache(struct mb2_cache *cache); extern const struct xattr_handler *ext2_xattr_handlers[]; @@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode) { } -static inline void -ext2_xattr_put_super(struct super_block *sb) -{ -} - -static inline int -init_ext2_xattr(void) -{ - return 0; -} - -static inline void -exit_ext2_xattr(void) +static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache) { } -- cgit v1.2.3 From d657201d48f0cff729c89fafeada716410934f5c Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 17 Apr 2017 17:11:38 -0700 Subject: Android: sdcardfs: Change cache GID value Change-Id: Ieb955dd26493da26a458bc20fbbe75bca32b094f Signed-off-by: Daniel Rosenberg Bug: 37193650 --- fs/sdcardfs/derived_perm.c | 2 +- fs/sdcardfs/multiuser.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index f82fedd29007..11cb402d3465 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -222,7 +222,7 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name) break; case PERM_ANDROID_PACKAGE_CACHE: if (info->d_uid != 0) - gid = multiuser_get_cache_gid(info->d_uid); + gid = multiuser_get_ext_cache_gid(info->d_uid); else gid = multiuser_get_uid(info->userid, uid); break; diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h index 2e89b5872314..d0c925cda299 100644 --- a/fs/sdcardfs/multiuser.h +++ b/fs/sdcardfs/multiuser.h @@ -23,6 +23,8 @@ #define AID_APP_END 19999 /* last app user */ #define AID_CACHE_GID_START 20000 /* start of gids for apps to mark cached data */ #define AID_EXT_GID_START 30000 /* start of gids for apps to mark external data */ +#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */ +#define AID_EXT_CACHE_GID_END 49999 /* end of gids for apps to mark external cached data */ #define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */ typedef uid_t userid_t; @@ -33,9 +35,9 @@ static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); } -static inline gid_t multiuser_get_cache_gid(uid_t uid) +static inline gid_t multiuser_get_ext_cache_gid(uid_t uid) { - return uid - AID_APP_START + AID_CACHE_GID_START; + return uid - AID_APP_START + AID_EXT_CACHE_GID_START; } static inline gid_t multiuser_get_ext_gid(uid_t uid) -- cgit v1.2.3 From 6f28e6ebccbaeee26ee72669215cf1e4d0d79537 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 18 Apr 2017 12:45:48 -0700 Subject: ANDROID: sdcardfs: ->iget fixes Adapted from wrapfs commit 8c49eaa0sb9c ("Wrapfs: ->iget fixes") Change where we igrab/iput to ensure we always hold a valid lower_inode. Return ENOMEM (not EACCES) if iget5_locked returns NULL. Signed-off-by: Erez Zadok Signed-off-by: Daniel Rosenberg Bug: 35766959 Change-Id: Id8d4e0c0cbc685a0a77685ce73c923e9a3ddc094 --- fs/sdcardfs/lookup.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index f10f7752f514..a0f221501b4c 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -91,7 +91,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u struct sdcardfs_inode_info *info; struct inode_data data; struct inode *inode; /* the new inode to return */ - int err; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); data.id = id; data.lower_inode = lower_inode; @@ -106,22 +108,19 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u sdcardfs_inode_set, /* inode init function */ &data); /* data passed to test+set fxns */ if (!inode) { - err = -EACCES; iput(lower_inode); - return ERR_PTR(err); + return ERR_PTR(-ENOMEM); } - /* if found a cached inode, then just return it */ - if (!(inode->i_state & I_NEW)) + /* if found a cached inode, then just return it (after iput) */ + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); return inode; + } /* initialize new inode */ info = SDCARDFS_I(inode); inode->i_ino = lower_inode->i_ino; - if (!igrab(lower_inode)) { - err = -ESTALE; - return ERR_PTR(err); - } sdcardfs_set_lower_inode(inode, lower_inode); inode->i_version++; -- cgit v1.2.3 From e92f72194da2e690d85fc736661d3f0d96825c57 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 18 Apr 2017 22:25:15 -0700 Subject: Android: sdcardfs: Don't do d_add for lower fs For file based encryption, ext4 explicitly does not create negative dentries for encrypted files. If you force one over it, the decrypted file will be hidden until the cache is cleared. Instead, just fail out. Signed-off-by: Daniel Rosenberg Bug: 37231161 Change-Id: Id2a9708dfa75e1c22f89915c529789caadd2ca4b --- fs/sdcardfs/lookup.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a0f221501b4c..08f62fb3e075 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -368,17 +368,15 @@ put_name: dname.len = name->len; dname.hash = full_name_hash(dname.name, dname.len); lower_dentry = d_lookup(lower_dir_dentry, &dname); - if (lower_dentry) - goto setup_lower; - - lower_dentry = d_alloc(lower_dir_dentry, &dname); if (!lower_dentry) { - err = -ENOMEM; + /* We called vfs_path_lookup earlier, and did not get a negative + * dentry then. Don't confuse the lower filesystem by forcing one + * on it now... + */ + err = -ENOENT; goto out; } - d_add(lower_dentry, NULL); /* instantiate and hash */ -setup_lower: lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); sdcardfs_set_lower_path(dentry, &lower_path); -- cgit v1.2.3 From d04f4c7320bab619062472469d149387d20095f9 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 18 Apr 2017 22:49:38 -0700 Subject: Android: sdcardfs: Don't complain in fixup_lower_ownership Not all filesystems support changing the owner of a file. We shouldn't complain if it doesn't happen. Signed-off-by: Daniel Rosenberg Bug: 37488099 Change-Id: I403e44ab7230f176e6df82f6adb4e5c82ce57f33 --- fs/sdcardfs/derived_perm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 11cb402d3465..cddfc79ea365 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -252,7 +252,7 @@ retry_deleg: goto retry_deleg; } if (error) - pr_err("sdcardfs: Failed to touch up lower fs gid/uid.\n"); + pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name); } sdcardfs_put_lower_path(dentry, &path); } -- cgit v1.2.3 From 0ed8679d0dbb02598c60cd4bd933f911bd636d01 Mon Sep 17 00:00:00 2001 From: Jiebing Li Date: Tue, 10 Mar 2015 11:24:00 +0800 Subject: ANDROID: usb: gadget: fix MTP enumeration issue under super speed mode MTP function doesn't show as a drive in Windows when the device is connected to PC's USB3 port, because device fails to respond ACK to BULK OUT transfer request. This patch modifies MTP OUT request length as multiple of MaxPacketSize per databook requirement in order to fix this issue. Patchset: mtp Change-Id: I090d7880ff00c499dc5ba7fd644b1fe7cd87fcb5 Signed-off-by: Jiebing Li Signed-off-by: Wang, Yu Signed-off-by: Russ Weight --- drivers/usb/gadget/function/f_mtp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index d8b69af6e335..b37cc8d87ca4 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -540,10 +540,12 @@ static ssize_t mtp_read(struct file *fp, char __user *buf, ssize_t r = count; unsigned xfer; int ret = 0; + size_t len; DBG(cdev, "mtp_read(%zu)\n", count); - if (count > MTP_BULK_BUFFER_SIZE) + len = usb_ep_align_maybe(cdev->gadget, dev->ep_out, count); + if (len > MTP_BULK_BUFFER_SIZE) return -EINVAL; /* we will block until we're online */ @@ -567,7 +569,7 @@ static ssize_t mtp_read(struct file *fp, char __user *buf, requeue_req: /* queue a request */ req = dev->rx_req[0]; - req->length = count; + req->length = len; dev->rx_done = 0; ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL); if (ret < 0) { -- cgit v1.2.3 From 53491d941217d00fdddda6b8b755ced36676f079 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 20 Apr 2017 12:13:31 -0700 Subject: Revert "Android: sdcardfs: Don't do d_add for lower fs" This reverts commit e92f72194da2e690d85fc736661d3f0d96825c57. This change caused issues for sdcardfs on top of vfat Signed-off-by: Daniel Rosenberg Change-Id: Ife2ac6d9af40e4ddb95b7261e1dad4d7817e3779 --- fs/sdcardfs/lookup.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 08f62fb3e075..a0f221501b4c 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -368,15 +368,17 @@ put_name: dname.len = name->len; dname.hash = full_name_hash(dname.name, dname.len); lower_dentry = d_lookup(lower_dir_dentry, &dname); + if (lower_dentry) + goto setup_lower; + + lower_dentry = d_alloc(lower_dir_dentry, &dname); if (!lower_dentry) { - /* We called vfs_path_lookup earlier, and did not get a negative - * dentry then. Don't confuse the lower filesystem by forcing one - * on it now... - */ - err = -ENOENT; + err = -ENOMEM; goto out; } + d_add(lower_dentry, NULL); /* instantiate and hash */ +setup_lower: lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); sdcardfs_set_lower_path(dentry, &lower_path); -- cgit v1.2.3 From 738e1a1a468217bd5cb3fdc19582d4a1778bb510 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Apr 2016 10:40:55 +0200 Subject: UPSTREAM: char: Drop bogus dependency of DEVPORT on !M68K (cherry pick from commit 309124e2648d668a0c23539c5078815660a4a850) According to full-history-linux commit d3794f4fa7c3edc3 ("[PATCH] M68k update (part 25)"), port operations are allowed on m68k if CONFIG_ISA is defined. However, commit 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") accidentally changed an "||" into an "&&", disabling it completely on m68k. This logic was retained when introducing the DEVPORT symbol in commit 4f911d64e04a44c4 ("Make /dev/port conditional on config symbol"). Drop the bogus dependency on !M68K to fix this. Fixes: 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") Signed-off-by: Geert Uytterhoeven Tested-by: Al Stone Signed-off-by: Greg Kroah-Hartman Bug: 37210310 Bug: 36604779 Change-Id: I9139bd8a5a6e9e39c2e428bde23a7d9be07e2f91 --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index a043107da2af..b130d38cf55c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -584,7 +584,6 @@ config TELCLOCK config DEVPORT bool - depends on !M68K depends on ISA || PCI default y -- cgit v1.2.3 From c0fb2f9e5814e4cf8d6e64c00bdacdbf1668ab62 Mon Sep 17 00:00:00 2001 From: Max Bires Date: Tue, 3 Jan 2017 08:18:07 -0800 Subject: UPSTREAM: char: lack of bool string made CONFIG_DEVPORT always on (cherry pick from commit f2cfa58b136e4b06a9b9db7af5ef62fbb5992f62) Without a bool string present, using "# CONFIG_DEVPORT is not set" in defconfig files would not actually unset devport. This esnured that /dev/port was always on, but there are reasons a user may wish to disable it (smaller kernel, attack surface reduction) if it's not being used. Adding a message here in order to make this user visible. Signed-off-by: Max Bires Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman Bug: 37210310 Bug: 36604779 Change-Id: Ib1e947526f6c6f7cdf6389923287631056f32c36 --- drivers/char/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index b130d38cf55c..3143db57ce44 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -583,9 +583,12 @@ config TELCLOCK controlling the behavior of this hardware. config DEVPORT - bool + bool "/dev/port character device" depends on ISA || PCI default y + help + Say Y here if you want to support the /dev/port device. The /dev/port + device is similar to /dev/mem, but for I/O ports. source "drivers/s390/char/Kconfig" -- cgit v1.2.3 From ebff0104446a71da950709b94cd2d8d2c10c73be Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 13 Apr 2017 17:07:58 -0700 Subject: ANDROID: uid_sys_stats: reduce update_io_stats overhead Replaced read_lock with rcu_read_lock to reduce time that preemption is disabled. Added a function to update io stats for specific uid and moved hash table lookup, user_namespace out of loops. Bug: 37319300 Change-Id: I2b81b5cd3b6399b40d08c3c14b42cad044556970 Signed-off-by: Jin Qian --- drivers/misc/uid_sys_stats.c | 61 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index 204b23484266..618617f3e867 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -237,28 +237,28 @@ static void clean_uid_io_last_stats(struct uid_entry *uid_entry, io_last->fsync -= task->ioac.syscfs; } -static void update_io_stats_locked(void) +static void update_io_stats_all_locked(void) { struct uid_entry *uid_entry; struct task_struct *task, *temp; struct io_stats *io_bucket, *io_curr, *io_last; + struct user_namespace *user_ns = current_user_ns(); unsigned long bkt; - - BUG_ON(!rt_mutex_is_locked(&uid_lock)); + uid_t uid; hash_for_each(hash_table, bkt, uid_entry, hash) memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, sizeof(struct io_stats)); - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(temp, task) { - uid_entry = find_or_register_uid(from_kuid_munged( - current_user_ns(), task_uid(task))); + uid = from_kuid_munged(user_ns, task_uid(task)); + uid_entry = find_or_register_uid(uid); if (!uid_entry) continue; add_uid_io_curr_stats(uid_entry, task); } while_each_thread(temp, task); - read_unlock(&tasklist_lock); + rcu_read_unlock(); hash_for_each(hash_table, bkt, uid_entry, hash) { io_bucket = &uid_entry->io[uid_entry->state]; @@ -281,6 +281,47 @@ static void update_io_stats_locked(void) } } +static void update_io_stats_uid_locked(uid_t target_uid) +{ + struct uid_entry *uid_entry; + struct task_struct *task, *temp; + struct io_stats *io_bucket, *io_curr, *io_last; + struct user_namespace *user_ns = current_user_ns(); + + uid_entry = find_or_register_uid(target_uid); + if (!uid_entry) + return; + + memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, + sizeof(struct io_stats)); + + rcu_read_lock(); + do_each_thread(temp, task) { + if (from_kuid_munged(user_ns, task_uid(task)) != target_uid) + continue; + add_uid_io_curr_stats(uid_entry, task); + } while_each_thread(temp, task); + rcu_read_unlock(); + + io_bucket = &uid_entry->io[uid_entry->state]; + io_curr = &uid_entry->io[UID_STATE_TOTAL_CURR]; + io_last = &uid_entry->io[UID_STATE_TOTAL_LAST]; + + io_bucket->read_bytes += + io_curr->read_bytes - io_last->read_bytes; + io_bucket->write_bytes += + io_curr->write_bytes - io_last->write_bytes; + io_bucket->rchar += io_curr->rchar - io_last->rchar; + io_bucket->wchar += io_curr->wchar - io_last->wchar; + io_bucket->fsync += io_curr->fsync - io_last->fsync; + + io_last->read_bytes = io_curr->read_bytes; + io_last->write_bytes = io_curr->write_bytes; + io_last->rchar = io_curr->rchar; + io_last->wchar = io_curr->wchar; + io_last->fsync = io_curr->fsync; +} + static int uid_io_show(struct seq_file *m, void *v) { struct uid_entry *uid_entry; @@ -288,7 +329,7 @@ static int uid_io_show(struct seq_file *m, void *v) rt_mutex_lock(&uid_lock); - update_io_stats_locked(); + update_io_stats_all_locked(); hash_for_each(hash_table, bkt, uid_entry, hash) { seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", @@ -363,7 +404,7 @@ static ssize_t uid_procstat_write(struct file *file, return count; } - update_io_stats_locked(); + update_io_stats_uid_locked(uid); uid_entry->state = state; @@ -401,7 +442,7 @@ static int process_notifier(struct notifier_block *self, uid_entry->utime += utime; uid_entry->stime += stime; - update_io_stats_locked(); + update_io_stats_uid_locked(uid); clean_uid_io_last_stats(uid_entry, task); exit: -- cgit v1.2.3 From 3144d81a77352a3934ff0f60dccb38dbf462da39 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Mar 2017 16:54:24 -0400 Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups commit 77f88796cee819b9c4562b0b6b44691b3b7755b1 upstream. Creation of a kthread goes through a couple interlocked stages between the kthread itself and its creator. Once the new kthread starts running, it initializes itself and wakes up the creator. The creator then can further configure the kthread and then let it start doing its job by waking it up. In this configuration-by-creator stage, the creator is the only one that can wake it up but the kthread is visible to userland. When altering the kthread's attributes from userland is allowed, this is fine; however, for cases where CPU affinity is critical, kthread_bind() is used to first disable affinity changes from userland and then set the affinity. This also prevents the kthread from being migrated into non-root cgroups as that can affect the CPU affinity and many other things. Unfortunately, the cgroup side of protection is racy. While the PF_NO_SETAFFINITY flag prevents further migrations, userland can win the race before the creator sets the flag with kthread_bind() and put the kthread in a non-root cgroup, which can lead to all sorts of problems including incorrect CPU affinity and starvation. This bug got triggered by userland which periodically tries to migrate all processes in the root cpuset cgroup to a non-root one. Per-cpu workqueue workers got caught while being created and ended up with incorrected CPU affinity breaking concurrency management and sometimes stalling workqueue execution. This patch adds task->no_cgroup_migration which disallows the task to be migrated by userland. kthreadd starts with the flag set making every child kthread start in the root cgroup with migration disallowed. The flag is cleared after the kthread finishes initialization by which time PF_NO_SETAFFINITY is set if the kthread should stay in the root cgroup. It'd be better to wait for the initialization instead of failing but I couldn't think of a way of implementing that without adding either a new PF flag, or sleeping and retrying from waiting side. Even if userland depends on changing cgroup membership of a kthread, it either has to be synchronized with kthread_create() or periodically repeat, so it's unlikely that this would break anything. v2: Switch to a simpler implementation using a new task_struct bit field suggested by Oleg. Signed-off-by: Tejun Heo Suggested-by: Oleg Nesterov Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Reported-and-debugged-by: Chris Mason Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup.h | 21 +++++++++++++++++++++ include/linux/sched.h | 4 ++++ kernel/cgroup.c | 9 +++++---- kernel/kthread.c | 3 +++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cb91b44f5f78..ad2bcf647b9a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -528,6 +528,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +static inline void cgroup_init_kthreadd(void) +{ + /* + * kthreadd is inherited by all kthreads, keep it in the root so + * that the new kthreads are guaranteed to stay in the root until + * initialization is finished. + */ + current->no_cgroup_migration = 1; +} + +static inline void cgroup_kthread_ready(void) +{ + /* + * This kthread finished initialization. The creator should have + * set PF_NO_SETAFFINITY if this kthread should stay in the root. + */ + current->no_cgroup_migration = 0; +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -551,6 +570,8 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_kthreadd(void) {} +static inline void cgroup_kthread_ready(void) {} #endif /* !CONFIG_CGROUPS */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ce0f61dcd887..352213b360d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1475,6 +1475,10 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif +#ifdef CONFIG_CGROUPS + /* disallow userland-initiated cgroup migration */ + unsigned no_cgroup_migration:1; +#endif unsigned long atomic_flags; /* Flags needing atomic access. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 127c63e02d52..4cb94b678e9f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2752,11 +2752,12 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, tsk = tsk->group_leader; /* - * Workqueue threads may acquire PF_NO_SETAFFINITY and become - * trapped in a cpuset, or RT worker may be born in a cgroup - * with no rt_runtime allocated. Just say no. + * kthreads may acquire PF_NO_SETAFFINITY during initialization. + * If userland migrates such a kthread to a non-root cgroup, it can + * become trapped in a cpuset, or RT kthread may be born in a + * cgroup with no rt_runtime allocated. Just say no. */ - if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { + if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; goto out_unlock_rcu; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 9ff173dca1ae..850b255649a2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -205,6 +206,7 @@ static int kthread(void *_create) ret = -EINTR; if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { + cgroup_kthread_ready(); __kthread_parkme(&self); ret = threadfn(data); } @@ -510,6 +512,7 @@ int kthreadd(void *unused) set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; + cgroup_init_kthreadd(); for (;;) { set_current_state(TASK_INTERRUPTIBLE); -- cgit v1.2.3 From ef4c962825c08609d8077c00cf73f26fbdc638cc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:28 -0700 Subject: thp: fix MADV_DONTNEED vs clear soft dirty race commit 5b7abeae3af8c08c577e599dd0578b9e3ee6687b upstream. Yet another instance of the same race. Fix is identical to change_huge_pmd(). See "thp: fix MADV_DONTNEED vs. numa balancing race" for more details. Link: http://lkml.kernel.org/r/20170302151034.27829-5-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/task_mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index d598b9c809c1..db1a1427c27a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -803,7 +803,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); + pmd_t pmd = *pmdp; + + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); -- cgit v1.2.3 From a737abe4d09af3d461f0661ccde8ccec007a2db9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 18 Mar 2017 21:53:05 -0400 Subject: drm/nouveau/mpeg: mthd returns true on success now commit 83bce9c2baa51e439480a713119a73d3c8b61083 upstream. Signed-off-by: Ilia Mirkin Fixes: 590801c1a3 ("drm/nouveau/mpeg: remove dependence on namedb/engctx lookup") Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c index d4d8942b1347..e55f8302d08a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c @@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv31_mpeg_mthd(mpeg, mthd, data)) + if (nv31_mpeg_mthd(mpeg, mthd, data)) show &= ~0x01000000; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c index d433cfa4a8ab..36af0a8927fc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c @@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv44_mpeg_mthd(subdev->device, mthd, data)) + if (nv44_mpeg_mthd(subdev->device, mthd, data)) show &= ~0x01000000; } } -- cgit v1.2.3 From a11ab9dd4b789f5b7ecfc069a73cde2bd826f6ec Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 18 Mar 2017 16:23:10 -0400 Subject: drm/nouveau/mmu/nv4a: use nv04 mmu rather than the nv44 one commit f94773b9f5ecd1df7c88c2e921924dd41d2020cc upstream. The NV4A (aka NV44A) is an oddity in the family. It only comes in AGP and PCI varieties, rather than a core PCIE chip with a bridge for AGP/PCI as necessary. As a result, it appears that the MMU is also non-functional. For AGP cards, the vast majority of the NV4A lineup, this worked out since we force AGP cards to use the nv04 mmu. However for PCI variants, this did not work. Switching to the NV04 MMU makes it work like a charm. Thanks to mwk for the suggestion. This should be a no-op for NV4A AGP boards, as they were using it already. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ece9f4102c0e..7f8acb3ebfcd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -714,7 +714,7 @@ nv4a_chipset = { .i2c = nv04_i2c_new, .imem = nv40_instmem_new, .mc = nv44_mc_new, - .mmu = nv44_mmu_new, + .mmu = nv04_mmu_new, .pci = nv40_pci_new, .therm = nv40_therm_new, .timer = nv41_timer_new, -- cgit v1.2.3 From f0899d0e1e9ea7b71a3b05889c047d74b729dbf6 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Fri, 7 Apr 2017 12:29:38 +0100 Subject: CIFS: store results of cifs_reopen_file to avoid infinite wait commit 1fa839b4986d648b907d117275869a0e46c324b9 upstream. This fixes Continuous Availability when errors during file reopen are encountered. cifs_user_readv and cifs_user_writev would wait for ever if results of cifs_reopen_file are not stored and for later inspection. In fact, results are checked and, in case of errors, a chain of function calls leading to reads and writes to be scheduled in a separate thread is skipped. These threads will wake up the corresponding waiters once reads and writes are done. However, given the return value is not stored, when rc is checked for errors a previous one (always zero) is inspected instead. This leads to pending reads/writes added to the list, making cifs_user_readv and cifs_user_writev wait for ever. Signed-off-by: Germano Percossi Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 72f270d4bd17..a0c0a49b6620 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2545,7 +2545,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->credits = credits; if (!wdata->cfile->invalidHandle || - !cifs_reopen_file(wdata->cfile, false)) + !(rc = cifs_reopen_file(wdata->cfile, false))) rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); if (rc) { @@ -2958,7 +2958,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->credits = credits; if (!rdata->cfile->invalidHandle || - !cifs_reopen_file(rdata->cfile, true)) + !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); error: if (rc) { @@ -3544,7 +3544,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, } if (!rdata->cfile->invalidHandle || - !cifs_reopen_file(rdata->cfile, true)) + !(rc = cifs_reopen_file(rdata->cfile, true))) rc = server->ops->async_readv(rdata); if (rc) { add_credits_and_wake_if(server, rdata->credits, 0); -- cgit v1.2.3 From a5e2f803b891f00d6019d727a6eb548c91a70b62 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Mon, 10 Apr 2017 20:44:25 -0700 Subject: Input: xpad - add support for Razer Wildcat gamepad commit 5376366886251e2f8f248704adb620a4bc4c0937 upstream. Signed-off-by: Cameron Gutman Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 16f000a76de5..3258baf3282e 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -189,6 +189,7 @@ static const struct xpad_device { { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, @@ -310,6 +311,7 @@ static struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA Controllers */ XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ + XPAD_XBOXONE_VENDOR(0x1532), /* Razer Wildcat */ XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ { } -- cgit v1.2.3 From f42be33fe976d4c0812fb2f697543e1b5ac073be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Apr 2017 10:10:28 +0200 Subject: perf/x86: Avoid exposing wrong/stale data in intel_pmu_lbr_read_32() commit f2200ac311302fcdca6556fd0c5127eab6c65a3e upstream. When the perf_branch_entry::{in_tx,abort,cycles} fields were added, intel_pmu_lbr_read_32() wasn't updated to initialize them. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: 135c5612c460 ("perf/x86/intel: Support Haswell/v4 LBR format") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 659f01e165d5..8900400230c6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -410,6 +410,9 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].to = msr_lastbranch.to; cpuc->lbr_entries[i].mispred = 0; cpuc->lbr_entries[i].predicted = 0; + cpuc->lbr_entries[i].in_tx = 0; + cpuc->lbr_entries[i].abort = 0; + cpuc->lbr_entries[i].cycles = 0; cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; -- cgit v1.2.3 From f1c5d01635862fb7b8b394fc1b6a1187751ce98e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 10 Apr 2017 17:14:27 +0200 Subject: x86/vdso: Ensure vdso32_enabled gets set to valid values only commit c06989da39cdb10604d572c8c7ea8c8c97f3c483 upstream. vdso_enabled can be set to arbitrary integer values via the kernel command line 'vdso32=' parameter or via 'sysctl abi.vsyscall32'. load_vdso32() only maps VDSO if vdso_enabled == 1, but ARCH_DLINFO_IA32 merily checks for vdso_enabled != 0. As a consequence the AT_SYSINFO_EHDR auxiliary vector for the VDSO_ENTRY is emitted with a NULL pointer which causes a segfault when the application tries to use the VDSO. Restrict the valid arguments on the command line and the sysctl to 0 and 1. Fixes: b0b49f2673f0 ("x86, vdso: Remove compat vdso support") Signed-off-by: Mathias Krause Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Roland McGrath Link: http://lkml.kernel.org/r/1491424561-7187-1-git-send-email-minipli@googlemail.com Link: http://lkml.kernel.org/r/20170410151723.518412863@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/vdso/vdso32-setup.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 08a317a9ae4b..a7508d7e20b7 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -31,8 +31,10 @@ static int __init vdso32_setup(char *s) { vdso32_enabled = simple_strtoul(s, NULL, 0); - if (vdso32_enabled > 1) + if (vdso32_enabled > 1) { pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); + vdso32_enabled = 0; + } return 1; } @@ -63,13 +65,18 @@ subsys_initcall(sysenter_setup); /* Register vsyscall32 into the ABI table */ #include +static const int zero; +static const int one = 1; + static struct ctl_table abi_table2[] = { { .procname = "vsyscall32", .data = &vdso32_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = (int *)&zero, + .extra2 = (int *)&one, }, {} }; -- cgit v1.2.3 From ec3978e10ecc61834c9f57dd9d00492b137fa01c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Apr 2017 17:14:28 +0200 Subject: x86/vdso: Plug race between mapping and ELF header setup commit 6fdc6dd90272ce7e75d744f71535cfbd8d77da81 upstream. The vsyscall32 sysctl can racy against a concurrent fork when it switches from disabled to enabled: arch_setup_additional_pages() if (vdso32_enabled) --> No mapping sysctl.vsysscall32() --> vdso32_enabled = true create_elf_tables() ARCH_DLINFO_IA32 if (vdso32_enabled) { --> Add VDSO entry with NULL pointer Make ARCH_DLINFO_IA32 check whether the VDSO mapping has been set up for the newly forked process or not. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Mathias Krause Link: http://lkml.kernel.org/r/20170410151723.602367196@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1514753fd435..d262f985bbc8 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -278,7 +278,7 @@ struct task_struct; #define ARCH_DLINFO_IA32 \ do { \ - if (vdso32_enabled) { \ + if (VDSO_CURRENT_BASE) { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } \ -- cgit v1.2.3 From 074bcc1302fd4357fa30c167bb20f684998b025f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 27 Mar 2017 21:53:38 -0700 Subject: acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit comparison) commit b03b99a329a14b7302f37c3ea6da3848db41c8c5 upstream. While reviewing the -stable patch for commit 86ef58a4e35e "nfit, libnvdimm: fix interleave set cookie calculation" Ben noted: "This is returning an int, thus it's effectively doing a 32-bit comparison and not the 64-bit comparison you say is needed." Update the compare operation to be immune to this integer demotion problem. Cc: Nicholas Moulin Fixes: 86ef58a4e35e ("nfit, libnvdimm: fix interleave set cookie calculation") Reported-by: Ben Hutchings Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 14c2a07c9f3f..67d7489ced01 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -979,7 +979,11 @@ static int cmp_map(const void *m0, const void *m1) const struct nfit_set_info_map *map0 = m0; const struct nfit_set_info_map *map1 = m1; - return map0->region_offset - map1->region_offset; + if (map0->region_offset < map1->region_offset) + return -1; + else if (map0->region_offset > map1->region_offset) + return 1; + return 0; } /* Retrieve the nth entry referencing this spa */ -- cgit v1.2.3 From 05c5dd75d77c8176c2beef56471868e29c19b47c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 23 Mar 2017 17:19:24 -0700 Subject: iscsi-target: Fix TMR reference leak during session shutdown commit efb2ea770bb3b0f40007530bc8b0c22f36e1c5eb upstream. This patch fixes a iscsi-target specific TMR reference leak during session shutdown, that could occur when a TMR was quiesced before the hand-off back to iscsi-target code via transport_cmd_check_stop_to_fabric(). The reference leak happens because iscsit_free_cmd() was incorrectly skipping the final target_put_sess_cmd() for TMRs when transport_generic_free_cmd() returned zero because the se_cmd->cmd_kref did not reach zero, due to the missing se_cmd assignment in original code. The result was iscsi_cmd and it's associated se_cmd memory would be freed once se_sess->sess_cmd_map where released, but the associated se_tmr_req was leaked and remained part of se_device->dev_tmr_list. This bug would manfiest itself as kernel paging request OOPsen in core_tmr_lun_reset(), when a left-over se_tmr_req attempted to dereference it's se_cmd pointer that had already been released during normal session shutdown. To address this bug, go ahead and treat ISCSI_OP_SCSI_CMD and ISCSI_OP_SCSI_TMFUNC the same when there is an extra se_cmd->cmd_kref to drop in iscsit_free_cmd(), and use op_scsi to signal __iscsit_free_cmd() when the former needs to clear any further iscsi related I/O state. Reported-by: Rob Millner Cc: Rob Millner Reported-by: Chu Yuan Lin Cc: Chu Yuan Lin Tested-by: Chu Yuan Lin Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_util.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 428b0d9e3dba..93590521ae33 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -731,21 +731,23 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) { struct se_cmd *se_cmd = NULL; int rc; + bool op_scsi = false; /* * Determine if a struct se_cmd is associated with * this struct iscsi_cmd. */ switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - se_cmd = &cmd->se_cmd; - __iscsit_free_cmd(cmd, true, shutdown); + op_scsi = true; /* * Fallthrough */ case ISCSI_OP_SCSI_TMFUNC: - rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); - if (!rc && shutdown && se_cmd && se_cmd->se_sess) { - __iscsit_free_cmd(cmd, true, shutdown); + se_cmd = &cmd->se_cmd; + __iscsit_free_cmd(cmd, op_scsi, shutdown); + rc = transport_generic_free_cmd(se_cmd, shutdown); + if (!rc && shutdown && se_cmd->se_sess) { + __iscsit_free_cmd(cmd, op_scsi, shutdown); target_put_sess_cmd(se_cmd); } break; -- cgit v1.2.3 From 1e1de2e841e141991250d7c0ac79c5877a43b6a4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 2 Apr 2017 13:36:44 -0700 Subject: iscsi-target: Drop work-around for legacy GlobalSAN initiator commit 1c99de981f30b3e7868b8d20ce5479fa1c0fea46 upstream. Once upon a time back in 2009, a work-around was added to support the GlobalSAN iSCSI initiator v3.3 for MacOSX, which during login did not propose nor respond to MaxBurstLength, FirstBurstLength, DefaultTime2Wait and DefaultTime2Retain keys. The work-around in iscsi_check_proposer_for_optional_reply() allowed the missing keys to be proposed, but did not require waiting for a response before moving to full feature phase operation. This allowed GlobalSAN v3.3 to work out-of-the box, and for many years we didn't run into login interopt issues with any other initiators.. Until recently, when Martin tried a QLogic 57840S iSCSI Offload HBA on Windows 2016 which completed login, but subsequently failed with: Got unknown iSCSI OpCode: 0x43 The issue was QLogic MSFT side did not propose DefaultTime2Wait + DefaultTime2Retain, so LIO proposes them itself, and immediately transitions to full feature phase because of the GlobalSAN hack. However, the QLogic MSFT side still attempts to respond to DefaultTime2Retain + DefaultTime2Wait, even though LIO has set ISCSI_FLAG_LOGIN_NEXT_STAGE3 + ISCSI_FLAG_LOGIN_TRANSIT in last login response. So while the QLogic MSFT side should have been proposing these two keys to start, it was doing the correct thing per RFC-3720 attempting to respond to proposed keys before transitioning to full feature phase. All that said, recent versions of GlobalSAN iSCSI (v5.3.0.541) does correctly propose the four keys during login, making the original work-around moot. So in order to allow QLogic MSFT to run unmodified as-is, go ahead and drop this long standing work-around. Reported-by: Martin Svec Cc: Martin Svec Cc: Himanshu Madhani Cc: Arun Easi Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_parameters.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 2cbea2af7cd0..6d1b0acbc5b3 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -780,22 +780,6 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) } else if (IS_TYPE_NUMBER(param)) { if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * The GlobalSAN iSCSI Initiator for MacOSX does - * not respond to MaxBurstLength, FirstBurstLength, - * DefaultTime2Wait or DefaultTime2Retain parameter keys. - * So, we set them to 'reply optional' here, and assume the - * the defaults from iscsi_parameters.h if the initiator - * is not RFC compliant and the keys are not negotiated. - */ - if (!strcmp(param->name, MAXBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, FIRSTBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2WAIT)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2RETAIN)) - SET_PSTATE_REPLY_OPTIONAL(param); /* * Required for gPXE iSCSI boot client */ -- cgit v1.2.3 From 925adae6664c0b9f5193876e9aeb2640a7e977d5 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 17 Mar 2017 08:47:14 -0400 Subject: scsi: sr: Sanity check returned mode data commit a00a7862513089f17209b732f230922f1942e0b9 upstream. Kefeng Wang discovered that old versions of the QEMU CD driver would return mangled mode data causing us to walk off the end of the buffer in an attempt to parse it. Sanity check the returned mode sense data. Reported-by: Kefeng Wang Tested-by: Kefeng Wang Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 64c867405ad4..804586aeaffe 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -834,6 +834,7 @@ static void get_capabilities(struct scsi_cd *cd) unsigned char *buffer; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; + unsigned int ms_len = 128; int rc, n; static const char *loadmech[] = @@ -860,10 +861,11 @@ static void get_capabilities(struct scsi_cd *cd) scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); /* ask for mode page 0x2a */ - rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, + rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len, SR_TIMEOUT, 3, &data, NULL); - if (!scsi_status_is_good(rc)) { + if (!scsi_status_is_good(rc) || data.length > ms_len || + data.header_length + data.block_descriptor_length > data.length) { /* failed, drive doesn't have capabilities mode page */ cd->cdi.speed = 1; cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R | -- cgit v1.2.3 From 448961955592c46f1490fb6ca8d3e52ce17e6222 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 28 Mar 2017 12:41:26 +0800 Subject: scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable commit 6780414519f91c2a84da9baa963a940ac916f803 upstream. If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size may get error. [mkp: tweaked to avoid setting rw_max twice and added typecast] Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits") Signed-off-by: Fam Zheng Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 78430ef28ea4..d2877d713b62 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2888,7 +2888,8 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); } else - rw_max = BLK_DEF_MAX_SECTORS; + rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), + (sector_t)BLK_DEF_MAX_SECTORS); /* Combine with controller limits */ q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); -- cgit v1.2.3 From b689dfbed8c8432a18c73fc261c030d8b3e24e00 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 4 Apr 2017 10:42:30 -0400 Subject: scsi: sd: Fix capacity calculation with 32-bit sector_t commit 7c856152cb92f8eee2df29ef325a1b1f43161aff upstream. We previously made sure that the reported disk capacity was less than 0xffffffff blocks when the kernel was not compiled with large sector_t support (CONFIG_LBDAF). However, this check assumed that the capacity was reported in units of 512 bytes. Add a sanity check function to ensure that we only enable disks if the entire reported capacity can be expressed in terms of sector_t. Reported-by: Steve Magnani Cc: Bart Van Assche Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d2877d713b62..4d5207dff960 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2051,6 +2051,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, #define READ_CAPACITY_RETRIES_ON_RESET 10 +/* + * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set + * and the reported logical block size is bigger than 512 bytes. Note + * that last_sector is a u64 and therefore logical_to_sectors() is not + * applicable. + */ +static bool sd_addressable_capacity(u64 lba, unsigned int sector_size) +{ + u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9); + + if (sizeof(sector_t) == 4 && last_sector > U32_MAX) + return false; + + return true; +} + static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { @@ -2116,7 +2132,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, return -ENODEV; } - if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); @@ -2202,7 +2218,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, return sector_size; } - if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); -- cgit v1.2.3 From 6058cf9929d9bbeb4a781c51e91866716cb5277f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 7 Apr 2017 17:28:23 +0200 Subject: xen, fbfront: fix connecting to backend commit 9121b15b5628b38b4695282dc18c553440e0f79b upstream. Connecting to the backend isn't working reliably in xen-fbfront: in case XenbusStateInitWait of the backend has been missed the backend transition to XenbusStateConnected will trigger the connected state only without doing the actions required when the backend has connected. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/xen-fbfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index 0567d517eed3..ea2f19f5fbde 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -644,7 +644,6 @@ static void xenfb_backend_changed(struct xenbus_device *dev, break; case XenbusStateInitWait: -InitWait: xenbus_switch_state(dev, XenbusStateConnected); break; @@ -655,7 +654,8 @@ InitWait: * get Connected twice here. */ if (dev->state != XenbusStateConnected) - goto InitWait; /* no InitWait seen yet, fudge it */ + /* no InitWait seen yet, fudge it */ + xenbus_switch_state(dev, XenbusStateConnected); if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, "request-update", "%d", &val) < 0) -- cgit v1.2.3 From 66b531d3ff113d1e440bde5cd167ed49063fd070 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Apr 2017 09:47:24 -0700 Subject: libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep splat commit 0beb2012a1722633515c8aaa263c73449636c893 upstream. Holding the reconfig_mutex over a potential userspace fault sets up a lockdep dependency chain between filesystem-DAX and the libnvdimm ioctl path. Move the user access outside of the lock. [ INFO: possible circular locking dependency detected ] 4.11.0-rc3+ #13 Tainted: G W O ------------------------------------------------------- fallocate/16656 is trying to acquire lock: (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [] nvdimm_bus_lock+0x21/0x30 [libnvdimm] but task is already holding lock: (jbd2_handle){++++..}, at: [] start_this_handle+0x104/0x460 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (jbd2_handle){++++..}: lock_acquire+0xbd/0x200 start_this_handle+0x16a/0x460 jbd2__journal_start+0xe9/0x2d0 __ext4_journal_start_sb+0x89/0x1c0 ext4_dirty_inode+0x32/0x70 __mark_inode_dirty+0x235/0x670 generic_update_time+0x87/0xd0 touch_atime+0xa9/0xd0 ext4_file_mmap+0x90/0xb0 mmap_region+0x370/0x5b0 do_mmap+0x415/0x4f0 vm_mmap_pgoff+0xd7/0x120 SyS_mmap_pgoff+0x1c5/0x290 SyS_mmap+0x22/0x30 entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (&mm->mmap_sem){++++++}: lock_acquire+0xbd/0x200 __might_fault+0x70/0xa0 __nd_ioctl+0x683/0x720 [libnvdimm] nvdimm_ioctl+0x8b/0xe0 [libnvdimm] do_vfs_ioctl+0xa8/0x740 SyS_ioctl+0x79/0x90 do_syscall_64+0x6c/0x200 return_from_SYSCALL_64+0x0/0x7a -> #0 (&nvdimm_bus->reconfig_mutex){+.+.+.}: __lock_acquire+0x16b6/0x1730 lock_acquire+0xbd/0x200 __mutex_lock+0x88/0x9b0 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_forget_poison+0x25/0x50 [libnvdimm] nvdimm_clear_poison+0x106/0x140 [libnvdimm] pmem_do_bvec+0x1c2/0x2b0 [nd_pmem] pmem_make_request+0xf9/0x270 [nd_pmem] generic_make_request+0x118/0x3b0 submit_bio+0x75/0x150 Fixes: 62232e45f4a2 ("libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices") Cc: Dave Jiang Reported-by: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5f47356d6942..254b0ee37039 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -590,8 +590,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); if (rc < 0) goto out_unlock; + nvdimm_bus_unlock(&nvdimm_bus->dev); + if (copy_to_user(p, buf, buf_len)) rc = -EFAULT; + + vfree(buf); + return rc; + out_unlock: nvdimm_bus_unlock(&nvdimm_bus->dev); out: -- cgit v1.2.3 From c51451e43bf19bb36fa50d81ae736dd9e7d66d4a Mon Sep 17 00:00:00 2001 From: Tyler Baker Date: Thu, 13 Apr 2017 15:27:31 -0700 Subject: irqchip/irq-imx-gpcv2: Fix spinlock initialization commit 75eb5e1e7b4edbc8e8f930de59004d21cb46961f upstream. The raw_spinlock in the IMX GPCV2 interupt chip is not initialized before usage. That results in a lockdep splat: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. Add the missing raw_spin_lock_init() to the setup code. Fixes: e324c4dc4a59 ("irqchip/imx-gpcv2: IMX GPCv2 driver for wakeup sources") Signed-off-by: Tyler Baker Reviewed-by: Fabio Estevam Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: shawnguo@kernel.org Cc: andrew.smirnov@gmail.com Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20170413222731.5917-1-tyler.baker@linaro.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-imx-gpcv2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 15af9a9753e5..2d203b422129 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -230,6 +230,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node, return -ENOMEM; } + raw_spin_lock_init(&cd->rlock); + cd->gpc_base = of_iomap(node, 0); if (!cd->gpc_base) { pr_err("fsl-gpcv2: unable to map gpc registers\n"); -- cgit v1.2.3 From 7fe57118a7c002c59e4087806f8f5a9f4a0b037f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 14 Apr 2017 17:45:45 -0400 Subject: ftrace: Fix removing of second function probe commit 82cc4fc2e70ec5baeff8f776f2773abc8b2cc0ae upstream. When two function probes are added to set_ftrace_filter, and then one of them is removed, the update to the function locations is not performed, and the record keeping of the function states are corrupted, and causes an ftrace_bug() to occur. This is easily reproducable by adding two probes, removing one, and then adding it back again. # cd /sys/kernel/debug/tracing # echo schedule:traceoff > set_ftrace_filter # echo do_IRQ:traceoff > set_ftrace_filter # echo \!do_IRQ:traceoff > /debug/tracing/set_ftrace_filter # echo do_IRQ:traceoff > set_ftrace_filter Causes: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1098 at kernel/trace/ftrace.c:2369 ftrace_get_addr_curr+0x143/0x220 Modules linked in: [...] CPU: 2 PID: 1098 Comm: bash Not tainted 4.10.0-test+ #405 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 Call Trace: dump_stack+0x68/0x9f __warn+0x111/0x130 ? trace_irq_work_interrupt+0xa0/0xa0 warn_slowpath_null+0x1d/0x20 ftrace_get_addr_curr+0x143/0x220 ? __fentry__+0x10/0x10 ftrace_replace_code+0xe3/0x4f0 ? ftrace_int3_handler+0x90/0x90 ? printk+0x99/0xb5 ? 0xffffffff81000000 ftrace_modify_all_code+0x97/0x110 arch_ftrace_update_code+0x10/0x20 ftrace_run_update_code+0x1c/0x60 ftrace_run_modify_code.isra.48.constprop.62+0x8e/0xd0 register_ftrace_function_probe+0x4b6/0x590 ? ftrace_startup+0x310/0x310 ? debug_lockdep_rcu_enabled.part.4+0x1a/0x30 ? update_stack_state+0x88/0x110 ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320 ? preempt_count_sub+0x18/0xd0 ? mutex_lock_nested+0x104/0x800 ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320 ? __unwind_start+0x1c0/0x1c0 ? _mutex_lock_nest_lock+0x800/0x800 ftrace_trace_probe_callback.isra.3+0xc0/0x130 ? func_set_flag+0xe0/0xe0 ? __lock_acquire+0x642/0x1790 ? __might_fault+0x1e/0x20 ? trace_get_user+0x398/0x470 ? strcmp+0x35/0x60 ftrace_trace_onoff_callback+0x48/0x70 ftrace_regex_write.isra.43.part.44+0x251/0x320 ? match_records+0x420/0x420 ftrace_filter_write+0x2b/0x30 __vfs_write+0xd7/0x330 ? do_loop_readv_writev+0x120/0x120 ? locks_remove_posix+0x90/0x2f0 ? do_lock_file_wait+0x160/0x160 ? __lock_is_held+0x93/0x100 ? rcu_read_lock_sched_held+0x5c/0xb0 ? preempt_count_sub+0x18/0xd0 ? __sb_start_write+0x10a/0x230 ? vfs_write+0x222/0x240 vfs_write+0xef/0x240 SyS_write+0xab/0x130 ? SyS_read+0x130/0x130 ? trace_hardirqs_on_caller+0x182/0x280 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0xad RIP: 0033:0x7fe61c157c30 RSP: 002b:00007ffe87890258 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: ffffffff8114a410 RCX: 00007fe61c157c30 RDX: 0000000000000010 RSI: 000055814798f5e0 RDI: 0000000000000001 RBP: ffff8800c9027f98 R08: 00007fe61c422740 R09: 00007fe61ca53700 R10: 0000000000000073 R11: 0000000000000246 R12: 0000558147a36400 R13: 00007ffe8788f160 R14: 0000000000000024 R15: 00007ffe8788f15c ? trace_hardirqs_off_caller+0xc0/0x110 ---[ end trace 99fa09b3d9869c2c ]--- Bad trampoline accounting at: ffffffff81cc3b00 (do_IRQ+0x0/0x150) Fixes: 59df055f1991 ("ftrace: trace different functions with a different tracer") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f743b147247..34b2a0d5cf1a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3677,23 +3677,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash) ftrace_probe_registered = 1; } -static void __disable_ftrace_function_probe(void) +static bool __disable_ftrace_function_probe(void) { int i; if (!ftrace_probe_registered) - return; + return false; for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; if (hhd->first) - return; + return false; } /* no more funcs left */ ftrace_shutdown(&trace_probe_ops, 0); ftrace_probe_registered = 0; + return true; } @@ -3820,6 +3821,7 @@ static void __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { + struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; struct ftrace_func_probe *p; @@ -3831,6 +3833,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; int i, ret; + bool disabled; if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) func_g.search = NULL; @@ -3849,6 +3852,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, mutex_lock(&trace_probe_ops.func_hash->regex_lock); + old_hash_ops.filter_hash = old_hash; + /* Probes only have filters */ + old_hash_ops.notrace_hash = NULL; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) /* Hmm, should report this somehow */ @@ -3886,12 +3893,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } } mutex_lock(&ftrace_lock); - __disable_ftrace_function_probe(); + disabled = __disable_ftrace_function_probe(); /* * Remove after the disable is called. Otherwise, if the last * probe is removed, a null hash means *all enabled*. */ ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + + /* still need to update the function call sites */ + if (ftrace_enabled && !disabled) + ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, + &old_hash_ops); synchronize_sched(); if (!ret) free_ftrace_hash_rcu(old_hash); -- cgit v1.2.3 From 0a6aa0d1cf27e9ca7b309cc86aa6b100754f88a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Apr 2016 10:40:55 +0200 Subject: char: Drop bogus dependency of DEVPORT on !M68K commit 309124e2648d668a0c23539c5078815660a4a850 upstream. According to full-history-linux commit d3794f4fa7c3edc3 ("[PATCH] M68k update (part 25)"), port operations are allowed on m68k if CONFIG_ISA is defined. However, commit 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") accidentally changed an "||" into an "&&", disabling it completely on m68k. This logic was retained when introducing the DEVPORT symbol in commit 4f911d64e04a44c4 ("Make /dev/port conditional on config symbol"). Drop the bogus dependency on !M68K to fix this. Fixes: 153dcc54df826d2f ("[PATCH] mem driver: fix conditional on isa i/o support") Signed-off-by: Geert Uytterhoeven Tested-by: Al Stone Signed-off-by: Greg Kroah-Hartman --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index a043107da2af..b130d38cf55c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -584,7 +584,6 @@ config TELCLOCK config DEVPORT bool - depends on !M68K depends on ISA || PCI default y -- cgit v1.2.3 From a32c5331b462670093ec809ec063ad7d28f47126 Mon Sep 17 00:00:00 2001 From: Max Bires Date: Tue, 3 Jan 2017 08:18:07 -0800 Subject: char: lack of bool string made CONFIG_DEVPORT always on commit f2cfa58b136e4b06a9b9db7af5ef62fbb5992f62 upstream. Without a bool string present, using "# CONFIG_DEVPORT is not set" in defconfig files would not actually unset devport. This esnured that /dev/port was always on, but there are reasons a user may wish to disable it (smaller kernel, attack surface reduction) if it's not being used. Adding a message here in order to make this user visible. Signed-off-by: Max Bires Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/char/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index b130d38cf55c..3143db57ce44 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -583,9 +583,12 @@ config TELCLOCK controlling the behavior of this hardware. config DEVPORT - bool + bool "/dev/port character device" depends on ISA || PCI default y + help + Say Y here if you want to support the /dev/port device. The /dev/port + device is similar to /dev/mem, but for I/O ports. source "drivers/s390/char/Kconfig" -- cgit v1.2.3 From 98c953a0a51fffa0904e143694222b213fa3c68f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Apr 2017 16:16:57 +0200 Subject: Revert "MIPS: Lantiq: Fix cascaded IRQ setup" This reverts commit 6280ac931a23d3fa40cd26057576abcf90a4f22d which is commit 6c356eda225e3ee134ed4176b9ae3a76f793f4dd upstream. It shouldn't have been included in a stable release. Reported-by: Amit Pundir Cc: Felix Fietkau Cc: John Crispin Cc: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/irq.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 51cdc46a87e2..2e7f60c9fc5d 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -269,11 +269,6 @@ static void ltq_hw5_irqdispatch(void) DEFINE_HWx_IRQDISPATCH(5) #endif -static void ltq_hw_irq_handler(struct irq_desc *desc) -{ - ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); -} - #ifdef CONFIG_MIPS_MT_SMP void __init arch_init_ipiirq(int irq, struct irqaction *action) { @@ -318,19 +313,23 @@ static struct irqaction irq_call = { asmlinkage void plat_irq_dispatch(void) { unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - int irq; - - if (!pending) { - spurious_interrupt(); - return; + unsigned int i; + + if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) { + do_IRQ(MIPS_CPU_TIMER_IRQ); + goto out; + } else { + for (i = 0; i < MAX_IM; i++) { + if (pending & (CAUSEF_IP2 << i)) { + ltq_hw_irqdispatch(i); + goto out; + } + } } + pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status()); - pending >>= CAUSEB_IP; - while (pending) { - irq = fls(pending) - 1; - do_IRQ(MIPS_CPU_IRQ_BASE + irq); - pending &= ~BIT(irq); - } +out: + return; } static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) @@ -355,6 +354,11 @@ static const struct irq_domain_ops irq_domain_ops = { .map = icu_map, }; +static struct irqaction cascade = { + .handler = no_action, + .name = "cascade", +}; + int __init icu_of_init(struct device_node *node, struct device_node *parent) { struct device_node *eiu_node; @@ -386,7 +390,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) mips_cpu_irq_init(); for (i = 0; i < MAX_IM; i++) - irq_set_chained_handler(i + 2, ltq_hw_irq_handler); + setup_irq(i + 2, &cascade); if (cpu_has_vint) { pr_info("Setting up vectored interrupts\n"); -- cgit v1.2.3 From c1fc1d2f214e33f91565a65ad1b4c09dae618d84 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 24 Jan 2017 11:56:21 +0100 Subject: kvm: fix page struct leak in handle_vmon commit 06ce521af9558814b8606c0476c54497cf83a653 upstream. handle_vmon gets a reference on VMXON region page, but does not release it. Release the reference. Found by syzkaller; based on a patch by Dmitry. Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini [bwh: Backported to 3.16: use skip_emulated_instruction()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3a7ae80dc49d..0a472e9865c5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6678,14 +6678,20 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, } page = nested_get_page(vcpu, vmptr); - if (page == NULL || - *(u32 *)kmap(page) != VMCS12_REVISION) { + if (page == NULL) { nested_vmx_failInvalid(vcpu); + skip_emulated_instruction(vcpu); + return 1; + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); skip_emulated_instruction(vcpu); return 1; } kunmap(page); + nested_release_page_clean(page); vmx->nested.vmxon_ptr = vmptr; break; case EXIT_REASON_VMCLEAR: -- cgit v1.2.3 From 9286385a3452d7eeb01bfb94676389bba6f59ebd Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 13 Apr 2017 14:56:37 -0700 Subject: zram: do not use copy_page with non-page aligned address commit d72e9a7a93e4f8e9e52491921d99e0c8aa89eb4e upstream. The copy_page is optimized memcpy for page-alinged address. If it is used with non-page aligned address, it can corrupt memory which means system corruption. With zram, it can happen with 1. 64K architecture 2. partial IO 3. slub debug Partial IO need to allocate a page and zram allocates it via kmalloc. With slub debug, kmalloc(PAGE_SIZE) doesn't return page-size aligned address. And finally, copy_page(mem, cmem) corrupts memory. So, this patch changes it to memcpy. Actuaully, we don't need to change zram_bvec_write part because zsmalloc returns page-aligned address in case of PAGE_SIZE class but it's not good to rely on the internal of zsmalloc. Note: When this patch is merged to stable, clear_page should be fixed, too. Unfortunately, recent zram removes it by "same page merge" feature so it's hard to backport this patch to -stable tree. I will handle it when I receive the mail from stable tree maintainer to merge this patch to backport. Fixes: 42e99bd ("zram: optimize memory operations with clear_page()/copy_page()") Link: http://lkml.kernel.org/r/1492042622-12074-2-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1648de80e230..62a93b685c54 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -574,13 +574,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - clear_page(mem); + memset(mem, 0, PAGE_SIZE); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) - copy_page(mem, cmem); + memcpy(mem, cmem, PAGE_SIZE); else ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); @@ -738,7 +738,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { src = kmap_atomic(page); - copy_page(cmem, src); + memcpy(cmem, src, PAGE_SIZE); kunmap_atomic(src); } else { memcpy(cmem, src, clen); -- cgit v1.2.3 From 70e55aaf9f8cb4a74ca2744457b1d817353090e3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 20 Mar 2017 17:49:03 +1100 Subject: powerpc: Disable HFSCR[TM] if TM is not supported commit 7ed23e1bae8bf7e37fd555066550a00b95a3a98b upstream. On Power8 & Power9 the early CPU inititialisation in __init_HFSCR() turns on HFSCR[TM] (Hypervisor Facility Status and Control Register [Transactional Memory]), but that doesn't take into account that TM might be disabled by CPU features, or disabled by the kernel being built with CONFIG_PPC_TRANSACTIONAL_MEM=n. So later in boot, when we have setup the CPU features, clear HSCR[TM] if the TM CPU feature has been disabled. We use CPU_FTR_TM_COMP to account for the CONFIG_PPC_TRANSACTIONAL_MEM=n case. Without this a KVM guest might try use TM, even if told not to, and cause an oops in the host kernel. Typically the oops is seen in __kvmppc_vcore_entry() and may or may not be fatal to the host, but is always bad news. In practice all shipping CPU revisions do support TM, and all host kernels we are aware of build with TM support enabled, so no one should actually be able to hit this in the wild. Fixes: 2a3563b023e5 ("powerpc: Setup in HFSCR for POWER8") Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Benjamin Herrenschmidt Tested-by: Sam Bobroff [mpe: Rewrite change log with input from Sam, add Fixes/stable] Signed-off-by: Michael Ellerman [sb: Backported to linux-4.4.y: adjusted context] Signed-off-by: Sam Bobroff Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/setup_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5c03a6a9b054..a20823210ac0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -220,6 +220,15 @@ static void cpu_ready_for_interrupts(void) unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } + + /* + * Fixup HFSCR:TM based on CPU features. The bit is set by our + * early asm init because at that point we haven't updated our + * CPU features from firmware and device-tree. Here we have, + * so let's do it. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); } /* -- cgit v1.2.3 From 2673d1c5122ee2492e24d9a135e230b2d0b2e630 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Apr 2017 17:27:57 +0800 Subject: crypto: ahash - Fix EINPROGRESS notification callback commit ef0579b64e93188710d48667cb5e014926af9f1b upstream. The ahash API modifies the request's callback function in order to clean up after itself in some corner cases (unaligned final and missing finup). When the request is complete ahash will restore the original callback and everything is fine. However, when the request gets an EBUSY on a full queue, an EINPROGRESS callback is made while the request is still ongoing. In this case the ahash API will incorrectly call its own callback. This patch fixes the problem by creating a temporary request object on the stack which is used to relay EINPROGRESS back to the original completion function. This patch also adds code to preserve the original flags value. Fixes: ab6bf4e5e5e4 ("crypto: hash - Fix the pointer voodoo in...") Reported-by: Sabrina Dubroca Tested-by: Sabrina Dubroca Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c | 79 ++++++++++++++++++++++++++---------------- include/crypto/internal/hash.h | 10 ++++++ 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index dac1c24e9c3e..f9caf0f74199 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -31,6 +31,7 @@ struct ahash_request_priv { crypto_completion_t complete; void *data; u8 *result; + u32 flags; void *ubuf[] CRYPTO_MINALIGN_ATTR; }; @@ -270,6 +271,8 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) priv->result = req->result; priv->complete = req->base.complete; priv->data = req->base.data; + priv->flags = req->base.flags; + /* * WARNING: We do not backup req->priv here! The req->priv * is for internal use of the Crypto API and the @@ -284,38 +287,44 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) return 0; } -static void ahash_restore_req(struct ahash_request *req) +static void ahash_restore_req(struct ahash_request *req, int err) { struct ahash_request_priv *priv = req->priv; + if (!err) + memcpy(priv->result, req->result, + crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); + /* Restore the original crypto request. */ req->result = priv->result; - req->base.complete = priv->complete; - req->base.data = priv->data; + + ahash_request_set_callback(req, priv->flags, + priv->complete, priv->data); req->priv = NULL; /* Free the req->priv.priv from the ADJUSTED request. */ kzfree(priv); } -static void ahash_op_unaligned_finish(struct ahash_request *req, int err) +static void ahash_notify_einprogress(struct ahash_request *req) { struct ahash_request_priv *priv = req->priv; + struct crypto_async_request oreq; - if (err == -EINPROGRESS) - return; - - if (!err) - memcpy(priv->result, req->result, - crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); + oreq.data = priv->data; - ahash_restore_req(req); + priv->complete(&oreq, -EINPROGRESS); } static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) { struct ahash_request *areq = req->data; + if (err == -EINPROGRESS) { + ahash_notify_einprogress(areq); + return; + } + /* * Restore the original request, see ahash_op_unaligned() for what * goes where. @@ -326,7 +335,7 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) */ /* First copy req->result into req->priv.result */ - ahash_op_unaligned_finish(areq, err); + ahash_restore_req(areq, err); /* Complete the ORIGINAL request. */ areq->base.complete(&areq->base, err); @@ -342,7 +351,12 @@ static int ahash_op_unaligned(struct ahash_request *req, return err; err = op(req); - ahash_op_unaligned_finish(req, err); + if (err == -EINPROGRESS || + (err == -EBUSY && (ahash_request_flags(req) & + CRYPTO_TFM_REQ_MAY_BACKLOG))) + return err; + + ahash_restore_req(req, err); return err; } @@ -377,25 +391,14 @@ int crypto_ahash_digest(struct ahash_request *req) } EXPORT_SYMBOL_GPL(crypto_ahash_digest); -static void ahash_def_finup_finish2(struct ahash_request *req, int err) +static void ahash_def_finup_done2(struct crypto_async_request *req, int err) { - struct ahash_request_priv *priv = req->priv; + struct ahash_request *areq = req->data; if (err == -EINPROGRESS) return; - if (!err) - memcpy(priv->result, req->result, - crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); - - ahash_restore_req(req); -} - -static void ahash_def_finup_done2(struct crypto_async_request *req, int err) -{ - struct ahash_request *areq = req->data; - - ahash_def_finup_finish2(areq, err); + ahash_restore_req(areq, err); areq->base.complete(&areq->base, err); } @@ -406,11 +409,15 @@ static int ahash_def_finup_finish1(struct ahash_request *req, int err) goto out; req->base.complete = ahash_def_finup_done2; - req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = crypto_ahash_reqtfm(req)->final(req); + if (err == -EINPROGRESS || + (err == -EBUSY && (ahash_request_flags(req) & + CRYPTO_TFM_REQ_MAY_BACKLOG))) + return err; out: - ahash_def_finup_finish2(req, err); + ahash_restore_req(req, err); return err; } @@ -418,7 +425,16 @@ static void ahash_def_finup_done1(struct crypto_async_request *req, int err) { struct ahash_request *areq = req->data; + if (err == -EINPROGRESS) { + ahash_notify_einprogress(areq); + return; + } + + areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = ahash_def_finup_finish1(areq, err); + if (areq->priv) + return; areq->base.complete(&areq->base, err); } @@ -433,6 +449,11 @@ static int ahash_def_finup(struct ahash_request *req) return err; err = tfm->update(req); + if (err == -EINPROGRESS || + (err == -EBUSY && (ahash_request_flags(req) & + CRYPTO_TFM_REQ_MAY_BACKLOG))) + return err; + return ahash_def_finup_finish1(req, err); } diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 3b4af1d7c7e9..a25414ce2898 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -173,6 +173,16 @@ static inline struct ahash_instance *ahash_alloc_instance( return crypto_alloc_instance2(name, alg, ahash_instance_headroom()); } +static inline void ahash_request_complete(struct ahash_request *req, int err) +{ + req->base.complete(&req->base, err); +} + +static inline u32 ahash_request_flags(struct ahash_request *req) +{ + return req->base.flags; +} + static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { -- cgit v1.2.3 From ea6d8d67001a40c74f4a732f897c28440a5e8dfd Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 16 Nov 2016 17:23:08 +0800 Subject: ath9k: fix NULL pointer dereference commit 40bea976c72b9ee60f8d097852deb53ccbeaffbe upstream. relay_open() may return NULL, check the return value to avoid the crash. BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] ath_cmn_process_fft+0xd5/0x700 [ath9k_common] PGD 41cf28067 PUD 41be92067 PMD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.6+ #35 Hardware name: Hewlett-Packard h8-1080t/2A86, BIOS 6.15 07/04/2011 task: ffffffff81e0c4c0 task.stack: ffffffff81e00000 RIP: 0010:[] [] ath_cmn_process_fft+0xd5/0x700 [ath9k_common] RSP: 0018:ffff88041f203ca0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 000000000000059f RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000040 RDI: ffffffff81f0ca98 RBP: ffff88041f203dc8 R08: ffffffffffffffff R09: 00000000000000ff R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffffffff81f0ca98 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88041f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 000000041b6ec000 CR4: 00000000000006f0 Stack: 0000000000000363 00000000000003f3 00000000000003f3 00000000000001f9 000000000000049a 0000000001252c04 ffff88041f203e44 ffff880417b4bfd0 0000000000000008 ffff88041785b9c0 0000000000000002 ffff88041613dc60 Call Trace: [] ath9k_tasklet+0x1b1/0x220 [ath9k] [] tasklet_action+0x4d/0xf0 [] __do_softirq+0x92/0x2a0 Reported-by: Devin Tuchsen Tested-by: Devin Tuchsen Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/common-spectral.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index a8762711ad74..03945731eb65 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -528,6 +528,9 @@ int ath_cmn_process_fft(struct ath_spec_scan_priv *spec_priv, struct ieee80211_h if (!(radar_info->pulse_bw_info & SPECTRAL_SCAN_BITMASK)) return 0; + if (!spec_priv->rfs_chan_spec_scan) + return 1; + /* Output buffers are full, no need to process anything * since there is no space to put the result anyway */ @@ -1072,7 +1075,7 @@ static struct rchan_callbacks rfs_spec_scan_cb = { void ath9k_cmn_spectral_deinit_debug(struct ath_spec_scan_priv *spec_priv) { - if (config_enabled(CONFIG_ATH9K_DEBUGFS)) { + if (config_enabled(CONFIG_ATH9K_DEBUGFS) && spec_priv->rfs_chan_spec_scan) { relay_close(spec_priv->rfs_chan_spec_scan); spec_priv->rfs_chan_spec_scan = NULL; } @@ -1086,6 +1089,9 @@ void ath9k_cmn_spectral_init_debug(struct ath_spec_scan_priv *spec_priv, debugfs_phy, 1024, 256, &rfs_spec_scan_cb, NULL); + if (!spec_priv->rfs_chan_spec_scan) + return; + debugfs_create_file("spectral_scan_ctl", S_IRUSR | S_IWUSR, debugfs_phy, spec_priv, -- cgit v1.2.3 From 0cb03b6e7086e59647cf6eb79fec646cdec69691 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 12:36:01 -0200 Subject: dvb-usb-v2: avoid use-after-free commit 005145378c9ad7575a01b6ce1ba118fb427f583a upstream. I ran into a stack frame size warning because of the on-stack copy of the USB device structure: drivers/media/usb/dvb-usb-v2/dvb_usb_core.c: In function 'dvb_usbv2_disconnect': drivers/media/usb/dvb-usb-v2/dvb_usb_core.c:1029:1: error: the frame size of 1104 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Copying a device structure like this is wrong for a number of other reasons too aside from the possible stack overflow. One of them is that the dev_info() call will print the name of the device later, but AFAICT we have only copied a pointer to the name earlier and the actual name has been freed by the time it gets printed. This removes the on-stack copy of the device and instead copies the device name using kstrdup(). I'm ignoring the possible failure here as both printk() and kfree() are able to deal with NULL pointers. Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb-v2/dvb_usb_core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c index f5df9eaba04f..9757f35cd5f5 100644 --- a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c +++ b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c @@ -1010,8 +1010,8 @@ EXPORT_SYMBOL(dvb_usbv2_probe); void dvb_usbv2_disconnect(struct usb_interface *intf) { struct dvb_usb_device *d = usb_get_intfdata(intf); - const char *name = d->name; - struct device dev = d->udev->dev; + const char *devname = kstrdup(dev_name(&d->udev->dev), GFP_KERNEL); + const char *drvname = d->name; dev_dbg(&d->udev->dev, "%s: bInterfaceNumber=%d\n", __func__, intf->cur_altsetting->desc.bInterfaceNumber); @@ -1021,8 +1021,9 @@ void dvb_usbv2_disconnect(struct usb_interface *intf) dvb_usbv2_exit(d); - dev_info(&dev, "%s: '%s' successfully deinitialized and disconnected\n", - KBUILD_MODNAME, name); + pr_info("%s: '%s:%s' successfully deinitialized and disconnected\n", + KBUILD_MODNAME, drvname, devname); + kfree(devname); } EXPORT_SYMBOL(dvb_usbv2_disconnect); -- cgit v1.2.3 From 51f8d95c89b4aa74e591015d076d2775d9286704 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 1 Dec 2016 11:49:12 -0500 Subject: ext4: fix inode checksum calculation problem if i_extra_size is small commit 05ac5aa18abd7db341e54df4ae2b4c98ea0e43b7 upstream. We've fixed the race condition problem in calculating ext4 checksum value in commit b47820edd163 ("ext4: avoid modifying checksum fields directly during checksum veficationon"). However, by this change, when calculating the checksum value of inode whose i_extra_size is less than 4, we couldn't calculate the checksum value in a proper way. This problem was found and reported by Nix, Thank you. Reported-by: Nix Signed-off-by: Daeho Jeong Signed-off-by: Youngjin Gil Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7dcc97eadb12..817a937de733 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -71,10 +71,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); offset += csum_size; - csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, - EXT4_INODE_SIZE(inode->i_sb) - - offset); } + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_INODE_SIZE(inode->i_sb) - offset); } return csum; -- cgit v1.2.3 From ccf0904c49b1bc2234dbde4978eaf1c384da11bf Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Thu, 3 Nov 2016 08:18:52 +0800 Subject: platform/x86: acer-wmi: setup accelerometer when machine has appropriate notify event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 98d610c3739ac354319a6590b915f4624d9151e6 upstream. The accelerometer event relies on the ACERWMID_EVENT_GUID notify. So, this patch changes the codes to setup accelerometer input device when detected ACERWMID_EVENT_GUID. It avoids that the accel input device created on every Acer machines. In addition, patch adds a clearly parsing logic of accelerometer hid to acer_wmi_get_handle_cb callback function. It is positive matching the "SENR" name with "BST0001" device to avoid non-supported hardware. Reported-by: Bjørn Mork Cc: Darren Hart Signed-off-by: Lee, Chun-Yi [andy: slightly massage commit message] Signed-off-by: Andy Shevchenko Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/acer-wmi.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 1062fa42ff26..b2cdc1a1ad4f 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1816,11 +1816,24 @@ static int __init acer_wmi_enable_lm(void) return status; } +#define ACER_WMID_ACCEL_HID "BST0001" + static acpi_status __init acer_wmi_get_handle_cb(acpi_handle ah, u32 level, void *ctx, void **retval) { + struct acpi_device *dev; + + if (!strcmp(ctx, "SENR")) { + if (acpi_bus_get_device(ah, &dev)) + return AE_OK; + if (!strcmp(ACER_WMID_ACCEL_HID, acpi_device_hid(dev))) + return AE_OK; + } else + return AE_OK; + *(acpi_handle *)retval = ah; - return AE_OK; + + return AE_CTRL_TERMINATE; } static int __init acer_wmi_get_handle(const char *name, const char *prop, @@ -1847,7 +1860,7 @@ static int __init acer_wmi_accel_setup(void) { int err; - err = acer_wmi_get_handle("SENR", "BST0001", &gsensor_handle); + err = acer_wmi_get_handle("SENR", ACER_WMID_ACCEL_HID, &gsensor_handle); if (err) return err; @@ -2185,10 +2198,11 @@ static int __init acer_wmi_init(void) err = acer_wmi_input_setup(); if (err) return err; + err = acer_wmi_accel_setup(); + if (err) + return err; } - acer_wmi_accel_setup(); - err = platform_driver_register(&acer_platform_driver); if (err) { pr_err("Unable to register platform driver\n"); -- cgit v1.2.3 From ba02781392fa1b934e41785a5301ca21ad44708b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 12 Jan 2017 17:07:43 +0100 Subject: rtc: tegra: Implement clock handling commit 5fa4086987506b2ab8c92f8f99f2295db9918856 upstream. Accessing the registers of the RTC block on Tegra requires the module clock to be enabled. This only works because the RTC module clock will be enabled by default during early boot. However, because the clock is unused, the CCF will disable it at late_init time. This causes the RTC to become unusable afterwards. This can easily be reproduced by trying to use the RTC: $ hwclock --rtc /dev/rtc1 This will hang the system. I ran into this by following up on a report by Martin Michlmayr that reboot wasn't working on Tegra210 systems. It turns out that the rtc-tegra driver's ->shutdown() implementation will hang the CPU, because of the disabled clock, before the system can be rebooted. What confused me for a while is that the same driver is used on prior Tegra generations where the hang can not be observed. However, as Peter De Schrijver pointed out, this is because on 32-bit Tegra chips the RTC clock is enabled by the tegra20_timer.c clocksource driver, which uses the RTC to provide a persistent clock. This code is never enabled on 64-bit Tegra because the persistent clock infrastructure does not exist on 64-bit ARM. The proper fix for this is to add proper clock handling to the RTC driver in order to ensure that the clock is enabled when the driver requires it. All device trees contain the clock already, therefore no additional changes are required. Reported-by: Martin Michlmayr Acked-By Peter De Schrijver Signed-off-by: Thierry Reding Signed-off-by: Alexandre Belloni [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-tegra.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 60232bd366ef..71216aa68905 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -18,6 +18,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include +#include #include #include #include @@ -59,6 +60,7 @@ struct tegra_rtc_info { struct platform_device *pdev; struct rtc_device *rtc_dev; void __iomem *rtc_base; /* NULL if not initialized. */ + struct clk *clk; int tegra_rtc_irq; /* alarm and periodic irq */ spinlock_t tegra_rtc_lock; }; @@ -332,6 +334,14 @@ static int __init tegra_rtc_probe(struct platform_device *pdev) if (info->tegra_rtc_irq <= 0) return -EBUSY; + info->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(info->clk)) + return PTR_ERR(info->clk); + + ret = clk_prepare_enable(info->clk); + if (ret < 0) + return ret; + /* set context info. */ info->pdev = pdev; spin_lock_init(&info->tegra_rtc_lock); @@ -352,7 +362,7 @@ static int __init tegra_rtc_probe(struct platform_device *pdev) ret = PTR_ERR(info->rtc_dev); dev_err(&pdev->dev, "Unable to register device (err=%d).\n", ret); - return ret; + goto disable_clk; } ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq, @@ -362,11 +372,24 @@ static int __init tegra_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Unable to request interrupt for device (err=%d).\n", ret); - return ret; + goto disable_clk; } dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n"); + return 0; + +disable_clk: + clk_disable_unprepare(info->clk); + return ret; +} + +static int tegra_rtc_remove(struct platform_device *pdev) +{ + struct tegra_rtc_info *info = platform_get_drvdata(pdev); + + clk_disable_unprepare(info->clk); + return 0; } @@ -419,6 +442,7 @@ static void tegra_rtc_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:tegra_rtc"); static struct platform_driver tegra_rtc_driver = { + .remove = tegra_rtc_remove, .shutdown = tegra_rtc_shutdown, .driver = { .name = "tegra_rtc", -- cgit v1.2.3 From 6739cc12f3dbd7e4b3795f6e809d44ea6b490bb6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Apr 2017 09:39:08 -0700 Subject: mm: Tighten x86 /dev/mem with zeroing reads commit a4866aa812518ed1a37d8ea0c881dc946409de94 upstream. Under CONFIG_STRICT_DEVMEM, reading System RAM through /dev/mem is disallowed. However, on x86, the first 1MB was always allowed for BIOS and similar things, regardless of it actually being System RAM. It was possible for heap to end up getting allocated in low 1MB RAM, and then read by things like x86info or dd, which would trip hardened usercopy: usercopy: kernel memory exposure attempt detected from ffff880000090000 (dma-kmalloc-256) (4096 bytes) This changes the x86 exception for the low 1MB by reading back zeros for System RAM areas instead of blindly allowing them. More work is needed to extend this to mmap, but currently mmap doesn't go through usercopy, so hardened usercopy won't Oops the kernel. Reported-by: Tommi Rantala Tested-by: Tommi Rantala Signed-off-by: Kees Cook Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/init.c | 41 +++++++++++++++++++-------- drivers/char/mem.c | 82 ++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 82 insertions(+), 41 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 493f54172b4a..3aebbd6c6f5f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -628,21 +628,40 @@ void __init init_mem_mapping(void) * devmem_is_allowed() checks to see if /dev/mem access to a certain address * is valid. The argument is a physical page number. * - * - * On x86, access has to be given to the first megabyte of ram because that area - * contains BIOS code and data regions used by X and dosemu and similar apps. - * Access has to be given to non-kernel-ram areas as well, these contain the PCI - * mmio resources as well as potential bios/acpi data regions. + * On x86, access has to be given to the first megabyte of RAM because that + * area traditionally contains BIOS code and data regions used by X, dosemu, + * and similar apps. Since they map the entire memory range, the whole range + * must be allowed (for mapping), but any areas that would otherwise be + * disallowed are flagged as being "zero filled" instead of rejected. + * Access has to be given to non-kernel-ram areas as well, these contain the + * PCI mmio resources as well as potential bios/acpi data regions. */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr < 256) - return 1; - if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + if (page_is_ram(pagenr)) { + /* + * For disallowed memory regions in the low 1MB range, + * request that the page be shown as all zeros. + */ + if (pagenr < 256) + return 2; + + return 0; + } + + /* + * This must follow RAM test, since System RAM is considered a + * restricted resource under CONFIG_STRICT_IOMEM. + */ + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) { + /* Low 1MB bypasses iomem restrictions. */ + if (pagenr < 256) + return 1; + return 0; - if (!page_is_ram(pagenr)) - return 1; - return 0; + } + + return 1; } void free_init_pages(char *what, unsigned long begin, unsigned long end) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 6b1721f978c2..e901463d4972 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -59,6 +59,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) #endif #ifdef CONFIG_STRICT_DEVMEM +static inline int page_is_allowed(unsigned long pfn) +{ + return devmem_is_allowed(pfn); +} static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; @@ -78,6 +82,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; } #else +static inline int page_is_allowed(unsigned long pfn) +{ + return 1; +} static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -125,23 +133,31 @@ static ssize_t read_mem(struct file *file, char __user *buf, while (count > 0) { unsigned long remaining; + int allowed; sz = size_inside_page(p, count); - if (!range_is_allowed(p >> PAGE_SHIFT, count)) + allowed = page_is_allowed(p >> PAGE_SHIFT); + if (!allowed) return -EPERM; + if (allowed == 2) { + /* Show zeros for restricted memory. */ + remaining = clear_user(buf, sz); + } else { + /* + * On ia64 if a page has been mapped somewhere as + * uncached, then it must also be accessed uncached + * by the kernel or data corruption may occur. + */ + ptr = xlate_dev_mem_ptr(p); + if (!ptr) + return -EFAULT; - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_mem_ptr(p); - if (!ptr) - return -EFAULT; + remaining = copy_to_user(buf, ptr, sz); + + unxlate_dev_mem_ptr(p, ptr); + } - remaining = copy_to_user(buf, ptr, sz); - unxlate_dev_mem_ptr(p, ptr); if (remaining) return -EFAULT; @@ -184,30 +200,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf, #endif while (count > 0) { + int allowed; + sz = size_inside_page(p, count); - if (!range_is_allowed(p >> PAGE_SHIFT, sz)) + allowed = page_is_allowed(p >> PAGE_SHIFT); + if (!allowed) return -EPERM; - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_mem_ptr(p); - if (!ptr) { - if (written) - break; - return -EFAULT; - } + /* Skip actual writing when a page is marked as restricted. */ + if (allowed == 1) { + /* + * On ia64 if a page has been mapped somewhere as + * uncached, then it must also be accessed uncached + * by the kernel or data corruption may occur. + */ + ptr = xlate_dev_mem_ptr(p); + if (!ptr) { + if (written) + break; + return -EFAULT; + } - copied = copy_from_user(ptr, buf, sz); - unxlate_dev_mem_ptr(p, ptr); - if (copied) { - written += sz - copied; - if (written) - break; - return -EFAULT; + copied = copy_from_user(ptr, buf, sz); + unxlate_dev_mem_ptr(p, ptr); + if (copied) { + written += sz - copied; + if (written) + break; + return -EFAULT; + } } buf += sz; -- cgit v1.2.3 From 502157457f52654595d28a555327e84b3e35c268 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 24 Jan 2017 08:13:11 -0200 Subject: dvb-usb: don't use stack for firmware load commit 43fab9793c1f44e665b4f98035a14942edf03ddc upstream. As reported by Marc Duponcheel , firmware load on dvb-usb is using the stack, with is not allowed anymore on default Kernel configurations: [ 1025.958836] dvb-usb: found a 'WideView WT-220U PenType Receiver (based on ZL353)' in cold state, will try to load a firmware [ 1025.958853] dvb-usb: downloading firmware from file 'dvb-usb-wt220u-zl0353-01.fw' [ 1025.958855] dvb-usb: could not stop the USB controller CPU. [ 1025.958856] dvb-usb: error while transferring firmware (transferred size: -11, block size: 3) [ 1025.958856] dvb-usb: firmware download failed at 8 with -22 [ 1025.958867] usbcore: registered new interface driver dvb_usb_dtt200u [ 2.789902] dvb-usb: downloading firmware from file 'dvb-usb-wt220u-zl0353-01.fw' [ 2.789905] ------------[ cut here ]------------ [ 2.789911] WARNING: CPU: 3 PID: 2196 at drivers/usb/core/hcd.c:1584 usb_hcd_map_urb_for_dma+0x430/0x560 [usbcore] [ 2.789912] transfer buffer not dma capable [ 2.789912] Modules linked in: btusb dvb_usb_dtt200u(+) dvb_usb_af9035(+) btrtl btbcm dvb_usb dvb_usb_v2 btintel dvb_core bluetooth rc_core rfkill x86_pkg_temp_thermal intel_powerclamp coretemp crc32_pclmul aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd drm_kms_helper syscopyarea sysfillrect pcspkr i2c_i801 sysimgblt fb_sys_fops drm i2c_smbus i2c_core r8169 lpc_ich mfd_core mii thermal fan rtc_cmos video button acpi_cpufreq processor snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd crc32c_intel ahci libahci libata xhci_pci ehci_pci xhci_hcd ehci_hcd usbcore usb_common dm_mirror dm_region_hash dm_log dm_mod [ 2.789936] CPU: 3 PID: 2196 Comm: systemd-udevd Not tainted 4.9.0-gentoo #1 [ 2.789937] Hardware name: ASUS All Series/H81I-PLUS, BIOS 0401 07/23/2013 [ 2.789938] ffffc9000339b690 ffffffff812bd397 ffffc9000339b6e0 0000000000000000 [ 2.789939] ffffc9000339b6d0 ffffffff81055c86 000006300339b6a0 ffff880116c0c000 [ 2.789941] 0000000000000000 0000000000000000 0000000000000001 ffff880116c08000 [ 2.789942] Call Trace: [ 2.789945] [] dump_stack+0x4d/0x66 [ 2.789947] [] __warn+0xc6/0xe0 [ 2.789948] [] warn_slowpath_fmt+0x4a/0x50 [ 2.789952] [] usb_hcd_map_urb_for_dma+0x430/0x560 [usbcore] [ 2.789954] [] ? io_schedule_timeout+0xd8/0x110 [ 2.789956] [] usb_hcd_submit_urb+0x9c/0x980 [usbcore] [ 2.789958] [] ? copy_page_to_iter+0x14f/0x2b0 [ 2.789960] [] ? pagecache_get_page+0x28/0x240 [ 2.789962] [] ? touch_atime+0x20/0xa0 [ 2.789964] [] usb_submit_urb+0x2c4/0x520 [usbcore] [ 2.789967] [] usb_start_wait_urb+0x5a/0xe0 [usbcore] [ 2.789969] [] usb_control_msg+0xbc/0xf0 [usbcore] [ 2.789970] [] usb_cypress_writemem+0x3d/0x40 [dvb_usb] [ 2.789972] [] usb_cypress_load_firmware+0x4f/0x130 [dvb_usb] [ 2.789973] [] ? console_unlock+0x2fe/0x5d0 [ 2.789974] [] ? vprintk_emit+0x27c/0x410 [ 2.789975] [] ? vprintk_default+0x1a/0x20 [ 2.789976] [] ? printk+0x43/0x4b [ 2.789977] [] dvb_usb_download_firmware+0x60/0xd0 [dvb_usb] [ 2.789979] [] dvb_usb_device_init+0x3d8/0x610 [dvb_usb] [ 2.789981] [] dtt200u_usb_probe+0x92/0xd0 [dvb_usb_dtt200u] [ 2.789984] [] usb_probe_interface+0xfc/0x270 [usbcore] [ 2.789985] [] driver_probe_device+0x215/0x2d0 [ 2.789986] [] __driver_attach+0x96/0xa0 [ 2.789987] [] ? driver_probe_device+0x2d0/0x2d0 [ 2.789988] [] bus_for_each_dev+0x5b/0x90 [ 2.789989] [] driver_attach+0x19/0x20 [ 2.789990] [] bus_add_driver+0x11c/0x220 [ 2.789991] [] driver_register+0x5b/0xd0 [ 2.789994] [] usb_register_driver+0x7c/0x130 [usbcore] [ 2.789994] [] ? 0xffffffffa06a5000 [ 2.789996] [] dtt200u_usb_driver_init+0x1e/0x20 [dvb_usb_dtt200u] [ 2.789997] [] do_one_initcall+0x38/0x140 [ 2.789998] [] ? __vunmap+0x7c/0xc0 [ 2.789999] [] ? do_init_module+0x22/0x1d2 [ 2.790000] [] do_init_module+0x5a/0x1d2 [ 2.790002] [] load_module+0x1e11/0x2580 [ 2.790003] [] ? show_taint+0x30/0x30 [ 2.790004] [] ? kernel_read_file+0x100/0x190 [ 2.790005] [] SyS_finit_module+0xba/0xc0 [ 2.790007] [] entry_SYSCALL_64_fastpath+0x13/0x94 [ 2.790008] ---[ end trace c78a74e78baec6fc ]--- So, allocate the structure dynamically. Signed-off-by: Mauro Carvalho Chehab [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dvb-usb-firmware.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c index 733a7ff7b207..0cd9b02739c6 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c @@ -35,29 +35,34 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type) { - struct hexline hx; + struct hexline *hx; u8 reset; int ret,pos=0; + hx = kmalloc(sizeof(*hx), GFP_KERNEL); + if (!hx) + return -ENOMEM; + /* stop the CPU */ reset = 1; if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1) err("could not stop the USB controller CPU."); - while ((ret = dvb_usb_get_hexline(fw,&hx,&pos)) > 0) { - deb_fw("writing to address 0x%04x (buffer: 0x%02x %02x)\n",hx.addr,hx.len,hx.chk); - ret = usb_cypress_writemem(udev,hx.addr,hx.data,hx.len); + while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) { + deb_fw("writing to address 0x%04x (buffer: 0x%02x %02x)\n", hx->addr, hx->len, hx->chk); + ret = usb_cypress_writemem(udev, hx->addr, hx->data, hx->len); - if (ret != hx.len) { + if (ret != hx->len) { err("error while transferring firmware " "(transferred size: %d, block size: %d)", - ret,hx.len); + ret, hx->len); ret = -EINVAL; break; } } if (ret < 0) { err("firmware download failed at %d with %d",pos,ret); + kfree(hx); return ret; } @@ -71,6 +76,8 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw } else ret = -EIO; + kfree(hx); + return ret; } EXPORT_SYMBOL(usb_cypress_load_firmware); -- cgit v1.2.3 From 6be431f91632504f269b6e8ffcd552a5ca3fd84d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Sun, 12 Feb 2017 13:02:13 -0200 Subject: dvb-usb-firmware: don't do DMA on stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 67b0503db9c29b04eadfeede6bebbfe5ddad94ef upstream. The buffer allocation for the firmware data was changed in commit 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load") but the same applies for the reset value. Fixes: 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load") Signed-off-by: Stefan Brüns Signed-off-by: Mauro Carvalho Chehab Cc: Ben Hutchings Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dvb-usb-firmware.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c index 0cd9b02739c6..caad3b5c01ad 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c @@ -36,16 +36,18 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type) { struct hexline *hx; - u8 reset; - int ret,pos=0; + u8 *buf; + int ret, pos = 0; + u16 cpu_cs_register = cypress[type].cpu_cs_register; - hx = kmalloc(sizeof(*hx), GFP_KERNEL); - if (!hx) + buf = kmalloc(sizeof(*hx), GFP_KERNEL); + if (!buf) return -ENOMEM; + hx = (struct hexline *)buf; /* stop the CPU */ - reset = 1; - if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1) + buf[0] = 1; + if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) err("could not stop the USB controller CPU."); while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) { @@ -62,21 +64,21 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw } if (ret < 0) { err("firmware download failed at %d with %d",pos,ret); - kfree(hx); + kfree(buf); return ret; } if (ret == 0) { /* restart the CPU */ - reset = 0; - if (ret || usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1) != 1) { + buf[0] = 0; + if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) { err("could not restart the USB controller CPU."); ret = -EINVAL; } } else ret = -EIO; - kfree(hx); + kfree(buf); return ret; } -- cgit v1.2.3 From eb5267657d85bfcbb60803dd88fa82c7dede6aab Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 1 Feb 2017 00:02:27 -0800 Subject: virtio-console: avoid DMA from stack commit c4baad50297d84bde1a7ad45e50c73adae4a2192 upstream. put_chars() stuffs the buffer it gets into an sg, but that buffer may be on the stack. This breaks with CONFIG_VMAP_STACK=y (for me, it manifested as printks getting turned into NUL bytes). Signed-off-by: Omar Sandoval Signed-off-by: Michael S. Tsirkin Reviewed-by: Amit Shah Cc: Ben Hutchings Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 090183f812be..31e8ae916ba0 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1130,6 +1130,8 @@ static int put_chars(u32 vtermno, const char *buf, int count) { struct port *port; struct scatterlist sg[1]; + void *data; + int ret; if (unlikely(early_put_chars)) return early_put_chars(vtermno, buf, count); @@ -1138,8 +1140,14 @@ static int put_chars(u32 vtermno, const char *buf, int count) if (!port) return -EPIPE; - sg_init_one(sg, buf, count); - return __send_to_port(port, sg, 1, count, (void *)buf, false); + data = kmemdup(buf, count, GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sg_init_one(sg, data, count); + ret = __send_to_port(port, sg, 1, count, data, false); + kfree(data); + return ret; } /* -- cgit v1.2.3 From be570e556deec7466d74a579129671185501a456 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:03 +0000 Subject: pegasus: Use heap buffers for all register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5593523f968bc86d42a035c6df47d5e0979b5ace upstream. Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") References: https://bugs.debian.org/852556 Reported-by: Lisandro Damián Nicanor Pérez Meyer Tested-by: Lisandro Damián Nicanor Pérez Meyer Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/pegasus.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f84080215915..17fac0121e56 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -126,40 +126,61 @@ static void async_ctrl_callback(struct urb *urb) static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) { + u8 *buf; int ret; + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_rcvctrlpipe(pegasus->usb, 0), PEGASUS_REQ_GET_REGS, PEGASUS_REQT_READ, 0, - indx, data, size, 1000); + indx, buf, size, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + else if (ret <= size) + memcpy(data, buf, ret); + kfree(buf); return ret; } -static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) +static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, + const void *data) { + u8 *buf; int ret; + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, - indx, data, size, 100); + indx, buf, size, 100); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { + u8 *buf; int ret; + buf = kmemdup(&data, 1, GFP_NOIO); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0), PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, - indx, &data, 1, 1000); + indx, buf, 1, 1000); if (ret < 0) netif_dbg(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(buf); return ret; } -- cgit v1.2.3 From a90604be51de4e63f916261a91edd4f67e8b0b2b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:32 +0000 Subject: rtl8150: Use heap buffers for all register access commit 7926aff5c57b577ab0f43364ff0c59d968f6a414 upstream. Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/rtl8150.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index d37b7dce2d40..39672984dde1 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -155,16 +155,36 @@ static const char driver_name [] = "rtl8150"; */ static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) { - return usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), - RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmalloc(size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), + RTL8150_REQ_GET_REGS, RTL8150_REQT_READ, + indx, 0, buf, size, 500); + if (ret > 0 && ret <= size) + memcpy(data, buf, ret); + kfree(buf); + return ret; } -static int set_registers(rtl8150_t * dev, u16 indx, u16 size, void *data) +static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data) { - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, - indx, 0, data, size, 500); + void *buf; + int ret; + + buf = kmemdup(data, size, GFP_NOIO); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), + RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE, + indx, 0, buf, size, 500); + kfree(buf); + return ret; } static void async_set_reg_cb(struct urb *urb) -- cgit v1.2.3 From 40531b26bade950cf9c815d8238be27b009aa197 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:56:56 +0000 Subject: catc: Combine failure cleanup code in catc_probe() commit d41149145f98fe26dcd0bfd1d6cc095e6e041418 upstream. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/catc.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 4e2b26a88b15..298885f81aad 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -777,7 +777,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id struct net_device *netdev; struct catc *catc; u8 broadcast[ETH_ALEN]; - int i, pktsz; + int i, pktsz, ret; if (usb_set_interface(usbdev, intf->altsetting->desc.bInterfaceNumber, 1)) { @@ -812,12 +812,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id if ((!catc->ctrl_urb) || (!catc->tx_urb) || (!catc->rx_urb) || (!catc->irq_urb)) { dev_err(&intf->dev, "No free urbs available.\n"); - usb_free_urb(catc->ctrl_urb); - usb_free_urb(catc->tx_urb); - usb_free_urb(catc->rx_urb); - usb_free_urb(catc->irq_urb); - free_netdev(netdev); - return -ENOMEM; + ret = -ENOMEM; + goto fail_free; } /* The F5U011 has the same vendor/product as the netmate but a device version of 0x130 */ @@ -914,16 +910,21 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id usb_set_intfdata(intf, catc); SET_NETDEV_DEV(netdev, &intf->dev); - if (register_netdev(netdev) != 0) { - usb_set_intfdata(intf, NULL); - usb_free_urb(catc->ctrl_urb); - usb_free_urb(catc->tx_urb); - usb_free_urb(catc->rx_urb); - usb_free_urb(catc->irq_urb); - free_netdev(netdev); - return -EIO; - } + ret = register_netdev(netdev); + if (ret) + goto fail_clear_intfdata; + return 0; + +fail_clear_intfdata: + usb_set_intfdata(intf, NULL); +fail_free: + usb_free_urb(catc->ctrl_urb); + usb_free_urb(catc->tx_urb); + usb_free_urb(catc->rx_urb); + usb_free_urb(catc->irq_urb); + free_netdev(netdev); + return ret; } static void catc_disconnect(struct usb_interface *intf) -- cgit v1.2.3 From 65596042c3af1c3578f5e478f512f595d7fa31d0 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 4 Feb 2017 16:57:04 +0000 Subject: catc: Use heap buffer for memory size test commit 2d6a0e9de03ee658a9adc3bfb2f0ca55dff1e478 upstream. Allocating USB buffers on the stack is not portable, and no longer works on x86_64 (with VMAP_STACK enabled as per default). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/catc.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 298885f81aad..2aa1a1d29cb4 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -777,7 +777,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id struct net_device *netdev; struct catc *catc; u8 broadcast[ETH_ALEN]; - int i, pktsz, ret; + int pktsz, ret; if (usb_set_interface(usbdev, intf->altsetting->desc.bInterfaceNumber, 1)) { @@ -841,15 +841,24 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id catc->irq_buf, 2, catc_irq_done, catc, 1); if (!catc->is_f5u011) { + u32 *buf; + int i; + dev_dbg(dev, "Checking memory size\n"); - i = 0x12345678; - catc_write_mem(catc, 0x7a80, &i, 4); - i = 0x87654321; - catc_write_mem(catc, 0xfa80, &i, 4); - catc_read_mem(catc, 0x7a80, &i, 4); + buf = kmalloc(4, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto fail_free; + } + + *buf = 0x12345678; + catc_write_mem(catc, 0x7a80, buf, 4); + *buf = 0x87654321; + catc_write_mem(catc, 0xfa80, buf, 4); + catc_read_mem(catc, 0x7a80, buf, 4); - switch (i) { + switch (*buf) { case 0x12345678: catc_set_reg(catc, TxBufCount, 8); catc_set_reg(catc, RxBufCount, 32); @@ -864,6 +873,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id dev_dbg(dev, "32k Memory\n"); break; } + + kfree(buf); dev_dbg(dev, "Getting MAC from SEEROM.\n"); -- cgit v1.2.3 From 403a728d1a35111103669aa125dcecfbe04e6872 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Dec 2016 18:15:09 -0600 Subject: ibmveth: calculate gso_segs for large packets commit 94acf164dc8f1184e8d0737be7125134c2701dbe upstream. Include calculations to compute the number of segments that comprise an aggregated large packet. Signed-off-by: Thomas Falcon Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Jonathan Maxwell Signed-off-by: David S. Miller Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 855c43d8f7e0..f9e4988ea30e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1179,7 +1179,9 @@ map_failed: static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) { + struct tcphdr *tcph; int offset = 0; + int hdr_len; /* only TCP packets will be aggregated */ if (skb->protocol == htons(ETH_P_IP)) { @@ -1206,14 +1208,20 @@ static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) /* if mss is not set through Large Packet bit/mss in rx buffer, * expect that the mss will be written to the tcp header checksum. */ + tcph = (struct tcphdr *)(skb->data + offset); if (lrg_pkt) { skb_shinfo(skb)->gso_size = mss; } else if (offset) { - struct tcphdr *tcph = (struct tcphdr *)(skb->data + offset); - skb_shinfo(skb)->gso_size = ntohs(tcph->check); tcph->check = 0; } + + if (skb_shinfo(skb)->gso_size) { + hdr_len = offset + tcph->doff * 4; + skb_shinfo(skb)->gso_segs = + DIV_ROUND_UP(skb->len - hdr_len, + skb_shinfo(skb)->gso_size); + } } static int ibmveth_poll(struct napi_struct *napi, int budget) -- cgit v1.2.3 From 8dc821b9f67d9abf2d5baca3eb92a70d91c0dbe0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Dec 2016 15:10:11 +1100 Subject: SUNRPC: fix refcounting problems with auth_gss messages. commit 1cded9d2974fe4fe339fc0ccd6638b80d465ab2c upstream. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust Signed-off-by: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/auth_gss.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 06095cc8815e..1f0687d8e3d7 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + int res; + atomic_inc(&gss_msg->count); + res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); + atomic_dec(&gss_msg->count); + gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else @@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) warn_gssd(); gss_release_msg(gss_msg); } + gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, -- cgit v1.2.3 From 990a142ee0d3b504a0a3c23a16e2cda41c5d45cf Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 6 Dec 2016 13:05:33 +0100 Subject: tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done commit b389f173aaa1204d6dc1f299082a162eb0491545 upstream. When using RS485 in half duplex, RX should be enabled when TX is finished, and stopped when TX starts. Before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA"), RX was not disabled in atmel_start_tx() if the DMA was used. So, collisions could happened. But disabling RX in atmel_start_tx() uncovered another bug: RX was enabled again in the wrong place (in atmel_tx_dma) instead of being enabled when TX is finished (in atmel_complete_tx_dma), so the transmission simply stopped. This bug was not triggered before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA") because RX was never disabled before. Moving atmel_start_rx() in atmel_complete_tx_dma() corrects the problem. Reported-by: Gil Weber Fixes: 0058f0871efe7b01c6 Tested-by: Gil Weber Signed-off-by: Richard Genoud Acked-by: Alexandre Belloni Signed-off-by: Alexandre Belloni Tested-by: Bryan Evenson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index a15070a7fcd6..53e4d5056db7 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -810,6 +810,11 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) tasklet_schedule(&atmel_port->tasklet); + else if ((port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { + /* DMA done, stop TX, start RX for RS485 */ + atmel_start_rx(port); + } spin_unlock_irqrestore(&port->lock, flags); } @@ -912,12 +917,6 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); - - } else { - if (port->rs485.flags & SER_RS485_ENABLED) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); - } } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) -- cgit v1.2.3 From f00f18ebb3b23134012a020faad85f33cd5d2e8f Mon Sep 17 00:00:00 2001 From: Mantas M Date: Fri, 16 Dec 2016 10:30:59 +0200 Subject: net: ipv6: check route protocol when deleting routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c2ed1880fd61a998e3ce40254a99a2ad000f1a7d upstream. The protocol field is checked when deleting IPv4 routes, but ignored for IPv6, which causes problems with routing daemons accidentally deleting externally set routes (observed by multiple bird6 users). This can be verified using `ip -6 route del proto something`. Signed-off-by: Mantas Mikulėnas Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 36bf4c3fe4f5..9f0aa255e288 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2084,6 +2084,8 @@ static int ip6_route_del(struct fib6_config *cfg) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; + if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) + continue; dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); -- cgit v1.2.3 From e2f5fb9207a6bd7101ad94e73264ac8bb9e3b87a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 23 Feb 2017 09:31:18 -0300 Subject: sctp: deny peeloff operation on asocs with threads sleeping on it commit dfcb9f4f99f1e9a49e43398a7bfbf56927544af1 upstream. commit 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") attempted to avoid a BUG_ON call when the association being used for a sendmsg() is blocked waiting for more sndbuf and another thread did a peeloff operation on such asoc, moving it to another socket. As Ben Hutchings noticed, then in such case it would return without locking back the socket and would cause two unlocks in a row. Further analysis also revealed that it could allow a double free if the application managed to peeloff the asoc that is created during the sendmsg call, because then sctp_sendmsg() would try to free the asoc that was created only for that call. This patch takes another approach. It will deny the peeloff operation if there is a thread sleeping on the asoc, so this situation doesn't exist anymore. This avoids the issues described above and also honors the syscalls that are already being handled (it can be multiple sendmsg calls). Joint work with Xin Long. Fixes: 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf") Cc: Alexander Popov Cc: Ben Hutchings Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 138f2d667212..5758818435f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4422,6 +4422,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!asoc) return -EINVAL; + /* If there is a thread waiting on more sndbuf space for + * sending on this asoc, it cannot be peeled. + */ + if (waitqueue_active(&asoc->wait)) + return -EBUSY; + /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. */ @@ -6960,8 +6966,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - if (sk != asoc->base.sk) - goto do_error; lock_sock(sk); *timeo_p = current_timeo; -- cgit v1.2.3 From d005579766761216526caa8345d1a1993eff8e24 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Apr 2017 15:14:54 +0200 Subject: MIPS: fix Select HAVE_IRQ_EXIT_ON_IRQ_STACK patch. Commit f017e58da4aba293e4a6ab62ca5d4801f79cc929 which was commit 3cc3434fd6307d06b53b98ce83e76bf9807689b9 upstream, was misapplied to the 4.4 stable kernel. This patch fixes this and moves the chunk to the proper Kconfig area. Reported-by: "Maciej W. Rozycki" Cc: Matt Redfearn Cc: Jason A. Donenfeld Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d5cfa937d622..8b0424abc84c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1413,7 +1413,7 @@ config CPU_MIPS32_R6 select CPU_SUPPORTS_MSA select GENERIC_CSUM select HAVE_KVM - select MIPS_O32_FP64_SUPPORT if 32BIT + select MIPS_O32_FP64_SUPPORT help Choose this option to build a kernel for release 6 or later of the MIPS32 architecture. New MIPS processors, starting with the Warrior @@ -1464,7 +1464,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA select GENERIC_CSUM - select MIPS_O32_FP64_SUPPORT if MIPS32_O32 + select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32 help Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. New MIPS processors, starting with the Warrior -- cgit v1.2.3 From 81af21fe95ba45261c7894b471e5d7698c4db8f1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Apr 2017 09:30:24 +0200 Subject: Linux 4.4.63 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0309acc34472..ec52973043f6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 62 +SUBLEVEL = 63 EXTRAVERSION = NAME = Blurry Fish Butt -- cgit v1.2.3 From b834e929774513db929303125486c2c34bedba73 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 24 Apr 2017 15:50:31 +0530 Subject: Revert "USB: gadget: u_ether: Fix data stall issue in RNDIS tethering mode" This reverts commit 78281f6ed79413e5a16eac6edc4a72c6836ae5a9. This data stall fix is no longer required in AOSP. It is already skipped in android-4.9 patchset. Also core change from this data stall fix is already undone by android-4.4 merge commit 324e88de4aba ("Merge tag 'v4.4.32' into android-4.4.y"). This revert patch just clean up the left overs. It also reverts the compile fix from Change-Id: I38c4f4a850b0329fb4a06b2c7e45558e16d66151 40ceb2c69964 ("usb: gadget: Fix compilation problem with tx_qlen field"). Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/u_ether.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index cc5210a87614..21bf0a8423d5 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -66,7 +66,7 @@ struct eth_dev { spinlock_t req_lock; /* guard {rx,tx}_reqs */ struct list_head tx_reqs, rx_reqs; - unsigned tx_qlen; + atomic_t tx_qlen; /* Minimum number of TX USB request queued to UDC */ #define TX_REQ_THRESHOLD 5 int no_tx_req_used; @@ -568,6 +568,7 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req) dev_kfree_skb_any(skb); } + atomic_dec(&dev->tx_qlen); if (netif_carrier_ok(dev->net)) netif_wake_queue(dev->net); } @@ -746,7 +747,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, req->no_interrupt = (((dev->gadget->speed == USB_SPEED_HIGH || dev->gadget->speed == USB_SPEED_SUPER)) && !list_empty(&dev->tx_reqs)) - ? ((dev->tx_qlen % dev->qmult) != 0) + ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0) : 0; retval = usb_ep_queue(in, req, GFP_ATOMIC); @@ -756,6 +757,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, break; case 0: net->trans_start = jiffies; + atomic_inc(&dev->tx_qlen); } if (retval) { @@ -784,7 +786,7 @@ static void eth_start(struct eth_dev *dev, gfp_t gfp_flags) rx_fill(dev, gfp_flags); /* and open the tx floodgates */ - dev->tx_qlen = 0; + atomic_set(&dev->tx_qlen, 0); netif_wake_queue(dev->net); } -- cgit v1.2.3