From a623f87a72de35096a9eae7cc7764d0c9533c2e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Jan 2016 16:58:24 -0800 Subject: printk: do cond_resched() between lines while outputting to consoles commit 8d91f8b15361dfb438ab6eb3b319e2ded43458ff upstream. @console_may_schedule tracks whether console_sem was acquired through lock or trylock. If the former, we're inside a sleepable context and console_conditional_schedule() performs cond_resched(). This allows console drivers which use console_lock for synchronization to yield while performing time-consuming operations such as scrolling. However, the actual console outputting is performed while holding irq-safe logbuf_lock, so console_unlock() clears @console_may_schedule before starting outputting lines. Also, only a few drivers call console_conditional_schedule() to begin with. This means that when a lot of lines need to be output by console_unlock(), for example on a console registration, the task doing console_unlock() may not yield for a long time on a non-preemptible kernel. If this happens with a slow console devices, for example a serial console, the outputting task may occupy the cpu for a very long time. Long enough to trigger softlockup and/or RCU stall warnings, which in turn pile more messages, sometimes enough to trigger the next cycle of warnings incapacitating the system. Fix it by making console_unlock() insert cond_resched() between lines if @console_may_schedule. Signed-off-by: Tejun Heo Reported-by: Calvin Owens Acked-by: Jan Kara Cc: Dave Jones Cc: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index bd194343c346..ea731af2451e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -150,6 +150,7 @@ extern int console_trylock(void); extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); +extern void console_flush_on_panic(void); extern struct tty_driver *console_device(int *); extern void console_stop(struct console *); extern void console_start(struct console *); -- cgit v1.2.3 From dd0d511548ea1ad8f233e9fa4a4acfb83af9bd29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 16:54:46 +0000 Subject: hrtimer: Handle remaining time proper for TIME_LOW_RES commit 203cbf77de59fc8f13502dcfd11350c6d4a5c95f upstream. If CONFIG_TIME_LOW_RES is enabled we add a jiffie to the relative timeout to prevent short sleeps, but we do not account for that in interfaces which retrieve the remaining time. Helge observed that timerfd can return a remaining time larger than the relative timeout. That's not expected and breaks userland test programs. Store the information that the timer was armed relative and provide functions to adjust the remaining time. To avoid bloating the hrtimer struct make state a u8, which as a bonus results in better code on x86 at least. Reported-and-tested-by: Helge Deller Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Cc: linux-m68k@lists.linux-m68k.org Cc: dhowells@redhat.com Link: http://lkml.kernel.org/r/20160114164159.273328486@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/hrtimer.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 76dd4f0da5ca..2ead22dd74a0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -87,7 +87,8 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @start_pid: timer statistics field to store the pid of the task which + * @is_rel: Set if the timer was armed relative + * @start_pid: timer statistics field to store the pid of the task which * started the timer * @start_site: timer statistics field to store the site where the timer * was started @@ -101,7 +102,8 @@ struct hrtimer { ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; - unsigned long state; + u8 state; + u8 is_rel; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { } #endif +static inline ktime_t +__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) +{ + ktime_t rem = ktime_sub(timer->node.expires, now); + + /* + * Adjust relative timers for the extra we added in + * hrtimer_start_range_ns() to prevent short timeouts. + */ + if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel) + rem.tv64 -= hrtimer_resolution; + return rem; +} + +static inline ktime_t +hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) +{ + return __hrtimer_expires_remaining_adjusted(timer, + timer->base->get_time()); +} + extern void clock_was_set(void); #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); @@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer) } /* Query timers: */ -extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); +extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); + +static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) +{ + return __hrtimer_get_remaining(timer, false); +} extern u64 hrtimer_get_next_event(void); -- cgit v1.2.3 From c3f8a5000e75da9dc40215bf1acf3aa5515c75fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 31 Jan 2016 11:57:41 +0100 Subject: ALSA: rawmidi: Make snd_rawmidi_transmit() race-free commit 06ab30034ed9c200a570ab13c017bde248ddb2a6 upstream. A kernel WARNING in snd_rawmidi_transmit_ack() is triggered by syzkaller fuzzer: WARNING: CPU: 1 PID: 20739 at sound/core/rawmidi.c:1136 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] snd_rawmidi_transmit_ack+0x275/0x400 sound/core/rawmidi.c:1136 [] snd_virmidi_output_trigger+0x4b1/0x5a0 sound/core/seq/seq_virmidi.c:163 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x549/0x780 sound/core/rawmidi.c:1223 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1273 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 Also a similar warning is found but in another path: Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 [] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482 [] warn_slowpath_null+0x29/0x30 kernel/panic.c:515 [] rawmidi_transmit_ack+0x24a/0x3b0 sound/core/rawmidi.c:1133 [] snd_rawmidi_transmit_ack+0x51/0x80 sound/core/rawmidi.c:1163 [] snd_virmidi_output_trigger+0x2b6/0x570 sound/core/seq/seq_virmidi.c:185 [< inline >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150 [] snd_rawmidi_kernel_write1+0x4bb/0x760 sound/core/rawmidi.c:1252 [] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1302 [] __vfs_write+0x113/0x480 fs/read_write.c:528 [] vfs_write+0x167/0x4a0 fs/read_write.c:577 [< inline >] SYSC_write fs/read_write.c:624 [] SyS_write+0x111/0x220 fs/read_write.c:616 [] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185 In the former case, the reason is that virmidi has an open code calling snd_rawmidi_transmit_ack() with the value calculated outside the spinlock. We may use snd_rawmidi_transmit() in a loop just for consuming the input data, but even there, there is a race between snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack(). Similarly in the latter case, it calls snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack() separately without protection, so they are racy as well. The patch tries to address these issues by the following ways: - Introduce the unlocked versions of snd_rawmidi_transmit_peek() and snd_rawmidi_transmit_ack() to be called inside the explicit lock. - Rewrite snd_rawmidi_transmit() to be race-free (the former case). - Make the split calls (the latter case) protected in the rawmidi spin lock. BugLink: http://lkml.kernel.org/r/CACT4Y+YPq1+cYLkadwjWa5XjzF1_Vki1eHnVn-Lm0hzhSpu5PA@mail.gmail.com BugLink: http://lkml.kernel.org/r/CACT4Y+acG4iyphdOZx47Nyq_VHGbpJQK-6xNpiqUjaZYqsXOGw@mail.gmail.com Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/sound/rawmidi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index f6cbef78db62..3b91ad5d5115 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count); int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream, unsigned char *buffer, int count); +int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream, + unsigned char *buffer, int count); +int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, + int count); /* main midi functions */ -- cgit v1.2.3 From c35f1234931e2cae81726440ad4df8ef1f313219 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Jan 2016 21:13:44 -0800 Subject: tty: Wait interruptibly for tty lock on reopen commit 0bfd464d3fdd5bb322f9cace4cc47f1796545cf7 upstream. Allow a signal to interrupt the wait for a tty reopen; eg., if the tty has starting final close and is waiting for the device to drain. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5e31f1b99037..6b6e811f4575 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -654,6 +654,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty, /* tty_mutex.c */ /* functions for preparation of BKL removal */ extern void __lockfunc tty_lock(struct tty_struct *tty); +extern int tty_lock_interruptible(struct tty_struct *tty); extern void __lockfunc tty_unlock(struct tty_struct *tty); extern void __lockfunc tty_lock_slave(struct tty_struct *tty); extern void __lockfunc tty_unlock_slave(struct tty_struct *tty); -- cgit v1.2.3 From 7509864bb2ec8a4dfbda52a9bda577ecdcb6b2c4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 30 Dec 2015 11:47:53 +0800 Subject: crypto: af_alg - Disallow bind/setkey/... after accept(2) commit c840ac6af3f8713a71b4d2363419145760bd6044 upstream. Each af_alg parent socket obtained by socket(2) corresponds to a tfm object once bind(2) has succeeded. An accept(2) call on that parent socket creates a context which then uses the tfm object. Therefore as long as any child sockets created by accept(2) exist the parent socket must not be modified or freed. This patch guarantees this by using locks and a reference count on the parent socket. Any attempt to modify the parent socket will fail with EBUSY. Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/if_alg.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 018afb264ac2..589716f2ee8a 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -30,6 +30,8 @@ struct alg_sock { struct sock *parent; + unsigned int refcnt; + const struct af_alg_type *type; void *private; }; @@ -67,6 +69,7 @@ int af_alg_register_type(const struct af_alg_type *type); int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); +void af_alg_release_parent(struct sock *sk); int af_alg_accept(struct sock *sk, struct socket *newsock); int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); @@ -83,11 +86,6 @@ static inline struct alg_sock *alg_sk(struct sock *sk) return (struct alg_sock *)sk; } -static inline void af_alg_release_parent(struct sock *sk) -{ - sock_put(alg_sk(sk)->parent); -} - static inline void af_alg_init_completion(struct af_alg_completion *completion) { init_completion(&completion->completion); -- cgit v1.2.3 From d1b886e326f3578e8d6066c590d32a5c7028afbd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 4 Jan 2016 13:35:18 +0900 Subject: crypto: af_alg - Add nokey compatibility path commit 37766586c965d63758ad542325a96d5384f4a8c9 upstream. This patch adds a compatibility path to support old applications that do acept(2) before setkey. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/if_alg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 589716f2ee8a..df8284415c56 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -52,9 +52,11 @@ struct af_alg_type { void (*release)(void *private); int (*setkey)(void *private, const u8 *key, unsigned int keylen); int (*accept)(void *private, struct sock *sk); + int (*accept_nokey)(void *private, struct sock *sk); int (*setauthsize)(void *private, unsigned int authsize); struct proto_ops *ops; + struct proto_ops *ops_nokey; struct module *owner; char name[14]; }; -- cgit v1.2.3 From f2e274ce8bfe8ab928ab1968a82a3b7eec27a69b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 8 Jan 2016 21:28:26 +0800 Subject: crypto: hash - Add crypto_ahash_has_setkey commit a5596d6332787fd383b3b5427b41f94254430827 upstream. This patch adds a way for ahash users to determine whether a key is required by a crypto_ahash transform. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/hash.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 3d69c93d50e8..6361892ea737 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -204,6 +204,7 @@ struct crypto_ahash { unsigned int keylen); unsigned int reqsize; + bool has_setkey; struct crypto_tfm base; }; @@ -375,6 +376,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req) int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); +static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm) +{ + return tfm->has_setkey; +} + /** * crypto_ahash_finup() - update and finalize message digest * @req: reference to the ahash_request handle that holds all information -- cgit v1.2.3 From 7f5c995746ebc3f431c596ffabb98af1b3e3de86 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 11 Jan 2016 21:26:50 +0800 Subject: crypto: skcipher - Add crypto_skcipher_has_setkey commit a1383cd86a062fc798899ab20f0ec2116cce39cb upstream. This patch adds a way for skcipher users to determine whether a key is required by a transform. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/skcipher.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index d8dd41fb034f..fd8742a40ff3 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -61,6 +61,8 @@ struct crypto_skcipher { unsigned int ivsize; unsigned int reqsize; + bool has_setkey; + struct crypto_tfm base; }; @@ -305,6 +307,11 @@ static inline int crypto_skcipher_setkey(struct crypto_skcipher *tfm, return tfm->setkey(tfm, key, keylen); } +static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm) +{ + return tfm->has_setkey; +} + /** * crypto_skcipher_reqtfm() - obtain cipher handle from request * @req: skcipher_request out of which the cipher handle is to be obtained -- cgit v1.2.3 From 758c560bcbc7d7814a391597fbcba02f087fb7f7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 13 Jan 2016 14:59:03 +0800 Subject: crypto: af_alg - Allow af_af_alg_release_parent to be called on nokey path commit 6a935170a980024dd29199e9dbb5c4da4767a1b9 upstream. This patch allows af_alg_release_parent to be called even for nokey sockets. Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/if_alg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index df8284415c56..a2bfd7843f18 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -31,6 +31,7 @@ struct alg_sock { struct sock *parent; unsigned int refcnt; + unsigned int nokey_refcnt; const struct af_alg_type *type; void *private; -- cgit v1.2.3 From 1bdf16025dfc5ed335f3d7d8bbe78461583105fc Mon Sep 17 00:00:00 2001 From: "Herton R. Krzesinski" Date: Thu, 14 Jan 2016 17:56:58 -0200 Subject: pty: make sure super_block is still valid in final /dev/tty close commit 1f55c718c290616889c04946864a13ef30f64929 upstream. Considering current pty code and multiple devpts instances, it's possible to umount a devpts file system while a program still has /dev/tty opened pointing to a previosuly closed pty pair in that instance. In the case all ptmx and pts/N files are closed, umount can be done. If the program closes /dev/tty after umount is done, devpts_kill_index will use now an invalid super_block, which was already destroyed in the umount operation after running ->kill_sb. This is another "use after free" type of issue, but now related to the allocated super_block instance. To avoid the problem (warning at ida_remove and potential crashes) for this specific case, I added two functions in devpts which grabs additional references to the super_block, which pty code now uses so it makes sure the super block structure is still valid until pty shutdown is done. I also moved the additional inode references to the same functions, which also covered similar case with inode being freed before /dev/tty final close/shutdown. Signed-off-by: Herton R. Krzesinski Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/devpts_fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 251a2090a554..e0ee0b3000b2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,8 @@ int devpts_new_index(struct inode *ptmx_inode); void devpts_kill_index(struct inode *ptmx_inode, int idx); +void devpts_add_ref(struct inode *ptmx_inode); +void devpts_del_ref(struct inode *ptmx_inode); /* mknod in devpts */ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv); @@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode); /* Dummy stubs in the no-pty case */ static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } +static inline void devpts_add_ref(struct inode *ptmx_inode) { } +static inline void devpts_del_ref(struct inode *ptmx_inode) { } static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, void *priv) { -- cgit v1.2.3 From 969624b7c1c8c9784651eb97431e6f2bbb7a024c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jan 2016 15:00:04 -0800 Subject: ptrace: use fsuid, fsgid, effective creds for fs access checks commit caaee6234d05a58c5b4d05e7bf766131b810a657 upstream. By checking the effective credentials instead of the real UID / permitted capabilities, ensure that the calling process actually intended to use its credentials. To ensure that all ptrace checks use the correct caller credentials (e.g. in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS flag), use two new flags and require one of them to be set. The problem was that when a privileged task had temporarily dropped its privileges, e.g. by calling setreuid(0, user_uid), with the intent to perform following syscalls with the credentials of a user, it still passed ptrace access checks that the user would not be able to pass. While an attacker should not be able to convince the privileged task to perform a ptrace() syscall, this is a problem because the ptrace access check is reused for things in procfs. In particular, the following somewhat interesting procfs entries only rely on ptrace access checks: /proc/$pid/stat - uses the check for determining whether pointers should be visible, useful for bypassing ASLR /proc/$pid/maps - also useful for bypassing ASLR /proc/$pid/cwd - useful for gaining access to restricted directories that contain files with lax permissions, e.g. in this scenario: lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar drwx------ root root /root drwxr-xr-x root root /root/foobar -rw-r--r-- root root /root/foobar/secret Therefore, on a system where a root-owned mode 6755 binary changes its effective credentials as described and then dumps a user-specified file, this could be used by an attacker to reveal the memory layout of root's processes or reveal the contents of files he is not allowed to access (through /proc/$pid/cwd). [akpm@linux-foundation.org: fix warning] Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Casey Schaufler Cc: Oleg Nesterov Cc: Ingo Molnar Cc: James Morris Cc: "Serge E. Hallyn" Cc: Andy Shevchenko Cc: Andy Lutomirski Cc: Al Viro Cc: "Eric W. Biederman" Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/ptrace.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 061265f92876..504c98a278d4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -57,7 +57,29 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -/* Returns true on success, false on denial. */ +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 + +/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ +#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) + +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must + * be set in @mode to specify whether the access was requested through + * a filesystem syscall (should use effective capabilities and fsuid + * of the caller) or through an explicit syscall such as + * process_vm_writev or ptrace (and should use the real credentials). + */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) -- cgit v1.2.3 From 2fa82bbbc73a7d8716e3f7aba6b2b5c84147a2fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Feb 2016 22:26:42 +0100 Subject: tracing: Fix freak link error caused by branch tracer commit b33c8ff4431a343561e2319f17c14286f2aa52e2 upstream. In my randconfig tests, I came across a bug that involves several components: * gcc-4.9 through at least 5.3 * CONFIG_GCOV_PROFILE_ALL enabling -fprofile-arcs for all files * CONFIG_PROFILE_ALL_BRANCHES overriding every if() * The optimized implementation of do_div() that tries to replace a library call with an division by multiplication * code in drivers/media/dvb-frontends/zl10353.c doing u32 adc_clock = 450560; /* 45.056 MHz */ if (state->config.adc_clock) adc_clock = state->config.adc_clock; do_div(value, adc_clock); In this case, gcc fails to determine whether the divisor in do_div() is __builtin_constant_p(). In particular, it concludes that __builtin_constant_p(adc_clock) is false, while __builtin_constant_p(!!adc_clock) is true. That in turn throws off the logic in do_div() that also uses __builtin_constant_p(), and instead of picking either the constant- optimized division, and the code in ilog2() that uses __builtin_constant_p() to figure out whether it knows the answer at compile time. The result is a link error from failing to find multiple symbols that should never have been called based on the __builtin_constant_p(): dvb-frontends/zl10353.c:138: undefined reference to `____ilog2_NaN' dvb-frontends/zl10353.c:138: undefined reference to `__aeabi_uldivmod' ERROR: "____ilog2_NaN" [drivers/media/dvb-frontends/zl10353.ko] undefined! ERROR: "__aeabi_uldivmod" [drivers/media/dvb-frontends/zl10353.ko] undefined! This patch avoids the problem by changing __trace_if() to check whether the condition is known at compile-time to be nonzero, rather than checking whether it is actually a constant. I see this one link error in roughly one out of 1600 randconfig builds on ARM, and the patch fixes all known instances. Link: http://lkml.kernel.org/r/1455312410-1058841-1-git-send-email-arnd@arndb.de Acked-by: Nicolas Pitre Fixes: ab3c9c686e22 ("branch tracer, intel-iommu: fix build with CONFIG_BRANCH_TRACER=y") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4dac1036594f..6fc9a6dd5ed2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,7 +144,7 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); */ #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) #define __trace_if(cond) \ - if (__builtin_constant_p((cond)) ? !!(cond) : \ + if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ({ \ int ______r; \ static struct ftrace_branch_data \ -- cgit v1.2.3 From 152fb02241b60ffb8d406b87c68d1908478a205f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 15 Feb 2016 12:36:14 -0500 Subject: tracepoints: Do not trace when cpu is offline commit f37755490fe9bf76f6ba1d8c6591745d3574a6a6 upstream. The tracepoint infrastructure uses RCU sched protection to enable and disable tracepoints safely. There are some instances where tracepoints are used in infrastructure code (like kfree()) that get called after a CPU is going offline, and perhaps when it is coming back online but hasn't been registered yet. This can probuce the following warning: [ INFO: suspicious RCU usage. ] 4.4.0-00006-g0fe53e8-dirty #34 Tainted: G S ------------------------------- include/trace/events/kmem.h:141 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/8/0. stack backtrace: CPU: 8 PID: 0 Comm: swapper/8 Tainted: G S 4.4.0-00006-g0fe53e8-dirty #34 Call Trace: [c0000005b76c78d0] [c0000000008b9540] .dump_stack+0x98/0xd4 (unreliable) [c0000005b76c7950] [c00000000010c898] .lockdep_rcu_suspicious+0x108/0x170 [c0000005b76c79e0] [c00000000029adc0] .kfree+0x390/0x440 [c0000005b76c7a80] [c000000000055f74] .destroy_context+0x44/0x100 [c0000005b76c7b00] [c0000000000934a0] .__mmdrop+0x60/0x150 [c0000005b76c7b90] [c0000000000e3ff0] .idle_task_exit+0x130/0x140 [c0000005b76c7c20] [c000000000075804] .pseries_mach_cpu_die+0x64/0x310 [c0000005b76c7cd0] [c000000000043e7c] .cpu_die+0x3c/0x60 [c0000005b76c7d40] [c0000000000188d8] .arch_cpu_idle_dead+0x28/0x40 [c0000005b76c7db0] [c000000000101e6c] .cpu_startup_entry+0x50c/0x560 [c0000005b76c7ed0] [c000000000043bd8] .start_secondary+0x328/0x360 [c0000005b76c7f90] [c000000000008a6c] start_secondary_prolog+0x10/0x14 This warning is not a false positive either. RCU is not protecting code that is being executed while the CPU is offline. Instead of playing "whack-a-mole(TM)" and adding conditional statements to the tracepoints we find that are used in this instance, simply add a cpu_online() test to the tracepoint code where the tracepoint will be ignored if the CPU is offline. Use of raw_smp_processor_id() is fine, as there should never be a case where the tracepoint code goes from running on a CPU that is online and suddenly gets migrated to a CPU that is offline. Link: http://lkml.kernel.org/r/1455387773-4245-1-git-send-email-kda@linux-powerpc.org Reported-by: Denis Kirjanov Fixes: 97e1c18e8d17b ("tracing: Kernel Tracepoints") Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/tracepoint.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 696a339c592c..03c7efb60c91 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -14,8 +14,10 @@ * See the file COPYING for more details. */ +#include #include #include +#include #include #include @@ -146,6 +148,9 @@ extern void syscall_unregfunc(void); void *it_func; \ void *__data; \ \ + if (!cpu_online(raw_smp_processor_id())) \ + return; \ + \ if (!(cond)) \ return; \ prercu; \ -- cgit v1.2.3 From 413aab16bc7b9b58fb1a52010eb3af3d227d7007 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:36:50 -0800 Subject: mm: replace vma_lock_anon_vma with anon_vma_lock_read/write commit 12352d3cae2cebe18805a91fab34b534d7444231 upstream. Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if anon_vma appeared between lock and unlock. We have to check anon_vma first or call anon_vma_prepare() to be sure that it's here. There are only few users of these legacy helpers. Let's get rid of them. This patch fixes anon_vma lock imbalance in validate_mm(). Write lock isn't required here, read lock is enough. And reorders expand_downwards/expand_upwards: security_mmap_addr() and wrapping-around check don't have to be under anon vma lock. Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com Signed-off-by: Konstantin Khlebnikov Reported-by: Dmitry Vyukov Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/rmap.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 29446aeef36e..ddda2ac3446e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -108,20 +108,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma) __put_anon_vma(anon_vma); } -static inline void vma_lock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - down_write(&anon_vma->root->rwsem); -} - -static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) -{ - struct anon_vma *anon_vma = vma->anon_vma; - if (anon_vma) - up_write(&anon_vma->root->rwsem); -} - static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); -- cgit v1.2.3 From 2231e5748746cd57df389521397e1c7f91882077 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 15 Feb 2016 12:42:38 +0000 Subject: iommu/vt-d: Clear PPR bit to ensure we get more page request interrupts commit 46924008273ed03bd11dbb32136e3da4cfe056e1 upstream. According to the VT-d specification we need to clear the PPR bit in the Page Request Status register when handling page requests, or the hardware won't generate any more interrupts. This wasn't actually necessary on SKL/KBL (which may well be the subject of a hardware erratum, although it's harmless enough). But other implementations do appear to get it right, and we only ever get one interrupt unless we clear the PPR bit. Reported-by: CQ Tang Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/intel-iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 821273ca4873..2d9b650047a5 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -235,6 +235,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* low 64 bit */ #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT)) +/* PRS_REG */ +#define DMA_PRS_PPR ((u32)1) + #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ do { \ cycles_t start_time = get_cycles(); \ -- cgit v1.2.3 From f4595e0081495b677a98c780e9ec1ab68ce89488 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 2 Feb 2016 16:57:52 -0800 Subject: radix-tree: fix race in gang lookup commit 46437f9a554fbe3e110580ca08ab703b59f2f95a upstream. If the indirect_ptr bit is set on a slot, that indicates we need to redo the lookup. Introduce a new function radix_tree_iter_retry() which forces the loop to retry the lookup by setting 'slot' to NULL and turning the iterator back to point at the problematic entry. This is a pretty rare problem to hit at the moment; the lookup has to race with a grow of the radix tree from a height of 0. The consequences of hitting this race are that gang lookup could return a pointer to a radix_tree_node instead of a pointer to whatever the user had inserted in the tree. Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator") Signed-off-by: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/radix-tree.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 33170dbd9db4..1a2b2276ffb3 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -369,6 +369,22 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags); +/** + * radix_tree_iter_retry - retry this chunk of the iteration + * @iter: iterator state + * + * If we iterate over a tree protected only by the RCU lock, a race + * against deletion or creation may result in seeing a slot for which + * radix_tree_deref_retry() returns true. If so, call this function + * and continue the iteration. + */ +static inline __must_check +void **radix_tree_iter_retry(struct radix_tree_iter *iter) +{ + iter->next_index = iter->index; + return NULL; +} + /** * radix_tree_chunk_size - get current chunk size * -- cgit v1.2.3 From 55e0d9869f1d3a6bbd5d1e864c0e866fe1247f97 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 5 Feb 2016 15:37:01 -0800 Subject: radix-tree: fix oops after radix_tree_iter_retry commit 732042821cfa106b3c20b9780e4c60fee9d68900 upstream. Helper radix_tree_iter_retry() resets next_index to the current index. In following radix_tree_next_slot current chunk size becomes zero. This isn't checked and it tries to dereference null pointer in slot. Tagged iterator is fine because retry happens only at slot 0 where tag bitmask in iter->tags is filled with single bit. Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup") Signed-off-by: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Jeremiah Mahler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/radix-tree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 1a2b2276ffb3..5d5174b59802 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -391,7 +391,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) * @iter: pointer to radix tree iterator * Returns: current chunk size */ -static __always_inline unsigned +static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { return iter->next_index - iter->index; @@ -425,9 +425,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return slot + offset + 1; } } else { - unsigned size = radix_tree_chunk_size(iter) - 1; + long size = radix_tree_chunk_size(iter); - while (size--) { + while (--size > 0) { slot++; iter->index++; if (likely(*slot)) -- cgit v1.2.3