From d629be850ac6e296dfe156604d7bb5202f1613da Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 11 Jul 2018 14:39:42 +0200 Subject: net: Don't copy pfmemalloc flag in __copy_skb_header() [ Upstream commit 8b7008620b8452728cadead460a36f64ed78c460 ] The pfmemalloc flag indicates that the skb was allocated from the PFMEMALLOC reserves, and the flag is currently copied on skb copy and clone. However, an skb copied from an skb flagged with pfmemalloc wasn't necessarily allocated from PFMEMALLOC reserves, and on the other hand an skb allocated that way might be copied from an skb that wasn't. So we should not copy the flag on skb copy, and rather decide whether to allow an skb to be associated with sockets unrelated to page reclaim depending only on how it was allocated. Move the pfmemalloc flag before headers_start[0] using an existing 1-bit hole, so that __copy_skb_header() doesn't copy it. When cloning, we'll now take care of this flag explicitly, contravening to the warning comment of __skb_clone(). While at it, restore the newline usage introduced by commit b19372273164 ("net: reorganize sk_buff for faster __copy_skb_header()") to visually separate bytes used in bitfields after headers_start[0], that was gone after commit a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area"), and describe the pfmemalloc flag in the kernel-doc structure comment. This doesn't change the size of sk_buff or cacheline boundaries, but consolidates the 15 bits hole before tc_index into a 2 bytes hole before csum, that could now be filled more easily. Reported-by: Patrick Talbert Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a6da214d0584..c28bd8be290a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -514,6 +514,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -594,8 +595,8 @@ struct sk_buff { fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + pfmemalloc:1; kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied @@ -615,19 +616,18 @@ struct sk_buff { __u8 __pkt_type_offset[0]; __u8 pkt_type:3; - __u8 pfmemalloc:1; __u8 ignore_df:1; __u8 nfctinfo:3; - __u8 nf_trace:1; + __u8 ip_summed:2; __u8 ooo_okay:1; __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - __u8 no_fcs:1; + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; @@ -635,11 +635,11 @@ struct sk_buff { __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_bad:1; - #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 ipvs_property:1; + __u8 inner_protocol_type:1; __u8 remcsum_offload:1; /* 3 or 5 bit hole */ -- cgit v1.2.3 From d8067aba239cbd2bfd64cdd548a914b20c58d189 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 14 Jul 2018 02:34:31 -0700 Subject: x86/bugs: Expose /sys/../spec_store_bypass commit c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf upstream Add the sysfs file for the new vulerability. It does not do much except show the words 'Vulnerable' for recent x86 cores. Intel cores prior to family 6 are known not to be vulnerable, and so are some Atoms and some Xeon Phi. It assumes that older Cyrix, Centaur, etc. cores are immune. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7e04bcd9af8e..2f9d12022100 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -46,6 +46,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 13fa2c65c9a8c2cd5f2a9799891582c40b6f5cfa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:35:28 -0700 Subject: prctl: Add speculation control prctls commit b617cfc858161140d69cc0b5cc211996b557a1c7 upstream Add two new prctls to control aspects of speculation related vulnerabilites and their mitigations to provide finer grained control over performance impacting mitigations. PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bit 0-2 with the following meaning: Bit Define Description 0 PR_SPEC_PRCTL Mitigation can be controlled per task by PR_SET_SPECULATION_CTRL 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is disabled 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is enabled If all bits are 0 the CPU is not affected by the speculation misfeature. If PR_SPEC_PRCTL is set, then the per task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of prctl(2) per task. arg3 is used to hand in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. The common return values are: EINVAL prctl is not implemented by the architecture or the unused prctl() arguments are not 0 ENODEV arg2 is selecting a not supported speculation misfeature PR_SET_SPECULATION_CTRL has these additional return values: ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE ENXIO prctl control of the selected speculation misfeature is disabled The first supported controlable speculation misfeature is PR_SPEC_STORE_BYPASS. Add the define so this can be shared between architectures. Based on an initial patch from Tim Chen and mostly rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index e791ebc65c9c..700bb8a4e4ea 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, \ (typeof(_i)) (_i & _mask); \ }) + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(unsigned long which); +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); + #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3 From b6f4a6285d7979b45d629e65c880279930b98ef1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:17 -0700 Subject: nospec: Allow getting/setting on non-current task commit 7bbf1373e228840bb0295a2ca26d548ef37f448e upstream Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than current. This is needed both for /proc/$pid/status queries and for seccomp (since thread-syncing can trigger seccomp in non-current threads). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 700bb8a4e4ea..a908c954484d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, }) /* Speculation control prctl */ -int arch_prctl_spec_ctrl_get(unsigned long which); -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3 From 3f9cb20f9126db1edb1fad78a0e94ff8e9ae94e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:36:41 -0700 Subject: prctl: Add force disable speculation commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream For certain use cases it is desired to enforce mitigations so they cannot be undone afterwards. That's important for loader stubs which want to prevent a child from disabling the mitigation again. Will also be used for seccomp(). The extra state preserving of the prctl state for SSB is a preparatory step for EBPF dymanic speculation control. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 90bea398e5e0..725498cc5d30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ @@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + /* * task->jobctl flags */ -- cgit v1.2.3 From c463c0f037f2d83aea54415ed7c61deb0b90333b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Jul 2018 02:36:57 -0700 Subject: seccomp: Add filter flag to opt-out of SSB mitigation commit 00a02d0c502a06d15e07b857f8ff921e3e402675 upstream If a seccomp user is not interested in Speculative Store Bypass mitigation by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when adding filters. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/seccomp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 2296e6b2f690..5a53d34bba26 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW) #ifdef CONFIG_SECCOMP -- cgit v1.2.3 From 9237a1b0828962191107e702cf56c88db9f9d455 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 14 Jul 2018 02:37:05 -0700 Subject: seccomp: Move speculation migitation control to arch code commit 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc upstream The migitation control is simpler to implement in architecture code as it avoids the extra function call to check the mode. Aside of that having an explicit seccomp enabled mode in the architecture mitigations would require even more workarounds. Move it into architecture code and provide a weak function in the seccomp code. Remove the 'which' argument as this allows the architecture to decide which mitigations are relevant for seccomp. Signed-off-by: Thomas Gleixner Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman Signed-off-by: Srivatsa S. Bhat Reviewed-by: Matt Helsley (VMware) Reviewed-by: Alexey Makhalov Reviewed-by: Bo Gan Signed-off-by: Greg Kroah-Hartman --- include/linux/nospec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index a908c954484d..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3