From 36205b7fa9638db8ce34c8d98165263641b15626 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Mar 2016 14:08:15 -0800 Subject: UPSTREAM: kasan: add functions to clear stack poison Functions which the compiler has instrumented for ASAN place poison on the stack shadow upon entry and remove this poison prior to returning. In some cases (e.g. hotplug and idle), CPUs may exit the kernel a number of levels deep in C code. If there are any instrumented functions on this critical path, these will leave portions of the idle thread stack shadow poisoned. If a CPU returns to the kernel via a different path (e.g. a cold entry), then depending on stack frame layout subsequent calls to instrumented functions may use regions of the stack with stale poison, resulting in (spurious) KASAN splats to the console. Contemporary GCCs always add stack shadow poisoning when ASAN is enabled, even when asked to not instrument a function [1], so we can't simply annotate functions on the critical path to avoid poisoning. Instead, this series explicitly removes any stale poison before it can be hit. In the common hotplug case we clear the entire stack shadow in common code, before a CPU is brought online. On architectures which perform a cold return as part of cpu idle may retain an architecture-specific amount of stack contents. To retain the poison for this retained context, the arch code must call the core KASAN code, passing a "watermark" stack pointer value beyond which shadow will be cleared. Architectures which don't perform a cold return as part of idle do not need any additional code. This patch (of 3): Functions which the compiler has instrumented for KASAN place poison on the stack shadow upon entry and remove this poision prior to returning. In some cases (e.g. hotplug and idle), CPUs may exit the kernel a number of levels deep in C code. If there are any instrumented functions on this critical path, these will leave portions of the stack shadow poisoned. If a CPU returns to the kernel via a different path (e.g. a cold entry), then depending on stack frame layout subsequent calls to instrumented functions may use regions of the stack with stale poison, resulting in (spurious) KASAN splats to the console. To avoid this, we must clear stale poison from the stack prior to instrumented functions being called. This patch adds functions to the KASAN core for removing poison from (portions of) a task's stack. These will be used by subsequent patches to avoid problems with hotplug and idle. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from e3ae116339f9a0c77523abc95e338fa405946e07) Change-Id: I9be31b714d5bdaec94a2dad3f0e468c094fe5fa2 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4b9f85c963d0..0fdc798e3ff7 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,6 +1,7 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H +#include #include struct kmem_cache; @@ -13,7 +14,6 @@ struct vm_struct; #include #include -#include extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; @@ -43,6 +43,8 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_unpoison_task_stack(struct task_struct *task); + void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm); static inline void kasan_unpoison_shadow(const void *address, size_t size) {} +static inline void kasan_unpoison_task_stack(struct task_struct *task) {} + static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} -- cgit v1.2.3 From 5b5a89ff484f3652de76375ef6f820667198126b Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:54:21 -0700 Subject: UPSTREAM: mm/slab: clean up DEBUG_PAGEALLOC processing code Currently, open code for checking DEBUG_PAGEALLOC cache is spread to some sites. It makes code unreadable and hard to change. This patch cleans up this code. The following patch will change the criteria for DEBUG_PAGEALLOC cache so this clean-up will help it, too. [akpm@linux-foundation.org: fix build with CONFIG_DEBUG_PAGEALLOC=n] Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 40b44137971c2e5865a78f9f7de274449983ccb5) Change-Id: I784df4a54b62f77a22f8fa70990387fdf968219f Signed-off-by: Paul Lawrence --- include/linux/mm.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 941c7f711ada..652e73967707 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2161,14 +2161,18 @@ kernel_map_pages(struct page *page, int numpages, int enable) } #ifdef CONFIG_HIBERNATION extern bool kernel_page_present(struct page *page); -#endif /* CONFIG_HIBERNATION */ -#else +#endif /* CONFIG_HIBERNATION */ +#else /* CONFIG_DEBUG_PAGEALLOC */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} #ifdef CONFIG_HIBERNATION static inline bool kernel_page_present(struct page *page) { return true; } -#endif /* CONFIG_HIBERNATION */ -#endif +#endif /* CONFIG_HIBERNATION */ +static inline bool debug_pagealloc_enabled(void) +{ + return false; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); -- cgit v1.2.3 From 63fd82260452806eec52aae2b842412c2070a444 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 15 Mar 2016 14:54:24 -0700 Subject: UPSTREAM: mm/slab: alternative implementation for DEBUG_SLAB_LEAK DEBUG_SLAB_LEAK is a debug option. It's current implementation requires status buffer so we need more memory to use it. And, it cause kmem_cache initialization step more complex. To remove this extra memory usage and to simplify initialization step, this patch implement this feature with another way. When user requests to get slab object owner information, it marks that getting information is started. And then, all free objects in caches are flushed to corresponding slab page. Now, we can distinguish all freed object so we can know all allocated objects, too. After collecting slab object owner information on allocated objects, mark is checked that there is no free during the processing. If true, we can be sure that our information is correct so information is returned to user. Although this way is rather complex, it has two important benefits mentioned above. So, I think it is worth changing. There is one drawback that it takes more time to get slab object owner information but it is just a debug option so it doesn't matter at all. To help review, this patch implements new way only. Following patch will remove useless code. Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from d31676dfde257cb2b3e52d4e657d8ad2251e4d49) Change-Id: I204ea0dd5553577d17c93f32f0d5a797ba0304af Signed-off-by: Paul Lawrence --- include/linux/slab_def.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 33d049066c3d..3b3e7747495d 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -60,6 +60,9 @@ struct kmem_cache { atomic_t allocmiss; atomic_t freehit; atomic_t freemiss; +#ifdef CONFIG_DEBUG_SLAB_LEAK + atomic_t store_user_clean; +#endif /* * If debugging is enabled, then the allocator can add additional -- cgit v1.2.3 From 5451a4a87d5364b652e401f54f4bf4d60a3f88e8 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:21:59 -0700 Subject: UPSTREAM: mm, kasan: SLAB support Add KASAN hooks to SLAB allocator. This patch is based on the "mm: kasan: unified support for SLUB and SLAB allocators" patch originally prepared by Dmitry Chernenkov. Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 7ed2f9e663854db313f177a511145630e398b402) Change-Id: I131fdafc1c27a25732475f5bbd1653b66954e1b7 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 12 ++++++++++++ include/linux/slab.h | 6 ++++++ include/linux/slab_def.h | 14 ++++++++++++++ include/linux/slub_def.h | 11 +++++++++++ 4 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 0fdc798e3ff7..839f2007a0f9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -48,6 +48,9 @@ void kasan_unpoison_task_stack(struct task_struct *task); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); +void kasan_cache_create(struct kmem_cache *cache, size_t *size, + unsigned long *flags); + void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); @@ -61,6 +64,11 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; + int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); @@ -76,6 +84,10 @@ static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} +static inline void kasan_cache_create(struct kmem_cache *cache, + size_t *size, + unsigned long *flags) {} + static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} diff --git a/include/linux/slab.h b/include/linux/slab.h index b4e739f04ee6..d6c288de13a4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -87,6 +87,12 @@ # define SLAB_FAILSLAB 0x00000000UL #endif +#ifdef CONFIG_KASAN +#define SLAB_KASAN 0x08000000UL +#else +#define SLAB_KASAN 0x00000000UL +#endif + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 3b3e7747495d..8ba02e0cee35 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -75,8 +75,22 @@ struct kmem_cache { #ifdef CONFIG_MEMCG_KMEM struct memcg_cache_params memcg_params; #endif +#ifdef CONFIG_KASAN + struct kasan_cache kasan_info; +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; +static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, + void *x) { + void *object = x - (x - page->s_mem) % cache->size; + void *last_object = page->s_mem + (cache->num - 1) * cache->size; + + if (unlikely(object > last_object)) + return last_object; + else + return object; +} + #endif /* _LINUX_SLAB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f4e857e920cd..d0f54fb58555 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -130,4 +130,15 @@ static inline void *virt_to_obj(struct kmem_cache *s, void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason); +static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, + void *x) { + void *object = x - (x - page_address(page)) % cache->size; + void *last_object = page_address(page) + + (page->objects - 1) * cache->size; + if (unlikely(object > last_object)) + return last_object; + else + return object; +} + #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3 From a9683c505ba8a00f704ec6023af2a01a8690ec11 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:22:02 -0700 Subject: BACKPORT: mm, kasan: add GFP flags to KASAN API Add GFP flags to KASAN hooks for future patches to use. This patch is based on the "mm: kasan: unified support for SLUB and SLAB allocators" patch originally prepared by Dmitry Chernenkov. Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 505f5dcb1c419e55a9621a01f83eb5745d8d7398) Change-Id: I7c5539f59e6969e484a6ff4f104dce2390669cfd Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 19 +++++++++++-------- include/linux/slab.h | 4 ++-- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 839f2007a0f9..737371b56044 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -55,13 +55,14 @@ void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); -void kasan_kmalloc_large(const void *ptr, size_t size); +void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); void kasan_kfree(void *ptr); -void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); -void kasan_krealloc(const void *object, size_t new_size); +void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, + gfp_t flags); +void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); -void kasan_slab_alloc(struct kmem_cache *s, void *object); +void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); void kasan_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { @@ -94,14 +95,16 @@ static inline void kasan_unpoison_object_data(struct kmem_cache *cache, static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} -static inline void kasan_kmalloc_large(void *ptr, size_t size) {} +static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} static inline void kasan_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, - size_t size) {} -static inline void kasan_krealloc(const void *object, size_t new_size) {} + size_t size, gfp_t flags) {} +static inline void kasan_krealloc(const void *object, size_t new_size, + gfp_t flags) {} -static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} +static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } diff --git a/include/linux/slab.h b/include/linux/slab.h index d6c288de13a4..16dc1e4a91f3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -374,7 +374,7 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, { void *ret = kmem_cache_alloc(s, flags); - kasan_kmalloc(s, ret, size); + kasan_kmalloc(s, ret, size, flags); return ret; } @@ -385,7 +385,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, { void *ret = kmem_cache_alloc_node(s, gfpflags, node); - kasan_kmalloc(s, ret, size); + kasan_kmalloc(s, ret, size, gfpflags); return ret; } #endif /* CONFIG_TRACING */ -- cgit v1.2.3 From 7336ab7b7c0af8c2fe4112ea4531dad8ca87c31e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:22:08 -0700 Subject: BACKPORT: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB Implement the stack depot and provide CONFIG_STACKDEPOT. Stack depot will allow KASAN store allocation/deallocation stack traces for memory chunks. The stack traces are stored in a hash table and referenced by handles which reside in the kasan_alloc_meta and kasan_free_meta structures in the allocated memory chunks. IRQ stack traces are cut below the IRQ entry point to avoid unnecessary duplication. Right now stackdepot support is only enabled in SLAB allocator. Once KASAN features in SLAB are on par with those in SLUB we can switch SLUB to stackdepot as well, thus removing the dependency on SLUB stack bookkeeping, which wastes a lot of memory. This patch is based on the "mm: kasan: stack depots" patch originally prepared by Dmitry Chernenkov. Joonsoo has said that he plans to reuse the stackdepot code for the mm/page_owner.c debugging facility. [akpm@linux-foundation.org: s/depot_stack_handle/depot_stack_handle_t] [aryabinin@virtuozzo.com: comment style fixes] Signed-off-by: Alexander Potapenko Signed-off-by: Andrey Ryabinin Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from cd11016e5f5212c13c0cec7384a525edc93b4921) Change-Id: Ic804318410823b95d84e264a6334e018f21ef943 Signed-off-by: Paul Lawrence --- include/linux/stackdepot.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/stackdepot.h (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h new file mode 100644 index 000000000000..7978b3e2c1e1 --- /dev/null +++ b/include/linux/stackdepot.h @@ -0,0 +1,32 @@ +/* + * A generic stack depot implementation + * + * Author: Alexander Potapenko + * Copyright (C) 2016 Google, Inc. + * + * Based on code by Dmitry Chernenkov. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_STACKDEPOT_H +#define _LINUX_STACKDEPOT_H + +typedef u32 depot_stack_handle_t; + +struct stack_trace; + +depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags); + +void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace); + +#endif -- cgit v1.2.3 From 8009eecae3b29ae1a54be9549302b429cae4352b Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 20 May 2016 16:59:11 -0700 Subject: UPSTREAM: mm: kasan: initial memory quarantine implementation Quarantine isolates freed objects in a separate queue. The objects are returned to the allocator later, which helps to detect use-after-free errors. When the object is freed, its state changes from KASAN_STATE_ALLOC to KASAN_STATE_QUARANTINE. The object is poisoned and put into quarantine instead of being returned to the allocator, therefore every subsequent access to that object triggers a KASAN error, and the error handler is able to say where the object has been allocated and deallocated. When it's time for the object to leave quarantine, its state becomes KASAN_STATE_FREE and it's returned to the allocator. From now on the allocator may reuse it for another allocation. Before that happens, it's still possible to detect a use-after free on that object (it retains the allocation/deallocation stacks). When the allocator reuses this object, the shadow is unpoisoned and old allocation/deallocation stacks are wiped. Therefore a use of this object, even an incorrect one, won't trigger ASan warning. Without the quarantine, it's not guaranteed that the objects aren't reused immediately, that's why the probability of catching a use-after-free is lower than with quarantine in place. Quarantine isolates freed objects in a separate queue. The objects are returned to the allocator later, which helps to detect use-after-free errors. Freed objects are first added to per-cpu quarantine queues. When a cache is destroyed or memory shrinking is requested, the objects are moved into the global quarantine queue. Whenever a kmalloc call allows memory reclaiming, the oldest objects are popped out of the global queue until the total size of objects in quarantine is less than 3/4 of the maximum quarantine size (which is a fraction of installed physical memory). As long as an object remains in the quarantine, KASAN is able to report accesses to it, so the chance of reporting a use-after-free is increased. Once the object leaves quarantine, the allocator may reuse it, in which case the object is unpoisoned and KASAN can't detect incorrect accesses to it. Right now quarantine support is only enabled in SLAB allocator. Unification of KASAN features in SLAB and SLUB will be done later. This patch is based on the "mm: kasan: quarantine" patch originally prepared by Dmitry Chernenkov. A number of improvements have been suggested by Andrey Ryabinin. [glider@google.com: v9] Link: http://lkml.kernel.org/r/1462987130-144092-1-git-send-email-glider@google.com Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 55834c59098d0c5a97b0f3247e55832b67facdcf) Change-Id: Ib808d72a40f2e5137961d93dad540e85f8bbd2c4 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 737371b56044..611927f5870d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -50,6 +50,8 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); +void kasan_cache_shrink(struct kmem_cache *cache); +void kasan_cache_destroy(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -63,7 +65,8 @@ void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); -void kasan_slab_free(struct kmem_cache *s, void *object); +bool kasan_slab_free(struct kmem_cache *s, void *object); +void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -88,6 +91,8 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} +static inline void kasan_cache_shrink(struct kmem_cache *cache) {} +static inline void kasan_cache_destroy(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, @@ -105,7 +110,11 @@ static inline void kasan_krealloc(const void *object, size_t new_size, static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) {} -static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} +static inline bool kasan_slab_free(struct kmem_cache *s, void *object) +{ + return false; +} +static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} -- cgit v1.2.3 From ec4cb91ee5574de568e22be4fd2db1bc2ca65a05 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:28 -0700 Subject: BACKPORT: mm/kasan: add API to check memory regions Memory access coded in an assembly won't be seen by KASAN as a compiler can instrument only C code. Add kasan_check_[read,write]() API which is going to be used to check a certain memory range. Link: http://lkml.kernel.org/r/1462538722-1574-3-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 64f8ebaf115bcddc4aaa902f981c57ba6506bc42) Change-Id: I3e75c7c22e77d390c55ca1b86ec58a6d6ea1da87 Signed-off-by: Paul Lawrence --- include/linux/kasan-checks.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/kasan-checks.h (limited to 'include/linux') diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h new file mode 100644 index 000000000000..b7f8aced7870 --- /dev/null +++ b/include/linux/kasan-checks.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_KASAN_CHECKS_H +#define _LINUX_KASAN_CHECKS_H + +#ifdef CONFIG_KASAN +void kasan_check_read(const void *p, unsigned int size); +void kasan_check_write(const void *p, unsigned int size); +#else +static inline void kasan_check_read(const void *p, unsigned int size) { } +static inline void kasan_check_write(const void *p, unsigned int size) { } +#endif + +#endif -- cgit v1.2.3 From 3240b4d6636e417ab391fe213943177732468815 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Jun 2016 14:49:34 -0700 Subject: UPSTREAM: mm: mempool: kasan: don't poot mempool objects in quarantine Currently we may put reserved by mempool elements into quarantine via kasan_kfree(). This is totally wrong since quarantine may really free these objects. So when mempool will try to use such element, use-after-free will happen. Or mempool may decide that it no longer need that element and double-free it. So don't put object into quarantine in kasan_kfree(), just poison it. Rename kasan_kfree() to kasan_poison_kfree() to respect that. Also, we shouldn't use kasan_slab_alloc()/kasan_krealloc() in kasan_unpoison_element() because those functions may update allocation stacktrace. This would be wrong for the most of the remove_element call sites. (The only call site where we may want to update alloc stacktrace is in mempool_alloc(). Kmemleak solves this by calling kmemleak_update_trace(), so we could make something like that too. But this is out of scope of this patch). Fixes: 55834c59098d ("mm: kasan: initial memory quarantine implementation") Link: http://lkml.kernel.org/r/575977C3.1010905@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Kuthonuzo Luruo Acked-by: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Kostya Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 9b75a867cc9ddbafcaf35029358ac500f2635ff3) Change-Id: Idb6c152dae8f8f2975dbe6acb7165315be8b465b Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 611927f5870d..ac4b3c46a84d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); -void kasan_kfree(void *ptr); +void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object); -void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -76,6 +75,9 @@ struct kasan_cache { int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +size_t ksize(const void *); +static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -static inline void kasan_kfree(void *ptr) {} +static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, @@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object) { return false; } -static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline void kasan_unpoison_slab(const void *ptr) { } + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ -- cgit v1.2.3 From 85f8b42430080f6a5170cb948a32fc21645acaa9 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 6 Dec 2017 14:44:33 -0800 Subject: BACKPORT: mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB For KASAN builds: - switch SLUB allocator to using stackdepot instead of storing the allocation/deallocation stacks in the objects; - change the freelist hook so that parts of the freelist can be put into the quarantine. [aryabinin@virtuozzo.com: fixes] Link: http://lkml.kernel.org/r/1468601423-28676-1-git-send-email-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/1468347165-41906-3-git-send-email-glider@google.com Signed-off-by: Alexander Potapenko Cc: Andrey Konovalov Cc: Christoph Lameter Cc: Dmitry Vyukov Cc: Steven Rostedt (Red Hat) Cc: Joonsoo Kim Cc: Kostya Serebryany Cc: Andrey Ryabinin Cc: Kuthonuzo Luruo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 80a9201a5965f4715d5c09790862e0df84ce0614) Change-Id: I2b59c6d50d0db62d3609edfdc7be54e48f8afa5c Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 2 ++ include/linux/slab_def.h | 3 ++- include/linux/slub_def.h | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ac4b3c46a84d..c9cf374445d8 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -77,6 +77,7 @@ void kasan_free_shadow(const struct vm_struct *vm); size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } +size_t kasan_metadata_size(struct kmem_cache *cache); #else /* CONFIG_KASAN */ @@ -121,6 +122,7 @@ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} static inline void kasan_unpoison_slab(const void *ptr) { } +static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8ba02e0cee35..bf16ba9f6fdb 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -83,7 +83,8 @@ struct kmem_cache { }; static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, - void *x) { + void *x) +{ void *object = x - (x - page->s_mem) % cache->size; void *last_object = page->s_mem + (cache->num - 1) * cache->size; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d0f54fb58555..fd720e3dd1b8 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -99,6 +99,11 @@ struct kmem_cache { */ int remote_node_defrag_ratio; #endif + +#ifdef CONFIG_KASAN + struct kasan_cache kasan_info; +#endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; -- cgit v1.2.3 From 508ad7fe8983756073061f15891ecf2abde396df Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 2 Aug 2016 14:02:52 -0700 Subject: BACKPORT: mm/kasan: get rid of ->state in struct kasan_alloc_meta The state of object currently tracked in two places - shadow memory, and the ->state field in struct kasan_alloc_meta. We can get rid of the latter. The will save us a little bit of memory. Also, this allow us to move free stack into struct kasan_alloc_meta, without increasing memory consumption. So now we should always know when the last time the object was freed. This may be useful for long delayed use-after-free bugs. As a side effect this fixes following UBSAN warning: UBSAN: Undefined behaviour in mm/kasan/quarantine.c:102:13 member access within misaligned address ffff88000d1efebc for type 'struct qlist_node' which requires 8 byte alignment Link: http://lkml.kernel.org/r/1470062715-14077-5-git-send-email-aryabinin@virtuozzo.com Reported-by: kernel test robot Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from b3cbd9bf77cd1888114dbee1653e79aa23fd4068) Change-Id: Iaa4959a78ffd2e49f9060099df1fb32483df3085 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c9cf374445d8..d600303306eb 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,6 +56,7 @@ void kasan_cache_destroy(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); +void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); @@ -102,6 +103,8 @@ static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} +static inline void kasan_init_slab_obj(struct kmem_cache *cache, + const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -- cgit v1.2.3 From 2cb9e02424f41417bd802cb967f9582b8765ca1c Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 14 Oct 2016 16:07:23 +0200 Subject: BACKPORT: kprobes: Unpoison stack in jprobe_return() for KASAN I observed false KSAN positives in the sctp code, when sctp uses jprobe_return() in jsctp_sf_eat_sack(). The stray 0xf4 in shadow memory are stack redzones: [ ] ================================================================== [ ] BUG: KASAN: stack-out-of-bounds in memcmp+0xe9/0x150 at addr ffff88005e48f480 [ ] Read of size 1 by task syz-executor/18535 [ ] page:ffffea00017923c0 count:0 mapcount:0 mapping: (null) index:0x0 [ ] flags: 0x1fffc0000000000() [ ] page dumped because: kasan: bad access detected [ ] CPU: 1 PID: 18535 Comm: syz-executor Not tainted 4.8.0+ #28 [ ] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ ] ffff88005e48f2d0 ffffffff82d2b849 ffffffff0bc91e90 fffffbfff10971e8 [ ] ffffed000bc91e90 ffffed000bc91e90 0000000000000001 0000000000000000 [ ] ffff88005e48f480 ffff88005e48f350 ffffffff817d3169 ffff88005e48f370 [ ] Call Trace: [ ] [] dump_stack+0x12e/0x185 [ ] [] kasan_report+0x489/0x4b0 [ ] [] __asan_report_load1_noabort+0x19/0x20 [ ] [] memcmp+0xe9/0x150 [ ] [] depot_save_stack+0x176/0x5c0 [ ] [] save_stack+0xb1/0xd0 [ ] [] kasan_slab_free+0x72/0xc0 [ ] [] kfree+0xc8/0x2a0 [ ] [] skb_free_head+0x79/0xb0 [ ] [] skb_release_data+0x37a/0x420 [ ] [] skb_release_all+0x4f/0x60 [ ] [] consume_skb+0x138/0x370 [ ] [] sctp_chunk_put+0xcb/0x180 [ ] [] sctp_chunk_free+0x58/0x70 [ ] [] sctp_inq_pop+0x68f/0xef0 [ ] [] sctp_assoc_bh_rcv+0xd6/0x4b0 [ ] [] sctp_inq_push+0x131/0x190 [ ] [] sctp_backlog_rcv+0xe9/0xa20 [ ... ] [ ] Memory state around the buggy address: [ ] ffff88005e48f380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ffff88005e48f400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] >ffff88005e48f480: f4 f4 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ^ [ ] ffff88005e48f500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ffff88005e48f580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ================================================================== KASAN stack instrumentation poisons stack redzones on function entry and unpoisons them on function exit. If a function exits abnormally (e.g. with a longjmp like jprobe_return()), stack redzones are left poisoned. Later this leads to random KASAN false reports. Unpoison stack redzones in the frames we are going to jump over before doing actual longjmp in jprobe_return(). Signed-off-by: Dmitry Vyukov Acked-by: Masami Hiramatsu Reviewed-by: Mark Rutland Cc: Mark Rutland Cc: Catalin Marinas Cc: Andrey Ryabinin Cc: Lorenzo Pieralisi Cc: Alexander Potapenko Cc: Will Deacon Cc: Andrew Morton Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: kasan-dev@googlegroups.com Cc: surovegin@google.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1476454043-101898-1-git-send-email-dvyukov@google.com Signed-off-by: Ingo Molnar Bug: 64145065 (cherry-picked from 9f7d416c36124667c406978bcb39746589c35d7f) Change-Id: I84e4fac44265a69f615601266b3415147dade633 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d600303306eb..820c0ad54a01 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -44,6 +44,7 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); +void kasan_unpoison_stack_above_sp_to(const void *watermark); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -85,6 +86,7 @@ size_t kasan_metadata_size(struct kmem_cache *cache); static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} -- cgit v1.2.3 From c0e5ed2db4049c457b75539a3551af2d3e9698c9 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 24 Feb 2017 15:00:05 -0800 Subject: BACKPORT: kasan: drain quarantine of memcg slab objects Per memcg slab accounting and kasan have a problem with kmem_cache destruction. - kmem_cache_create() allocates a kmem_cache, which is used for allocations from processes running in root (top) memcg. - Processes running in non root memcg and allocating with either __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache. - Kasan catches use-after-free by having kfree() and kmem_cache_free() defer freeing of objects. Objects are placed in a quarantine. - kmem_cache_destroy() destroys root and non root kmem_caches. It takes care to drain the quarantine of objects from the root memcg's kmem_cache, but ignores objects associated with non root memcg. This causes leaks because quarantined per memcg objects refer to per memcg kmem cache being destroyed. To see the problem: 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,) 2) from non root memcg, allocate and free a few objects from cache 3) dispose of the cache with kmem_cache_destroy() kmem_cache_destroy() will trigger a "Slab cache still has objects" warning indicating that the per memcg kmem_cache structure was leaked. Fix the leak by draining kasan quarantined objects allocated from non root memcg. Racing memcg deletion is tricky, but handled. kmem_cache_destroy() => shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache() flushes per memcg quarantined objects, even if that memcg has been rmdir'd and gone through memcg_deactivate_kmem_caches(). This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is enabled. So I don't think it's worth patching stable kernels. Link: http://lkml.kernel.org/r/1482257462-36948-1-git-send-email-gthelen@google.com Signed-off-by: Greg Thelen Reviewed-by: Vladimir Davydov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from f9fa1d919c696e90c887d8742198023e7639d139) Change-Id: Ie054d9cde7fb1ce62e65776bff5a70f72925d037 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 820c0ad54a01..c908b25bf5a5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_destroy(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_destroy(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, -- cgit v1.2.3 From 16a34d5679f2a77800af894a3ac2e654bce788ab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 09:57:00 +0100 Subject: BACKPORT: kasan, sched/headers: Uninline kasan_enable/disable_current() is a low level header that is included early in affected kernel headers. But it includes which complicates the cleanup of sched.h dependencies. But kasan.h has almost no need for sched.h: its only use of scheduler functionality is in two inline functions which are not used very frequently - so uninline kasan_enable_current() and kasan_disable_current(). Also add a dependency to a .c file that depended on kasan.h including it. This paves the way to remove the include from kasan.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Bug: 64145065 (cherry-picked from af8601ad420f6afa6445c927ad9f36d9700d96d6) Change-Id: I13fd2d3927f663d694ea0d5bf44f18e2c62ae013 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c908b25bf5a5..7793036eac80 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); -- cgit v1.2.3 From e8a4efd041734c248674f585d44b770ef8ea79aa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Mar 2017 15:12:04 -0700 Subject: BACKPORT: kasan: report only the first error by default Disable kasan after the first report. There are several reasons for this: - Single bug quite often has multiple invalid memory accesses causing storm in the dmesg. - Write OOB access might corrupt metadata so the next report will print bogus alloc/free stacktraces. - Reports after the first easily could be not bugs by itself but just side effects of the first one. Given that multiple reports usually only do harm, it makes sense to disable kasan after the first one. If user wants to see all the reports, the boot-time parameter kasan_multi_shot must be used. [aryabinin@virtuozzo.com: wrote changelog and doc, added missing include] Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com Signed-off-by: Mark Rutland Signed-off-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from b0845ce58379d11dcad4cdb6824a6410de260216) Change-Id: Ia8c6d40dd0d4f5b944bf3501c08d7a825070b116 Signed-off-by: Paul Lawrence --- include/linux/kasan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7793036eac80..b37afd197eed 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -75,6 +75,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} -- cgit v1.2.3 From 5249dcc4ebe88377f49a235a9aa977ddd964e41a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:22:05 -0700 Subject: UPSTREAM: arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections KASAN needs to know whether the allocation happens in an IRQ handler. This lets us strip everything below the IRQ entry point to reduce the number of unique stack traces needed to be stored. Move the definition of __irq_entry to so that the users don't need to pull in . Also introduce the __softirq_entry macro which is similar to __irq_entry, but puts the corresponding functions to the .softirqentry.text section. Signed-off-by: Alexander Potapenko Acked-by: Steven Rostedt Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from be7635e7287e0e8013af3c89a6354a9e0182594c) Change-Id: Ib321eb9c2b76ef4785cf3fd522169f524348bd9a Signed-off-by: Paul Lawrence --- include/linux/ftrace.h | 11 ----------- include/linux/interrupt.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ed94cea9eaff..312a4ef093e3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -781,16 +781,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, */ #define __notrace_funcgraph notrace -/* - * We want to which function is an entrypoint of a hardirq. - * That will help us to put a signal on output. - */ -#define __irq_entry __attribute__((__section__(".irqentry.text"))) - -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; - #define FTRACE_NOTRACE_DEPTH 65536 #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 @@ -827,7 +817,6 @@ static inline void unpause_graph_tracing(void) #else /* !CONFIG_FUNCTION_GRAPH_TRACER */ #define __notrace_funcgraph -#define __irq_entry #define INIT_FTRACE_GRAPH static inline void ftrace_graph_init_task(struct task_struct *t) { } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809c8596..a5f0a9e5453b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -672,4 +672,24 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) +/* + * We want to know which function is an entrypoint of a hardirq or a softirq. + */ +#define __irq_entry __attribute__((__section__(".irqentry.text"))) +#define __softirq_entry \ + __attribute__((__section__(".softirqentry.text"))) + +/* Limits of hardirq entrypoints */ +extern char __irqentry_text_start[]; +extern char __irqentry_text_end[]; +/* Limits of softirq entrypoints */ +extern char __softirqentry_text_start[]; +extern char __softirqentry_text_end[]; + +#else +#define __irq_entry +#define __softirq_entry +#endif + #endif -- cgit v1.2.3 From dcfa5fe36a67566a1301cf86a6d340aab1168c64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:38:21 +0900 Subject: BACKPORT: irq: Make the irqentry text section unconditional Generate irqentry and softirqentry text sections without any Kconfig dependencies. This will add extra sections, but there should be no performace impact. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox Signed-off-by: Ingo Molnar Bug: 64145065 (cherry-picked from 229a71860547ec856b156179a9c6bef2de426f66) Change-Id: I8f10ad59f16d637834a9dcacebdf087a028e995d Signed-off-by: Paul Lawrence --- include/linux/interrupt.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a5f0a9e5453b..9ddd6a0ce3a2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -672,7 +673,6 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ @@ -680,16 +680,4 @@ extern int arch_early_irq_init(void); #define __softirq_entry \ __attribute__((__section__(".softirqentry.text"))) -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; -/* Limits of softirq entrypoints */ -extern char __softirqentry_text_start[]; -extern char __softirqentry_text_end[]; - -#else -#define __irq_entry -#define __softirq_entry -#endif - #endif -- cgit v1.2.3 From 481efb4c7256bb5516287c46f8315a56c65b9dbb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Dec 2017 09:50:08 +0000 Subject: efi: Move some sysfs files to be read-only by root commit af97a77bc01ce49a466f9d4c0125479e2e2230b6 upstream. Thanks to the scripts/leaking_addresses.pl script, it was found that some EFI values should not be readable by non-root users. So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to make this easier, and use it in other places at the same time. Reported-by: Linus Torvalds Tested-by: Dave Young Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ard Biesheuvel Cc: H. Peter Anvin Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c6f0f0d0e17e..00a1f330f93a 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -116,6 +116,12 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RO_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ .store = _name##_store, \ -- cgit v1.2.3 From 5327f9badacdbd268b182b4fc89cd9b94a358a38 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:23 -0700 Subject: mm: drop unused pmdp_huge_get_and_clear_notify() commit c0c379e2931b05facef538e53bf3b21f283d9a0b upstream. Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the last pmdp_huge_get_and_clear_notify() user is gone. Let's drop the helper. Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [jwang: adjust context for 4.4] Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- include/linux/mmu_notifier.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a1a210d59961..38c5eb21883e 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pmd; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -475,7 +463,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- cgit v1.2.3 From 0500c6d35274aff625804ea5075d59006c5b4264 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 11 Feb 2017 14:02:49 +0100 Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure [ Upstream commit 7807e086a2d1f69cc1a57958cac04fea79fc2112 ] gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/omap-gpmc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 7dee00143afd..c201e31e9d7e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -191,10 +191,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif -- cgit v1.2.3 From 24c98ec494c2df1e449c6aa6c3904d7c32c20c44 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 17 Nov 2017 15:28:16 -0800 Subject: lib/genalloc.c: make the avail variable an atomic_long_t [ Upstream commit 36a3d1dd4e16bcd0d2ddfb4a2ec7092f0ae0d931 ] If the amount of resources allocated to a gen_pool exceeds 2^32 then the avail atomic overflows and this causes problems when clients try and borrow resources from the pool. This is only expected to be an issue on 64 bit systems. Add the header to pull in atomic_long* operations. So that 32 bit systems continue to use atomic32_t but 64 bit systems can use atomic64_t. Link: http://lkml.kernel.org/r/1509033843-25667-1-git-send-email-sbates@raithlin.com Signed-off-by: Stephen Bates Reviewed-by: Logan Gunthorpe Reviewed-by: Mathieu Desnoyers Reviewed-by: Daniel Mentz Cc: Jonathan Corbet Cc: Andrew Morton Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/genalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 7ff168d06967..46156ff5b01d 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -31,6 +31,7 @@ #define __GENALLOC_H__ #include +#include struct device; struct device_node; @@ -68,7 +69,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ -- cgit v1.2.3 From 9b83f370dc669e5dbd15d23a41e821b7d6765581 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 22 Mar 2016 14:27:30 -0700 Subject: BACKPORT: kernel: add kcov code coverage kcov provides code coverage collection for coverage-guided fuzzing (randomized testing). Coverage-guided fuzzing is a testing technique that uses coverage feedback to determine new interesting inputs to a system. A notable user-space example is AFL (http://lcamtuf.coredump.cx/afl/). However, this technique is not widely used for kernel testing due to missing compiler and kernel support. kcov does not aim to collect as much coverage as possible. It aims to collect more or less stable coverage that is function of syscall inputs. To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic or non-interesting parts of kernel is disbled (e.g. scheduler, locking). Currently there is a single coverage collection mode (tracing), but the API anticipates additional collection modes. Initially I also implemented a second mode which exposes coverage in a fixed-size hash table of counters (what Quentin used in his original patch). I've dropped the second mode for simplicity. This patch adds the necessary support on kernel side. The complimentary compiler support was added in gcc revision 231296. We've used this support to build syzkaller system call fuzzer, which has found 90 kernel bugs in just 2 months: https://github.com/google/syzkaller/wiki/Found-Bugs We've also found 30+ bugs in our internal systems with syzkaller. Another (yet unexplored) direction where kcov coverage would greatly help is more traditional "blob mutation". For example, mounting a random blob as a filesystem, or receiving a random blob over wire. Why not gcov. Typical fuzzing loop looks as follows: (1) reset coverage, (2) execute a bit of code, (3) collect coverage, repeat. A typical coverage can be just a dozen of basic blocks (e.g. an invalid input). In such context gcov becomes prohibitively expensive as reset/collect coverage steps depend on total number of basic blocks/edges in program (in case of kernel it is about 2M). Cost of kcov depends only on number of executed basic blocks/edges. On top of that, kernel requires per-thread coverage because there are always background threads and unrelated processes that also produce coverage. With inlined gcov instrumentation per-thread coverage is not possible. kcov exposes kernel PCs and control flow to user-space which is insecure. But debugfs should not be mapped as user accessible. Based on a patch by Quentin Casasnovas. [akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode'] [akpm@linux-foundation.org: unbreak allmodconfig] [akpm@linux-foundation.org: follow x86 Makefile layout standards] Signed-off-by: Dmitry Vyukov Reviewed-by: Kees Cook Cc: syzkaller Cc: Vegard Nossum Cc: Catalin Marinas Cc: Tavis Ormandy Cc: Will Deacon Cc: Quentin Casasnovas Cc: Kostya Serebryany Cc: Eric Dumazet Cc: Alexander Potapenko Cc: Kees Cook Cc: Bjorn Helgaas Cc: Sasha Levin Cc: David Drysdale Cc: Ard Biesheuvel Cc: Andrey Ryabinin Cc: Kirill A. Shutemov Cc: Jiri Slaby Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from 5c9a8750a6409c63a0f01d51a9024861022f6593) Change-Id: I17b5e04f6e89b241924e78ec32ead79c38b860ce Signed-off-by: Paul Lawrence --- include/linux/kcov.h | 29 +++++++++++++++++++++++++++++ include/linux/sched.h | 11 +++++++++++ 2 files changed, 40 insertions(+) create mode 100644 include/linux/kcov.h (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h new file mode 100644 index 000000000000..2883ac98c280 --- /dev/null +++ b/include/linux/kcov.h @@ -0,0 +1,29 @@ +#ifndef _LINUX_KCOV_H +#define _LINUX_KCOV_H + +#include + +struct task_struct; + +#ifdef CONFIG_KCOV + +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + +enum kcov_mode { + /* Coverage collection is not enabled yet. */ + KCOV_MODE_DISABLED = 0, + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + */ + KCOV_MODE_TRACE = 1, +}; + +#else + +static inline void kcov_task_init(struct task_struct *t) {} +static inline void kcov_task_exit(struct task_struct *t) {} + +#endif /* CONFIG_KCOV */ +#endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 60af9d2fa922..de6b3edd835b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -51,6 +51,7 @@ struct sched_param { #include #include #include +#include #include #include #include @@ -1973,6 +1974,16 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ +#ifdef CONFIG_KCOV + /* Coverage collection mode enabled for this task (0 if disabled). */ + enum kcov_mode kcov_mode; + /* Size of the kcov_area. */ + unsigned kcov_size; + /* Buffer for coverage collection. */ + void *kcov_area; + /* kcov desciptor wired with this task or NULL. */ + struct kcov *kcov; +#endif #ifdef CONFIG_MEMCG struct mem_cgroup *memcg_in_oom; gfp_t memcg_oom_gfp_mask; -- cgit v1.2.3 From 540baa17a89e25ec39f5618ff29a6d9a8bc5a325 Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:46 -0800 Subject: UPSTREAM: kcov: support comparison operands collection Enables kcov to collect comparison operands from instrumented code. This is done by using Clang's -fsanitize=trace-cmp instrumentation (currently not available for GCC). The comparison operands help a lot in fuzz testing. E.g. they are used in Syzkaller to cover the interiors of conditional statements with way less attempts and thus make previously unreachable code reachable. To allow separate collection of coverage and comparison operands two different work modes are implemented. Mode selection is now done via a KCOV_ENABLE ioctl call with corresponding argument value. Link: http://lkml.kernel.org/r/20171011095459.70721-1-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Bug: 64145065 (cherry-picked from ded97d2c2b2c5f1dcced0bc57133f7753b037dfc) Change-Id: Iaba700a3f4786048be14a5e764ccabceae114eb7 Signed-off-by: Paul Lawrence --- include/linux/kcov.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 2883ac98c280..87e2a44f1bab 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -7,19 +7,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} -- cgit v1.2.3 From acc7d1bd901cde5acc6891ebc6be6cfc614fb868 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 13 Mar 2017 19:29:08 +0200 Subject: net/mlx4_core: Avoid delays during VF driver device shutdown [ Upstream commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 ] Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. For such Hypervisors, there is a race condition between the VF's shutdown method and its internal-error detection/reset thread. The internal-error detection/reset thread (which runs every 5 seconds) also detects a disabled comm channel. If the internal-error detection/reset flow wins the race, we still get delays (while that flow tries repeatedly to detect comm-channel recovery). The cited commit fixed the command timeout problem when the internal-error detection/reset flow loses the race. This commit avoids the unneeded delays when the internal-error detection/reset flow wins. Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") Signed-off-by: Jack Morgenstein Reported-by: Simon Xiao Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d3133be12d92..7fde8af9b87e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -460,6 +460,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From b59614cfd2d3619eedc1b711ac51efb7636efba6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 3 Nov 2017 12:21:21 +0100 Subject: mm: Handle 0 flags in _calc_vm_trans() macro [ Upstream commit 592e254502041f953e84d091eae2c68cba04c10b ] _calc_vm_trans() does not handle the situation when some of the passed flags are 0 (which can happen if these VM flags do not make sense for the architecture). Improve the _calc_vm_trans() macro to return 0 in such situation. Since all passed flags are constant, this does not add any runtime overhead. Signed-off-by: Jan Kara Signed-off-by: Dan Williams Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- include/linux/mman.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 16373c8f5f57..369bc3405a6d 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -63,8 +63,9 @@ static inline int arch_validate_prot(unsigned long prot) * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ + ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ - : ((x) & (bit1)) / ((bit1) / (bit2))) + : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. -- cgit v1.2.3 From 425f13a36652523d604fd96413d6c438d415dd70 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 09:39:06 -0700 Subject: sched/core: Add switch_mm_irqs_off() and use it in the scheduler commit f98db6013c557c216da5038d9c52045be55cd039 upstream. By default, this is the same thing as switch_mm(). x86 will override it as an optimization. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/df401df47bdd6be3e389c6f1e3f5310d70e81b2c.1461688545.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/mmu_context.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index 70fffeba7495..a4441784503b 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -1,9 +1,16 @@ #ifndef _LINUX_MMU_CONTEXT_H #define _LINUX_MMU_CONTEXT_H +#include + struct mm_struct; void use_mm(struct mm_struct *mm); void unuse_mm(struct mm_struct *mm); +/* Architectures that care about IRQ state in switch_mm can override this. */ +#ifndef switch_mm_irqs_off +# define switch_mm_irqs_off switch_mm +#endif + #endif -- cgit v1.2.3 From e0bdd21a86c5831d6039a3d46d25cab9e58a7ae4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 12:10:21 -0800 Subject: net: reevalulate autoflowlabel setting after sysctl setting [ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ] sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: Shaohua Li Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 402753bccafa..7b8e3afcc291 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -215,7 +215,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; -- cgit v1.2.3 From 5d67dbef745b60fd5041b2fd7e5833e92a512a54 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Jun 2017 07:40:25 -0700 Subject: mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP commit 5dd0b16cdaff9b94da06074d5888b03235c0bf17 upstream. This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing further #ifdef soup. Caught by a Kbuild bot randconfig build. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code") Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/vm_event_item.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index e623d392db0c..8ef3a61fdc74 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -80,10 +80,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #endif #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -- cgit v1.2.3 From 8a43ddfb93a0c6ae1a6e1f5c25705ec5d1843c40 Mon Sep 17 00:00:00 2001 From: Richard Fellner Date: Thu, 4 May 2017 14:26:50 +0200 Subject: KAISER: Kernel Address Isolation This patch introduces our implementation of KAISER (Kernel Address Isolation to have Side-channels Efficiently Removed), a kernel isolation technique to close hardware side channels on kernel address information. More information about the patch can be found on: https://github.com/IAIK/KAISER From: Richard Fellner From: Daniel Gruss X-Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode Date: Thu, 4 May 2017 14:26:50 +0200 Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2 Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5 To: To: Cc: Cc: Cc: Michael Schwarz Cc: Richard Fellner Cc: Ingo Molnar Cc: Cc: After several recent works [1,2,3] KASLR on x86_64 was basically considered dead by many researchers. We have been working on an efficient but effective fix for this problem and found that not mapping the kernel space when running in user mode is the solution to this problem [4] (the corresponding paper [5] will be presented at ESSoS17). With this RFC patch we allow anybody to configure their kernel with the flag CONFIG_KAISER to add our defense mechanism. If there are any questions we would love to answer them. We also appreciate any comments! Cheers, Daniel (+ the KAISER team from Graz University of Technology) [1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf [2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf [3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf [4] https://github.com/IAIK/KAISER [5] https://gruss.cc/files/kaiser.pdf [patch based also on https://raw.githubusercontent.com/IAIK/KAISER/master/KAISER/0001-KAISER-Kernel-Address-Isolation.patch] Signed-off-by: Richard Fellner Signed-off-by: Moritz Lipp Signed-off-by: Daniel Gruss Signed-off-by: Michael Schwarz Acked-by: Jiri Kosina Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- include/linux/percpu-defs.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f16299ca068..8ea945f63a05 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -35,6 +35,12 @@ #endif +#ifdef CONFIG_KAISER +#define USER_MAPPED_SECTION "..user_mapped" +#else +#define USER_MAPPED_SECTION "" +#endif + /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -115,6 +121,12 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") +#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + +#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) + /* * Declaration/definition used for per-CPU variables that must come first in * the set of variables. @@ -144,6 +156,14 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp +#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + #define DECLARE_PER_CPU_ALIGNED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ ____cacheline_aligned @@ -162,6 +182,16 @@ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. + */ +#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) + +#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) /* * Declaration/definition used for per-CPU variables that must be read mostly. -- cgit v1.2.3 From bed9bb7f3e6d4045013d2bb9e4004896de57f02b Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 30 Aug 2017 16:23:00 -0700 Subject: kaiser: merged update Merged fixes and cleanups, rebased to 4.4.89 tree (no 5-level paging). Signed-off-by: Dave Hansen Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/kaiser.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/kaiser.h (limited to 'include/linux') diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h new file mode 100644 index 000000000000..9db5433c2284 --- /dev/null +++ b/include/linux/kaiser.h @@ -0,0 +1,26 @@ +#ifndef _INCLUDE_KAISER_H +#define _INCLUDE_KAISER_H + +#ifdef CONFIG_KAISER +#include +#else + +/* + * These stubs are used whenever CONFIG_KAISER is off, which + * includes architectures that support KAISER, but have it + * disabled. + */ + +static inline void kaiser_init(void) +{ +} +static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) +{ +} +static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +{ + return 0; +} + +#endif /* !CONFIG_KAISER */ +#endif /* _INCLUDE_KAISER_H */ -- cgit v1.2.3 From 003e476716906afa135faf605ae0a5c3598c0293 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 3 Sep 2017 18:57:03 -0700 Subject: kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE Kaiser only needs to map one page of the stack; and kernel/fork.c did not build on powerpc (no __PAGE_KERNEL). It's all cleaner if linux/kaiser.h provides kaiser_map_thread_stack() and kaiser_unmap_thread_stack() wrappers around asm/kaiser.h's kaiser_add_mapping() and kaiser_remove_mapping(). And use linux/kaiser.h in init/main.c to avoid the #ifdefs there. Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/kaiser.h | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h index 9db5433c2284..4a4d6d911a14 100644 --- a/include/linux/kaiser.h +++ b/include/linux/kaiser.h @@ -1,26 +1,52 @@ -#ifndef _INCLUDE_KAISER_H -#define _INCLUDE_KAISER_H +#ifndef _LINUX_KAISER_H +#define _LINUX_KAISER_H #ifdef CONFIG_KAISER #include + +static inline int kaiser_map_thread_stack(void *stack) +{ + /* + * Map that page of kernel stack on which we enter from user context. + */ + return kaiser_add_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL); +} + +static inline void kaiser_unmap_thread_stack(void *stack) +{ + /* + * Note: may be called even when kaiser_map_thread_stack() failed. + */ + kaiser_remove_mapping((unsigned long)stack + + THREAD_SIZE - PAGE_SIZE, PAGE_SIZE); +} #else /* * These stubs are used whenever CONFIG_KAISER is off, which - * includes architectures that support KAISER, but have it - * disabled. + * includes architectures that support KAISER, but have it disabled. */ static inline void kaiser_init(void) { } -static inline void kaiser_remove_mapping(unsigned long start, unsigned long size) +static inline int kaiser_add_mapping(unsigned long addr, + unsigned long size, unsigned long flags) +{ + return 0; +} +static inline void kaiser_remove_mapping(unsigned long start, + unsigned long size) { } -static inline int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) +static inline int kaiser_map_thread_stack(void *stack) { return 0; } +static inline void kaiser_unmap_thread_stack(void *stack) +{ +} #endif /* !CONFIG_KAISER */ -#endif /* _INCLUDE_KAISER_H */ +#endif /* _LINUX_KAISER_H */ -- cgit v1.2.3 From c52e55a2a82d3a44189810d35717d81cb4cf61d4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Aug 2017 20:11:43 -0700 Subject: kaiser: cleanups while trying for gold link While trying to get our gold link to work, four cleanups: matched the gdt_page declaration to its definition; in fiddling unsuccessfully with PERCPU_INPUT(), lined up backslashes; lined up the backslashes according to convention in percpu-defs.h; deleted the unused irq_stack_pointer addition to irq_stack_union. Sad to report that aligning backslashes does not appear to help gold align to 8192: but while these did not help, they are worth keeping. Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/percpu-defs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8ea945f63a05..cfe13cb4ec63 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -121,10 +121,10 @@ #define DEFINE_PER_CPU(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "") -#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ +#define DECLARE_PER_CPU_USER_MAPPED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) -#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ +#define DEFINE_PER_CPU_USER_MAPPED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION) /* @@ -156,11 +156,11 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ +#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ +#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp @@ -185,18 +185,18 @@ /* * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode. */ -#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ - __aligned(PAGE_SIZE) +#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) -#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ - __aligned(PAGE_SIZE) +#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \ + __aligned(PAGE_SIZE) /* * Declaration/definition used for per-CPU variables that must be read mostly. */ -#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ +#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ -- cgit v1.2.3 From 3e3d38fd9832e82a8cb1a5b1154acfa43ac08d15 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 9 Sep 2017 21:27:32 -0700 Subject: kaiser: vmstat show NR_KAISERTABLE as nr_overhead The kaiser update made an interesting choice, never to free any shadow page tables. Contention on global spinlock was worrying, particularly with it held across page table scans when freeing. Something had to be done: I was going to add refcounting; but simply never to free them is an appealing choice, minimizing contention without complicating the code (the more a page table is found already, the less the spinlock is used). But leaking pages in this way is also a worry: can we get away with it? At the very least, we need a count to show how bad it actually gets: in principle, one might end up wasting about 1/256 of memory that way (1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 for when they are user-mapped from the vmalloc area on another occasion (but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the shared pgd entries, and 1 for each intermediate page table added thereafter for user-mapping - but leave out the 1 per mm, for its shadow pgd, because that distracts from the monotonic increase. Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). In practice, it doesn't look so bad so far: more like 1/12000 after nine hours of gtests below; and movable pageblock segregation should tend to cluster the kaiser tables into a subset of the address space (if not, they will be bad for compaction too). But production may tell a different story: keep an eye on this number, and bring back lighter freeing if it gets out of control (maybe a shrinker). Signed-off-by: Hugh Dickins Acked-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ff88d6189411..b93b578cfa42 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -131,8 +131,9 @@ enum zone_stat_item { NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ - NR_KERNEL_STACK, /* Second 128 byte cacheline */ + NR_KERNEL_STACK, + NR_KAISERTABLE, NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, -- cgit v1.2.3 From 3e1457d6bf26d9ec300781f84cd0057e44deb45d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Jan 2018 10:43:15 -0800 Subject: KPTI: Rename to PAGE_TABLE_ISOLATION This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/kaiser.h | 6 +++--- include/linux/percpu-defs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h index 4a4d6d911a14..58c55b1589d0 100644 --- a/include/linux/kaiser.h +++ b/include/linux/kaiser.h @@ -1,7 +1,7 @@ #ifndef _LINUX_KAISER_H #define _LINUX_KAISER_H -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION #include static inline int kaiser_map_thread_stack(void *stack) @@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_stack(void *stack) #else /* - * These stubs are used whenever CONFIG_KAISER is off, which + * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which * includes architectures that support KAISER, but have it disabled. */ @@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_stack(void *stack) { } -#endif /* !CONFIG_KAISER */ +#endif /* !CONFIG_PAGE_TABLE_ISOLATION */ #endif /* _LINUX_KAISER_H */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cfe13cb4ec63..8902f23bb770 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -35,7 +35,7 @@ #endif -#ifdef CONFIG_KAISER +#ifdef CONFIG_PAGE_TABLE_ISOLATION #define USER_MAPPED_SECTION "..user_mapped" #else #define USER_MAPPED_SECTION "" -- cgit v1.2.3