diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 295 |
1 files changed, 202 insertions, 93 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e44eb1aeec4a..8884934b4bcb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -274,13 +274,12 @@ EXPORT_SYMBOL(vmalloc_to_pfn); /*** Global kva allocator ***/ -#define VM_LAZY_FREE 0x01 -#define VM_LAZY_FREEING 0x02 #define VM_VM_AREA 0x04 static DEFINE_SPINLOCK(vmap_area_lock); /* Export for kexec only */ LIST_HEAD(vmap_area_list); +static LLIST_HEAD(vmap_purge_list); static struct rb_root vmap_area_root = RB_ROOT; /* The vmap cache globals are protected by vmap_area_lock */ @@ -291,6 +290,57 @@ static unsigned long cached_align; static unsigned long vmap_area_pcpu_hole; +#ifdef CONFIG_ENABLE_VMALLOC_SAVING +#define POSSIBLE_VMALLOC_START PAGE_OFFSET + +#define VMALLOC_BITMAP_SIZE ((VMALLOC_END - PAGE_OFFSET) >> \ + PAGE_SHIFT) +#define VMALLOC_TO_BIT(addr) ((addr - PAGE_OFFSET) >> PAGE_SHIFT) +#define BIT_TO_VMALLOC(i) (PAGE_OFFSET + i * PAGE_SIZE) + +unsigned long total_vmalloc_size; +unsigned long vmalloc_reserved; + +DECLARE_BITMAP(possible_areas, VMALLOC_BITMAP_SIZE); + +void mark_vmalloc_reserved_area(void *x, unsigned long size) +{ + unsigned long addr = (unsigned long)x; + + bitmap_set(possible_areas, VMALLOC_TO_BIT(addr), size >> PAGE_SHIFT); + vmalloc_reserved += size; +} + +int is_vmalloc_addr(const void *x) +{ + unsigned long addr = (unsigned long)x; + + if (addr < POSSIBLE_VMALLOC_START || addr >= VMALLOC_END) + return 0; + + if (test_bit(VMALLOC_TO_BIT(addr), possible_areas)) + return 0; + + return 1; +} + +static void calc_total_vmalloc_size(void) +{ + total_vmalloc_size = VMALLOC_END - POSSIBLE_VMALLOC_START - + vmalloc_reserved; +} +#else +int is_vmalloc_addr(const void *x) +{ + unsigned long addr = (unsigned long)x; + + return addr >= VMALLOC_START && addr < VMALLOC_END; +} + +static void calc_total_vmalloc_size(void) { } +#endif +EXPORT_SYMBOL(is_vmalloc_addr); + static struct vmap_area *__find_vmap_area(unsigned long addr) { struct rb_node *n = vmap_area_root.rb_node; @@ -363,6 +413,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, BUG_ON(offset_in_page(size)); BUG_ON(!is_power_of_2(align)); + might_sleep(); + va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) @@ -473,7 +525,7 @@ overflow: purged = 1; goto retry; } - if (printk_ratelimit()) + if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n", size); kfree(va); @@ -576,11 +628,19 @@ static unsigned long lazy_max_pages(void) log = fls(num_online_cpus()); - return log * (32UL * 1024 * 1024 / PAGE_SIZE); + return log * (1UL * CONFIG_VMAP_LAZY_PURGING_FACTOR * + 1024 * 1024 / PAGE_SIZE); } static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); +/* + * Serialize vmap purging. There is no actual criticial section protected + * by this look, but we want to avoid concurrent calls for performance + * reasons and to make the pcpu_get_vm_areas more deterministic. + */ +static DEFINE_MUTEX(vmap_purge_lock); + /* for per-CPU blocks */ static void purge_fragmented_blocks_allcpus(void); @@ -595,65 +655,40 @@ void set_iounmap_nonlazy(void) /* * Purges all lazily-freed vmap areas. - * - * If sync is 0 then don't purge if there is already a purge in progress. - * If force_flush is 1, then flush kernel TLBs between *start and *end even - * if we found no lazy vmap areas to unmap (callers can use this to optimise - * their own TLB flushing). - * Returns with *start = min(*start, lowest purged address) - * *end = max(*end, highest purged address) */ -static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, - int sync, int force_flush) +static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) { - static DEFINE_SPINLOCK(purge_lock); - LIST_HEAD(valist); + struct llist_node *valist; struct vmap_area *va; struct vmap_area *n_va; - int nr = 0; + bool do_free = false; - /* - * If sync is 0 but force_flush is 1, we'll go sync anyway but callers - * should not expect such behaviour. This just simplifies locking for - * the case that isn't actually used at the moment anyway. - */ - if (!sync && !force_flush) { - if (!spin_trylock(&purge_lock)) - return; - } else - spin_lock(&purge_lock); + lockdep_assert_held(&vmap_purge_lock); - if (sync) - purge_fragmented_blocks_allcpus(); - - rcu_read_lock(); - list_for_each_entry_rcu(va, &vmap_area_list, list) { - if (va->flags & VM_LAZY_FREE) { - if (va->va_start < *start) - *start = va->va_start; - if (va->va_end > *end) - *end = va->va_end; - nr += (va->va_end - va->va_start) >> PAGE_SHIFT; - list_add_tail(&va->purge_list, &valist); - va->flags |= VM_LAZY_FREEING; - va->flags &= ~VM_LAZY_FREE; - } + valist = llist_del_all(&vmap_purge_list); + llist_for_each_entry(va, valist, purge_list) { + if (va->va_start < start) + start = va->va_start; + if (va->va_end > end) + end = va->va_end; + do_free = true; } - rcu_read_unlock(); - if (nr) - atomic_sub(nr, &vmap_lazy_nr); + if (!do_free) + return false; - if (nr || force_flush) - flush_tlb_kernel_range(*start, *end); + flush_tlb_kernel_range(start, end); - if (nr) { - spin_lock(&vmap_area_lock); - list_for_each_entry_safe(va, n_va, &valist, purge_list) - __free_vmap_area(va); - spin_unlock(&vmap_area_lock); + spin_lock(&vmap_area_lock); + llist_for_each_entry_safe(va, n_va, valist, purge_list) { + int nr = (va->va_end - va->va_start) >> PAGE_SHIFT; + + __free_vmap_area(va); + atomic_sub(nr, &vmap_lazy_nr); + cond_resched_lock(&vmap_area_lock); } - spin_unlock(&purge_lock); + spin_unlock(&vmap_area_lock); + return true; } /* @@ -662,9 +697,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, */ static void try_purge_vmap_area_lazy(void) { - unsigned long start = ULONG_MAX, end = 0; - - __purge_vmap_area_lazy(&start, &end, 0, 0); + if (mutex_trylock(&vmap_purge_lock)) { + __purge_vmap_area_lazy(ULONG_MAX, 0); + mutex_unlock(&vmap_purge_lock); + } } /* @@ -672,9 +708,10 @@ static void try_purge_vmap_area_lazy(void) */ static void purge_vmap_area_lazy(void) { - unsigned long start = ULONG_MAX, end = 0; - - __purge_vmap_area_lazy(&start, &end, 1, 0); + mutex_lock(&vmap_purge_lock); + purge_fragmented_blocks_allcpus(); + __purge_vmap_area_lazy(ULONG_MAX, 0); + mutex_unlock(&vmap_purge_lock); } /* @@ -684,20 +721,16 @@ static void purge_vmap_area_lazy(void) */ static void free_vmap_area_noflush(struct vmap_area *va) { - va->flags |= VM_LAZY_FREE; - atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); - if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) - try_purge_vmap_area_lazy(); -} + int nr_lazy; -/* - * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been - * called for the correct range previously. - */ -static void free_unmap_vmap_area_noflush(struct vmap_area *va) -{ - unmap_vmap_area(va); - free_vmap_area_noflush(va); + nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT, + &vmap_lazy_nr); + + /* After this point, we may free va at any time */ + llist_add(&va->purge_list, &vmap_purge_list); + + if (unlikely(nr_lazy > lazy_max_pages())) + try_purge_vmap_area_lazy(); } /* @@ -706,7 +739,8 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) static void free_unmap_vmap_area(struct vmap_area *va) { flush_cache_vunmap(va->va_start, va->va_end); - free_unmap_vmap_area_noflush(va); + unmap_vmap_area(va); + free_vmap_area_noflush(va); } static struct vmap_area *find_vmap_area(unsigned long addr) @@ -720,16 +754,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr) return va; } -static void free_unmap_vmap_area_addr(unsigned long addr) -{ - struct vmap_area *va; - - va = find_vmap_area(addr); - BUG_ON(!va); - free_unmap_vmap_area(va); -} - - /*** Per cpu kva allocator ***/ /* @@ -1050,6 +1074,8 @@ void vm_unmap_aliases(void) if (unlikely(!vmap_initialized)) return; + might_sleep(); + for_each_possible_cpu(cpu) { struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); struct vmap_block *vb; @@ -1074,7 +1100,11 @@ void vm_unmap_aliases(void) rcu_read_unlock(); } - __purge_vmap_area_lazy(&start, &end, 1, flush); + mutex_lock(&vmap_purge_lock); + purge_fragmented_blocks_allcpus(); + if (!__purge_vmap_area_lazy(start, end) && flush) + flush_tlb_kernel_range(start, end); + mutex_unlock(&vmap_purge_lock); } EXPORT_SYMBOL_GPL(vm_unmap_aliases); @@ -1087,7 +1117,9 @@ void vm_unmap_ram(const void *mem, unsigned int count) { unsigned long size = count << PAGE_SHIFT; unsigned long addr = (unsigned long)mem; + struct vmap_area *va; + might_sleep(); BUG_ON(!addr); BUG_ON(addr < VMALLOC_START); BUG_ON(addr > VMALLOC_END); @@ -1096,10 +1128,14 @@ void vm_unmap_ram(const void *mem, unsigned int count) debug_check_no_locks_freed(mem, size); vmap_debug_free_range(addr, addr+size); - if (likely(count <= VMAP_MAX_ALLOC)) + if (likely(count <= VMAP_MAX_ALLOC)) { vb_free(mem, size); - else - free_unmap_vmap_area_addr(addr); + return; + } + + va = find_vmap_area(addr); + BUG_ON(!va); + free_unmap_vmap_area(va); } EXPORT_SYMBOL(vm_unmap_ram); @@ -1148,6 +1184,33 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro EXPORT_SYMBOL(vm_map_ram); static struct vm_struct *vmlist __initdata; + +/** + * vm_area_check_early - check if vmap area is already mapped + * @vm: vm_struct to be checked + * + * This function is used to check if the vmap area has been + * mapped already. @vm->addr, @vm->size and @vm->flags should + * contain proper values. + * + */ +int __init vm_area_check_early(struct vm_struct *vm) +{ + struct vm_struct *tmp, **p; + + BUG_ON(vmap_initialized); + for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { + if (tmp->addr >= vm->addr) { + if (tmp->addr < vm->addr + vm->size) + return 1; + } else { + if (tmp->addr + tmp->size > vm->addr) + return 1; + } + } + return 0; +} + /** * vm_area_add_early - add vmap area early during boot * @vm: vm_struct to add @@ -1228,7 +1291,7 @@ void __init vmalloc_init(void) } vmap_area_pcpu_hole = VMALLOC_END; - + calc_total_vmalloc_size(); vmap_initialized = true; } @@ -1392,16 +1455,27 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, */ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) { +#ifdef CONFIG_ENABLE_VMALLOC_SAVING + return __get_vm_area_node(size, 1, flags, PAGE_OFFSET, VMALLOC_END, + NUMA_NO_NODE, GFP_KERNEL, + __builtin_return_address(0)); +#else return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); +#endif } struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller) { +#ifdef CONFIG_ENABLE_VMALLOC_SAVING + return __get_vm_area_node(size, 1, flags, PAGE_OFFSET, VMALLOC_END, + NUMA_NO_NODE, GFP_KERNEL, caller); +#else return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, caller); +#endif } /** @@ -1435,6 +1509,8 @@ struct vm_struct *remove_vm_area(const void *addr) { struct vmap_area *va; + might_sleep(); + va = find_vmap_area((unsigned long)addr); if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->vm; @@ -1494,7 +1570,39 @@ static void __vunmap(const void *addr, int deallocate_pages) kfree(area); return; } - + +static inline void __vfree_deferred(const void *addr) +{ + /* + * Use raw_cpu_ptr() because this can be called from preemptible + * context. Preemption is absolutely fine here, because the llist_add() + * implementation is lockless, so it works even if we are adding to + * nother cpu's list. schedule_work() should be fine with this too. + */ + struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred); + + if (llist_add((struct llist_node *)addr, &p->list)) + schedule_work(&p->wq); +} + +/** + * vfree_atomic - release memory allocated by vmalloc() + * @addr: memory base address + * + * This one is just like vfree() but can be called in any atomic context + * except NMIs. + */ +void vfree_atomic(const void *addr) +{ + BUG_ON(in_nmi()); + + kmemleak_free(addr); + + if (!addr) + return; + __vfree_deferred(addr); +} + /** * vfree - release memory allocated by vmalloc() * @addr: memory base address @@ -1517,11 +1625,9 @@ void vfree(const void *addr) if (!addr) return; - if (unlikely(in_interrupt())) { - struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred); - if (llist_add((struct llist_node *)addr, &p->list)) - schedule_work(&p->wq); - } else + if (unlikely(in_interrupt())) + __vfree_deferred(addr); + else __vunmap(addr, 1); } EXPORT_SYMBOL(vfree); @@ -2671,6 +2777,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_VPAGES) seq_puts(m, " vpages"); + if (v->flags & VM_LOWMEM) + seq_puts(m, " lowmem"); + show_numa_info(m, v); seq_putc(m, '\n'); return 0; |