diff options
Diffstat (limited to 'arch/arm64/mm')
| -rw-r--r-- | arch/arm64/mm/cache.S | 77 | ||||
| -rw-r--r-- | arch/arm64/mm/context.c | 3 | ||||
| -rw-r--r-- | arch/arm64/mm/dma-mapping.c | 1118 | ||||
| -rw-r--r-- | arch/arm64/mm/fault.c | 73 | ||||
| -rw-r--r-- | arch/arm64/mm/flush.c | 1 | ||||
| -rw-r--r-- | arch/arm64/mm/init.c | 159 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 519 | ||||
| -rw-r--r-- | arch/arm64/mm/pageattr.c | 48 | ||||
| -rw-r--r-- | arch/arm64/mm/proc.S | 124 |
9 files changed, 2025 insertions, 97 deletions
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index e5091d9cceb6..f14be126fb6a 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -26,6 +26,79 @@ #include <asm/uaccess.h> /* + * __flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: x0-x7, x9-x11 + */ +__flush_dcache_all: + dmb sy // ensure ordering with previous memory accesses + mrs x0, clidr_el1 // read clidr + and x3, x0, #0x7000000 // extract loc from clidr + lsr x3, x3, #23 // left align loc bit field + cbz x3, finished // if loc is 0, then no need to clean + mov x10, #0 // start clean at cache level 0 +loop1: + add x2, x10, x10, lsr #1 // work out 3x current cache level + lsr x1, x0, x2 // extract cache type bits from clidr + and x1, x1, #7 // mask of the bits for current cache only + cmp x1, #2 // see what cache we have at this level + b.lt skip // skip if no cache, or just i-cache + save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic + msr csselr_el1, x10 // select current cache level in csselr + isb // isb to sych the new cssr&csidr + mrs x1, ccsidr_el1 // read the new ccsidr + restore_irqs x9 + and x2, x1, #7 // extract the length of the cache lines + add x2, x2, #4 // add 4 (line length offset) + mov x4, #0x3ff + and x4, x4, x1, lsr #3 // find maximum number on the way size + clz w5, w4 // find bit position of way size increment + mov x7, #0x7fff + and x7, x7, x1, lsr #13 // extract max number of the index size +loop2: + mov x9, x4 // create working copy of max way size +loop3: + lsl x6, x9, x5 + orr x11, x10, x6 // factor way and cache number into x11 + lsl x6, x7, x2 + orr x11, x11, x6 // factor index number into x11 + dc cisw, x11 // clean & invalidate by set/way + subs x9, x9, #1 // decrement the way + b.ge loop3 + subs x7, x7, #1 // decrement the index + b.ge loop2 +skip: + add x10, x10, #2 // increment cache number + cmp x3, x10 + b.gt loop1 +finished: + mov x10, #0 // swith back to cache level 0 + msr csselr_el1, x10 // select current cache level in csselr + dsb sy + isb + ret +ENDPROC(__flush_dcache_all) + +/* + * flush_cache_all() + * + * Flush the entire cache system. The data cache flush is now achieved + * using atomic clean / invalidates working outwards from L1 cache. This + * is done using Set/Way based cache maintainance instructions. The + * instruction cache can still be invalidated back to the point of + * unification in a single instruction. + */ +ENTRY(flush_cache_all) + mov x12, lr + bl __flush_dcache_all + mov x0, #0 + ic ialluis // I+BTB cache invalidate + ret x12 +ENDPROC(flush_cache_all) + +/* * flush_icache_range(start,end) * * Ensure that the I and D caches are coherent within specified region. @@ -121,7 +194,7 @@ ENTRY(__inval_cache_range) * - start - virtual start address of region * - end - virtual end address of region */ -__dma_inv_range: +ENTRY(__dma_inv_range) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -147,7 +220,7 @@ ENDPROC(__dma_inv_range) * - start - virtual start address of region * - end - virtual end address of region */ -__dma_clean_range: +ENTRY(__dma_clean_range) dcache_line_size x2, x3 sub x3, x2, #1 bic x0, x0, x3 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index d4bf895b47cf..1e31cd3871ee 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -191,6 +191,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: + + arm64_apply_bp_hardening(); + /* * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when * emulating PAN. diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 925b2b3a06f8..159c79612e63 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -3,6 +3,7 @@ * * Copyright (C) 2012 ARM Ltd. * Author: Catalin Marinas <catalin.marinas@arm.com> + * Copyright (c) 2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -24,21 +25,49 @@ #include <linux/genalloc.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> +#include <linux/mm.h> +#include <linux/iommu.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> #include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <linux/io.h> +#include <asm/dma-iommu.h> +#include <linux/dma-mapping-fast.h> +#include <linux/msm_dma_iommu_mapping.h> + +#include "mm.h" + static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { - if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs)) + return pgprot_noncached(prot); + else if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) return pgprot_writecombine(prot); return prot; } -static struct gen_pool *atomic_pool; +static bool is_dma_coherent(struct device *dev, struct dma_attrs *attrs) +{ + bool is_coherent; + + if (dma_get_attr(DMA_ATTR_FORCE_COHERENT, attrs)) + is_coherent = true; + else if (dma_get_attr(DMA_ATTR_FORCE_NON_COHERENT, attrs)) + is_coherent = false; + else if (is_device_dma_coherent(dev)) + is_coherent = true; + else + is_coherent = false; + + return is_coherent; +} +static struct gen_pool *atomic_pool; +#define NO_KERNEL_MAPPING_DUMMY 0x2222 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; @@ -86,10 +115,47 @@ static int __free_from_pool(void *start, size_t size) return 1; } +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + struct page *page = virt_to_page(addr); + pgprot_t prot = *(pgprot_t *)data; + + set_pte(pte, mk_pte(page, prot)); + return 0; +} + +static int __dma_clear_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + pte_clear(&init_mm, addr, pte); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot, + bool no_kernel_map) +{ + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + int (*func)(pte_t *pte, pgtable_t token, unsigned long addr, + void *data); + + if (no_kernel_map) + func = __dma_clear_pte; + else + func = __dma_update_pte; + + apply_to_page_range(&init_mm, start, size, func, &prot); + mb(); + flush_tlb_kernel_range(start, end); +} + static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { + void *addr; + if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); return NULL; @@ -100,7 +166,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, flags |= GFP_DMA; if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { struct page *page; - void *addr; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); @@ -110,10 +175,20 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); addr = page_address(page); memset(addr, 0, size); - return addr; } else { - return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + addr = swiotlb_alloc_coherent(dev, size, dma_handle, flags); + } + + if (addr && (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs) || + dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs))) { + /* + * flush the caches here because we can't later + */ + __dma_flush_range(addr, addr + size); + __dma_remap(virt_to_page(addr), size, 0, true); } + + return addr; } static void __dma_free_coherent(struct device *dev, size_t size, @@ -123,11 +198,16 @@ static void __dma_free_coherent(struct device *dev, size_t size, bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); + size = PAGE_ALIGN(size); if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); return; } + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs) || + dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs)) + __dma_remap(phys_to_page(paddr), size, PAGE_KERNEL, false); + freed = dma_release_from_contiguous(dev, phys_to_page(paddr), size >> PAGE_SHIFT); @@ -141,8 +221,7 @@ static void *__dma_alloc(struct device *dev, size_t size, { struct page *page; void *ptr, *coherent_ptr; - bool coherent = is_device_dma_coherent(dev); - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); + bool coherent = is_dma_coherent(dev, attrs); size = PAGE_ALIGN(size); @@ -164,16 +243,22 @@ static void *__dma_alloc(struct device *dev, size_t size, if (coherent) return ptr; - /* remove any dirty cache lines on the kernel alias */ - __dma_flush_range(ptr, ptr + size); - - /* create a coherent mapping */ - page = virt_to_page(ptr); - coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, __builtin_return_address(0)); - if (!coherent_ptr) - goto no_map; - + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + coherent_ptr = (void *)NO_KERNEL_MAPPING_DUMMY; + } else { + if (!dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs)) + /* remove any dirty cache lines on the kernel alias */ + __dma_flush_range(ptr, ptr + size); + + /* create a coherent mapping */ + page = virt_to_page(ptr); + coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, + __get_dma_pgprot(attrs, + __pgprot(PROT_NORMAL_NC), false), + NULL); + if (!coherent_ptr) + goto no_map; + } return coherent_ptr; no_map: @@ -191,10 +276,11 @@ static void __dma_free(struct device *dev, size_t size, size = PAGE_ALIGN(size); - if (!is_device_dma_coherent(dev)) { + if (!is_dma_coherent(dev, attrs)) { if (__free_from_pool(vaddr, size)) return; - vunmap(vaddr); + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + vunmap(vaddr); } __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); } @@ -207,7 +293,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr; dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - if (!is_device_dma_coherent(dev)) + if (!is_dma_coherent(dev, attrs)) __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); return dev_addr; @@ -218,7 +304,7 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!is_device_dma_coherent(dev)) + if (!is_dma_coherent(dev, attrs)) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); } @@ -231,7 +317,7 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int i, ret; ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - if (!is_device_dma_coherent(dev)) + if (!is_dma_coherent(dev, attrs)) for_each_sg(sgl, sg, ret, i) __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), sg->length, dir); @@ -247,7 +333,7 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sg; int i; - if (!is_device_dma_coherent(dev)) + if (!is_dma_coherent(dev, attrs)) for_each_sg(sgl, sg, nelems, i) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), sg->length, dir); @@ -313,7 +399,7 @@ static int __swiotlb_mmap(struct device *dev, unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); + is_dma_coherent(dev, attrs)); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -341,6 +427,55 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static void *arm64_dma_remap(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + struct page *page = phys_to_page(dma_to_phys(dev, handle)); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); + unsigned long offset = handle & ~PAGE_MASK; + struct vm_struct *area; + unsigned long addr; + + size = PAGE_ALIGN(size + offset); + + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area(size, VM_USERMAP); + if (!area) + return NULL; + + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr + offset; +} + +static void arm64_dma_unremap(struct device *dev, void *remapped_addr, + size_t size) +{ + struct vm_struct *area; + + size = PAGE_ALIGN(size); + remapped_addr = (void *)((unsigned long)remapped_addr & PAGE_MASK); + + area = find_vm_area(remapped_addr); + if (!area) { + WARN(1, "trying to free invalid coherent area: %p\n", + remapped_addr); + return; + } + vunmap(remapped_addr); + flush_tlb_kernel_range((unsigned long)remapped_addr, + (unsigned long)(remapped_addr + size)); +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -356,6 +491,8 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_sg_for_device = __swiotlb_sync_sg_for_device, .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, + .remap = arm64_dma_remap, + .unremap = arm64_dma_unremap, }; static int __init atomic_pool_init(void) @@ -406,7 +543,7 @@ static int __init atomic_pool_init(void) goto out; remove_mapping: - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP, true); destroy_genpool: gen_pool_destroy(atomic_pool); atomic_pool = NULL; @@ -427,6 +564,7 @@ static void *__dummy_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { + WARN(1, "dma alloc failure, device may be missing a call to arch_setup_dma_ops"); return NULL; } @@ -542,7 +680,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = is_dma_coherent(dev, attrs); int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); size_t iosize = size; void *addr; @@ -624,7 +762,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (WARN_ON(!area || !area->pages)) return; iommu_dma_free(dev, area->pages, iosize, &handle); - dma_common_free_remap(cpu_addr, size, VM_USERMAP); + dma_common_free_remap(cpu_addr, size, VM_USERMAP, true); } else { iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); __free_pages(virt_to_page(cpu_addr), get_order(size)); @@ -639,7 +777,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, int ret; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); + is_dma_coherent(dev, attrs)); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -696,7 +834,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = is_dma_coherent(dev, attrs); int prot = dma_direction_to_prot(dir, coherent); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); @@ -749,7 +887,7 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = is_dma_coherent(dev, attrs); if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); @@ -782,7 +920,6 @@ static struct dma_map_ops iommu_dma_ops = { .sync_single_for_device = __iommu_sync_single_for_device, .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, .sync_sg_for_device = __iommu_sync_sg_for_device, - .dma_supported = iommu_dma_supported, .mapping_error = iommu_dma_mapping_error, }; @@ -993,3 +1130,922 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); } +EXPORT_SYMBOL(arch_setup_dma_ops); + +#ifdef CONFIG_ARM64_DMA_USE_IOMMU + +static int __get_iommu_pgprot(struct dma_attrs *attrs, int prot, + bool coherent) +{ + if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs)) + prot |= IOMMU_NOEXEC; + if (coherent) + prot |= IOMMU_CACHE; + + return prot; +} + +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + __dma_map_area(page_address(page) + off, size, dir); +} + +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + __dma_unmap_area(page_address(page) + off, size, dir); + + /* + * Mark the D-cache clean for this page to avoid extra flushing. + */ + if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) + set_bit(PG_dcache_clean, &page->flags); +} + +static int arm_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + + return 0; +} + +/* IOMMU */ + +static void __dma_clear_buffer(struct page *page, size_t size, + struct dma_attrs *attrs, bool is_coherent) +{ + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + void *ptr = page_address(page); + if (!dma_get_attr(DMA_ATTR_SKIP_ZEROING, attrs)) + memset(ptr, 0, size); + if (!is_coherent) + dmac_flush_range(ptr, ptr + size); +} + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + unsigned long flags; + + if (order > CONFIG_ARM64_DMA_IOMMU_ALIGNMENT) + order = CONFIG_ARM64_DMA_IOMMU_ALIGNMENT; + + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, + count, align); + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); + + return mapping->base + (start << PAGE_SHIFT); +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start = (addr - mapping->base) >> PAGE_SHIFT; + unsigned int count = size >> PAGE_SHIFT; + unsigned long flags; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, struct dma_attrs *attrs) +{ + struct page **pages; + size_t count = size >> PAGE_SHIFT; + size_t array_size = count * sizeof(struct page *); + int i = 0; + bool is_coherent = is_dma_coherent(dev, attrs); + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + unsigned long order = get_order(size); + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + goto error; + + __dma_clear_buffer(page, size, attrs, is_coherent); + + for (i = 0; i < count; i++) + pages[i] = page + i; + + return pages; + } + + /* + * IOMMU can map any pages, so himem can also be used here + */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + int j, order = __fls(count); + + pages[i] = alloc_pages(gfp, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp, --order); + if (!pages[i]) + goto error; + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + + __dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs, + is_coherent); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (i--) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, + size_t size, struct dma_attrs *attrs) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + dma_release_from_contiguous(dev, pages[0], count); + } else { + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + } + + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* + * Create a CPU mapping for a specified pages + */ +static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + return dma_common_pages_remap(pages, size, VM_USERMAP, prot, caller); +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t __iommu_create_mapping(struct device *dev, + struct page **pages, size_t size, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i, ret; + int prot = IOMMU_READ | IOMMU_WRITE; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + prot = __get_iommu_pgprot(attrs, prot, + is_dma_coherent(dev, attrs)); + + iova = dma_addr; + for (i = 0; i < count; ) { + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, prot); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_ERROR_CODE; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, + size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static struct page **__atomic_get_pages(void *addr) +{ + struct page *page; + phys_addr_t phys; + + phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); + page = phys_to_page(phys); + + return (struct page **)page; +} + +static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) +{ + struct vm_struct *area; + + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return cpu_addr; + + area = find_vm_area(cpu_addr); + if (area) + return area->pages; + return NULL; +} + +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + struct page *page; + struct page **pages; + size_t count = size >> PAGE_SHIFT; + size_t array_size = count * sizeof(struct page *); + int i; + void *addr; + bool coherent = is_dma_coherent(dev, attrs); + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + + if (!pages) + return NULL; + + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = __alloc_from_pool(size, &page, gfp); + } + + if (!addr) + goto err_free; + + for (i = 0; i < count ; i++) + pages[i] = page + i; + + *handle = __iommu_create_mapping(dev, pages, size, attrs); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + kvfree(pages); + return addr; + +err_mapping: + if (coherent) + __free_pages(page, get_order(size)); + else + __free_from_pool(addr, size); +err_free: + kvfree(pages); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(cpu_addr, size); +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + bool coherent = is_dma_coherent(dev, attrs); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page **pages; + void *addr = NULL; + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + if (!gfpflags_allow_blocking(gfp)) + return __iommu_alloc_atomic(dev, size, handle, gfp, attrs); + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size, attrs); + if (*handle == DMA_ERROR_CODE) + goto err_buffer; + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return pages; + + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size, attrs); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + bool coherent = is_dma_coherent(dev, attrs); + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + coherent); + + if (!pages) + return -ENXIO; + + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); + + return 0; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + struct page **pages; + size = PAGE_ALIGN(size); + + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); + return; + } + + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + dma_common_free_remap(cpu_addr, size, VM_USERMAP, true); + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size, attrs); +} + +int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *s; + int ret, i; + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int total_length = 0, current_offset = 0; + dma_addr_t iova; + int prot = __dma_direction_to_prot(dir); + + for_each_sg(sg, s, nents, i) + total_length += s->length; + + iova = __alloc_iova(mapping, total_length); + if (iova == DMA_ERROR_CODE) { + dev_err(dev, "Couldn't allocate iova for sg %p\n", sg); + return 0; + } + prot = __get_iommu_pgprot(attrs, prot, + is_dma_coherent(dev, attrs)); + + ret = iommu_map_sg(mapping->domain, iova, sg, nents, prot); + if (ret != total_length) { + __free_iova(mapping, iova, total_length); + return 0; + } + + for_each_sg(sg, s, nents, i) { + s->dma_address = iova + current_offset; + s->dma_length = total_length - current_offset; + current_offset += s->length; + } + + return nents; +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int total_length = sg_dma_len(sg); + dma_addr_t iova = sg_dma_address(sg); + + total_length = PAGE_ALIGN((iova & ~PAGE_MASK) + total_length); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, total_length); + __free_iova(mapping, iova, total_length); +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = sg_dma_address(sg); + bool iova_coherent = iommu_is_iova_coherent(mapping->domain, iova); + + if (iova_coherent) + return; + + for_each_sg(sg, s, nents, i) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = sg_dma_address(sg); + bool iova_coherent = iommu_is_iova_coherent(mapping->domain, iova); + + if (iova_coherent) + return; + + for_each_sg(sg, s, nents, i) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + + +/** + * arm_coherent_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Coherent IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t dma_addr; + int ret, prot, len, start_offset, map_offset; + + map_offset = offset & ~PAGE_MASK; + start_offset = offset & PAGE_MASK; + len = PAGE_ALIGN(map_offset + size); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + prot = __dma_direction_to_prot(dir); + prot = __get_iommu_pgprot(attrs, prot, + is_dma_coherent(dev, attrs)); + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page) + + start_offset, len, prot); + if (ret < 0) + goto fail; + + return dma_addr + map_offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_ERROR_CODE; +} + +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!is_dma_coherent(dev, attrs) && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys( + mapping->domain, iova)); + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!(is_dma_coherent(dev, attrs) || + dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))) + __dma_page_dev_to_cpu(page, offset, size, dir); + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys( + mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle); + + if (!iova_coherent) + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys( + mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle); + + if (!iova_coherent) + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +static int arm_iommu_dma_supported(struct device *dev, u64 mask) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + if (!mapping) { + dev_warn(dev, "No IOMMU mapping for device\n"); + return 0; + } + + return iommu_dma_supported(mapping->domain, dev, mask); +} + +static int arm_iommu_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ + return dma_addr == DMA_ERROR_CODE; +} + +const struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, + + .set_dma_mask = arm_dma_set_mask, + .dma_supported = arm_iommu_dma_supported, + .mapping_error = arm_iommu_mapping_error, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: maximum size of the valid IO address space + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) +{ + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); + struct dma_iommu_mapping *mapping; + int err = -ENOMEM; + + if (!bitmap_size) + return ERR_PTR(-EINVAL); + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL | __GFP_NOWARN | + __GFP_NORETRY); + if (!mapping->bitmap) + mapping->bitmap = vzalloc(bitmap_size); + + if (!mapping->bitmap) + goto err2; + + mapping->base = base; + mapping->bits = bits; + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err3; + + kref_init(&mapping->kref); + return mapping; +err3: + kvfree(mapping->bitmap); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL(arm_iommu_create_mapping); + +static void release_iommu_mapping(struct kref *kref) +{ + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + kvfree(mapping->bitmap); + kfree(mapping); +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} +EXPORT_SYMBOL(arm_iommu_release_mapping); + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device, + * this replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. More than one client might be attached to + * the same io address space mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + int s1_bypass = 0, is_fast = 0; + + iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_FAST, &is_fast); + if (is_fast) + return fast_smmu_attach_device(dev, mapping); + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_S1_BYPASS, + &s1_bypass); + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + if (!s1_bypass) + set_dma_ops(dev, &iommu_ops); + + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} +EXPORT_SYMBOL(arm_iommu_attach_device); + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) +{ + struct dma_iommu_mapping *mapping; + int is_fast, s1_bypass = 0; + + mapping = to_dma_iommu_mapping(dev); + if (!mapping) { + dev_warn(dev, "Not attached\n"); + return; + } + + iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_FAST, &is_fast); + if (is_fast) { + fast_smmu_detach_device(dev, mapping); + return; + } + + iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_S1_BYPASS, + &s1_bypass); + + if (msm_dma_unmap_all_for_dev(dev)) + dev_warn(dev, "IOMMU detach with outstanding mappings\n"); + + iommu_detach_device(mapping->domain, dev); + kref_put(&mapping->kref, release_iommu_mapping); + dev->archdata.mapping = NULL; + if (!s1_bypass) + set_dma_ops(dev, NULL); + + pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); +} +EXPORT_SYMBOL(arm_iommu_detach_device); + +#endif diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5a3117c287ca..4970252612c2 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -40,9 +40,35 @@ #include <asm/system_misc.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/edac.h> +#include <soc/qcom/scm.h> + +#include <trace/events/exception.h> static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -176,6 +202,8 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, { struct siginfo si; + trace_user_fault(tsk, addr, esr); + if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), fault_name(esr), sig, @@ -278,6 +306,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -297,16 +328,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + } else if (((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) || + ((esr & ESR_ELx_CM) && !(mm_flags & FAULT_FLAG_USER))) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } if (addr < USER_DS && is_permission_fault(esr, regs)) { - /* regs->orig_addr_limit may be 0 if we entered from EL0 */ - if (regs->orig_addr_limit == KERNEL_DS) - die("Accessing user space memory with fs=KERNEL_DS", regs, esr); - if (is_el1_instruction_abort(esr)) die("Attempting to execute userspace memory", regs, esr); @@ -429,6 +457,19 @@ no_context: } /* + * TLB conflict is already handled in EL2. This rourtine should return zero + * so that, do_mem_abort would not crash kernel thinking TLB conflict not + * handled. +*/ +#ifdef CONFIG_QCOM_TLB_EL2_HANDLER +static int do_tlb_conf_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + return 0; +} +#endif +/* * First Level Translation Fault Handler * * We enter here because the first level page table doesn't contain a valid @@ -461,6 +502,7 @@ static int __kprobes do_translation_fault(unsigned long addr, */ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) { + arm64_check_cache_ecc(NULL); return 1; } @@ -518,7 +560,11 @@ static const struct fault_info { { do_bad, SIGBUS, 0, "unknown 45" }, { do_bad, SIGBUS, 0, "unknown 46" }, { do_bad, SIGBUS, 0, "unknown 47" }, +#ifdef CONFIG_QCOM_TLB_EL2_HANDLER + { do_tlb_conf_fault, SIGBUS, 0, "TLB conflict abort" }, +#else { do_bad, SIGBUS, 0, "TLB conflict abort" }, +#endif { do_bad, SIGBUS, 0, "unknown 49" }, { do_bad, SIGBUS, 0, "unknown 50" }, { do_bad, SIGBUS, 0, "unknown 51" }, @@ -564,6 +610,22 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die("", regs, &info, esr); } +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + /* + * We've taken an instruction abort from userspace and not yet + * re-enabled IRQs. If the address is a kernel address, apply + * BP hardening prior to enabling IRQs and pre-emption. + */ + if (addr > TASK_SIZE) + arm64_apply_bp_hardening(); + + local_irq_enable(); + do_mem_abort(addr, esr, regs); +} + /* * Handle stack alignment exceptions. */ @@ -653,6 +715,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, return rv; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN int cpu_enable_pan(void *__unused) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index d9c664ed6104..298db9789fc7 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -97,6 +97,7 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ +EXPORT_SYMBOL(flush_cache_all); EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 60e113a7c5db..2e7702bd7e57 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -155,6 +155,7 @@ static void __init arm64_memory_present(void) #endif static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; +static phys_addr_t bootloader_memory_limit; /* * Limit the memory size that was specified via FDT. @@ -208,6 +209,11 @@ void __init arm64_memblock_init(void) * via the linear mapping. */ if (memory_limit != (phys_addr_t)ULLONG_MAX) { + /* + * Save bootloader imposed memory limit before we overwirte + * memblock. + */ + bootloader_memory_limit = memblock_end_of_DRAM(); memblock_enforce_memory_limit(memory_limit); memblock_add(__pa_symbol(_text), (u64)(_end - _text)); } @@ -223,7 +229,7 @@ void __init arm64_memblock_init(void) * memory spans, randomize the linear region as well. */ if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) { - range = range / ARM64_MEMSTART_ALIGN + 1; + range /= ARM64_MEMSTART_ALIGN; memstart_addr -= ARM64_MEMSTART_ALIGN * ((range * memstart_offset_seed) >> 16); } @@ -432,6 +438,11 @@ void __init mem_init(void) } } +static inline void poison_init_mem(void *s, size_t count) +{ + memset(s, 0, count); +} + void free_initmem(void) { free_initmem_default(0); @@ -457,6 +468,18 @@ static int __init keepinitrd_setup(char *__unused) __setup("keepinitrd", keepinitrd_setup); #endif +#ifdef CONFIG_KERNEL_TEXT_RDONLY +void set_kernel_text_ro(void) +{ + unsigned long start = PFN_ALIGN(_stext); + unsigned long end = PFN_ALIGN(_etext); + + /* + * Set the kernel identity mapping for text RO. + */ + set_memory_ro(start, (end - start) >> PAGE_SHIFT); +} +#endif /* * Dump out memory limit information on panic. */ @@ -481,3 +504,137 @@ static int __init register_mem_limit_dumper(void) return 0; } __initcall(register_mem_limit_dumper); + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) +{ + pg_data_t *pgdat; + struct zone *zone; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); + int ret; + + if (end_pfn > max_sparsemem_pfn) { + pr_err("end_pfn too big"); + return -1; + } + hotplug_paging(start, size); + + /* + * Mark the first page in the range as unusable. This is needed + * because __add_section (within __add_pages) wants pfn_valid + * of it to be false, and in arm64 pfn falid is implemented by + * just checking at the nomap flag for existing blocks. + * + * A small trick here is that __add_section() requires only + * phys_start_pfn (that is the first pfn of a section) to be + * invalid. Regardless of whether it was assumed (by the function + * author) that all pfns within a section are either all valid + * or all invalid, it allows to avoid looping twice (once here, + * second when memblock_clear_nomap() is called) through all + * pfns of the section and modify only one pfn. Thanks to that, + * further, in __add_zone() only this very first pfn is skipped + * and corresponding page is not flagged reserved. Therefore it + * is enough to correct this setup only for it. + * + * When arch_add_memory() returns the walk_memory_range() function + * is called and passed with online_memory_block() callback, + * which execution finally reaches the memory_block_action() + * function, where also only the first pfn of a memory block is + * checked to be reserved. Above, it was first pfn of a section, + * here it is a block but + * (drivers/base/memory.c): + * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + * (include/linux/memory.h): + * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) + * so we can consider block and section equivalently + */ + memblock_mark_nomap(start, 1<<PAGE_SHIFT); + + pgdat = NODE_DATA(nid); + + zone = pgdat->node_zones + + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); + ret = __add_pages(nid, zone, start_pfn, nr_pages); + + /* + * Make the pages usable after they have been added. + * This will make pfn_valid return true + */ + memblock_clear_nomap(start, 1<<PAGE_SHIFT); + + /* + * This is a hack to avoid having to mix arch specific code + * into arch independent code. SetPageReserved is supposed + * to be called by __add_zone (within __add_section, within + * __add_pages). However, when it is called there, it assumes that + * pfn_valid returns true. For the way pfn_valid is implemented + * in arm64 (a check on the nomap flag), the only way to make + * this evaluate true inside __add_zone is to clear the nomap + * flags of blocks in architecture independent code. + * + * To avoid this, we set the Reserved flag here after we cleared + * the nomap flag in the line above. + */ + SetPageReserved(pfn_to_page(start_pfn)); + + if (ret) + pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", + __func__, ret); + + return ret; +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +static void kernel_physical_mapping_remove(unsigned long start, + unsigned long end) +{ + start = (unsigned long)__va(start); + end = (unsigned long)__va(end); + + remove_pagetable(start, end, true); + +} + +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct zone *zone; + int ret = 0; + + zone = page_zone(page); + ret = __remove_pages(zone, start_pfn, nr_pages); + WARN_ON_ONCE(ret); + + kernel_physical_mapping_remove(start, start + size); + + return ret; +} + +#endif /* CONFIG_MEMORY_HOTREMOVE */ +static int arm64_online_page(struct page *page) +{ + unsigned long target_pfn = page_to_pfn(page); + unsigned long limit = __phys_to_pfn(bootloader_memory_limit); + + if (target_pfn >= limit) + return -EINVAL; + + __online_page_set_limits(page); + __online_page_increment_counters(page); + __online_page_free(page); + + return 0; +} + +static int __init arm64_memory_hotplug_init(void) +{ + set_online_page_callback(&arm64_online_page); + return 0; +} +subsys_initcall(arm64_memory_hotplug_init); +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ddfe90299048..e8b8590f553a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -29,6 +29,8 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/dma-contiguous.h> +#include <linux/cma.h> #include <linux/mm.h> #include <asm/barrier.h> @@ -61,6 +63,8 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static bool dma_overlap(phys_addr_t start, phys_addr_t end); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -210,7 +214,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + block_mappings_allowed(pgtable_alloc) && + !dma_overlap(phys, phys + next - addr)) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -270,7 +275,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, * For 4K granule only, attempt to put down a 1GB block */ if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + block_mappings_allowed(pgtable_alloc) && + !dma_overlap(phys, phys + next - addr)) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -386,7 +392,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa_symbol(_stext); + unsigned long kernel_start = __pa_symbol(_text); unsigned long kernel_end = __pa_symbol(__init_begin); /* @@ -403,7 +409,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end } /* - * This block overlaps the kernel text/rodata mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) @@ -418,7 +424,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end early_pgtable_alloc); /* - * Map the linear alias of the [_stext, __init_begin) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. @@ -450,8 +456,8 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)_etext - (unsigned long)_stext; - create_mapping_late(__pa_symbol(_stext), (unsigned long)_stext, + section_size = (unsigned long)_etext - (unsigned long)_text; + create_mapping_late(__pa_symbol(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata @@ -473,8 +479,8 @@ void fixup_init(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) +static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -532,11 +538,11 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_chunk(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* @@ -564,6 +570,37 @@ static void __init map_kernel(pgd_t *pgd) kasan_copy_shadow(pgd); } +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS]; + +static int dma_mmu_remap_num; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} + +static bool dma_overlap(phys_addr_t start, phys_addr_t end) +{ + int i; + + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t dma_base = dma_mmu_remap[i].base; + phys_addr_t dma_end = dma_mmu_remap[i].base + + dma_mmu_remap[i].size; + + if ((dma_base < end) && (dma_end > start)) + return true; + } + return false; +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -591,9 +628,6 @@ void __init paging_init(void) pgd_clear_fixmap(); memblock_free(pgd_phys, PAGE_SIZE); - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); - /* * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd * allocated with it. @@ -604,6 +638,439 @@ void __init paging_init(void) bootmem_init(); } +#ifdef CONFIG_MEMORY_HOTPLUG +static phys_addr_t pgd_pgtable_alloc(void) +{ + void *ptr = (void *)__get_free_page(PGALLOC_GFP); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +/* + * hotplug_paging() is used by memory hotplug to build new page tables + * for hot added memory. + */ +void hotplug_paging(phys_addr_t start, phys_addr_t size) +{ + struct page *pg; + phys_addr_t pgd_phys; + pgd_t *pgd; + int cpu; + + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_isolate_cpu(cpu); + pgd_phys = pgd_pgtable_alloc(); + pgd = pgd_set_fixmap(pgd_phys); + + memcpy(pgd, swapper_pg_dir, PAGE_SIZE); + + __create_pgd_mapping(pgd, start, __phys_to_virt(start), size, + PAGE_KERNEL, pgd_pgtable_alloc); + + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); + + pgd_clear_fixmap(); + + pg = phys_to_page(pgd_phys); + pgtable_page_dtor(pg); + __free_pages(pg, 0); + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_unisolate_cpu_unlocked(cpu); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +#define PAGE_INUSE 0xFD + +static void free_pagetable(struct page *page, int order, bool direct) +{ + unsigned long magic; + unsigned int nr_pages = 1 << order; + + /* bootmem page has reserved flag */ + if (PageReserved(page)) { + __ClearPageReserved(page); + + magic = (unsigned long)page->lru.next; + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + while (nr_pages--) + put_page_bootmem(page++); + } else { + while (nr_pages--) + free_reserved_page(page++); + } + } else { + /* + * Only direct pagetable allocation (those allocated via + * hotplug) call the pgtable_page_ctor; vmemmap pgtable + * allocations don't. + */ + if (direct) + pgtable_page_dtor(page); + + free_pages((unsigned long)page_address(page), order); + } +} + +static void free_pte_table(pmd_t *pmd, bool direct) +{ + pte_t *pte_start, *pte; + struct page *page; + int i; + + pte_start = (pte_t *) pmd_page_vaddr(*pmd); + /* Check if there is no valid entry in the PMD */ + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + page = pmd_page(*pmd); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only taken in _pte_aloc_kernel + * in mm/memory.c and nowhere else (for arm64). Not sure if + * the function above can be called concurrently. In doubt, + * I am living it here for now, but it probably can be removed + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); +} + +static void free_pmd_table(pud_t *pud, bool direct) +{ + pmd_t *pmd_start, *pmd; + struct page *page; + int i; + + pmd_start = (pmd_t *) pud_page_vaddr(*pud); + /* Check if there is no valid entry in the PMD */ + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + page = pud_page(*pud); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only taken in _pte_aloc_kernel + * in mm/memory.c and nowhere else (for arm64). Not sure if + * the function above can be called concurrently. In doubt, + * I am living it here for now, but it probably can be removed + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); +} + +/* + * When the PUD is folded on the PGD (three levels of paging), + * there's no need to free PUDs + */ +#if CONFIG_PGTABLE_LEVELS > 3 +static void free_pud_table(pgd_t *pgd, bool direct) +{ + pud_t *pud_start, *pud; + struct page *page; + int i; + + pud_start = (pud_t *) pgd_page_vaddr(*pgd); + /* Check if there is no valid entry in the PUD */ + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } + + page = pgd_page(*pgd); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pgd_clear(pgd); + spin_unlock(&init_mm.page_table_lock); +} +#endif + +static void remove_pte_table(pte_t *pte, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + void *page_addr; + + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + /* + * Do not free direct mapping pages since they were + * freed when offlining, or simplely not in use. + */ + if (!direct) + free_pagetable(pte_page(*pte), 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } else { + /* + * If we are here, we are freeing vmemmap pages since + * direct mapped memory ranges to be freed are aligned. + * + * If we are not removing the whole page, it means + * other page structs in this page are being used and + * we canot remove them. So fill the unused page_structs + * with 0xFD, and remove the page when it is wholly + * filled with 0xFD. + */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pte_page(*pte)); + if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { + free_pagetable(pte_page(*pte), 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } + } + } + + // I am adding this flush here in simmetry to the x86 code. + // Why do I need to call it here and not in remove_p[mu]d + flush_tlb_all(); +} + +static void remove_pmd_table(pmd_t *pmd, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + void *page_addr; + pte_t *pte; + + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + // check if we are using 2MB section mappings + if (pmd_sect(*pmd)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + if (!direct) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE), direct); + } + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pmd_page(*pmd)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PMD_SIZE)) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE), direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } + } + continue; + } + + BUG_ON(!pmd_table(*pmd)); + + pte = pte_offset_map(pmd, addr); + remove_pte_table(pte, addr, next, direct); + free_pte_table(pmd, direct); + } +} + +static void remove_pud_table(pud_t *pud, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + pmd_t *pmd; + void *page_addr; + + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (!pud_present(*pud)) + continue; + /* + * If we are using 4K granules, check if we are using + * 1GB section mapping. + */ + if (pud_sect(*pud)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + if (!direct) { + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE), direct); + } + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pud_page(*pud)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PUD_SIZE)) { + + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE), direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } + } + continue; + } + + BUG_ON(!pud_table(*pud)); + + pmd = pmd_offset(pud, addr); + remove_pmd_table(pmd, addr, next, direct); + free_pmd_table(pud, direct); + } +} + +void remove_pagetable(unsigned long start, unsigned long end, bool direct) +{ + unsigned long next; + unsigned long addr; + pgd_t *pgd; + pud_t *pud; + int cpu; + + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_isolate_cpu(cpu); + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + continue; + + pud = pud_offset(pgd, addr); + remove_pud_table(pud, addr, next, direct); + /* + * When the PUD is folded on the PGD (three levels of paging), + * I did already clear the PMD page in free_pmd_table, + * and reset the corresponding PGD==PUD entry. + */ +#if CONFIG_PGTABLE_LEVELS > 3 + free_pud_table(pgd, direct); +#endif + } + + flush_tlb_all(); + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_unisolate_cpu_unlocked(cpu); +} + + +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ + /* * Check whether a kernel address is valid (derived from arch/x86/). */ @@ -655,6 +1122,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pgd_t *pgd; pud_t *pud; pmd_t *pmd; + int ret = 0; do { next = pmd_addr_end(addr, end); @@ -672,19 +1140,30 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) void *p = NULL; p = vmemmap_alloc_block_buf(PMD_SIZE, node); - if (!p) - return -ENOMEM; + if (!p) { +#ifdef CONFIG_MEMORY_HOTPLUG + vmemmap_free(start, end); +#endif + ret = -ENOMEM; + break; + } set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); - return 0; + if (ret) + return vmemmap_populate_basepages(start, end, node); + else + return ret; } #endif /* CONFIG_ARM64_64K_PAGES */ void vmemmap_free(unsigned long start, unsigned long end) { +#ifdef CONFIG_MEMORY_HOTREMOVE + remove_pagetable(start, end, false); +#endif } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index ca6d268e3313..6ea71387ee12 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -70,6 +70,13 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } + if (!IS_ENABLED(CONFIG_FORCE_PAGES)) { + if (start < MODULES_VADDR || start >= MODULES_END) + return -EINVAL; + + if (end < MODULES_VADDR || end >= MODULES_END) + return -EINVAL; + } /* * Kernel VA mappings are always live, and splitting live section * mappings into page mappings may cause TLB conflicts. This means @@ -139,4 +146,43 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) __pgprot(0), __pgprot(PTE_VALID)); } -#endif +#ifdef CONFIG_HIBERNATION +/* + * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function + * is used to determine if a linear map page has been marked as not-valid by + * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. + * This is based on kern_addr_valid(), which almost does what we need. + * + * Because this is only called on the kernel linear map, p?d_sect() implies + * p?d_present(). When debug_pagealloc is enabled, sections mappings are + * disabled. + */ +bool kernel_page_present(struct page *page) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = (unsigned long)page_address(page); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return false; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return false; + if (pud_sect(*pud)) + return true; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return false; + if (pmd_sect(*pmd)) + return true; + + pte = pte_offset_kernel(pmd, addr); + return pte_valid(*pte); +} +#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index bc0f0a6c9c23..b78688806652 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -24,6 +24,7 @@ #include <asm/asm-offsets.h> #include <asm/hwcap.h> #include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> @@ -43,6 +44,52 @@ #define MAIR(attr, mt) ((attr) << ((mt) * 8)) /* + * cpu_cache_off() + * + * Turn the CPU D-cache off. + */ +ENTRY(cpu_cache_off) + mrs x0, sctlr_el1 + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb + ret +ENDPROC(cpu_cache_off) + +/* + * cpu_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the same state + * as it would be if it had been reset, and branch to what would be the + * reset vector. It must be executed with the flat identity mapping. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_reset) + mrs x1, sctlr_el1 + bic x1, x1, #1 + msr sctlr_el1, x1 // disable the MMU + isb + ret x0 +ENDPROC(cpu_reset) + +ENTRY(cpu_soft_restart) + /* Save address of cpu_reset() and reset address */ + mov x19, x0 + mov x20, x1 + + /* Turn D-cache off */ + bl cpu_cache_off + + /* Push out all dirty data, and ensure cache is empty */ + bl flush_cache_all + + mov x0, x20 + ret x19 +ENDPROC(cpu_soft_restart) + +/* * cpu_do_idle() * * Idle the processor (wait for interrupt). @@ -63,58 +110,49 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, mair_el1 - mrs x6, cpacr_el1 - mrs x7, ttbr1_el1 - mrs x8, tcr_el1 - mrs x9, vbar_el1 - mrs x10, mdscr_el1 - mrs x11, oslsr_el1 - mrs x12, sctlr_el1 - mrs x13, tpidr_el1 - mrs x14, sp_el0 + mrs x5, cpacr_el1 + mrs x6, tcr_el1 + mrs x7, vbar_el1 + mrs x8, mdscr_el1 + mrs x9, oslsr_el1 + mrs x10, sctlr_el1 + mrs x11, tpidr_el1 + mrs x12, sp_el0 stp x2, x3, [x0] - stp x4, x5, [x0, #16] - stp x6, x7, [x0, #32] - stp x8, x9, [x0, #48] - stp x10, x11, [x0, #64] - stp x12, x13, [x0, #80] - str x14, [x0, #96] + stp x4, xzr, [x0, #16] + stp x5, x6, [x0, #32] + stp x7, x8, [x0, #48] + stp x9, x10, [x0, #64] + stp x11, x12, [x0, #80] ret ENDPROC(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context * - * x0: Physical address of context pointer - * x1: ttbr0_el1 to be restored - * - * Returns: - * sctlr_el1 value in x0 + * x0: Address of context pointer */ + .pushsection ".idmap.text", "ax" ENTRY(cpu_do_resume) - /* - * Invalidate local tlb entries before turning on MMU - */ - tlbi vmalle1 ldp x2, x3, [x0] ldp x4, x5, [x0, #16] - ldp x6, x7, [x0, #32] - ldp x8, x9, [x0, #48] - ldp x10, x11, [x0, #64] - ldp x12, x13, [x0, #80] - ldr x14, [x0, #96] + ldp x6, x8, [x0, #32] + ldp x9, x10, [x0, #48] + ldp x11, x12, [x0, #64] + ldp x13, x14, [x0, #80] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 - msr mair_el1, x5 msr cpacr_el1, x6 - msr ttbr0_el1, x1 - msr ttbr1_el1, x7 - tcr_set_idmap_t0sz x8, x7 + + /* Don't change t0sz here, mask those bits when restoring */ + mrs x5, tcr_el1 + bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 + msr sctlr_el1, x12 msr tpidr_el1, x13 msr sp_el0, x14 /* @@ -123,11 +161,10 @@ ENTRY(cpu_do_resume) ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - mov x0, x12 - dsb nsh // Make sure local tlb invalidation completed isb ret ENDPROC(cpu_do_resume) + .popsection #endif /* @@ -185,6 +222,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ + .pushsection ".idmap.text", "ax" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -273,5 +311,17 @@ ENDPROC(__cpu_setup) */ .type crval, #object crval: +#ifdef CONFIG_ARM64_ICACHE_DISABLE +#define CR_IBIT 0 +#else +#define CR_IBIT 0x1000 +#endif + +#ifdef CONFIG_ARM64_DCACHE_DISABLE +#define CR_CBIT 0 +#else +#define CR_CBIT 0x4 +#endif .word 0xfcffffff // clear - .word 0x34d5d91d // set + .word 0x34d5d91d | CR_IBIT | CR_CBIT // set + .popsection |
