summaryrefslogtreecommitdiff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/cache.S77
-rw-r--r--arch/arm64/mm/context.c3
-rw-r--r--arch/arm64/mm/dma-mapping.c1118
-rw-r--r--arch/arm64/mm/fault.c73
-rw-r--r--arch/arm64/mm/flush.c1
-rw-r--r--arch/arm64/mm/init.c159
-rw-r--r--arch/arm64/mm/mmu.c519
-rw-r--r--arch/arm64/mm/pageattr.c48
-rw-r--r--arch/arm64/mm/proc.S124
9 files changed, 2025 insertions, 97 deletions
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index e5091d9cceb6..f14be126fb6a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -26,6 +26,79 @@
#include <asm/uaccess.h>
/*
+ * __flush_dcache_all()
+ *
+ * Flush the whole D-cache.
+ *
+ * Corrupted registers: x0-x7, x9-x11
+ */
+__flush_dcache_all:
+ dmb sy // ensure ordering with previous memory accesses
+ mrs x0, clidr_el1 // read clidr
+ and x3, x0, #0x7000000 // extract loc from clidr
+ lsr x3, x3, #23 // left align loc bit field
+ cbz x3, finished // if loc is 0, then no need to clean
+ mov x10, #0 // start clean at cache level 0
+loop1:
+ add x2, x10, x10, lsr #1 // work out 3x current cache level
+ lsr x1, x0, x2 // extract cache type bits from clidr
+ and x1, x1, #7 // mask of the bits for current cache only
+ cmp x1, #2 // see what cache we have at this level
+ b.lt skip // skip if no cache, or just i-cache
+ save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic
+ msr csselr_el1, x10 // select current cache level in csselr
+ isb // isb to sych the new cssr&csidr
+ mrs x1, ccsidr_el1 // read the new ccsidr
+ restore_irqs x9
+ and x2, x1, #7 // extract the length of the cache lines
+ add x2, x2, #4 // add 4 (line length offset)
+ mov x4, #0x3ff
+ and x4, x4, x1, lsr #3 // find maximum number on the way size
+ clz w5, w4 // find bit position of way size increment
+ mov x7, #0x7fff
+ and x7, x7, x1, lsr #13 // extract max number of the index size
+loop2:
+ mov x9, x4 // create working copy of max way size
+loop3:
+ lsl x6, x9, x5
+ orr x11, x10, x6 // factor way and cache number into x11
+ lsl x6, x7, x2
+ orr x11, x11, x6 // factor index number into x11
+ dc cisw, x11 // clean & invalidate by set/way
+ subs x9, x9, #1 // decrement the way
+ b.ge loop3
+ subs x7, x7, #1 // decrement the index
+ b.ge loop2
+skip:
+ add x10, x10, #2 // increment cache number
+ cmp x3, x10
+ b.gt loop1
+finished:
+ mov x10, #0 // swith back to cache level 0
+ msr csselr_el1, x10 // select current cache level in csselr
+ dsb sy
+ isb
+ ret
+ENDPROC(__flush_dcache_all)
+
+/*
+ * flush_cache_all()
+ *
+ * Flush the entire cache system. The data cache flush is now achieved
+ * using atomic clean / invalidates working outwards from L1 cache. This
+ * is done using Set/Way based cache maintainance instructions. The
+ * instruction cache can still be invalidated back to the point of
+ * unification in a single instruction.
+ */
+ENTRY(flush_cache_all)
+ mov x12, lr
+ bl __flush_dcache_all
+ mov x0, #0
+ ic ialluis // I+BTB cache invalidate
+ ret x12
+ENDPROC(flush_cache_all)
+
+/*
* flush_icache_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
@@ -121,7 +194,7 @@ ENTRY(__inval_cache_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-__dma_inv_range:
+ENTRY(__dma_inv_range)
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -147,7 +220,7 @@ ENDPROC(__dma_inv_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-__dma_clean_range:
+ENTRY(__dma_clean_range)
dcache_line_size x2, x3
sub x3, x2, #1
bic x0, x0, x3
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d4bf895b47cf..1e31cd3871ee 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -191,6 +191,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
+
+ arm64_apply_bp_hardening();
+
/*
* Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
* emulating PAN.
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 925b2b3a06f8..159c79612e63 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -24,21 +25,49 @@
#include <linux/genalloc.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
+#include <linux/mm.h>
+#include <linux/iommu.h>
#include <linux/vmalloc.h>
#include <linux/swiotlb.h>
#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <linux/io.h>
+#include <asm/dma-iommu.h>
+#include <linux/dma-mapping-fast.h>
+#include <linux/msm_dma_iommu_mapping.h>
+
+#include "mm.h"
+
static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
bool coherent)
{
- if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+ if (dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs))
+ return pgprot_noncached(prot);
+ else if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
return pgprot_writecombine(prot);
return prot;
}
-static struct gen_pool *atomic_pool;
+static bool is_dma_coherent(struct device *dev, struct dma_attrs *attrs)
+{
+ bool is_coherent;
+
+ if (dma_get_attr(DMA_ATTR_FORCE_COHERENT, attrs))
+ is_coherent = true;
+ else if (dma_get_attr(DMA_ATTR_FORCE_NON_COHERENT, attrs))
+ is_coherent = false;
+ else if (is_device_dma_coherent(dev))
+ is_coherent = true;
+ else
+ is_coherent = false;
+
+ return is_coherent;
+}
+static struct gen_pool *atomic_pool;
+#define NO_KERNEL_MAPPING_DUMMY 0x2222
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
@@ -86,10 +115,47 @@ static int __free_from_pool(void *start, size_t size)
return 1;
}
+static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ struct page *page = virt_to_page(addr);
+ pgprot_t prot = *(pgprot_t *)data;
+
+ set_pte(pte, mk_pte(page, prot));
+ return 0;
+}
+
+static int __dma_clear_pte(pte_t *pte, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ pte_clear(&init_mm, addr, pte);
+ return 0;
+}
+
+static void __dma_remap(struct page *page, size_t size, pgprot_t prot,
+ bool no_kernel_map)
+{
+ unsigned long start = (unsigned long) page_address(page);
+ unsigned end = start + size;
+ int (*func)(pte_t *pte, pgtable_t token, unsigned long addr,
+ void *data);
+
+ if (no_kernel_map)
+ func = __dma_clear_pte;
+ else
+ func = __dma_update_pte;
+
+ apply_to_page_range(&init_mm, start, size, func, &prot);
+ mb();
+ flush_tlb_kernel_range(start, end);
+}
+
static void *__dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
+ void *addr;
+
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
return NULL;
@@ -100,7 +166,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
flags |= GFP_DMA;
if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
struct page *page;
- void *addr;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
get_order(size));
@@ -110,10 +175,20 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
*dma_handle = phys_to_dma(dev, page_to_phys(page));
addr = page_address(page);
memset(addr, 0, size);
- return addr;
} else {
- return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ addr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ }
+
+ if (addr && (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs) ||
+ dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs))) {
+ /*
+ * flush the caches here because we can't later
+ */
+ __dma_flush_range(addr, addr + size);
+ __dma_remap(virt_to_page(addr), size, 0, true);
}
+
+ return addr;
}
static void __dma_free_coherent(struct device *dev, size_t size,
@@ -123,11 +198,16 @@ static void __dma_free_coherent(struct device *dev, size_t size,
bool freed;
phys_addr_t paddr = dma_to_phys(dev, dma_handle);
+ size = PAGE_ALIGN(size);
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
return;
}
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs) ||
+ dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs))
+ __dma_remap(phys_to_page(paddr), size, PAGE_KERNEL, false);
+
freed = dma_release_from_contiguous(dev,
phys_to_page(paddr),
size >> PAGE_SHIFT);
@@ -141,8 +221,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
{
struct page *page;
void *ptr, *coherent_ptr;
- bool coherent = is_device_dma_coherent(dev);
- pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+ bool coherent = is_dma_coherent(dev, attrs);
size = PAGE_ALIGN(size);
@@ -164,16 +243,22 @@ static void *__dma_alloc(struct device *dev, size_t size,
if (coherent)
return ptr;
- /* remove any dirty cache lines on the kernel alias */
- __dma_flush_range(ptr, ptr + size);
-
- /* create a coherent mapping */
- page = virt_to_page(ptr);
- coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- prot, __builtin_return_address(0));
- if (!coherent_ptr)
- goto no_map;
-
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
+ coherent_ptr = (void *)NO_KERNEL_MAPPING_DUMMY;
+ } else {
+ if (!dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs))
+ /* remove any dirty cache lines on the kernel alias */
+ __dma_flush_range(ptr, ptr + size);
+
+ /* create a coherent mapping */
+ page = virt_to_page(ptr);
+ coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
+ __get_dma_pgprot(attrs,
+ __pgprot(PROT_NORMAL_NC), false),
+ NULL);
+ if (!coherent_ptr)
+ goto no_map;
+ }
return coherent_ptr;
no_map:
@@ -191,10 +276,11 @@ static void __dma_free(struct device *dev, size_t size,
size = PAGE_ALIGN(size);
- if (!is_device_dma_coherent(dev)) {
+ if (!is_dma_coherent(dev, attrs)) {
if (__free_from_pool(vaddr, size))
return;
- vunmap(vaddr);
+ if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ vunmap(vaddr);
}
__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
}
@@ -207,7 +293,7 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
dma_addr_t dev_addr;
dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_dma_coherent(dev, attrs))
__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
return dev_addr;
@@ -218,7 +304,7 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- if (!is_device_dma_coherent(dev))
+ if (!is_dma_coherent(dev, attrs))
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
}
@@ -231,7 +317,7 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
int i, ret;
ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
- if (!is_device_dma_coherent(dev))
+ if (!is_dma_coherent(dev, attrs))
for_each_sg(sgl, sg, ret, i)
__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
@@ -247,7 +333,7 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev,
struct scatterlist *sg;
int i;
- if (!is_device_dma_coherent(dev))
+ if (!is_dma_coherent(dev, attrs))
for_each_sg(sgl, sg, nelems, i)
__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
sg->length, dir);
@@ -313,7 +399,7 @@ static int __swiotlb_mmap(struct device *dev,
unsigned long off = vma->vm_pgoff;
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
- is_device_dma_coherent(dev));
+ is_dma_coherent(dev, attrs));
if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
@@ -341,6 +427,55 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
+static void *arm64_dma_remap(struct device *dev, void *cpu_addr,
+ dma_addr_t handle, size_t size,
+ struct dma_attrs *attrs)
+{
+ struct page *page = phys_to_page(dma_to_phys(dev, handle));
+ pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+ unsigned long offset = handle & ~PAGE_MASK;
+ struct vm_struct *area;
+ unsigned long addr;
+
+ size = PAGE_ALIGN(size + offset);
+
+ /*
+ * DMA allocation can be mapped to user space, so lets
+ * set VM_USERMAP flags too.
+ */
+ area = get_vm_area(size, VM_USERMAP);
+ if (!area)
+ return NULL;
+
+ addr = (unsigned long)area->addr;
+ area->phys_addr = __pfn_to_phys(page_to_pfn(page));
+
+ if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
+ vunmap((void *)addr);
+ return NULL;
+ }
+ return (void *)addr + offset;
+}
+
+static void arm64_dma_unremap(struct device *dev, void *remapped_addr,
+ size_t size)
+{
+ struct vm_struct *area;
+
+ size = PAGE_ALIGN(size);
+ remapped_addr = (void *)((unsigned long)remapped_addr & PAGE_MASK);
+
+ area = find_vm_area(remapped_addr);
+ if (!area) {
+ WARN(1, "trying to free invalid coherent area: %p\n",
+ remapped_addr);
+ return;
+ }
+ vunmap(remapped_addr);
+ flush_tlb_kernel_range((unsigned long)remapped_addr,
+ (unsigned long)(remapped_addr + size));
+}
+
static struct dma_map_ops swiotlb_dma_ops = {
.alloc = __dma_alloc,
.free = __dma_free,
@@ -356,6 +491,8 @@ static struct dma_map_ops swiotlb_dma_ops = {
.sync_sg_for_device = __swiotlb_sync_sg_for_device,
.dma_supported = swiotlb_dma_supported,
.mapping_error = swiotlb_dma_mapping_error,
+ .remap = arm64_dma_remap,
+ .unremap = arm64_dma_unremap,
};
static int __init atomic_pool_init(void)
@@ -406,7 +543,7 @@ static int __init atomic_pool_init(void)
goto out;
remove_mapping:
- dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
+ dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP, true);
destroy_genpool:
gen_pool_destroy(atomic_pool);
atomic_pool = NULL;
@@ -427,6 +564,7 @@ static void *__dummy_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
+ WARN(1, "dma alloc failure, device may be missing a call to arch_setup_dma_ops");
return NULL;
}
@@ -542,7 +680,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp,
struct dma_attrs *attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = is_dma_coherent(dev, attrs);
int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
size_t iosize = size;
void *addr;
@@ -624,7 +762,7 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
if (WARN_ON(!area || !area->pages))
return;
iommu_dma_free(dev, area->pages, iosize, &handle);
- dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+ dma_common_free_remap(cpu_addr, size, VM_USERMAP, true);
} else {
iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
__free_pages(virt_to_page(cpu_addr), get_order(size));
@@ -639,7 +777,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
int ret;
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
- is_device_dma_coherent(dev));
+ is_dma_coherent(dev, attrs));
if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
@@ -696,7 +834,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = is_dma_coherent(dev, attrs);
int prot = dma_direction_to_prot(dir, coherent);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
@@ -749,7 +887,7 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = is_dma_coherent(dev, attrs);
if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
@@ -782,7 +920,6 @@ static struct dma_map_ops iommu_dma_ops = {
.sync_single_for_device = __iommu_sync_single_for_device,
.sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
.sync_sg_for_device = __iommu_sync_sg_for_device,
- .dma_supported = iommu_dma_supported,
.mapping_error = iommu_dma_mapping_error,
};
@@ -993,3 +1130,922 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
}
+EXPORT_SYMBOL(arch_setup_dma_ops);
+
+#ifdef CONFIG_ARM64_DMA_USE_IOMMU
+
+static int __get_iommu_pgprot(struct dma_attrs *attrs, int prot,
+ bool coherent)
+{
+ if (!dma_get_attr(DMA_ATTR_EXEC_MAPPING, attrs))
+ prot |= IOMMU_NOEXEC;
+ if (coherent)
+ prot |= IOMMU_CACHE;
+
+ return prot;
+}
+
+/*
+ * Make an area consistent for devices.
+ * Note: Drivers should NOT use this function directly, as it will break
+ * platforms with CONFIG_DMABOUNCE.
+ * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
+ */
+static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
+ size_t size, enum dma_data_direction dir)
+{
+ __dma_map_area(page_address(page) + off, size, dir);
+}
+
+static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
+ size_t size, enum dma_data_direction dir)
+{
+ __dma_unmap_area(page_address(page) + off, size, dir);
+
+ /*
+ * Mark the D-cache clean for this page to avoid extra flushing.
+ */
+ if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
+ set_bit(PG_dcache_clean, &page->flags);
+}
+
+static int arm_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+
+ *dev->dma_mask = dma_mask;
+
+ return 0;
+}
+
+/* IOMMU */
+
+static void __dma_clear_buffer(struct page *page, size_t size,
+ struct dma_attrs *attrs, bool is_coherent)
+{
+ /*
+ * Ensure that the allocated pages are zeroed, and that any data
+ * lurking in the kernel direct-mapped region is invalidated.
+ */
+ void *ptr = page_address(page);
+ if (!dma_get_attr(DMA_ATTR_SKIP_ZEROING, attrs))
+ memset(ptr, 0, size);
+ if (!is_coherent)
+ dmac_flush_range(ptr, ptr + size);
+}
+
+static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
+ size_t size)
+{
+ unsigned int order = get_order(size);
+ unsigned int align = 0;
+ unsigned int count, start;
+ unsigned long flags;
+
+ if (order > CONFIG_ARM64_DMA_IOMMU_ALIGNMENT)
+ order = CONFIG_ARM64_DMA_IOMMU_ALIGNMENT;
+
+ count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ align = (1 << order) - 1;
+
+ spin_lock_irqsave(&mapping->lock, flags);
+ start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
+ count, align);
+ if (start > mapping->bits) {
+ spin_unlock_irqrestore(&mapping->lock, flags);
+ return DMA_ERROR_CODE;
+ }
+
+ bitmap_set(mapping->bitmap, start, count);
+ spin_unlock_irqrestore(&mapping->lock, flags);
+
+ return mapping->base + (start << PAGE_SHIFT);
+}
+
+static inline void __free_iova(struct dma_iommu_mapping *mapping,
+ dma_addr_t addr, size_t size)
+{
+ unsigned int start = (addr - mapping->base) >> PAGE_SHIFT;
+ unsigned int count = size >> PAGE_SHIFT;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->lock, flags);
+ bitmap_clear(mapping->bitmap, start, count);
+ spin_unlock_irqrestore(&mapping->lock, flags);
+}
+
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+ gfp_t gfp, struct dma_attrs *attrs)
+{
+ struct page **pages;
+ size_t count = size >> PAGE_SHIFT;
+ size_t array_size = count * sizeof(struct page *);
+ int i = 0;
+ bool is_coherent = is_dma_coherent(dev, attrs);
+
+ if (array_size <= PAGE_SIZE)
+ pages = kzalloc(array_size, gfp);
+ else
+ pages = vzalloc(array_size);
+ if (!pages)
+ return NULL;
+
+ if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
+ unsigned long order = get_order(size);
+ struct page *page;
+
+ page = dma_alloc_from_contiguous(dev, count, order);
+ if (!page)
+ goto error;
+
+ __dma_clear_buffer(page, size, attrs, is_coherent);
+
+ for (i = 0; i < count; i++)
+ pages[i] = page + i;
+
+ return pages;
+ }
+
+ /*
+ * IOMMU can map any pages, so himem can also be used here
+ */
+ gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+ while (count) {
+ int j, order = __fls(count);
+
+ pages[i] = alloc_pages(gfp, order);
+ while (!pages[i] && order)
+ pages[i] = alloc_pages(gfp, --order);
+ if (!pages[i])
+ goto error;
+
+ if (order) {
+ split_page(pages[i], order);
+ j = 1 << order;
+ while (--j)
+ pages[i + j] = pages[i] + j;
+ }
+
+ __dma_clear_buffer(pages[i], PAGE_SIZE << order, attrs,
+ is_coherent);
+ i += 1 << order;
+ count -= 1 << order;
+ }
+
+ return pages;
+error:
+ while (i--)
+ if (pages[i])
+ __free_pages(pages[i], 0);
+ if (array_size <= PAGE_SIZE)
+ kfree(pages);
+ else
+ vfree(pages);
+ return NULL;
+}
+
+static int __iommu_free_buffer(struct device *dev, struct page **pages,
+ size_t size, struct dma_attrs *attrs)
+{
+ int count = size >> PAGE_SHIFT;
+ int array_size = count * sizeof(struct page *);
+ int i;
+
+ if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
+ dma_release_from_contiguous(dev, pages[0], count);
+ } else {
+ for (i = 0; i < count; i++)
+ if (pages[i])
+ __free_pages(pages[i], 0);
+ }
+
+ if (array_size <= PAGE_SIZE)
+ kfree(pages);
+ else
+ vfree(pages);
+ return 0;
+}
+
+/*
+ * Create a CPU mapping for a specified pages
+ */
+static void *
+__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
+ const void *caller)
+{
+ return dma_common_pages_remap(pages, size, VM_USERMAP, prot, caller);
+}
+
+/*
+ * Create a mapping in device IO address space for specified pages
+ */
+static dma_addr_t __iommu_create_mapping(struct device *dev,
+ struct page **pages, size_t size,
+ struct dma_attrs *attrs)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ dma_addr_t dma_addr, iova;
+ int i, ret;
+ int prot = IOMMU_READ | IOMMU_WRITE;
+
+ dma_addr = __alloc_iova(mapping, size);
+ if (dma_addr == DMA_ERROR_CODE)
+ return dma_addr;
+ prot = __get_iommu_pgprot(attrs, prot,
+ is_dma_coherent(dev, attrs));
+
+ iova = dma_addr;
+ for (i = 0; i < count; ) {
+ unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
+ phys_addr_t phys = page_to_phys(pages[i]);
+ unsigned int len, j;
+
+ for (j = i + 1; j < count; j++, next_pfn++)
+ if (page_to_pfn(pages[j]) != next_pfn)
+ break;
+
+ len = (j - i) << PAGE_SHIFT;
+ ret = iommu_map(mapping->domain, iova, phys, len, prot);
+ if (ret < 0)
+ goto fail;
+ iova += len;
+ i = j;
+ }
+ return dma_addr;
+fail:
+ iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
+ __free_iova(mapping, dma_addr, size);
+ return DMA_ERROR_CODE;
+}
+
+static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova,
+ size_t size)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+
+ /*
+ * add optional in-page offset from iova to size and align
+ * result to page size
+ */
+ size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
+ iova &= PAGE_MASK;
+
+ iommu_unmap(mapping->domain, iova, size);
+ __free_iova(mapping, iova, size);
+ return 0;
+}
+
+static struct page **__atomic_get_pages(void *addr)
+{
+ struct page *page;
+ phys_addr_t phys;
+
+ phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr);
+ page = phys_to_page(phys);
+
+ return (struct page **)page;
+}
+
+static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
+{
+ struct vm_struct *area;
+
+ if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+ return __atomic_get_pages(cpu_addr);
+
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ return cpu_addr;
+
+ area = find_vm_area(cpu_addr);
+ if (area)
+ return area->pages;
+ return NULL;
+}
+
+static void *__iommu_alloc_atomic(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ struct page *page;
+ struct page **pages;
+ size_t count = size >> PAGE_SHIFT;
+ size_t array_size = count * sizeof(struct page *);
+ int i;
+ void *addr;
+ bool coherent = is_dma_coherent(dev, attrs);
+
+ if (array_size <= PAGE_SIZE)
+ pages = kzalloc(array_size, gfp);
+ else
+ pages = vzalloc(array_size);
+
+ if (!pages)
+ return NULL;
+
+ if (coherent) {
+ page = alloc_pages(gfp, get_order(size));
+ addr = page ? page_address(page) : NULL;
+ } else {
+ addr = __alloc_from_pool(size, &page, gfp);
+ }
+
+ if (!addr)
+ goto err_free;
+
+ for (i = 0; i < count ; i++)
+ pages[i] = page + i;
+
+ *handle = __iommu_create_mapping(dev, pages, size, attrs);
+ if (*handle == DMA_ERROR_CODE)
+ goto err_mapping;
+
+ kvfree(pages);
+ return addr;
+
+err_mapping:
+ if (coherent)
+ __free_pages(page, get_order(size));
+ else
+ __free_from_pool(addr, size);
+err_free:
+ kvfree(pages);
+ return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
+ dma_addr_t handle, size_t size)
+{
+ __iommu_remove_mapping(dev, handle, size);
+ __free_from_pool(cpu_addr, size);
+}
+
+static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
+ bool coherent = is_dma_coherent(dev, attrs);
+ pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+ struct page **pages;
+ void *addr = NULL;
+
+ *handle = DMA_ERROR_CODE;
+ size = PAGE_ALIGN(size);
+
+ if (!gfpflags_allow_blocking(gfp))
+ return __iommu_alloc_atomic(dev, size, handle, gfp, attrs);
+
+ /*
+ * Following is a work-around (a.k.a. hack) to prevent pages
+ * with __GFP_COMP being passed to split_page() which cannot
+ * handle them. The real problem is that this flag probably
+ * should be 0 on ARM as it is not supported on this
+ * platform; see CONFIG_HUGETLBFS.
+ */
+ gfp &= ~(__GFP_COMP);
+
+ pages = __iommu_alloc_buffer(dev, size, gfp, attrs);
+ if (!pages)
+ return NULL;
+
+ *handle = __iommu_create_mapping(dev, pages, size, attrs);
+ if (*handle == DMA_ERROR_CODE)
+ goto err_buffer;
+
+ if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ return pages;
+
+ addr = __iommu_alloc_remap(pages, size, gfp, prot,
+ __builtin_return_address(0));
+ if (!addr)
+ goto err_mapping;
+
+ return addr;
+
+err_mapping:
+ __iommu_remove_mapping(dev, *handle, size);
+err_buffer:
+ __iommu_free_buffer(dev, pages, size, attrs);
+ return NULL;
+}
+
+static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ unsigned long uaddr = vma->vm_start;
+ unsigned long usize = vma->vm_end - vma->vm_start;
+ struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+ bool coherent = is_dma_coherent(dev, attrs);
+
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+ coherent);
+
+ if (!pages)
+ return -ENXIO;
+
+ do {
+ int ret = vm_insert_page(vma, uaddr, *pages++);
+ if (ret) {
+ pr_err("Remapping memory failed: %d\n", ret);
+ return ret;
+ }
+ uaddr += PAGE_SIZE;
+ usize -= PAGE_SIZE;
+ } while (usize > 0);
+
+ return 0;
+}
+
+/*
+ * free a page as defined by the above mapping.
+ * Must not be called with IRQs disabled.
+ */
+void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ struct page **pages;
+ size = PAGE_ALIGN(size);
+
+ if (__in_atomic_pool(cpu_addr, size)) {
+ __iommu_free_atomic(dev, cpu_addr, handle, size);
+ return;
+ }
+
+ pages = __iommu_get_pages(cpu_addr, attrs);
+ if (!pages) {
+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+ return;
+ }
+
+ if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
+ dma_common_free_remap(cpu_addr, size, VM_USERMAP, true);
+
+ __iommu_remove_mapping(dev, handle, size);
+ __iommu_free_buffer(dev, pages, size, attrs);
+}
+
+int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr,
+ size_t size, struct dma_attrs *attrs)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+
+ if (!pages)
+ return -ENXIO;
+
+ return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
+ GFP_KERNEL);
+}
+
+static int __dma_direction_to_prot(enum dma_data_direction dir)
+{
+ int prot;
+
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ prot = IOMMU_READ | IOMMU_WRITE;
+ break;
+ case DMA_TO_DEVICE:
+ prot = IOMMU_READ;
+ break;
+ case DMA_FROM_DEVICE:
+ prot = IOMMU_WRITE;
+ break;
+ default:
+ prot = 0;
+ }
+
+ return prot;
+}
+
+/**
+ * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * The scatter gather list elements are merged together (if possible) and
+ * tagged with the appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}.
+ */
+int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ struct scatterlist *s;
+ int ret, i;
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ unsigned int total_length = 0, current_offset = 0;
+ dma_addr_t iova;
+ int prot = __dma_direction_to_prot(dir);
+
+ for_each_sg(sg, s, nents, i)
+ total_length += s->length;
+
+ iova = __alloc_iova(mapping, total_length);
+ if (iova == DMA_ERROR_CODE) {
+ dev_err(dev, "Couldn't allocate iova for sg %p\n", sg);
+ return 0;
+ }
+ prot = __get_iommu_pgprot(attrs, prot,
+ is_dma_coherent(dev, attrs));
+
+ ret = iommu_map_sg(mapping->domain, iova, sg, nents, prot);
+ if (ret != total_length) {
+ __free_iova(mapping, iova, total_length);
+ return 0;
+ }
+
+ for_each_sg(sg, s, nents, i) {
+ s->dma_address = iova + current_offset;
+ s->dma_length = total_length - current_offset;
+ current_offset += s->length;
+ }
+
+ return nents;
+}
+
+/**
+ * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations. Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ unsigned int total_length = sg_dma_len(sg);
+ dma_addr_t iova = sg_dma_address(sg);
+
+ total_length = PAGE_ALIGN((iova & ~PAGE_MASK) + total_length);
+ iova &= PAGE_MASK;
+
+ iommu_unmap(mapping->domain, iova, total_length);
+ __free_iova(mapping, iova, total_length);
+}
+
+/**
+ * arm_iommu_sync_sg_for_cpu
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *s;
+ int i;
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = sg_dma_address(sg);
+ bool iova_coherent = iommu_is_iova_coherent(mapping->domain, iova);
+
+ if (iova_coherent)
+ return;
+
+ for_each_sg(sg, s, nents, i)
+ __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+
+}
+
+/**
+ * arm_iommu_sync_sg_for_device
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *s;
+ int i;
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = sg_dma_address(sg);
+ bool iova_coherent = iommu_is_iova_coherent(mapping->domain, iova);
+
+ if (iova_coherent)
+ return;
+
+ for_each_sg(sg, s, nents, i)
+ __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+}
+
+
+/**
+ * arm_coherent_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Coherent IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_coherent_iommu_map_page(struct device *dev,
+ struct page *page, unsigned long offset, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t dma_addr;
+ int ret, prot, len, start_offset, map_offset;
+
+ map_offset = offset & ~PAGE_MASK;
+ start_offset = offset & PAGE_MASK;
+ len = PAGE_ALIGN(map_offset + size);
+
+ dma_addr = __alloc_iova(mapping, len);
+ if (dma_addr == DMA_ERROR_CODE)
+ return dma_addr;
+
+ prot = __dma_direction_to_prot(dir);
+ prot = __get_iommu_pgprot(attrs, prot,
+ is_dma_coherent(dev, attrs));
+
+ ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page) +
+ start_offset, len, prot);
+ if (ret < 0)
+ goto fail;
+
+ return dma_addr + map_offset;
+fail:
+ __free_iova(mapping, dma_addr, len);
+ return DMA_ERROR_CODE;
+}
+
+/**
+ * arm_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ if (!is_dma_coherent(dev, attrs) &&
+ !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __dma_page_cpu_to_dev(page, offset, size, dir);
+
+ return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
+}
+
+/**
+ * arm_iommu_unmap_page
+ * @dev: valid struct device pointer
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * IOMMU aware version of arm_dma_unmap_page()
+ */
+static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = handle & PAGE_MASK;
+ struct page *page = phys_to_page(iommu_iova_to_phys(
+ mapping->domain, iova));
+ int offset = handle & ~PAGE_MASK;
+ int len = PAGE_ALIGN(size + offset);
+
+ if (!(is_dma_coherent(dev, attrs) ||
+ dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)))
+ __dma_page_dev_to_cpu(page, offset, size, dir);
+
+ iommu_unmap(mapping->domain, iova, len);
+ __free_iova(mapping, iova, len);
+}
+
+static void arm_iommu_sync_single_for_cpu(struct device *dev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = handle & PAGE_MASK;
+ struct page *page = phys_to_page(iommu_iova_to_phys(
+ mapping->domain, iova));
+ unsigned int offset = handle & ~PAGE_MASK;
+ bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle);
+
+ if (!iova_coherent)
+ __dma_page_dev_to_cpu(page, offset, size, dir);
+}
+
+static void arm_iommu_sync_single_for_device(struct device *dev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+ struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+ dma_addr_t iova = handle & PAGE_MASK;
+ struct page *page = phys_to_page(iommu_iova_to_phys(
+ mapping->domain, iova));
+ unsigned int offset = handle & ~PAGE_MASK;
+ bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle);
+
+ if (!iova_coherent)
+ __dma_page_cpu_to_dev(page, offset, size, dir);
+}
+
+static int arm_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+ if (!mapping) {
+ dev_warn(dev, "No IOMMU mapping for device\n");
+ return 0;
+ }
+
+ return iommu_dma_supported(mapping->domain, dev, mask);
+}
+
+static int arm_iommu_mapping_error(struct device *dev,
+ dma_addr_t dma_addr)
+{
+ return dma_addr == DMA_ERROR_CODE;
+}
+
+const struct dma_map_ops iommu_ops = {
+ .alloc = arm_iommu_alloc_attrs,
+ .free = arm_iommu_free_attrs,
+ .mmap = arm_iommu_mmap_attrs,
+ .get_sgtable = arm_iommu_get_sgtable,
+
+ .map_page = arm_iommu_map_page,
+ .unmap_page = arm_iommu_unmap_page,
+ .sync_single_for_cpu = arm_iommu_sync_single_for_cpu,
+ .sync_single_for_device = arm_iommu_sync_single_for_device,
+
+ .map_sg = arm_iommu_map_sg,
+ .unmap_sg = arm_iommu_unmap_sg,
+ .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu,
+ .sync_sg_for_device = arm_iommu_sync_sg_for_device,
+
+ .set_dma_mask = arm_dma_set_mask,
+ .dma_supported = arm_iommu_dma_supported,
+ .mapping_error = arm_iommu_mapping_error,
+};
+
+/**
+ * arm_iommu_create_mapping
+ * @bus: pointer to the bus holding the client device (for IOMMU calls)
+ * @base: start address of the valid IO address space
+ * @size: maximum size of the valid IO address space
+ *
+ * Creates a mapping structure which holds information about used/unused
+ * IO address ranges, which is required to perform memory allocation and
+ * mapping with IOMMU aware functions.
+ *
+ * The client device need to be attached to the mapping with
+ * arm_iommu_attach_device function.
+ */
+struct dma_iommu_mapping *
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+{
+ unsigned int bits = size >> PAGE_SHIFT;
+ unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
+ struct dma_iommu_mapping *mapping;
+ int err = -ENOMEM;
+
+ if (!bitmap_size)
+ return ERR_PTR(-EINVAL);
+
+ mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
+ if (!mapping)
+ goto err;
+
+ mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (!mapping->bitmap)
+ mapping->bitmap = vzalloc(bitmap_size);
+
+ if (!mapping->bitmap)
+ goto err2;
+
+ mapping->base = base;
+ mapping->bits = bits;
+ spin_lock_init(&mapping->lock);
+
+ mapping->domain = iommu_domain_alloc(bus);
+ if (!mapping->domain)
+ goto err3;
+
+ kref_init(&mapping->kref);
+ return mapping;
+err3:
+ kvfree(mapping->bitmap);
+err2:
+ kfree(mapping);
+err:
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(arm_iommu_create_mapping);
+
+static void release_iommu_mapping(struct kref *kref)
+{
+ struct dma_iommu_mapping *mapping =
+ container_of(kref, struct dma_iommu_mapping, kref);
+
+ iommu_domain_free(mapping->domain);
+ kvfree(mapping->bitmap);
+ kfree(mapping);
+}
+
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
+{
+ if (mapping)
+ kref_put(&mapping->kref, release_iommu_mapping);
+}
+EXPORT_SYMBOL(arm_iommu_release_mapping);
+
+/**
+ * arm_iommu_attach_device
+ * @dev: valid struct device pointer
+ * @mapping: io address space mapping structure (returned from
+ * arm_iommu_create_mapping)
+ *
+ * Attaches specified io address space mapping to the provided device,
+ * this replaces the dma operations (dma_map_ops pointer) with the
+ * IOMMU aware version. More than one client might be attached to
+ * the same io address space mapping.
+ */
+int arm_iommu_attach_device(struct device *dev,
+ struct dma_iommu_mapping *mapping)
+{
+ int err;
+ int s1_bypass = 0, is_fast = 0;
+
+ iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_FAST, &is_fast);
+ if (is_fast)
+ return fast_smmu_attach_device(dev, mapping);
+
+ err = iommu_attach_device(mapping->domain, dev);
+ if (err)
+ return err;
+
+ iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_S1_BYPASS,
+ &s1_bypass);
+
+ kref_get(&mapping->kref);
+ dev->archdata.mapping = mapping;
+ if (!s1_bypass)
+ set_dma_ops(dev, &iommu_ops);
+
+ pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
+ return 0;
+}
+EXPORT_SYMBOL(arm_iommu_attach_device);
+
+/**
+ * arm_iommu_detach_device
+ * @dev: valid struct device pointer
+ *
+ * Detaches the provided device from a previously attached map.
+ * This voids the dma operations (dma_map_ops pointer)
+ */
+void arm_iommu_detach_device(struct device *dev)
+{
+ struct dma_iommu_mapping *mapping;
+ int is_fast, s1_bypass = 0;
+
+ mapping = to_dma_iommu_mapping(dev);
+ if (!mapping) {
+ dev_warn(dev, "Not attached\n");
+ return;
+ }
+
+ iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_FAST, &is_fast);
+ if (is_fast) {
+ fast_smmu_detach_device(dev, mapping);
+ return;
+ }
+
+ iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_S1_BYPASS,
+ &s1_bypass);
+
+ if (msm_dma_unmap_all_for_dev(dev))
+ dev_warn(dev, "IOMMU detach with outstanding mappings\n");
+
+ iommu_detach_device(mapping->domain, dev);
+ kref_put(&mapping->kref, release_iommu_mapping);
+ dev->archdata.mapping = NULL;
+ if (!s1_bypass)
+ set_dma_ops(dev, NULL);
+
+ pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
+}
+EXPORT_SYMBOL(arm_iommu_detach_device);
+
+#endif
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5a3117c287ca..4970252612c2 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -40,9 +40,35 @@
#include <asm/system_misc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/edac.h>
+#include <soc/qcom/scm.h>
+
+#include <trace/events/exception.h>
static const char *fault_name(unsigned int esr);
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+ int ret = 0;
+
+ /* kprobe_running() needs smp_processor_id() */
+ if (!user_mode(regs)) {
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, esr))
+ ret = 1;
+ preempt_enable();
+ }
+
+ return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
+{
+ return 0;
+}
+#endif
+
/*
* Dump out the page tables associated with 'addr' in mm 'mm'.
*/
@@ -176,6 +202,8 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
{
struct siginfo si;
+ trace_user_fault(tsk, addr, esr);
+
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
@@ -278,6 +306,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (notify_page_fault(regs, esr))
+ return 0;
+
tsk = current;
mm = tsk->mm;
@@ -297,16 +328,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
- } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+ } else if (((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) ||
+ ((esr & ESR_ELx_CM) && !(mm_flags & FAULT_FLAG_USER))) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
if (addr < USER_DS && is_permission_fault(esr, regs)) {
- /* regs->orig_addr_limit may be 0 if we entered from EL0 */
- if (regs->orig_addr_limit == KERNEL_DS)
- die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
-
if (is_el1_instruction_abort(esr))
die("Attempting to execute userspace memory", regs, esr);
@@ -429,6 +457,19 @@ no_context:
}
/*
+ * TLB conflict is already handled in EL2. This rourtine should return zero
+ * so that, do_mem_abort would not crash kernel thinking TLB conflict not
+ * handled.
+*/
+#ifdef CONFIG_QCOM_TLB_EL2_HANDLER
+static int do_tlb_conf_fault(unsigned long addr,
+ unsigned int esr,
+ struct pt_regs *regs)
+{
+ return 0;
+}
+#endif
+/*
* First Level Translation Fault Handler
*
* We enter here because the first level page table doesn't contain a valid
@@ -461,6 +502,7 @@ static int __kprobes do_translation_fault(unsigned long addr,
*/
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
+ arm64_check_cache_ecc(NULL);
return 1;
}
@@ -518,7 +560,11 @@ static const struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 45" },
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
+#ifdef CONFIG_QCOM_TLB_EL2_HANDLER
+ { do_tlb_conf_fault, SIGBUS, 0, "TLB conflict abort" },
+#else
{ do_bad, SIGBUS, 0, "TLB conflict abort" },
+#endif
{ do_bad, SIGBUS, 0, "unknown 49" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
@@ -564,6 +610,22 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die("", regs, &info, esr);
}
+asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
+ unsigned int esr,
+ struct pt_regs *regs)
+{
+ /*
+ * We've taken an instruction abort from userspace and not yet
+ * re-enabled IRQs. If the address is a kernel address, apply
+ * BP hardening prior to enabling IRQs and pre-emption.
+ */
+ if (addr > TASK_SIZE)
+ arm64_apply_bp_hardening();
+
+ local_irq_enable();
+ do_mem_abort(addr, esr, regs);
+}
+
/*
* Handle stack alignment exceptions.
*/
@@ -653,6 +715,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
return rv;
}
+NOKPROBE_SYMBOL(do_debug_exception);
#ifdef CONFIG_ARM64_PAN
int cpu_enable_pan(void *__unused)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index d9c664ed6104..298db9789fc7 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(flush_dcache_page);
/*
* Additional functions defined in assembly.
*/
+EXPORT_SYMBOL(flush_cache_all);
EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 60e113a7c5db..2e7702bd7e57 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -155,6 +155,7 @@ static void __init arm64_memory_present(void)
#endif
static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX;
+static phys_addr_t bootloader_memory_limit;
/*
* Limit the memory size that was specified via FDT.
@@ -208,6 +209,11 @@ void __init arm64_memblock_init(void)
* via the linear mapping.
*/
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+ /*
+ * Save bootloader imposed memory limit before we overwirte
+ * memblock.
+ */
+ bootloader_memory_limit = memblock_end_of_DRAM();
memblock_enforce_memory_limit(memory_limit);
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
}
@@ -223,7 +229,7 @@ void __init arm64_memblock_init(void)
* memory spans, randomize the linear region as well.
*/
if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
- range = range / ARM64_MEMSTART_ALIGN + 1;
+ range /= ARM64_MEMSTART_ALIGN;
memstart_addr -= ARM64_MEMSTART_ALIGN *
((range * memstart_offset_seed) >> 16);
}
@@ -432,6 +438,11 @@ void __init mem_init(void)
}
}
+static inline void poison_init_mem(void *s, size_t count)
+{
+ memset(s, 0, count);
+}
+
void free_initmem(void)
{
free_initmem_default(0);
@@ -457,6 +468,18 @@ static int __init keepinitrd_setup(char *__unused)
__setup("keepinitrd", keepinitrd_setup);
#endif
+#ifdef CONFIG_KERNEL_TEXT_RDONLY
+void set_kernel_text_ro(void)
+{
+ unsigned long start = PFN_ALIGN(_stext);
+ unsigned long end = PFN_ALIGN(_etext);
+
+ /*
+ * Set the kernel identity mapping for text RO.
+ */
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+}
+#endif
/*
* Dump out memory limit information on panic.
*/
@@ -481,3 +504,137 @@ static int __init register_mem_limit_dumper(void)
return 0;
}
__initcall(register_mem_limit_dumper);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+ int ret;
+
+ if (end_pfn > max_sparsemem_pfn) {
+ pr_err("end_pfn too big");
+ return -1;
+ }
+ hotplug_paging(start, size);
+
+ /*
+ * Mark the first page in the range as unusable. This is needed
+ * because __add_section (within __add_pages) wants pfn_valid
+ * of it to be false, and in arm64 pfn falid is implemented by
+ * just checking at the nomap flag for existing blocks.
+ *
+ * A small trick here is that __add_section() requires only
+ * phys_start_pfn (that is the first pfn of a section) to be
+ * invalid. Regardless of whether it was assumed (by the function
+ * author) that all pfns within a section are either all valid
+ * or all invalid, it allows to avoid looping twice (once here,
+ * second when memblock_clear_nomap() is called) through all
+ * pfns of the section and modify only one pfn. Thanks to that,
+ * further, in __add_zone() only this very first pfn is skipped
+ * and corresponding page is not flagged reserved. Therefore it
+ * is enough to correct this setup only for it.
+ *
+ * When arch_add_memory() returns the walk_memory_range() function
+ * is called and passed with online_memory_block() callback,
+ * which execution finally reaches the memory_block_action()
+ * function, where also only the first pfn of a memory block is
+ * checked to be reserved. Above, it was first pfn of a section,
+ * here it is a block but
+ * (drivers/base/memory.c):
+ * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+ * (include/linux/memory.h):
+ * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
+ * so we can consider block and section equivalently
+ */
+ memblock_mark_nomap(start, 1<<PAGE_SHIFT);
+
+ pgdat = NODE_DATA(nid);
+
+ zone = pgdat->node_zones +
+ zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
+ ret = __add_pages(nid, zone, start_pfn, nr_pages);
+
+ /*
+ * Make the pages usable after they have been added.
+ * This will make pfn_valid return true
+ */
+ memblock_clear_nomap(start, 1<<PAGE_SHIFT);
+
+ /*
+ * This is a hack to avoid having to mix arch specific code
+ * into arch independent code. SetPageReserved is supposed
+ * to be called by __add_zone (within __add_section, within
+ * __add_pages). However, when it is called there, it assumes that
+ * pfn_valid returns true. For the way pfn_valid is implemented
+ * in arm64 (a check on the nomap flag), the only way to make
+ * this evaluate true inside __add_zone is to clear the nomap
+ * flags of blocks in architecture independent code.
+ *
+ * To avoid this, we set the Reserved flag here after we cleared
+ * the nomap flag in the line above.
+ */
+ SetPageReserved(pfn_to_page(start_pfn));
+
+ if (ret)
+ pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
+ __func__, ret);
+
+ return ret;
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void kernel_physical_mapping_remove(unsigned long start,
+ unsigned long end)
+{
+ start = (unsigned long)__va(start);
+ end = (unsigned long)__va(end);
+
+ remove_pagetable(start, end, true);
+
+}
+
+int arch_remove_memory(u64 start, u64 size)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct page *page = pfn_to_page(start_pfn);
+ struct zone *zone;
+ int ret = 0;
+
+ zone = page_zone(page);
+ ret = __remove_pages(zone, start_pfn, nr_pages);
+ WARN_ON_ONCE(ret);
+
+ kernel_physical_mapping_remove(start, start + size);
+
+ return ret;
+}
+
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+static int arm64_online_page(struct page *page)
+{
+ unsigned long target_pfn = page_to_pfn(page);
+ unsigned long limit = __phys_to_pfn(bootloader_memory_limit);
+
+ if (target_pfn >= limit)
+ return -EINVAL;
+
+ __online_page_set_limits(page);
+ __online_page_increment_counters(page);
+ __online_page_free(page);
+
+ return 0;
+}
+
+static int __init arm64_memory_hotplug_init(void)
+{
+ set_online_page_callback(&arm64_online_page);
+ return 0;
+}
+subsys_initcall(arm64_memory_hotplug_init);
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ddfe90299048..e8b8590f553a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -29,6 +29,8 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <linux/dma-contiguous.h>
+#include <linux/cma.h>
#include <linux/mm.h>
#include <asm/barrier.h>
@@ -61,6 +63,8 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+static bool dma_overlap(phys_addr_t start, phys_addr_t end);
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -210,7 +214,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
- block_mappings_allowed(pgtable_alloc)) {
+ block_mappings_allowed(pgtable_alloc) &&
+ !dma_overlap(phys, phys + next - addr)) {
pmd_t old_pmd =*pmd;
pmd_set_huge(pmd, phys, prot);
/*
@@ -270,7 +275,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
* For 4K granule only, attempt to put down a 1GB block
*/
if (use_1G_block(addr, next, phys) &&
- block_mappings_allowed(pgtable_alloc)) {
+ block_mappings_allowed(pgtable_alloc) &&
+ !dma_overlap(phys, phys + next - addr)) {
pud_t old_pud = *pud;
pud_set_huge(pud, phys, prot);
@@ -386,7 +392,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
{
- unsigned long kernel_start = __pa_symbol(_stext);
+ unsigned long kernel_start = __pa_symbol(_text);
unsigned long kernel_end = __pa_symbol(__init_begin);
/*
@@ -403,7 +409,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
}
/*
- * This block overlaps the kernel text/rodata mapping.
+ * This block overlaps the kernel text/rodata mappings.
* Map the portion(s) which don't overlap.
*/
if (start < kernel_start)
@@ -418,7 +424,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
early_pgtable_alloc);
/*
- * Map the linear alias of the [_stext, __init_begin) interval as
+ * Map the linear alias of the [_text, __init_begin) interval as
* read-only/non-executable. This makes the contents of the
* region accessible to subsystems such as hibernate, but
* protects it from inadvertent modification or execution.
@@ -450,8 +456,8 @@ void mark_rodata_ro(void)
{
unsigned long section_size;
- section_size = (unsigned long)_etext - (unsigned long)_stext;
- create_mapping_late(__pa_symbol(_stext), (unsigned long)_stext,
+ section_size = (unsigned long)_etext - (unsigned long)_text;
+ create_mapping_late(__pa_symbol(_text), (unsigned long)_text,
section_size, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use __init_begin rather than __end_rodata
@@ -473,8 +479,8 @@ void fixup_init(void)
unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
- pgprot_t prot, struct vm_struct *vma)
+static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+ pgprot_t prot, struct vm_struct *vma)
{
phys_addr_t pa_start = __pa_symbol(va_start);
unsigned long size = va_end - va_start;
@@ -532,11 +538,11 @@ static void __init map_kernel(pgd_t *pgd)
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
- map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
- map_kernel_chunk(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
- map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
- &vmlinux_init);
- map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+ map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+ map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata);
+ map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+ &vmlinux_init);
+ map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
/*
@@ -564,6 +570,37 @@ static void __init map_kernel(pgd_t *pgd)
kasan_copy_shadow(pgd);
}
+struct dma_contig_early_reserve {
+ phys_addr_t base;
+ unsigned long size;
+};
+
+static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS];
+
+static int dma_mmu_remap_num;
+
+void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+ dma_mmu_remap[dma_mmu_remap_num].base = base;
+ dma_mmu_remap[dma_mmu_remap_num].size = size;
+ dma_mmu_remap_num++;
+}
+
+static bool dma_overlap(phys_addr_t start, phys_addr_t end)
+{
+ int i;
+
+ for (i = 0; i < dma_mmu_remap_num; i++) {
+ phys_addr_t dma_base = dma_mmu_remap[i].base;
+ phys_addr_t dma_end = dma_mmu_remap[i].base +
+ dma_mmu_remap[i].size;
+
+ if ((dma_base < end) && (dma_end > start))
+ return true;
+ }
+ return false;
+}
+
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps and sets up the zero page.
@@ -591,9 +628,6 @@ void __init paging_init(void)
pgd_clear_fixmap();
memblock_free(pgd_phys, PAGE_SIZE);
- /* Ensure the zero page is visible to the page table walker */
- dsb(ishst);
-
/*
* We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
* allocated with it.
@@ -604,6 +638,439 @@ void __init paging_init(void)
bootmem_init();
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static phys_addr_t pgd_pgtable_alloc(void)
+{
+ void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+ if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+ BUG();
+
+ /* Ensure the zeroed page is visible to the page table walker */
+ dsb(ishst);
+ return __pa(ptr);
+}
+
+/*
+ * hotplug_paging() is used by memory hotplug to build new page tables
+ * for hot added memory.
+ */
+void hotplug_paging(phys_addr_t start, phys_addr_t size)
+{
+ struct page *pg;
+ phys_addr_t pgd_phys;
+ pgd_t *pgd;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ if (current->cpu != cpu)
+ sched_isolate_cpu(cpu);
+ pgd_phys = pgd_pgtable_alloc();
+ pgd = pgd_set_fixmap(pgd_phys);
+
+ memcpy(pgd, swapper_pg_dir, PAGE_SIZE);
+
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start), size,
+ PAGE_KERNEL, pgd_pgtable_alloc);
+
+ cpu_replace_ttbr1(__va(pgd_phys));
+ memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+ cpu_replace_ttbr1(swapper_pg_dir);
+
+ pgd_clear_fixmap();
+
+ pg = phys_to_page(pgd_phys);
+ pgtable_page_dtor(pg);
+ __free_pages(pg, 0);
+ for_each_possible_cpu(cpu)
+ if (current->cpu != cpu)
+ sched_unisolate_cpu_unlocked(cpu);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+#define PAGE_INUSE 0xFD
+
+static void free_pagetable(struct page *page, int order, bool direct)
+{
+ unsigned long magic;
+ unsigned int nr_pages = 1 << order;
+
+ /* bootmem page has reserved flag */
+ if (PageReserved(page)) {
+ __ClearPageReserved(page);
+
+ magic = (unsigned long)page->lru.next;
+ if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+ while (nr_pages--)
+ put_page_bootmem(page++);
+ } else {
+ while (nr_pages--)
+ free_reserved_page(page++);
+ }
+ } else {
+ /*
+ * Only direct pagetable allocation (those allocated via
+ * hotplug) call the pgtable_page_ctor; vmemmap pgtable
+ * allocations don't.
+ */
+ if (direct)
+ pgtable_page_dtor(page);
+
+ free_pages((unsigned long)page_address(page), order);
+ }
+}
+
+static void free_pte_table(pmd_t *pmd, bool direct)
+{
+ pte_t *pte_start, *pte;
+ struct page *page;
+ int i;
+
+ pte_start = (pte_t *) pmd_page_vaddr(*pmd);
+ /* Check if there is no valid entry in the PMD */
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (!pte_none(*pte))
+ return;
+ }
+
+ page = pmd_page(*pmd);
+
+ free_pagetable(page, 0, direct);
+
+ /*
+ * This spin lock could be only taken in _pte_aloc_kernel
+ * in mm/memory.c and nowhere else (for arm64). Not sure if
+ * the function above can be called concurrently. In doubt,
+ * I am living it here for now, but it probably can be removed
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud, bool direct)
+{
+ pmd_t *pmd_start, *pmd;
+ struct page *page;
+ int i;
+
+ pmd_start = (pmd_t *) pud_page_vaddr(*pud);
+ /* Check if there is no valid entry in the PMD */
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (!pmd_none(*pmd))
+ return;
+ }
+
+ page = pud_page(*pud);
+
+ free_pagetable(page, 0, direct);
+
+ /*
+ * This spin lock could be only taken in _pte_aloc_kernel
+ * in mm/memory.c and nowhere else (for arm64). Not sure if
+ * the function above can be called concurrently. In doubt,
+ * I am living it here for now, but it probably can be removed
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+}
+
+/*
+ * When the PUD is folded on the PGD (three levels of paging),
+ * there's no need to free PUDs
+ */
+#if CONFIG_PGTABLE_LEVELS > 3
+static void free_pud_table(pgd_t *pgd, bool direct)
+{
+ pud_t *pud_start, *pud;
+ struct page *page;
+ int i;
+
+ pud_start = (pud_t *) pgd_page_vaddr(*pgd);
+ /* Check if there is no valid entry in the PUD */
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
+
+ page = pgd_page(*pgd);
+
+ free_pagetable(page, 0, direct);
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pgd_clear(pgd);
+ spin_unlock(&init_mm.page_table_lock);
+}
+#endif
+
+static void remove_pte_table(pte_t *pte, unsigned long addr,
+ unsigned long end, bool direct)
+{
+ unsigned long next;
+ void *page_addr;
+
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ /*
+ * Do not free direct mapping pages since they were
+ * freed when offlining, or simplely not in use.
+ */
+ if (!direct)
+ free_pagetable(pte_page(*pte), 0, direct);
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, addr, pte);
+ spin_unlock(&init_mm.page_table_lock);
+ } else {
+ /*
+ * If we are here, we are freeing vmemmap pages since
+ * direct mapped memory ranges to be freed are aligned.
+ *
+ * If we are not removing the whole page, it means
+ * other page structs in this page are being used and
+ * we canot remove them. So fill the unused page_structs
+ * with 0xFD, and remove the page when it is wholly
+ * filled with 0xFD.
+ */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pte_page(*pte));
+ if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+ free_pagetable(pte_page(*pte), 0, direct);
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, addr, pte);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+ }
+
+ // I am adding this flush here in simmetry to the x86 code.
+ // Why do I need to call it here and not in remove_p[mu]d
+ flush_tlb_all();
+}
+
+static void remove_pmd_table(pmd_t *pmd, unsigned long addr,
+ unsigned long end, bool direct)
+{
+ unsigned long next;
+ void *page_addr;
+ pte_t *pte;
+
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ // check if we are using 2MB section mappings
+ if (pmd_sect(*pmd)) {
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ if (!direct) {
+ free_pagetable(pmd_page(*pmd),
+ get_order(PMD_SIZE), direct);
+ }
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+ } else {
+ /* If here, we are freeing vmemmap pages. */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pmd_page(*pmd));
+ if (!memchr_inv(page_addr, PAGE_INUSE,
+ PMD_SIZE)) {
+ free_pagetable(pmd_page(*pmd),
+ get_order(PMD_SIZE), direct);
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pmd_clear(pmd);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+ continue;
+ }
+
+ BUG_ON(!pmd_table(*pmd));
+
+ pte = pte_offset_map(pmd, addr);
+ remove_pte_table(pte, addr, next, direct);
+ free_pte_table(pmd, direct);
+ }
+}
+
+static void remove_pud_table(pud_t *pud, unsigned long addr,
+ unsigned long end, bool direct)
+{
+ unsigned long next;
+ pmd_t *pmd;
+ void *page_addr;
+
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (!pud_present(*pud))
+ continue;
+ /*
+ * If we are using 4K granules, check if we are using
+ * 1GB section mapping.
+ */
+ if (pud_sect(*pud)) {
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ if (!direct) {
+ free_pagetable(pud_page(*pud),
+ get_order(PUD_SIZE), direct);
+ }
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+ } else {
+ /* If here, we are freeing vmemmap pages. */
+ memset((void *)addr, PAGE_INUSE, next - addr);
+
+ page_addr = page_address(pud_page(*pud));
+ if (!memchr_inv(page_addr, PAGE_INUSE,
+ PUD_SIZE)) {
+
+ free_pagetable(pud_page(*pud),
+ get_order(PUD_SIZE), direct);
+
+ /*
+ * This spin lock could be only
+ * taken in _pte_aloc_kernel in
+ * mm/memory.c and nowhere else
+ * (for arm64). Not sure if the
+ * function above can be called
+ * concurrently. In doubt,
+ * I am living it here for now,
+ * but it probably can be removed.
+ */
+ spin_lock(&init_mm.page_table_lock);
+ pud_clear(pud);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ }
+ continue;
+ }
+
+ BUG_ON(!pud_table(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ remove_pmd_table(pmd, addr, next, direct);
+ free_pmd_table(pud, direct);
+ }
+}
+
+void remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ unsigned long next;
+ unsigned long addr;
+ pgd_t *pgd;
+ pud_t *pud;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ if (current->cpu != cpu)
+ sched_isolate_cpu(cpu);
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ continue;
+
+ pud = pud_offset(pgd, addr);
+ remove_pud_table(pud, addr, next, direct);
+ /*
+ * When the PUD is folded on the PGD (three levels of paging),
+ * I did already clear the PMD page in free_pmd_table,
+ * and reset the corresponding PGD==PUD entry.
+ */
+#if CONFIG_PGTABLE_LEVELS > 3
+ free_pud_table(pgd, direct);
+#endif
+ }
+
+ flush_tlb_all();
+ for_each_possible_cpu(cpu)
+ if (current->cpu != cpu)
+ sched_unisolate_cpu_unlocked(cpu);
+}
+
+
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
/*
* Check whether a kernel address is valid (derived from arch/x86/).
*/
@@ -655,6 +1122,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+ int ret = 0;
do {
next = pmd_addr_end(addr, end);
@@ -672,19 +1140,30 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
void *p = NULL;
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
- if (!p)
- return -ENOMEM;
+ if (!p) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+ vmemmap_free(start, end);
+#endif
+ ret = -ENOMEM;
+ break;
+ }
set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
} while (addr = next, addr != end);
- return 0;
+ if (ret)
+ return vmemmap_populate_basepages(start, end, node);
+ else
+ return ret;
}
#endif /* CONFIG_ARM64_64K_PAGES */
void vmemmap_free(unsigned long start, unsigned long end)
{
+#ifdef CONFIG_MEMORY_HOTREMOVE
+ remove_pagetable(start, end, false);
+#endif
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index ca6d268e3313..6ea71387ee12 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -70,6 +70,13 @@ static int change_memory_common(unsigned long addr, int numpages,
WARN_ON_ONCE(1);
}
+ if (!IS_ENABLED(CONFIG_FORCE_PAGES)) {
+ if (start < MODULES_VADDR || start >= MODULES_END)
+ return -EINVAL;
+
+ if (end < MODULES_VADDR || end >= MODULES_END)
+ return -EINVAL;
+ }
/*
* Kernel VA mappings are always live, and splitting live section
* mappings into page mappings may cause TLB conflicts. This means
@@ -139,4 +146,43 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
__pgprot(0),
__pgprot(PTE_VALID));
}
-#endif
+#ifdef CONFIG_HIBERNATION
+/*
+ * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
+ * is used to determine if a linear map page has been marked as not-valid by
+ * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
+ * This is based on kern_addr_valid(), which almost does what we need.
+ *
+ * Because this is only called on the kernel linear map, p?d_sect() implies
+ * p?d_present(). When debug_pagealloc is enabled, sections mappings are
+ * disabled.
+ */
+bool kernel_page_present(struct page *page)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr = (unsigned long)page_address(page);
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return false;
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return false;
+ if (pud_sect(*pud))
+ return true;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return false;
+ if (pmd_sect(*pmd))
+ return true;
+
+ pte = pte_offset_kernel(pmd, addr);
+ return pte_valid(*pte);
+}
+#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index bc0f0a6c9c23..b78688806652 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -24,6 +24,7 @@
#include <asm/asm-offsets.h>
#include <asm/hwcap.h>
#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
@@ -43,6 +44,52 @@
#define MAIR(attr, mt) ((attr) << ((mt) * 8))
/*
+ * cpu_cache_off()
+ *
+ * Turn the CPU D-cache off.
+ */
+ENTRY(cpu_cache_off)
+ mrs x0, sctlr_el1
+ bic x0, x0, #1 << 2 // clear SCTLR.C
+ msr sctlr_el1, x0
+ isb
+ ret
+ENDPROC(cpu_cache_off)
+
+/*
+ * cpu_reset(loc)
+ *
+ * Perform a soft reset of the system. Put the CPU into the same state
+ * as it would be if it had been reset, and branch to what would be the
+ * reset vector. It must be executed with the flat identity mapping.
+ *
+ * - loc - location to jump to for soft reset
+ */
+ .align 5
+ENTRY(cpu_reset)
+ mrs x1, sctlr_el1
+ bic x1, x1, #1
+ msr sctlr_el1, x1 // disable the MMU
+ isb
+ ret x0
+ENDPROC(cpu_reset)
+
+ENTRY(cpu_soft_restart)
+ /* Save address of cpu_reset() and reset address */
+ mov x19, x0
+ mov x20, x1
+
+ /* Turn D-cache off */
+ bl cpu_cache_off
+
+ /* Push out all dirty data, and ensure cache is empty */
+ bl flush_cache_all
+
+ mov x0, x20
+ ret x19
+ENDPROC(cpu_soft_restart)
+
+/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
@@ -63,58 +110,49 @@ ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
mrs x3, tpidrro_el0
mrs x4, contextidr_el1
- mrs x5, mair_el1
- mrs x6, cpacr_el1
- mrs x7, ttbr1_el1
- mrs x8, tcr_el1
- mrs x9, vbar_el1
- mrs x10, mdscr_el1
- mrs x11, oslsr_el1
- mrs x12, sctlr_el1
- mrs x13, tpidr_el1
- mrs x14, sp_el0
+ mrs x5, cpacr_el1
+ mrs x6, tcr_el1
+ mrs x7, vbar_el1
+ mrs x8, mdscr_el1
+ mrs x9, oslsr_el1
+ mrs x10, sctlr_el1
+ mrs x11, tpidr_el1
+ mrs x12, sp_el0
stp x2, x3, [x0]
- stp x4, x5, [x0, #16]
- stp x6, x7, [x0, #32]
- stp x8, x9, [x0, #48]
- stp x10, x11, [x0, #64]
- stp x12, x13, [x0, #80]
- str x14, [x0, #96]
+ stp x4, xzr, [x0, #16]
+ stp x5, x6, [x0, #32]
+ stp x7, x8, [x0, #48]
+ stp x9, x10, [x0, #64]
+ stp x11, x12, [x0, #80]
ret
ENDPROC(cpu_do_suspend)
/**
* cpu_do_resume - restore CPU register context
*
- * x0: Physical address of context pointer
- * x1: ttbr0_el1 to be restored
- *
- * Returns:
- * sctlr_el1 value in x0
+ * x0: Address of context pointer
*/
+ .pushsection ".idmap.text", "ax"
ENTRY(cpu_do_resume)
- /*
- * Invalidate local tlb entries before turning on MMU
- */
- tlbi vmalle1
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
- ldp x6, x7, [x0, #32]
- ldp x8, x9, [x0, #48]
- ldp x10, x11, [x0, #64]
- ldp x12, x13, [x0, #80]
- ldr x14, [x0, #96]
+ ldp x6, x8, [x0, #32]
+ ldp x9, x10, [x0, #48]
+ ldp x11, x12, [x0, #64]
+ ldp x13, x14, [x0, #80]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
- msr mair_el1, x5
msr cpacr_el1, x6
- msr ttbr0_el1, x1
- msr ttbr1_el1, x7
- tcr_set_idmap_t0sz x8, x7
+
+ /* Don't change t0sz here, mask those bits when restoring */
+ mrs x5, tcr_el1
+ bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
msr tcr_el1, x8
msr vbar_el1, x9
msr mdscr_el1, x10
+ msr sctlr_el1, x12
msr tpidr_el1, x13
msr sp_el0, x14
/*
@@ -123,11 +161,10 @@ ENTRY(cpu_do_resume)
ubfx x11, x11, #1, #1
msr oslar_el1, x11
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
- mov x0, x12
- dsb nsh // Make sure local tlb invalidation completed
isb
ret
ENDPROC(cpu_do_resume)
+ .popsection
#endif
/*
@@ -185,6 +222,7 @@ ENDPROC(idmap_cpu_replace_ttbr1)
* Initialise the processor for turning the MMU on. Return in x0 the
* value of the SCTLR_EL1 register.
*/
+ .pushsection ".idmap.text", "ax"
ENTRY(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
@@ -273,5 +311,17 @@ ENDPROC(__cpu_setup)
*/
.type crval, #object
crval:
+#ifdef CONFIG_ARM64_ICACHE_DISABLE
+#define CR_IBIT 0
+#else
+#define CR_IBIT 0x1000
+#endif
+
+#ifdef CONFIG_ARM64_DCACHE_DISABLE
+#define CR_CBIT 0
+#else
+#define CR_CBIT 0x4
+#endif
.word 0xfcffffff // clear
- .word 0x34d5d91d // set
+ .word 0x34d5d91d | CR_IBIT | CR_CBIT // set
+ .popsection