diff options
Diffstat (limited to 'arch/arm64/mm/mmu.c')
| -rw-r--r-- | arch/arm64/mm/mmu.c | 519 |
1 files changed, 499 insertions, 20 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ddfe90299048..e8b8590f553a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -29,6 +29,8 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/dma-contiguous.h> +#include <linux/cma.h> #include <linux/mm.h> #include <asm/barrier.h> @@ -61,6 +63,8 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static bool dma_overlap(phys_addr_t start, phys_addr_t end); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -210,7 +214,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + block_mappings_allowed(pgtable_alloc) && + !dma_overlap(phys, phys + next - addr)) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -270,7 +275,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, * For 4K granule only, attempt to put down a 1GB block */ if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + block_mappings_allowed(pgtable_alloc) && + !dma_overlap(phys, phys + next - addr)) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -386,7 +392,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa_symbol(_stext); + unsigned long kernel_start = __pa_symbol(_text); unsigned long kernel_end = __pa_symbol(__init_begin); /* @@ -403,7 +409,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end } /* - * This block overlaps the kernel text/rodata mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) @@ -418,7 +424,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end early_pgtable_alloc); /* - * Map the linear alias of the [_stext, __init_begin) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. @@ -450,8 +456,8 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)_etext - (unsigned long)_stext; - create_mapping_late(__pa_symbol(_stext), (unsigned long)_stext, + section_size = (unsigned long)_etext - (unsigned long)_text; + create_mapping_late(__pa_symbol(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata @@ -473,8 +479,8 @@ void fixup_init(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) +static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -532,11 +538,11 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_chunk(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* @@ -564,6 +570,37 @@ static void __init map_kernel(pgd_t *pgd) kasan_copy_shadow(pgd); } +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS]; + +static int dma_mmu_remap_num; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} + +static bool dma_overlap(phys_addr_t start, phys_addr_t end) +{ + int i; + + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t dma_base = dma_mmu_remap[i].base; + phys_addr_t dma_end = dma_mmu_remap[i].base + + dma_mmu_remap[i].size; + + if ((dma_base < end) && (dma_end > start)) + return true; + } + return false; +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -591,9 +628,6 @@ void __init paging_init(void) pgd_clear_fixmap(); memblock_free(pgd_phys, PAGE_SIZE); - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); - /* * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd * allocated with it. @@ -604,6 +638,439 @@ void __init paging_init(void) bootmem_init(); } +#ifdef CONFIG_MEMORY_HOTPLUG +static phys_addr_t pgd_pgtable_alloc(void) +{ + void *ptr = (void *)__get_free_page(PGALLOC_GFP); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +/* + * hotplug_paging() is used by memory hotplug to build new page tables + * for hot added memory. + */ +void hotplug_paging(phys_addr_t start, phys_addr_t size) +{ + struct page *pg; + phys_addr_t pgd_phys; + pgd_t *pgd; + int cpu; + + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_isolate_cpu(cpu); + pgd_phys = pgd_pgtable_alloc(); + pgd = pgd_set_fixmap(pgd_phys); + + memcpy(pgd, swapper_pg_dir, PAGE_SIZE); + + __create_pgd_mapping(pgd, start, __phys_to_virt(start), size, + PAGE_KERNEL, pgd_pgtable_alloc); + + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); + + pgd_clear_fixmap(); + + pg = phys_to_page(pgd_phys); + pgtable_page_dtor(pg); + __free_pages(pg, 0); + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_unisolate_cpu_unlocked(cpu); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +#define PAGE_INUSE 0xFD + +static void free_pagetable(struct page *page, int order, bool direct) +{ + unsigned long magic; + unsigned int nr_pages = 1 << order; + + /* bootmem page has reserved flag */ + if (PageReserved(page)) { + __ClearPageReserved(page); + + magic = (unsigned long)page->lru.next; + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + while (nr_pages--) + put_page_bootmem(page++); + } else { + while (nr_pages--) + free_reserved_page(page++); + } + } else { + /* + * Only direct pagetable allocation (those allocated via + * hotplug) call the pgtable_page_ctor; vmemmap pgtable + * allocations don't. + */ + if (direct) + pgtable_page_dtor(page); + + free_pages((unsigned long)page_address(page), order); + } +} + +static void free_pte_table(pmd_t *pmd, bool direct) +{ + pte_t *pte_start, *pte; + struct page *page; + int i; + + pte_start = (pte_t *) pmd_page_vaddr(*pmd); + /* Check if there is no valid entry in the PMD */ + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + page = pmd_page(*pmd); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only taken in _pte_aloc_kernel + * in mm/memory.c and nowhere else (for arm64). Not sure if + * the function above can be called concurrently. In doubt, + * I am living it here for now, but it probably can be removed + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); +} + +static void free_pmd_table(pud_t *pud, bool direct) +{ + pmd_t *pmd_start, *pmd; + struct page *page; + int i; + + pmd_start = (pmd_t *) pud_page_vaddr(*pud); + /* Check if there is no valid entry in the PMD */ + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + page = pud_page(*pud); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only taken in _pte_aloc_kernel + * in mm/memory.c and nowhere else (for arm64). Not sure if + * the function above can be called concurrently. In doubt, + * I am living it here for now, but it probably can be removed + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); +} + +/* + * When the PUD is folded on the PGD (three levels of paging), + * there's no need to free PUDs + */ +#if CONFIG_PGTABLE_LEVELS > 3 +static void free_pud_table(pgd_t *pgd, bool direct) +{ + pud_t *pud_start, *pud; + struct page *page; + int i; + + pud_start = (pud_t *) pgd_page_vaddr(*pgd); + /* Check if there is no valid entry in the PUD */ + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } + + page = pgd_page(*pgd); + + free_pagetable(page, 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pgd_clear(pgd); + spin_unlock(&init_mm.page_table_lock); +} +#endif + +static void remove_pte_table(pte_t *pte, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + void *page_addr; + + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + /* + * Do not free direct mapping pages since they were + * freed when offlining, or simplely not in use. + */ + if (!direct) + free_pagetable(pte_page(*pte), 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } else { + /* + * If we are here, we are freeing vmemmap pages since + * direct mapped memory ranges to be freed are aligned. + * + * If we are not removing the whole page, it means + * other page structs in this page are being used and + * we canot remove them. So fill the unused page_structs + * with 0xFD, and remove the page when it is wholly + * filled with 0xFD. + */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pte_page(*pte)); + if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { + free_pagetable(pte_page(*pte), 0, direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } + } + } + + // I am adding this flush here in simmetry to the x86 code. + // Why do I need to call it here and not in remove_p[mu]d + flush_tlb_all(); +} + +static void remove_pmd_table(pmd_t *pmd, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + void *page_addr; + pte_t *pte; + + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + // check if we are using 2MB section mappings + if (pmd_sect(*pmd)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + if (!direct) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE), direct); + } + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pmd_page(*pmd)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PMD_SIZE)) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE), direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } + } + continue; + } + + BUG_ON(!pmd_table(*pmd)); + + pte = pte_offset_map(pmd, addr); + remove_pte_table(pte, addr, next, direct); + free_pte_table(pmd, direct); + } +} + +static void remove_pud_table(pud_t *pud, unsigned long addr, + unsigned long end, bool direct) +{ + unsigned long next; + pmd_t *pmd; + void *page_addr; + + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (!pud_present(*pud)) + continue; + /* + * If we are using 4K granules, check if we are using + * 1GB section mapping. + */ + if (pud_sect(*pud)) { + if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) { + if (!direct) { + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE), direct); + } + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pud_page(*pud)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PUD_SIZE)) { + + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE), direct); + + /* + * This spin lock could be only + * taken in _pte_aloc_kernel in + * mm/memory.c and nowhere else + * (for arm64). Not sure if the + * function above can be called + * concurrently. In doubt, + * I am living it here for now, + * but it probably can be removed. + */ + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } + } + continue; + } + + BUG_ON(!pud_table(*pud)); + + pmd = pmd_offset(pud, addr); + remove_pmd_table(pmd, addr, next, direct); + free_pmd_table(pud, direct); + } +} + +void remove_pagetable(unsigned long start, unsigned long end, bool direct) +{ + unsigned long next; + unsigned long addr; + pgd_t *pgd; + pud_t *pud; + int cpu; + + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_isolate_cpu(cpu); + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + continue; + + pud = pud_offset(pgd, addr); + remove_pud_table(pud, addr, next, direct); + /* + * When the PUD is folded on the PGD (three levels of paging), + * I did already clear the PMD page in free_pmd_table, + * and reset the corresponding PGD==PUD entry. + */ +#if CONFIG_PGTABLE_LEVELS > 3 + free_pud_table(pgd, direct); +#endif + } + + flush_tlb_all(); + for_each_possible_cpu(cpu) + if (current->cpu != cpu) + sched_unisolate_cpu_unlocked(cpu); +} + + +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ + /* * Check whether a kernel address is valid (derived from arch/x86/). */ @@ -655,6 +1122,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pgd_t *pgd; pud_t *pud; pmd_t *pmd; + int ret = 0; do { next = pmd_addr_end(addr, end); @@ -672,19 +1140,30 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) void *p = NULL; p = vmemmap_alloc_block_buf(PMD_SIZE, node); - if (!p) - return -ENOMEM; + if (!p) { +#ifdef CONFIG_MEMORY_HOTPLUG + vmemmap_free(start, end); +#endif + ret = -ENOMEM; + break; + } set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); - return 0; + if (ret) + return vmemmap_populate_basepages(start, end, node); + else + return ret; } #endif /* CONFIG_ARM64_64K_PAGES */ void vmemmap_free(unsigned long start, unsigned long end) { +#ifdef CONFIG_MEMORY_HOTREMOVE + remove_pagetable(start, end, false); +#endif } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
