diff options
Diffstat (limited to 'arch/arm64/mm/mmu.c')
| -rw-r--r-- | arch/arm64/mm/mmu.c | 701 |
1 files changed, 343 insertions, 358 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 62096a7e047a..e82aabb3c5e2 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,10 +32,8 @@ #include <linux/dma-contiguous.h> #include <linux/cma.h> -#include <asm/barrier.h> #include <asm/cputype.h> #include <asm/fixmap.h> -#include <asm/kasan.h> #include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> @@ -48,21 +46,14 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); -u64 kimage_voffset __read_mostly; -EXPORT_SYMBOL(kimage_voffset); - /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; - -static bool dma_overlap(phys_addr_t start, phys_addr_t end); +static bool __init dma_overlap(phys_addr_t start, phys_addr_t end); pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) @@ -75,30 +66,16 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static phys_addr_t __init early_pgtable_alloc(void) +static void __init *early_alloc(unsigned long sz) { phys_addr_t phys; void *ptr; - phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + phys = memblock_alloc(sz, sz); BUG_ON(!phys); - - /* - * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE - * slot will be free, so we can (ab)use the FIX_PTE slot to initialise - * any level of table. - */ - ptr = pte_set_fixmap(phys); - - memset(ptr, 0, PAGE_SIZE); - - /* - * Implicit barriers also ensure the zeroed page is visible to the page - * table walker - */ - pte_clear_fixmap(); - - return phys; + ptr = __va(phys); + memset(ptr, 0, sz); + return ptr; } /* @@ -122,30 +99,24 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + void *(*alloc)(unsigned long size)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - phys_addr_t pte_phys; - BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(); - pte = pte_set_fixmap(pte_phys); + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); if (pmd_sect(*pmd)) split_pmd(pmd, pte); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); flush_tlb_all(); - pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); - pte = pte_set_fixmap_offset(pmd, addr); + pte = pte_offset_kernel(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); - - pte_clear_fixmap(); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -160,29 +131,10 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -#ifdef CONFIG_DEBUG_PAGEALLOC -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - - /* - * If debug_page_alloc is enabled we must map the linear map - * using pages. However, other mappings created by - * create_mapping_noalloc must use sections in some cases. Allow - * sections to be used in those cases, where no pgtable_alloc - * function is provided. - */ - return !pgtable_alloc || !debug_pagealloc_enabled(); -} -#else -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - return true; -} -#endif - -static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + void *(*alloc)(unsigned long size), bool pages) { pmd_t *pmd; unsigned long next; @@ -191,10 +143,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - phys_addr_t pmd_phys; - BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(); - pmd = pmd_set_fixmap(pmd_phys); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -202,21 +151,19 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, */ split_pud(pud, pmd); } - __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); + pud_populate(mm, pud, pmd); flush_tlb_all(); - pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); - pmd = pmd_set_fixmap_offset(pud, addr); + pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc) && - !dma_overlap(phys, phys + next - addr)) { + if (!pages && ((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - pmd_set_huge(pmd, phys, prot); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -224,19 +171,17 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { - phys_addr_t table = pmd_page_paddr(old_pmd); + phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, pgtable_alloc); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); - - pmd_clear_fixmap(); } static inline bool use_1G_block(unsigned long addr, unsigned long next, @@ -251,22 +196,21 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + void *(*alloc)(unsigned long size), bool force_pages) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - phys_addr_t pud_phys; - BUG_ON(!pgtable_alloc); - pud_phys = pgtable_alloc(); - __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); - pud = pud_set_fixmap_offset(pgd, addr); + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -274,10 +218,12 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, * For 4K granule only, attempt to put down a 1GB block */ if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc) && - !dma_overlap(phys, phys + next - addr)) { + !force_pages && + !dma_overlap(phys, phys + next - addr) && + !IS_ENABLED(CONFIG_FORCE_PAGES)) { pud_t old_pud = *pud; - pud_set_huge(pud, phys, prot); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -289,274 +235,359 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { - phys_addr_t table = pud_page_paddr(old_pud); + phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { - alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc, force_pages); } phys += next - addr; } while (pud++, addr = next, addr != end); - - pud_clear_fixmap(); } /* * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + void *(*alloc)(unsigned long size), bool force_pages) { unsigned long addr, length, end, next; - /* - * If the virtual and physical address don't have the same offset - * within a page, we cannot map the region as the caller expects. - */ - if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) - return; - - phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc, force_pages); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static phys_addr_t late_pgtable_alloc(void) +static void *late_alloc(unsigned long size) { - void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); - - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return __pa(ptr); -} + void *ptr; -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*alloc)(void)) -{ - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; } -/* - * This function can only be used to modify existing table entries, - * without allocating new levels of table. Note that this permits the - * creation of new section or page entries. - */ -static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +static void __init create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, bool force_pages) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, prot, early_alloc, force_pages); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc); + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + late_alloc, false); } -static void create_mapping_late(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +static inline pmd_t *pmd_off_k(unsigned long virt) { - if (virt < VMALLOC_START) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", - &phys, virt); - return; - } - - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc); + return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt); } -static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) +void __init remap_as_pages(unsigned long start, unsigned long size) { - unsigned long kernel_start = __pa(_stext); - unsigned long kernel_end = __pa(_etext); + unsigned long addr; + unsigned long end = start + size; /* - * Take care not to create a writable alias for the - * read-only text and rodata sections of the kernel image. + * Make start and end PMD_SIZE aligned, observing memory + * boundaries */ + if (memblock_is_memory(start & PMD_MASK)) + start = start & PMD_MASK; + if (memblock_is_memory(ALIGN(end, PMD_SIZE))) + end = ALIGN(end, PMD_SIZE); - /* No overlap with the kernel text */ - if (end < kernel_start || start >= kernel_end) { - __create_pgd_mapping(pgd, start, __phys_to_virt(start), - end - start, PAGE_KERNEL, - early_pgtable_alloc); - return; - } + size = end - start; /* - * This block overlaps the kernel text mapping. - * Map the portion(s) which don't overlap. + * Clear previous low-memory mapping */ - if (start < kernel_start) - __create_pgd_mapping(pgd, start, - __phys_to_virt(start), - kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc); - if (kernel_end < end) - __create_pgd_mapping(pgd, kernel_end, - __phys_to_virt(kernel_end), - end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc); + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PMD_SIZE) { + pmd_t *pmd; + pmd = pmd_off_k(addr); + if (pmd_bad(*pmd) || pmd_sect(*pmd)) + pmd_clear(pmd); + } - /* - * Map the linear alias of the [_stext, _etext) interval as - * read-only/non-executable. This makes the contents of the - * region accessible to subsystems such as hibernate, but - * protects it from inadvertent modification or execution. - */ - __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), - kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc); + create_mapping(start, __phys_to_virt(start), size, PAGE_KERNEL, true); } -static void __init map_mem(pgd_t *pgd) +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { - struct memblock_region *reg; + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} - /* map all the memory banks */ - for_each_memblock(memory, reg) { - phys_addr_t start = reg->base; - phys_addr_t end = start + reg->size; +static bool __init dma_overlap(phys_addr_t start, phys_addr_t end) +{ + int i; - if (start >= end) - break; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t dma_base = dma_mmu_remap[i].base; + phys_addr_t dma_end = dma_mmu_remap[i].base + + dma_mmu_remap[i].size; - __map_memblock(pgd, start, end); + if ((dma_base < end) && (dma_end > start)) + return true; } + return false; } -void mark_rodata_ro(void) +static void __init dma_contiguous_remap(void) { - if (!IS_ENABLED(CONFIG_DEBUG_RODATA)) + int i; + for (i = 0; i < dma_mmu_remap_num; i++) + remap_as_pages(dma_mmu_remap[i].base, + dma_mmu_remap[i].size); +} + +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); return; + } - create_mapping_late(__pa(_stext), (unsigned long)_stext, - (unsigned long)_etext - (unsigned long)_stext, - PAGE_KERNEL_ROX); + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc, + IS_ENABLED(CONFIG_FORCE_PAGES)); } -void fixup_init(void) +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) { /* - * Unmap the __init region but leave the VM area in place. This - * prevents the region from being reused for kernel modules, which - * is not supported by kallsyms. + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped */ - unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); + unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL, false); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL, false); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL, false); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC, false); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL, false); + } } +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC, false); +} +#endif -static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) +static void __init map_mem(void) { - phys_addr_t pa_start = __pa(va_start); - unsigned long size = va_end - va_start; + struct memblock_region *reg; + phys_addr_t limit; - BUG_ON(!PAGE_ALIGNED(pa_start)); - BUG_ON(!PAGE_ALIGNED(size)); + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, gives + * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, + * memory starting from PHYS_OFFSET (which must be aligned to 2MB as + * per Documentation/arm64/booting.txt). + */ + limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; + memblock_set_current_limit(limit); + + /* map all the memory banks */ + for_each_memblock(memory, reg) { + phys_addr_t start = reg->base; + phys_addr_t end = start + reg->size; - __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc); + if (start >= end) + break; - vma->addr = va_start; - vma->phys_addr = pa_start; - vma->size = size; - vma->flags = VM_MAP; - vma->caller = __builtin_return_address(0); + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. With + * the section maps, if the first block doesn't end on section + * size boundary, create_mapping() will try to allocate a pte + * page, which may be returned from an unmapped area. + * When section maps are not used, the pte page table for the + * current limit is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, SECTION_SIZE); + if (end < limit) { + limit = end & SECTION_MASK; + memblock_set_current_limit(limit); + } + } + __map_memblock(start, end); + } - vm_area_add_early(vma); + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } - -/* - * Create fine-grained mappings for the kernel. - */ -static void __init map_kernel(pgd_t *pgd) +#ifdef CONFIG_FORCE_PAGES +static noinline void __init split_and_set_pmd(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn) { - static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data; + pte_t *pte, *start_pte; - map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + start_pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + pte = start_pte; - if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { - /* - * The fixmap falls in a separate pgd to the kernel, and doesn't - * live in the carveout for the swapper_pg_dir. We can simply - * re-use the existing dir for the fixmap. - */ - set_pgd(pgd_offset_raw(pgd, FIXADDR_START), - *pgd_offset_k(FIXADDR_START)); - } else if (CONFIG_PGTABLE_LEVELS > 3) { - /* - * The fixmap shares its top level pgd entry with the kernel - * mapping. This can really only occur when we are running - * with 16k/4 levels, so we can simply reuse the pud level - * entry instead. - */ - BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa(bm_pmd) | PUD_TYPE_TABLE)); - pud_clear_fixmap(); - } else { - BUG(); - } + do { + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); - kasan_copy_shadow(pgd); + set_pmd(pmd, __pmd((__pa(start_pte)) | PMD_TYPE_TABLE)); } -struct dma_contig_early_reserve { - phys_addr_t base; - unsigned long size; -}; +static noinline void __init remap_pages(void) +{ + struct memblock_region *reg; -static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS]; + for_each_memblock(memory, reg) { + phys_addr_t phys_pgd = reg->base; + phys_addr_t phys_end = reg->base + reg->size; + unsigned long addr_pgd = (unsigned long)__va(phys_pgd); + unsigned long end = (unsigned long)__va(phys_end); + pmd_t *pmd = NULL; + pud_t *pud = NULL; + pgd_t *pgd = NULL; + unsigned long next_pud, next_pmd, next_pgd; + unsigned long addr_pmd, addr_pud; + phys_addr_t phys_pud, phys_pmd; + + if (phys_pgd >= phys_end) + break; -static int dma_mmu_remap_num; + pgd = pgd_offset(&init_mm, addr_pgd); + do { + next_pgd = pgd_addr_end(addr_pgd, end); + pud = pud_offset(pgd, addr_pgd); + addr_pud = addr_pgd; + phys_pud = phys_pgd; + do { + next_pud = pud_addr_end(addr_pud, next_pgd); + pmd = pmd_offset(pud, addr_pud); + addr_pmd = addr_pud; + phys_pmd = phys_pud; + do { + next_pmd = pmd_addr_end(addr_pmd, + next_pud); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + split_and_set_pmd(pmd, addr_pmd, + next_pmd, __phys_to_pfn(phys_pmd)); + pmd++; + phys_pmd += next_pmd - addr_pmd; + } while (addr_pmd = next_pmd, + addr_pmd < next_pud); + phys_pud += next_pud - addr_pud; + } while (pud++, addr_pud = next_pud, + addr_pud < next_pgd); + phys_pgd += next_pgd - addr_pgd; + } while (pgd++, addr_pgd = next_pgd, addr_pgd < end); + } +} -void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +#else +static void __init remap_pages(void) { - dma_mmu_remap[dma_mmu_remap_num].base = base; - dma_mmu_remap[dma_mmu_remap_num].size = size; - dma_mmu_remap_num++; + } +#endif -static bool dma_overlap(phys_addr_t start, phys_addr_t end) +static void __init fixup_executable(void) { - int i; +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SWAPPER_BLOCK_SIZE); - for (i = 0; i < dma_mmu_remap_num; i++) { - phys_addr_t dma_base = dma_mmu_remap[i].base; - phys_addr_t dma_end = dma_mmu_remap[i].base + - dma_mmu_remap[i].size; + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL, false); + } - if ((dma_base < end) && (dma_end > start)) - return true; + if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SWAPPER_BLOCK_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL, false); } - return false; +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_ROX); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); } /* @@ -565,35 +596,39 @@ static bool dma_overlap(phys_addr_t start, phys_addr_t end) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgd = pgd_set_fixmap(pgd_phys); + void *zero_page; - map_kernel(pgd); - map_mem(pgd); + map_mem(); + fixup_executable(); + dma_contiguous_remap(); + remap_pages(); /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. + * Finally flush the caches and tlb to ensure that we're in a + * consistent state. */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgd, PAGE_SIZE); - cpu_replace_ttbr1(swapper_pg_dir); + flush_tlb_all(); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + + /* Ensure the zero page is visible to the page table walker */ + dsb(ishst); /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. */ - memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, - SWAPPER_DIR_SIZE - PAGE_SIZE); - - bootmem_init(); + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + flush_tlb_all(); + set_kernel_text_ro(); + flush_tlb_all(); } /* @@ -680,13 +715,21 @@ void vmemmap_free(unsigned long start, unsigned long end) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + static inline pud_t * fixmap_pud(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - return pud_offset_kimg(pgd, addr); + return pud_offset(pgd, addr); } static inline pmd_t * fixmap_pmd(unsigned long addr) @@ -695,12 +738,16 @@ static inline pmd_t * fixmap_pmd(unsigned long addr) BUG_ON(pud_none(*pud) || pud_bad(*pud)); - return pmd_offset_kimg(pud, addr); + return pmd_offset(pud, addr); } static inline pte_t * fixmap_pte(unsigned long addr) { - return &bm_pte[pte_index(addr)]; + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); } void __init early_fixmap_init(void) @@ -711,26 +758,15 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - if (CONFIG_PGTABLE_LEVELS > 3 && - !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { - /* - * We only end up here if the kernel mapping and the fixmap - * share the top level pgd entry, which should only happen on - * 16k/4 levels configurations. - */ - BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud = pud_offset_kimg(pgd, addr); - } else { - pgd_populate(&init_mm, pgd, bm_pud); - pud = fixmap_pud(addr); - } + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, bm_pmd); - pmd = fixmap_pmd(addr); + pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: + * we are not preparted: */ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); @@ -769,10 +805,11 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - int offset; + pgprot_t prot = PAGE_KERNEL_RO; + int size, offset; void *dt_virt; /* @@ -789,7 +826,7 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) /* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe - * to call create_mapping_noalloc() this early. + * to call create_mapping() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. @@ -805,73 +842,21 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), - dt_virt_base, SWAPPER_BLOCK_SIZE, prot); + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + SWAPPER_BLOCK_SIZE, prot, false); if (fdt_check_header(dt_virt) != 0) return NULL; - *size = fdt_totalsize(dt_virt); - if (*size > MAX_FDT_SIZE) + size = fdt_totalsize(dt_virt); + if (size > MAX_FDT_SIZE) return NULL; - if (offset + *size > SWAPPER_BLOCK_SIZE) - create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); - - return dt_virt; -} - -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) -{ - void *dt_virt; - int size; - - dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); - if (!dt_virt) - return NULL; + if (offset + size > SWAPPER_BLOCK_SIZE) + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + size, SWAPPER_BLOCK_SIZE), prot, false); memblock_reserve(dt_phys, size); - return dt_virt; -} - -int __init arch_ioremap_pud_supported(void) -{ - /* only 4k granule supports level 1 block mappings */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES); -} - -int __init arch_ioremap_pmd_supported(void) -{ - return 1; -} - -int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) -{ - BUG_ON(phys & ~PUD_MASK); - set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); - return 1; -} - -int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) -{ - BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); - return 1; -} -int pud_clear_huge(pud_t *pud) -{ - if (!pud_sect(*pud)) - return 0; - pud_clear(pud); - return 1; -} - -int pmd_clear_huge(pmd_t *pmd) -{ - if (!pmd_sect(*pmd)) - return 0; - pmd_clear(pmd); - return 1; + return dt_virt; } |
