diff options
| -rw-r--r-- | arch/arm64/Kconfig | 4 | ||||
| -rw-r--r-- | arch/arm64/include/asm/mmu.h | 3 | ||||
| -rw-r--r-- | arch/arm64/mm/init.c | 77 | ||||
| -rw-r--r-- | arch/arm64/mm/mmu.c | 45 | ||||
| -rw-r--r-- | include/linux/memblock.h | 1 | ||||
| -rw-r--r-- | mm/memblock.c | 10 |
6 files changed, 120 insertions, 20 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 07090b418129..104588d55777 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -653,9 +653,7 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu. config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config ARCH_ENABLE_MEMORY_HOTREMOVE + depends on !NUMA def_bool y # The GPIO number here must be sorted by descending number. In case of diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 990124a67eeb..0944bfc04f5b 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -35,5 +35,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +#ifdef CONFIG_MEMORY_HOTPLUG +extern void hotplug_paging(phys_addr_t start, phys_addr_t size); +#endif #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1d4dcd57ac85..bc45677d9ce6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -503,37 +503,80 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); + unsigned long pfn; int ret; + if (end_pfn > max_sparsemem_pfn) { + pr_err("end_pfn too big"); + return -1; + } + hotplug_paging(start, size); + + /* + * Mark the first page in the range as unusable. This is needed + * because __add_section (within __add_pages) wants pfn_valid + * of it to be false, and in arm64 pfn falid is implemented by + * just checking at the nomap flag for existing blocks. + * + * A small trick here is that __add_section() requires only + * phys_start_pfn (that is the first pfn of a section) to be + * invalid. Regardless of whether it was assumed (by the function + * author) that all pfns within a section are either all valid + * or all invalid, it allows to avoid looping twice (once here, + * second when memblock_clear_nomap() is called) through all + * pfns of the section and modify only one pfn. Thanks to that, + * further, in __add_zone() only this very first pfn is skipped + * and corresponding page is not flagged reserved. Therefore it + * is enough to correct this setup only for it. + * + * When arch_add_memory() returns the walk_memory_range() function + * is called and passed with online_memory_block() callback, + * which execution finally reaches the memory_block_action() + * function, where also only the first pfn of a memory block is + * checked to be reserved. Above, it was first pfn of a section, + * here it is a block but + * (drivers/base/memory.c): + * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + * (include/linux/memory.h): + * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) + * so we can consider block and section equivalently + */ + memblock_mark_nomap(start, 1<<PAGE_SHIFT); + pgdat = NODE_DATA(nid); zone = pgdat->node_zones + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); ret = __add_pages(nid, zone, start_pfn, nr_pages); - if (ret) - pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", - __func__, ret); - - return ret; -} + /* + * Make the pages usable after they have been added. + * This will make pfn_valid return true + */ + memblock_clear_nomap(start, 1<<PAGE_SHIFT); -#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - int ret; + /* + * This is a hack to avoid having to mix arch specific code + * into arch independent code. SetPageReserved is supposed + * to be called by __add_zone (within __add_section, within + * __add_pages). However, when it is called there, it assumes that + * pfn_valid returns true. For the way pfn_valid is implemented + * in arm64 (a check on the nomap flag), the only way to make + * this evaluate true inside __add_zone is to clear the nomap + * flags of blocks in architecture independent code. + * + * To avoid this, we set the Reserved flag here after we cleared + * the nomap flag in the line above. + */ + SetPageReserved(pfn_to_page(start_pfn)); - zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); if (ret) - pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n", + pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", __func__, ret); return ret; } #endif -#endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6c444d968323..cdb9338568f4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* * Based on arch/arm/mm/mmu.c * @@ -130,6 +131,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); + pr_debug("Allocating PTE at %pK\n", __va(pte_phys)); pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); @@ -194,6 +196,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); + pr_debug("Allocating PMD at %pK\n", __va(pmd_phys)); pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* @@ -262,6 +265,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t pud_phys; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(); + pr_debug("Allocating PUD at %pK\n", __va(pud_phys)); __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); @@ -605,6 +609,47 @@ void __init paging_init(void) bootmem_init(); } +#ifdef CONFIG_MEMORY_HOTPLUG +static phys_addr_t pgd_pgtable_alloc(void) +{ + void *ptr = (void *)__get_free_page(PGALLOC_GFP); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +/* + * hotplug_paging() is used by memory hotplug to build new page tables + * for hot added memory. + */ +void hotplug_paging(phys_addr_t start, phys_addr_t size) +{ + + struct page *pg; + phys_addr_t pgd_phys = pgd_pgtable_alloc(); + pgd_t *pgd = pgd_set_fixmap(pgd_phys); + + memcpy(pgd, swapper_pg_dir, PAGE_SIZE); + + __create_pgd_mapping(pgd, start, __phys_to_virt(start), size, + PAGE_KERNEL, pgd_pgtable_alloc); + + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); + + pgd_clear_fixmap(); + + pg = phys_to_page(pgd_phys); + pgtable_page_dtor(pg); + __free_pages(pg, 0); +} + +#endif + /* * Check whether a kernel address is valid (derived from arch/x86/). */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2d79ec1496e5..d3f41bfe05f1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -84,6 +84,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); unsigned long memblock_region_resize_late_begin(void); void memblock_region_resize_late_end(unsigned long); diff --git a/mm/memblock.c b/mm/memblock.c index 241225579f3a..fb63a9cc00fd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -840,6 +840,16 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) } /** + * memblock_clear_nomap - Clear a flag of MEMBLOCK_NOMAP memory region + * @base: the base phys addr of the region + * @size: the size of the region + */ +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); +} + +/** * __next_reserved_mem_region - next function for for_each_reserved_region() * @idx: pointer to u64 loop variable * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL |
