From 66a20757214d94b915f2d2aada1384dead9ab18d Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 21 Jan 2014 15:49:20 -0800 Subject: memblock, numa: introduce flags field into memblock There is no flag in memblock to describe what type the memory is. Sometimes, we may use memblock to reserve some memory for special usage. And we want to know what kind of memory it is. So we need a way to In hotplug environment, we want to reserve hotpluggable memory so the kernel won't be able to use it. And when the system is up, we have to free these hotpluggable memory to buddy. So we need to mark these memory first. In order to do so, we need to mark out these special memory in memblock. In this patch, we introduce a new "flags" member into memblock_region: struct memblock_region { phys_addr_t base; phys_addr_t size; unsigned long flags; /* This is new. */ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int nid; #endif }; This patch does the following things: 1) Add "flags" member to memblock_region. 2) Modify the following APIs' prototype: memblock_add_region() memblock_insert_region() 3) Add memblock_reserve_region() to support reserve memory with flags, and keep memblock_reserve()'s prototype unmodified. 4) Modify other APIs to support flags, but keep their prototype unmodified. The idea is from Wen Congyang and Liu Jiang . Suggested-by: Wen Congyang Suggested-by: Liu Jiang Signed-off-by: Tang Chen Reviewed-by: Zhang Yanfei Cc: "H. Peter Anvin" Cc: "Rafael J . Wysocki" Cc: Chen Tang Cc: Gong Chen Cc: Ingo Molnar Cc: Jiang Liu Cc: Johannes Weiner Cc: Lai Jiangshan Cc: Larry Woodman Cc: Len Brown Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Prarit Bhargava Cc: Rik van Riel Cc: Taku Izumi Cc: Tejun Heo Cc: Thomas Gleixner Cc: Thomas Renninger Cc: Toshi Kani Cc: Vasilis Liaskovitis Cc: Wanpeng Li Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 77c60e52939d..9a805ec6e794 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -22,6 +22,7 @@ struct memblock_region { phys_addr_t base; phys_addr_t size; + unsigned long flags; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int nid; #endif -- cgit v1.2.3 From 66b16edf9eafc3291cabb2253d0f342a847656b7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 21 Jan 2014 15:49:23 -0800 Subject: memblock, mem_hotplug: introduce MEMBLOCK_HOTPLUG flag to mark hotpluggable regions In find_hotpluggable_memory, once we find out a memory region which is hotpluggable, we want to mark them in memblock.memory. So that we could control memblock allocator not to allocte hotpluggable memory for the kernel later. To achieve this goal, we introduce MEMBLOCK_HOTPLUG flag to indicate the hotpluggable memory regions in memblock and a function memblock_mark_hotplug() to mark hotpluggable memory if we find one. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Tang Chen Reviewed-by: Zhang Yanfei Cc: "H. Peter Anvin" Cc: "Rafael J . Wysocki" Cc: Chen Tang Cc: Gong Chen Cc: Ingo Molnar Cc: Jiang Liu Cc: Johannes Weiner Cc: Lai Jiangshan Cc: Larry Woodman Cc: Len Brown Cc: Liu Jiang Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Prarit Bhargava Cc: Rik van Riel Cc: Taku Izumi Cc: Tejun Heo Cc: Thomas Gleixner Cc: Thomas Renninger Cc: Toshi Kani Cc: Vasilis Liaskovitis Cc: Wanpeng Li Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9a805ec6e794..b788faa71563 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -19,6 +19,9 @@ #define INIT_MEMBLOCK_REGIONS 128 +/* Definition of memblock flags. */ +#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */ + struct memblock_region { phys_addr_t base; phys_addr_t size; @@ -60,6 +63,8 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); +int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); +int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, @@ -122,6 +127,18 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start, i != (u64)ULLONG_MAX; \ __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid)) +static inline void memblock_set_region_flags(struct memblock_region *r, + unsigned long flags) +{ + r->flags |= flags; +} + +static inline void memblock_clear_region_flags(struct memblock_region *r, + unsigned long flags) +{ + r->flags &= ~flags; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); -- cgit v1.2.3 From e7e8de5918dd6a07cbddae559600d7765ad6a56e Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 21 Jan 2014 15:49:26 -0800 Subject: memblock: make memblock_set_node() support different memblock_type [sfr@canb.auug.org.au: fix powerpc build] Signed-off-by: Tang Chen Reviewed-by: Zhang Yanfei Cc: "H. Peter Anvin" Cc: "Rafael J . Wysocki" Cc: Chen Tang Cc: Gong Chen Cc: Ingo Molnar Cc: Jiang Liu Cc: Johannes Weiner Cc: Lai Jiangshan Cc: Larry Woodman Cc: Len Brown Cc: Liu Jiang Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Prarit Bhargava Cc: Rik van Riel Cc: Taku Izumi Cc: Tejun Heo Cc: Thomas Gleixner Cc: Thomas Renninger Cc: Toshi Kani Cc: Vasilis Liaskovitis Cc: Wanpeng Li Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b788faa71563..97480d392e40 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -140,7 +140,8 @@ static inline void memblock_clear_region_flags(struct memblock_region *r, } #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid); +int memblock_set_node(phys_addr_t base, phys_addr_t size, + struct memblock_type *type, int nid); static inline void memblock_set_region_node(struct memblock_region *r, int nid) { -- cgit v1.2.3 From 55ac590c2fadad785d60dd70c12d62823bc2cd39 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 21 Jan 2014 15:49:35 -0800 Subject: memblock, mem_hotplug: make memblock skip hotpluggable regions if needed Linux kernel cannot migrate pages used by the kernel. As a result, hotpluggable memory used by the kernel won't be able to be hot-removed. To solve this problem, the basic idea is to prevent memblock from allocating hotpluggable memory for the kernel at early time, and arrange all hotpluggable memory in ACPI SRAT(System Resource Affinity Table) as ZONE_MOVABLE when initializing zones. In the previous patches, we have marked hotpluggable memory regions with MEMBLOCK_HOTPLUG flag in memblock.memory. In this patch, we make memblock skip these hotpluggable memory regions in the default top-down allocation function if movable_node boot option is specified. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Tang Chen Signed-off-by: Zhang Yanfei Cc: "H. Peter Anvin" Cc: "Rafael J . Wysocki" Cc: Chen Tang Cc: Gong Chen Cc: Ingo Molnar Cc: Jiang Liu Cc: Johannes Weiner Cc: Lai Jiangshan Cc: Larry Woodman Cc: Len Brown Cc: Liu Jiang Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Prarit Bhargava Cc: Rik van Riel Cc: Taku Izumi Cc: Tejun Heo Cc: Thomas Gleixner Cc: Thomas Renninger Cc: Toshi Kani Cc: Vasilis Liaskovitis Cc: Wanpeng Li Cc: Wen Congyang Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 97480d392e40..2f52c8c492bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,10 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; +#ifdef CONFIG_MOVABLE_NODE +/* If movable_node boot option specified */ +extern bool movable_node_enabled; +#endif /* CONFIG_MOVABLE_NODE */ #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) @@ -65,6 +69,26 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_MOVABLE_NODE +static inline bool memblock_is_hotpluggable(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_HOTPLUG; +} + +static inline bool movable_node_is_enabled(void) +{ + return movable_node_enabled; +} +#else +static inline bool memblock_is_hotpluggable(struct memblock_region *m) +{ + return false; +} +static inline bool movable_node_is_enabled(void) +{ + return false; +} +#endif #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, -- cgit v1.2.3 From 87029ee9390b2297dae699d5fb135b77992116e5 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 21 Jan 2014 15:50:14 -0800 Subject: mm/memblock: reorder parameters of memblock_find_in_range_node Reorder parameters of memblock_find_in_range_node to be consistent with other memblock APIs. The change was suggested by Tejun Heo . Signed-off-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar Cc: Yinghai Lu Cc: Tejun Heo Cc: "Rafael J. Wysocki" Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michal Hocko Cc: Paul Walmsley Cc: Pavel Machek Cc: Russell King Cc: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2f52c8c492bd..11c31590cc49 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -55,8 +55,9 @@ extern bool movable_node_enabled; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align, int nid); +phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, + phys_addr_t start, phys_addr_t end, + int nid); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); -- cgit v1.2.3 From b115423357e0cda6d8f45d0c81df537d7b004020 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 21 Jan 2014 15:50:16 -0800 Subject: mm/memblock: switch to use NUMA_NO_NODE instead of MAX_NUMNODES It's recommended to use NUMA_NO_NODE everywhere to select "process any node" behavior or to indicate that "no node id specified". Hence, update __next_free_mem_range*() API's to accept both NUMA_NO_NODE and MAX_NUMNODES, but emit warning once on MAX_NUMNODES, and correct corresponding API's documentation to describe new behavior. Also, update other memblock/nobootmem APIs where MAX_NUMNODES is used dirrectly. The change was suggested by Tejun Heo. Signed-off-by: Grygorii Strashko Signed-off-by: Santosh Shilimkar Cc: Yinghai Lu Cc: Tejun Heo Cc: "Rafael J. Wysocki" Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Konrad Rzeszutek Wilk Cc: Michal Hocko Cc: Paul Walmsley Cc: Pavel Machek Cc: Russell King Cc: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 11c31590cc49..cd0274bebd4c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,7 +118,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start, /** * for_each_free_mem_range - iterate through free memblock areas * @i: u64 used as loop variable - * @nid: node selector, %MAX_NUMNODES for all nodes + * @nid: node selector, %NUMA_NO_NODE for all nodes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL @@ -138,7 +138,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start, /** * for_each_free_mem_range_reverse - rev-iterate through free memblock areas * @i: u64 used as loop variable - * @nid: node selector, %MAX_NUMNODES for all nodes + * @nid: node selector, %NUMA_NO_NODE for all nodes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL -- cgit v1.2.3 From 5e270e254885893f8c82ab9b91caa648af3690df Mon Sep 17 00:00:00 2001 From: Philipp Hachtmann Date: Thu, 23 Jan 2014 15:53:11 -0800 Subject: mm: free memblock.memory in free_all_bootmem When calling free_all_bootmem() the free areas under memblock's control are released to the buddy allocator. Additionally the reserved list is freed if it was reallocated by memblock. The same should apply for the memory list. Signed-off-by: Philipp Hachtmann Reviewed-by: Tejun Heo Cc: Joonsoo Kim Cc: Johannes Weiner Cc: Tang Chen Cc: Toshi Kani Cc: Jianguo Wu Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/memblock.h') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cd0274bebd4c..1ef66360f0b0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); +phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); -- cgit v1.2.3