mm: zbud: fix the locking scenarios with zcache

With zcache using zbud, strange locking scenarios are observed. The first problem seen is: Core 2 waiting on mapping->tree_lock which is taken by core 6 do_raw_spin_lock raw_spin_lock_irq atomic_cmpxchg page_freeze_refs __remove_mapping shrink_page_list Core 6 after taking mapping->tree_lock is waiting on zbud pool lock which is held by core 5 zbud_alloc zcache_store_page __cleancache_put_page cleancache_put_page __delete_from_page_cache spin_unlock_irq __remove_mapping shrink_page_list shrink_inactive_list Core 5 after taking zbud pool lock from zbud_free received an IRQ, and after IRQ exit, softirqs were scheduled and end_page_writeback tried to lock on mapping->tree_lock which is already held by Core 6. Deadlock. do_raw_spin_lock raw_spin_lock_irqsave test_clear_page_writeba end_page_writeback ext4_finish_bio ext4_end_bio bio_endio blk_update_request end_clone_bio bio_endio blk_update_request blk_update_bidi_request blk_end_bidi_request blk_end_request mmc_blk_cmdq_complete_r mmc_cmdq_softirq_done blk_done_softirq static_key_count static_key_false trace_softirq_exit __do_softirq() tick_irq_exit irq_exit() set_irq_regs __handle_domain_irq gic_handle_irq el1_irq exception __list_del_entry list_del zbud_free zcache_load_page __cleancache_get_page(? This shows that allowing softirqs while holding zbud pool lock can result in deadlocks. To fix this, 'commit 6a1fdaa36272 ("mm: zbud: prevent softirq during zbud alloc, free and reclaim")' decided to take spin_lock_bh during zbud_free, zbud_alloc and zbud_reclaim. But this resulted in another deadlock. spin_bug() do_raw_spin_lock() _raw_spin_lock_irqsave() test_clear_page_writeback() end_page_writeback() ext4_finish_bio() ext4_end_bio() bio_endio() blk_update_request() end_clone_bio() bio_endio() blk_update_request() blk_update_bidi_request() blk_end_request() mmc_blk_cmdq_complete_rq() mmc_cmdq_softirq_done() blk_done_softirq() __do_softirq() do_softirq() __local_bh_enable_ip() _raw_spin_unlock_bh() zbud_alloc() zcache_store_page() __cleancache_put_page() __delete_from_page_cache() __remove_mapping() shrink_page_list() Here, the spin_unlock_bh resulted in explicit invocation of do_sofirq, which resulted in the acquisition of mapping->tree_lock which was already taken by __remove_mapping. The new fix considers the following facts. 1) zcache_store_page is always be called from __delete_from_page_cache with mapping->tree_lock held and interrupts disabled. Thus zbud_alloc which is called only from zcache_store_page is always called with interrupts disabled. 2) zbud_free and zbud_reclaim_page can be called from zcache with or without interrupts disabled. So an interrupt while holding zbud pool lock can result in do_softirq and acquisition of mapping->tree_lock. (1) implies zbud_alloc need not explicitly disable bh. But disable interrupts to make sure zbud_alloc is safe with zcache, irrespective of future changes. This will fix the second scenario. (2) implies zbud_free and zbud_reclaim_page should use spin_lock_irqsave, so that interrupts will not be triggered and inturn softirqs. spin_lock_bh can't be used because a spin_unlock_bh can triger a softirq even in interrupt context. This will fix the first scenario. Change-Id: Ibc810525dddf97614db41643642fec7472bd6a2c Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
author: Vinayak Menon <vinmenon@codeaurora.org> 2016-06-28 09:48:42 +0530
committer: Vinayak Menon <vinmenon@codeaurora.org> 2016-08-25 11:49:45 +0530
commit: 47b41f43ee8d38cbceb8b1513b712e4924e4e2e7 (patch)
tree: 557fa5e419cbb9722eac246f8edc855d3bf2e939 /mm/zbud.c
parent: e97b6a0e0217f7c072fdad6c50673cd7a64348e1 (diff)
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/zbud.c b/mm/zbud.c
index 04359b845ead..09ab957e2b10 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -357,6 +357,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	struct zbud_header *zhdr = NULL;
 	enum buddy bud;
 	struct page *page;
+	unsigned long flags;
 	int found = 0;
 
 	if (!size || (gfp & __GFP_HIGHMEM))
@@ -364,7 +365,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 		return -ENOSPC;
 	chunks = size_to_chunks(size);
-	spin_lock_bh(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 
 	/* First, try to find an unbuddied zbud page. */
 	zhdr = NULL;
@@ -383,11 +384,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	}
 
 	/* Couldn't find unbuddied zbud page, create new one */
-	spin_unlock_bh(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 	page = alloc_page(gfp);
 	if (!page)
 		return -ENOMEM;
-	spin_lock_bh(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	pool->pages_nr++;
 	zhdr = init_zbud_page(page);
 	bud = FIRST;
@@ -415,7 +416,7 @@ found:
 	*handle = encode_handle(zhdr, bud);
 	if ((gfp & __GFP_ZERO) && found)
 		memset((void *)*handle, 0, size);
-	spin_unlock_bh(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 
 	return 0;
 }
@@ -434,8 +435,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 {
 	struct zbud_header *zhdr;
 	int freechunks;
+	unsigned long flags;
 
-	spin_lock_bh(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	zhdr = handle_to_zbud_header(handle);
 
 	/* If first buddy, handle will be page aligned */
@@ -446,7 +448,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 
 	if (zhdr->under_reclaim) {
 		/* zbud page is under reclaim, reclaim will free */
-		spin_unlock_bh(&pool->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 		return;
 	}
 
@@ -464,7 +466,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
 	}
 
-	spin_unlock_bh(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
 #define list_tail_entry(ptr, type, member) \
@@ -509,12 +511,13 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 {
 	int i, ret, freechunks;
 	struct zbud_header *zhdr;
+	unsigned long flags;
 	unsigned long first_handle = 0, last_handle = 0;
 
-	spin_lock_bh(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
 			retries == 0) {
-		spin_unlock_bh(&pool->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 		return -EINVAL;
 	}
 	for (i = 0; i < retries; i++) {
@@ -533,7 +536,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 			first_handle = encode_handle(zhdr, FIRST);
 		if (zhdr->last_chunks)
 			last_handle = encode_handle(zhdr, LAST);
-		spin_unlock_bh(&pool->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 
 		/* Issue the eviction callback(s) */
 		if (first_handle) {
@@ -547,7 +550,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 				goto next;
 		}
 next:
-		spin_lock_bh(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
 		zhdr->under_reclaim = false;
 		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
 			/*
@@ -556,7 +559,7 @@ next:
 			 */
 			free_zbud_page(zhdr);
 			pool->pages_nr--;
-			spin_unlock_bh(&pool->lock);
+			spin_unlock_irqrestore(&pool->lock, flags);
 			return 0;
 		} else if (zhdr->first_chunks == 0 ||
 				zhdr->last_chunks == 0) {
@@ -571,7 +574,7 @@ next:
 		/* add to beginning of LRU */
 		list_add(&zhdr->lru, &pool->lru);
 	}
-	spin_unlock_bh(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 	return -EAGAIN;
 }
author	Vinayak Menon <vinmenon@codeaurora.org>	2016-06-28 09:48:42 +0530
committer	Vinayak Menon <vinmenon@codeaurora.org>	2016-08-25 11:49:45 +0530
commit	47b41f43ee8d38cbceb8b1513b712e4924e4e2e7 (patch)
tree	557fa5e419cbb9722eac246f8edc855d3bf2e939 /mm/zbud.c
parent	e97b6a0e0217f7c072fdad6c50673cd7a64348e1 (diff)