diff options
author | Vinayak Menon <vinmenon@codeaurora.org> | 2016-06-28 09:48:42 +0530 |
---|---|---|
committer | Vinayak Menon <vinmenon@codeaurora.org> | 2016-08-25 11:49:45 +0530 |
commit | 47b41f43ee8d38cbceb8b1513b712e4924e4e2e7 (patch) | |
tree | 557fa5e419cbb9722eac246f8edc855d3bf2e939 /mm/zbud.c | |
parent | e97b6a0e0217f7c072fdad6c50673cd7a64348e1 (diff) |
mm: zbud: fix the locking scenarios with zcache
With zcache using zbud, strange locking scenarios are
observed. The first problem seen is:
Core 2 waiting on mapping->tree_lock which is taken by core 6
do_raw_spin_lock
raw_spin_lock_irq
atomic_cmpxchg
page_freeze_refs
__remove_mapping
shrink_page_list
Core 6 after taking mapping->tree_lock is waiting on zbud pool lock
which is held by core 5
zbud_alloc
zcache_store_page
__cleancache_put_page
cleancache_put_page
__delete_from_page_cache
spin_unlock_irq
__remove_mapping
shrink_page_list
shrink_inactive_list
Core 5 after taking zbud pool lock from zbud_free received an IRQ, and
after IRQ exit, softirqs were scheduled and end_page_writeback tried to
lock on mapping->tree_lock which is already held by Core 6. Deadlock.
do_raw_spin_lock
raw_spin_lock_irqsave
test_clear_page_writeba
end_page_writeback
ext4_finish_bio
ext4_end_bio
bio_endio
blk_update_request
end_clone_bio
bio_endio
blk_update_request
blk_update_bidi_request
blk_end_bidi_request
blk_end_request
mmc_blk_cmdq_complete_r
mmc_cmdq_softirq_done
blk_done_softirq
static_key_count
static_key_false
trace_softirq_exit
__do_softirq()
tick_irq_exit
irq_exit()
set_irq_regs
__handle_domain_irq
gic_handle_irq
el1_irq
exception
__list_del_entry
list_del
zbud_free
zcache_load_page
__cleancache_get_page(?
This shows that allowing softirqs while holding zbud pool lock
can result in deadlocks. To fix this, 'commit 6a1fdaa36272
("mm: zbud: prevent softirq during zbud alloc, free and reclaim")'
decided to take spin_lock_bh during zbud_free, zbud_alloc and
zbud_reclaim. But this resulted in another deadlock.
spin_bug()
do_raw_spin_lock()
_raw_spin_lock_irqsave()
test_clear_page_writeback()
end_page_writeback()
ext4_finish_bio()
ext4_end_bio()
bio_endio()
blk_update_request()
end_clone_bio()
bio_endio()
blk_update_request()
blk_update_bidi_request()
blk_end_request()
mmc_blk_cmdq_complete_rq()
mmc_cmdq_softirq_done()
blk_done_softirq()
__do_softirq()
do_softirq()
__local_bh_enable_ip()
_raw_spin_unlock_bh()
zbud_alloc()
zcache_store_page()
__cleancache_put_page()
__delete_from_page_cache()
__remove_mapping()
shrink_page_list()
Here, the spin_unlock_bh resulted in explicit invocation of do_sofirq,
which resulted in the acquisition of mapping->tree_lock which was already
taken by __remove_mapping.
The new fix considers the following facts.
1) zcache_store_page is always be called from __delete_from_page_cache
with mapping->tree_lock held and interrupts disabled. Thus zbud_alloc
which is called only from zcache_store_page is always called with
interrupts disabled.
2) zbud_free and zbud_reclaim_page can be called from zcache with or
without interrupts disabled. So an interrupt while holding zbud pool
lock can result in do_softirq and acquisition of mapping->tree_lock.
(1) implies zbud_alloc need not explicitly disable bh. But disable
interrupts to make sure zbud_alloc is safe with zcache, irrespective
of future changes. This will fix the second scenario.
(2) implies zbud_free and zbud_reclaim_page should use spin_lock_irqsave,
so that interrupts will not be triggered and inturn softirqs.
spin_lock_bh can't be used because a spin_unlock_bh can triger a softirq
even in interrupt context. This will fix the first scenario.
Change-Id: Ibc810525dddf97614db41643642fec7472bd6a2c
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Diffstat (limited to 'mm/zbud.c')
-rw-r--r-- | mm/zbud.c | 29 |
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/zbud.c b/mm/zbud.c index 04359b845ead..09ab957e2b10 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -357,6 +357,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, struct zbud_header *zhdr = NULL; enum buddy bud; struct page *page; + unsigned long flags; int found = 0; if (!size || (gfp & __GFP_HIGHMEM)) @@ -364,7 +365,7 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) return -ENOSPC; chunks = size_to_chunks(size); - spin_lock_bh(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); /* First, try to find an unbuddied zbud page. */ zhdr = NULL; @@ -383,11 +384,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, } /* Couldn't find unbuddied zbud page, create new one */ - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); page = alloc_page(gfp); if (!page) return -ENOMEM; - spin_lock_bh(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); pool->pages_nr++; zhdr = init_zbud_page(page); bud = FIRST; @@ -415,7 +416,7 @@ found: *handle = encode_handle(zhdr, bud); if ((gfp & __GFP_ZERO) && found) memset((void *)*handle, 0, size); - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); return 0; } @@ -434,8 +435,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) { struct zbud_header *zhdr; int freechunks; + unsigned long flags; - spin_lock_bh(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); zhdr = handle_to_zbud_header(handle); /* If first buddy, handle will be page aligned */ @@ -446,7 +448,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) if (zhdr->under_reclaim) { /* zbud page is under reclaim, reclaim will free */ - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); return; } @@ -464,7 +466,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle) list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); } - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); } #define list_tail_entry(ptr, type, member) \ @@ -509,12 +511,13 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) { int i, ret, freechunks; struct zbud_header *zhdr; + unsigned long flags; unsigned long first_handle = 0, last_handle = 0; - spin_lock_bh(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) || retries == 0) { - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); return -EINVAL; } for (i = 0; i < retries; i++) { @@ -533,7 +536,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) first_handle = encode_handle(zhdr, FIRST); if (zhdr->last_chunks) last_handle = encode_handle(zhdr, LAST); - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); /* Issue the eviction callback(s) */ if (first_handle) { @@ -547,7 +550,7 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries) goto next; } next: - spin_lock_bh(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); zhdr->under_reclaim = false; if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { /* @@ -556,7 +559,7 @@ next: */ free_zbud_page(zhdr); pool->pages_nr--; - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); return 0; } else if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) { @@ -571,7 +574,7 @@ next: /* add to beginning of LRU */ list_add(&zhdr->lru, &pool->lru); } - spin_unlock_bh(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); return -EAGAIN; } |