diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/Kconfig | 16 | ||||
| -rw-r--r-- | drivers/md/bcache/bcache.h | 4 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.c | 40 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.h | 3 | ||||
| -rw-r--r-- | drivers/md/bcache/request.c | 4 | ||||
| -rw-r--r-- | drivers/md/bcache/super.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-cache-target.c | 6 | ||||
| -rw-r--r-- | drivers/md/dm-crypt.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm-flakey.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-stats.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-verity-target.c | 9 | ||||
| -rw-r--r-- | drivers/md/dm.c | 55 | ||||
| -rw-r--r-- | drivers/md/linear.c | 39 | ||||
| -rw-r--r-- | drivers/md/linear.h | 1 | ||||
| -rw-r--r-- | drivers/md/md.c | 2 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 14 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 9 |
17 files changed, 167 insertions, 47 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 7d5aa2c5c81d..36ca4e4cbfb7 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -475,6 +475,21 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + bool "Prefetch size 128" + +config DM_VERITY_HASH_PREFETCH_MIN_SIZE + int "Verity hash prefetch minimum size" + depends on DM_VERITY + range 1 4096 + default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + default 1 + ---help--- + This sets minimum number of hash blocks to prefetch for dm-verity. + For devices like eMMC, having larger prefetch size like 128 can improve + performance with increased memory consumption for keeping more hashes + in RAM. + config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -527,6 +542,7 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR + select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6b420a55c745..c3ea03c9a1a8 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -425,7 +425,7 @@ struct cache { * until a gc finishes - otherwise we could pointlessly burn a ton of * cpu */ - unsigned invalidate_needs_gc:1; + unsigned invalidate_needs_gc; bool discard; /* Get rid of? */ @@ -593,8 +593,8 @@ struct cache_set { /* Counts how many sectors bio_insert has added to the cache */ atomic_t sectors_to_gc; + wait_queue_head_t gc_wait; - wait_queue_head_t moving_gc_wait; struct keybuf moving_gc_keys; /* Number of moving GC bios in flight */ struct semaphore moving_in_flight; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 22b9e34ceb75..5b815e64c1c9 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1762,33 +1762,34 @@ static void bch_btree_gc(struct cache_set *c) bch_moving_gc(c); } -static int bch_gc_thread(void *arg) +static bool gc_should_run(struct cache_set *c) { - struct cache_set *c = arg; struct cache *ca; unsigned i; - while (1) { -again: - bch_btree_gc(c); + for_each_cache(ca, c, i) + if (ca->invalidate_needs_gc) + return true; - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; + if (atomic_read(&c->sectors_to_gc) < 0) + return true; - mutex_lock(&c->bucket_lock); + return false; +} - for_each_cache(ca, c, i) - if (ca->invalidate_needs_gc) { - mutex_unlock(&c->bucket_lock); - set_current_state(TASK_RUNNING); - goto again; - } +static int bch_gc_thread(void *arg) +{ + struct cache_set *c = arg; - mutex_unlock(&c->bucket_lock); + while (1) { + wait_event_interruptible(c->gc_wait, + kthread_should_stop() || gc_should_run(c)); - try_to_freeze(); - schedule(); + if (kthread_should_stop()) + break; + + set_gc_sectors(c); + bch_btree_gc(c); } return 0; @@ -1796,11 +1797,10 @@ again: int bch_gc_thread_start(struct cache_set *c) { - c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc"); + c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc"); if (IS_ERR(c->gc_thread)) return PTR_ERR(c->gc_thread); - set_task_state(c->gc_thread, TASK_INTERRUPTIBLE); return 0; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 5c391fa01bed..9b80417cd547 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -260,8 +260,7 @@ void bch_initial_mark_key(struct cache_set *, int, struct bkey *); static inline void wake_up_gc(struct cache_set *c) { - if (c->gc_thread) - wake_up_process(c->gc_thread); + wake_up(&c->gc_wait); } #define MAP_DONE 0 diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 25fa8445bb24..2410df1c2a05 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -196,10 +196,8 @@ static void bch_data_insert_start(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio, *n; - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { - set_gc_sectors(op->c); + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) wake_up_gc(op->c); - } if (op->bypass) return bch_data_invalidate(cl); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3d5c0ba13181..7b5880b8874c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1489,6 +1489,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) mutex_init(&c->bucket_lock); init_waitqueue_head(&c->btree_cache_wait); init_waitqueue_head(&c->bucket_wait); + init_waitqueue_head(&c->gc_wait); sema_init(&c->uuid_write_mutex, 1); spin_lock_init(&c->btree_gc_time.lock); @@ -1547,6 +1548,7 @@ static void run_cache_set(struct cache_set *c) for_each_cache(ca, c, i) c->nbuckets += ca->sb.nbuckets; + set_gc_sectors(c); if (CACHE_SYNC(&c->sb)) { LIST_HEAD(journal); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index bb9b92ebbf8e..0da5efaad85c 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -248,7 +248,7 @@ struct cache { /* * Fields for converting from sectors to blocks. */ - uint32_t sectors_per_block; + sector_t sectors_per_block; int sectors_per_block_shift; spinlock_t lock; @@ -3544,11 +3544,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", + DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, - cache->sectors_per_block, + (unsigned long long)cache->sectors_per_block, (unsigned long long) from_cblock(residency), (unsigned long long) from_cblock(cache->cache_size), (unsigned) atomic_read(&cache->stats.read_hit), diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e540b7942eba..799b9a5ad4f5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1500,12 +1500,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 8e9e928dafba..78f403b45ab3 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 8289804ccd99..d5ea9f28ae70 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head *head) int cpu; struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); + kfree(s->histogram_boundaries); kfree(s->program_id); kfree(s->aux_data); for_each_possible_cpu(cpu) { diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 9d3d4b297201..c7e97cf6e7fb 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -501,6 +501,7 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; + sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -523,8 +524,14 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: + // for emmc, it is more efficient to send bigger read + prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, + hash_block_end - hash_block_start + 1); + if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { + prefetch_size = hash_block_end - hash_block_start + 1; + } dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); + prefetch_size); } kfree(pw); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dd1cde5458b8..e9b34de2319e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1467,11 +1467,62 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +/* + * Flush current->bio_list when the target map method blocks. + * This fixes deadlocks in snapshot and possibly in other targets. + */ +struct dm_offload { + struct blk_plug plug; + struct blk_plug_cb cb; +}; + +static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) +{ + struct dm_offload *o = container_of(cb, struct dm_offload, cb); + struct bio_list list; + struct bio *bio; + + INIT_LIST_HEAD(&o->cb.list); + + if (unlikely(!current->bio_list)) + return; + + list = *current->bio_list; + bio_list_init(current->bio_list); + + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(current->bio_list, bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); + } +} + +static void dm_offload_start(struct dm_offload *o) +{ + blk_start_plug(&o->plug); + o->cb.callback = flush_current_bio_list; + list_add(&o->cb.list, ¤t->plug->cb_list); +} + +static void dm_offload_end(struct dm_offload *o) +{ + list_del(&o->cb.list); + blk_finish_plug(&o->plug); +} + static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; + struct dm_offload o; struct bio *clone = &tio->clone; struct dm_target *ti = tio->ti; @@ -1484,7 +1535,11 @@ static void __map_bio(struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_iter.bi_sector; + + dm_offload_start(&o); r = ti->type->map(ti, clone); + dm_offload_end(&o); + if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b7fe7e9fc777..6ba3227e29b2 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -52,18 +52,26 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) return conf->disks + lo; } +/* + * In linear_congested() conf->raid_disks is used as a copy of + * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks + * and conf->disks[] are created in linear_conf(), they are always + * consitent with each other, but mddev->raid_disks does not. + */ static int linear_congested(struct mddev *mddev, int bits) { struct linear_conf *conf; int i, ret = 0; - conf = mddev->private; + rcu_read_lock(); + conf = rcu_dereference(mddev->private); - for (i = 0; i < mddev->raid_disks && !ret ; i++) { + for (i = 0; i < conf->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); ret |= bdi_congested(&q->backing_dev_info, bits); } + rcu_read_unlock(); return ret; } @@ -143,6 +151,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) conf->disks[i-1].end_sector + conf->disks[i].rdev->sectors; + /* + * conf->raid_disks is copy of mddev->raid_disks. The reason to + * keep a copy of mddev->raid_disks in struct linear_conf is, + * mddev->raid_disks may not be consistent with pointers number of + * conf->disks[] when it is updated in linear_add() and used to + * iterate old conf->disks[] earray in linear_congested(). + * Here conf->raid_disks is always consitent with number of + * pointers in conf->disks[] array, and mddev->private is updated + * with rcu_assign_pointer() in linear_addr(), such race can be + * avoided. + */ + conf->raid_disks = raid_disks; + return conf; out: @@ -195,15 +216,23 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) if (!newconf) return -ENOMEM; + /* newconf->raid_disks already keeps a copy of * the increased + * value of mddev->raid_disks, WARN_ONCE() is just used to make + * sure of this. It is possible that oldconf is still referenced + * in linear_congested(), therefore kfree_rcu() is used to free + * oldconf until no one uses it anymore. + */ mddev_suspend(mddev); - oldconf = mddev->private; + oldconf = rcu_dereference(mddev->private); mddev->raid_disks++; - mddev->private = newconf; + WARN_ONCE(mddev->raid_disks != newconf->raid_disks, + "copied raid_disks doesn't match mddev->raid_disks"); + rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); revalidate_disk(mddev->gendisk); - kfree(oldconf); + kfree_rcu(oldconf, rcu); return 0; } diff --git a/drivers/md/linear.h b/drivers/md/linear.h index b685ddd7d7f7..8d392e6098b3 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -10,6 +10,7 @@ struct linear_conf { struct rcu_head rcu; sector_t array_sectors; + int raid_disks; /* a copy of mddev->raid_disks */ struct dev_info disks[0]; }; #endif diff --git a/drivers/md/md.c b/drivers/md/md.c index c1c7d4fb4b77..eff554a12fb4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6771,7 +6771,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* need to ensure recovery thread has run */ wait_event_interruptible_timeout(mddev->sb_wait, !test_bit(MD_RECOVERY_NEEDED, - &mddev->flags), + &mddev->recovery), msecs_to_jiffies(5000)); if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) { /* Need to flush page cache, and ensure no-one else opens diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005595c1..20557e2c60c6 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 10ce885445f6..7af976934441 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6980,6 +6980,15 @@ static int run(struct mddev *mddev) stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; + + /* + * We use 16-bit counter of active stripes in bi_phys_segments + * (minus one for over-loaded initialization) + */ + blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); + blk_queue_max_discard_sectors(mddev->queue, + 0xfffe * STRIPE_SECTORS); + /* * unaligned part of discard request will be ignored, so can't * guarantee discard_zeroes_data |
