diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/bcache/alloc.c | 19 | ||||
| -rw-r--r-- | drivers/md/bcache/super.c | 17 | ||||
| -rw-r--r-- | drivers/md/dm-bufio.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-io.c | 1 | ||||
| -rw-r--r-- | drivers/md/dm-ioctl.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-verity-target.c | 65 | ||||
| -rw-r--r-- | drivers/md/dm-verity.h | 1 | ||||
| -rw-r--r-- | drivers/md/dm.c | 3 | ||||
| -rw-r--r-- | drivers/md/md-cluster.c | 4 | ||||
| -rw-r--r-- | drivers/md/md.c | 10 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 7 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 30 |
12 files changed, 135 insertions, 31 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 4d46f2ce606f..aa84fcfd59fc 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -514,15 +514,21 @@ struct open_bucket { /* * We keep multiple buckets open for writes, and try to segregate different - * write streams for better cache utilization: first we look for a bucket where - * the last write to it was sequential with the current write, and failing that - * we look for a bucket that was last used by the same task. + * write streams for better cache utilization: first we try to segregate flash + * only volume write streams from cached devices, secondly we look for a bucket + * where the last write to it was sequential with the current write, and + * failing that we look for a bucket that was last used by the same task. * * The ideas is if you've got multiple tasks pulling data into the cache at the * same time, you'll get better cache utilization if you try to segregate their * data and preserve locality. * - * For example, say you've starting Firefox at the same time you're copying a + * For example, dirty sectors of flash only volume is not reclaimable, if their + * dirty sectors mixed with dirty sectors of cached device, such buckets will + * be marked as dirty and won't be reclaimed, though the dirty data of cached + * device have been written back to backend device. + * + * And say you've starting Firefox at the same time you're copying a * bunch of files. Firefox will likely end up being fairly hot and stay in the * cache awhile, but the data you copied might not be; if you wrote all that * data to the same buckets it'd get invalidated at the same time. @@ -539,7 +545,10 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c, struct open_bucket *ret, *ret_task = NULL; list_for_each_entry_reverse(ret, &c->data_buckets, list) - if (!bkey_cmp(&ret->key, search)) + if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) != + UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)])) + continue; + else if (!bkey_cmp(&ret->key, search)) goto found; else if (ret->last_write_point == write_point) ret_task = ret; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c2248b75f2da..b9a526271f02 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -890,6 +890,12 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); + cancel_delayed_work_sync(&dc->writeback_rate_update); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) { + kthread_stop(dc->writeback_thread); + dc->writeback_thread = NULL; + } + memset(&dc->sb.set_uuid, 0, 16); SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); @@ -935,6 +941,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) uint32_t rtime = cpu_to_le32(get_seconds()); struct uuid_entry *u; char buf[BDEVNAME_SIZE]; + struct cached_dev *exist_dc, *t; bdevname(dc->bdev, buf); @@ -958,6 +965,16 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) return -EINVAL; } + /* Check whether already attached */ + list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { + if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { + pr_err("Tried to attach %s but duplicate UUID already attached", + buf); + + return -EINVAL; + } + } + u = uuid_find(c, dc->sb.uuid); if (u && diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 969c815c90b6..d566c32e222a 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -818,7 +818,8 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client * dm-bufio is resistant to allocation failures (it just keeps * one buffer reserved in cases all the allocations fail). * So set flags to not try too hard: - * GFP_NOIO: don't recurse into the I/O layer + * GFP_NOWAIT: don't wait; if we need to sleep we'll release our + * mutex and wait ourselves. * __GFP_NORETRY: don't retry and rather return failure * __GFP_NOMEMALLOC: don't use emergency reserves * __GFP_NOWARN: don't print a warning in case of failure @@ -828,7 +829,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client */ while (1) { if (dm_bufio_cache_size_latch != 1) { - b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); if (b) return b; } diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 81c5e1a1f363..1b84d2890fbf 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -300,6 +300,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, else if (rw & REQ_WRITE_SAME) special_cmd_max_sectors = q->limits.max_write_same_sectors; if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) { + atomic_inc(&io->count); dec_count(io, region, -EOPNOTSUPP); return; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 7baeeafa059d..065d7cee0d21 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1773,12 +1773,12 @@ static int validate_params(uint cmd, struct dm_ioctl *param) cmd == DM_LIST_VERSIONS_CMD) return 0; - if ((cmd == DM_DEV_CREATE_CMD)) { + if (cmd == DM_DEV_CREATE_CMD) { if (!*param->name) { DMWARN("name not supplied when creating device"); return -EINVAL; } - } else if ((*param->uuid && *param->name)) { + } else if (*param->uuid && *param->name) { DMWARN("only supply one of name or uuid, cmd(%u)", cmd); return -EINVAL; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e34cf53bd068..ceff074b3b74 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/reboot.h> +#include <linux/vmalloc.h> #define DM_MSG_PREFIX "verity" @@ -32,6 +33,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" +#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" #define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) @@ -399,6 +401,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, } /* + * Moves the bio iter one data block forward. + */ +static inline void verity_bv_skip_block(struct dm_verity *v, + struct dm_verity_io *io, + struct bvec_iter *iter) +{ + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); + + bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits); +} + +/* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) @@ -410,9 +424,16 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; + sector_t cur_block = io->block + b; struct shash_desc *desc = verity_io_hash_desc(v, io); - r = verity_hash_for_block(v, io, io->block + b, + if (v->validated_blocks && + likely(test_bit(cur_block, v->validated_blocks))) { + verity_bv_skip_block(v, io, &io->iter); + continue; + } + + r = verity_hash_for_block(v, io, cur_block, verity_io_want_digest(v, io), &is_zero); if (unlikely(r < 0)) @@ -445,13 +466,16 @@ static int verity_verify_io(struct dm_verity_io *io) return r; if (likely(memcmp(verity_io_real_digest(v, io), - verity_io_want_digest(v, io), v->digest_size) == 0)) + verity_io_want_digest(v, io), v->digest_size) == 0)) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); continue; + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b, NULL, &start) == 0) + cur_block, NULL, &start) == 0) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + cur_block)) return -EIO; } @@ -645,6 +669,8 @@ void verity_status(struct dm_target *ti, status_type_t type, args += DM_VERITY_OPTS_FEC; if (v->zero_digest) args++; + if (v->validated_blocks) + args++; if (!args) return; DMEMIT(" %u", args); @@ -663,6 +689,8 @@ void verity_status(struct dm_target *ti, status_type_t type, } if (v->zero_digest) DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); + if (v->validated_blocks) + DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE); sz = verity_fec_status_table(v, sz, result, maxlen); break; } @@ -716,6 +744,7 @@ void verity_dtr(struct dm_target *ti) if (v->bufio) dm_bufio_client_destroy(v->bufio); + vfree(v->validated_blocks); kfree(v->salt); kfree(v->root_digest); kfree(v->zero_digest); @@ -737,6 +766,26 @@ void verity_dtr(struct dm_target *ti) } EXPORT_SYMBOL_GPL(verity_dtr); +static int verity_alloc_most_once(struct dm_verity *v) +{ + struct dm_target *ti = v->ti; + + /* the bitset can only handle INT_MAX blocks */ + if (v->data_blocks > INT_MAX) { + ti->error = "device too large to use check_at_most_once"; + return -E2BIG; + } + + v->validated_blocks = vzalloc(BITS_TO_LONGS(v->data_blocks) * + sizeof(unsigned long)); + if (!v->validated_blocks) { + ti->error = "failed to allocate bitset for check_at_most_once"; + return -ENOMEM; + } + + return 0; +} + static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; @@ -806,6 +855,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) { + r = verity_alloc_most_once(v); + if (r) + return r; + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) @@ -1074,7 +1129,7 @@ EXPORT_SYMBOL_GPL(verity_ctr); static struct target_type verity_target = { .name = "verity", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index a90d1d416107..d216fc76d350 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -63,6 +63,7 @@ struct dm_verity { sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; struct dm_verity_fec *fec; /* forward error correction */ + unsigned long *validated_blocks; /* bitset blocks validated */ }; struct dm_verity_io { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f7f560f5f056..f002d2ce9c9f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -974,7 +974,8 @@ static void dec_pending(struct dm_io *io, int error) } else { /* done with normal IO or empty flush */ trace_block_bio_complete(md->queue, bio, io_error); - bio->bi_error = io_error; + if (io_error) + bio->bi_error = io_error; bio_endio(bio); } } diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 494d01d0e92a..a7a561af05c9 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -945,8 +945,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); lock_comm(cinfo); ret = __sendmsg(cinfo, &cmsg); - if (ret) + if (ret) { + unlock_comm(cinfo); return ret; + } cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; diff --git a/drivers/md/md.c b/drivers/md/md.c index 1cd819202553..3a9685fe115c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1028,8 +1028,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor * (not needed for Linear and RAID0 as metadata doesn't * record this size) */ - if (rdev->sectors >= (2ULL << 32) && sb->level >= 1) - rdev->sectors = (2ULL << 32) - 2; + if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) && + sb->level >= 1) + rdev->sectors = (sector_t)(2ULL << 32) - 2; if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1) /* "this cannot possibly happen" ... */ @@ -1322,8 +1323,9 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) /* Limit to 4TB as metadata cannot record more than that. * 4TB == 2^32 KB, or 2*2^32 sectors. */ - if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1) - num_sectors = (2ULL << 32) - 2; + if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) && + rdev->mddev->level >= 1) + num_sectors = (sector_t)(2ULL << 32) - 2; md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); md_super_wait(rdev->mddev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a67e1a36733f..45e7a47e5f7b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2698,6 +2698,11 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) list_add(&r10_bio->retry_list, &conf->bio_end_io_list); conf->nr_queued++; spin_unlock_irq(&conf->device_lock); + /* + * In case freeze_array() is waiting for condition + * nr_pending == nr_queued + extra to be true. + */ + wake_up(&conf->wait_barrier); md_wakeup_thread(conf->mddev->thread); } else { if (test_bit(R10BIO_WriteError, @@ -3633,6 +3638,7 @@ static int run(struct mddev *mddev) if (blk_queue_discard(bdev_get_queue(rdev->bdev))) discard_supported = true; + first = 0; } if (mddev->queue) { @@ -4039,6 +4045,7 @@ static int raid10_start_reshape(struct mddev *mddev) diff = 0; if (first || diff < min_offset_diff) min_offset_diff = diff; + first = 0; } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 77403228e098..9284acea4f7b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -110,8 +110,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) { int i; - local_irq_disable(); - spin_lock(conf->hash_locks); + spin_lock_irq(conf->hash_locks); for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); spin_lock(&conf->device_lock); @@ -121,9 +120,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) { int i; spin_unlock(&conf->device_lock); - for (i = NR_STRIPE_HASH_LOCKS; i; i--) - spin_unlock(conf->hash_locks + i - 1); - local_irq_enable(); + for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) + spin_unlock(conf->hash_locks + i); + spin_unlock_irq(conf->hash_locks); } /* bio's attached to a stripe+device for I/O are linked together in bi_sector @@ -726,12 +725,11 @@ static bool is_full_stripe_write(struct stripe_head *sh) static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { - local_irq_disable(); if (sh1 > sh2) { - spin_lock(&sh2->stripe_lock); + spin_lock_irq(&sh2->stripe_lock); spin_lock_nested(&sh1->stripe_lock, 1); } else { - spin_lock(&sh1->stripe_lock); + spin_lock_irq(&sh1->stripe_lock); spin_lock_nested(&sh2->stripe_lock, 1); } } @@ -739,8 +737,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { spin_unlock(&sh1->stripe_lock); - spin_unlock(&sh2->stripe_lock); - local_irq_enable(); + spin_unlock_irq(&sh2->stripe_lock); } /* Only freshly new full stripe normal write stripe can be added to a batch list */ @@ -3372,9 +3369,20 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); BUG_ON(test_bit(R5_Wantread, &dev->flags)); BUG_ON(sh->batch_head); + + /* + * In the raid6 case if the only non-uptodate disk is P + * then we already trusted P to compute the other failed + * drives. It is safe to compute rather than re-read P. + * In other cases we only compute blocks from failed + * devices, otherwise check/repair might fail to detect + * a real inconsistency. + */ + if ((s->uptodate == disks - 1) && + ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || (s->failed && (disk_idx == s->failed_num[0] || - disk_idx == s->failed_num[1]))) { + disk_idx == s->failed_num[1])))) { /* have disk failed, and we're requested to fetch it; * do compute it */ |
