summaryrefslogtreecommitdiff
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c83
1 files changed, 59 insertions, 24 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 10ce885445f6..d59b861764a1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -110,8 +110,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
{
int i;
- local_irq_disable();
- spin_lock(conf->hash_locks);
+ spin_lock_irq(conf->hash_locks);
for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
spin_lock(&conf->device_lock);
@@ -121,9 +120,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
{
int i;
spin_unlock(&conf->device_lock);
- for (i = NR_STRIPE_HASH_LOCKS; i; i--)
- spin_unlock(conf->hash_locks + i - 1);
- local_irq_enable();
+ for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
+ spin_unlock(conf->hash_locks + i);
+ spin_unlock_irq(conf->hash_locks);
}
/* bio's attached to a stripe+device for I/O are linked together in bi_sector
@@ -726,12 +725,11 @@ static bool is_full_stripe_write(struct stripe_head *sh)
static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
{
- local_irq_disable();
if (sh1 > sh2) {
- spin_lock(&sh2->stripe_lock);
+ spin_lock_irq(&sh2->stripe_lock);
spin_lock_nested(&sh1->stripe_lock, 1);
} else {
- spin_lock(&sh1->stripe_lock);
+ spin_lock_irq(&sh1->stripe_lock);
spin_lock_nested(&sh2->stripe_lock, 1);
}
}
@@ -739,8 +737,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
{
spin_unlock(&sh1->stripe_lock);
- spin_unlock(&sh2->stripe_lock);
- local_irq_enable();
+ spin_unlock_irq(&sh2->stripe_lock);
}
/* Only freshly new full stripe normal write stripe can be added to a batch list */
@@ -818,6 +815,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
spin_unlock(&head->batch_head->batch_lock);
goto unlock_out;
}
+ /*
+ * We must assign batch_head of this stripe within the
+ * batch_lock, otherwise clear_batch_ready of batch head
+ * stripe could clear BATCH_READY bit of this stripe and
+ * this stripe->batch_head doesn't get assigned, which
+ * could confuse clear_batch_ready for this stripe
+ */
+ sh->batch_head = head->batch_head;
/*
* at this point, head's BATCH_READY could be cleared, but we
@@ -825,8 +830,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
*/
list_add(&sh->batch_list, &head->batch_list);
spin_unlock(&head->batch_head->batch_lock);
-
- sh->batch_head = head->batch_head;
} else {
head->batch_head = head;
sh->batch_head = head->batch_head;
@@ -1675,8 +1678,11 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx || i == qd_idx) {
- if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
+ if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) {
set_bit(R5_UPTODATE, &dev->flags);
+ if (test_bit(STRIPE_EXPAND_READY, &sh->state))
+ set_bit(R5_Expanded, &dev->flags);
+ }
if (fua)
set_bit(R5_WantFUA, &dev->flags);
if (sync)
@@ -2022,15 +2028,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
static int grow_stripes(struct r5conf *conf, int num)
{
struct kmem_cache *sc;
+ size_t namelen = sizeof(conf->cache_name[0]);
int devs = max(conf->raid_disks, conf->previous_raid_disks);
if (conf->mddev->gendisk)
- sprintf(conf->cache_name[0],
+ snprintf(conf->cache_name[0], namelen,
"raid%d-%s", conf->level, mdname(conf->mddev));
else
- sprintf(conf->cache_name[0],
+ snprintf(conf->cache_name[0], namelen,
"raid%d-%p", conf->level, conf->mddev);
- sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+ snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
@@ -2232,6 +2239,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
err = -ENOMEM;
mutex_unlock(&conf->cache_size_mutex);
+
+ conf->slab_cache = sc;
+ conf->active_name = 1-conf->active_name;
+
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@ -2249,8 +2260,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
}
/* critical section pass, GFP_NOIO no longer needed */
- conf->slab_cache = sc;
- conf->active_name = 1-conf->active_name;
if (!err)
conf->pool_size = newsize;
return err;
@@ -3361,9 +3370,20 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
BUG_ON(test_bit(R5_Wantread, &dev->flags));
BUG_ON(sh->batch_head);
+
+ /*
+ * In the raid6 case if the only non-uptodate disk is P
+ * then we already trusted P to compute the other failed
+ * drives. It is safe to compute rather than re-read P.
+ * In other cases we only compute blocks from failed
+ * devices, otherwise check/repair might fail to detect
+ * a real inconsistency.
+ */
+
if ((s->uptodate == disks - 1) &&
+ ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) ||
(s->failed && (disk_idx == s->failed_num[0] ||
- disk_idx == s->failed_num[1]))) {
+ disk_idx == s->failed_num[1])))) {
/* have disk failed, and we're requested to fetch it;
* do compute it
*/
@@ -4256,7 +4276,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
(1 << STRIPE_PREREAD_ACTIVE) |
- (1 << STRIPE_DEGRADED)),
+ (1 << STRIPE_DEGRADED) |
+ (1 << STRIPE_ON_UNPLUG_LIST)),
head_sh->state & (1 << STRIPE_INSYNC));
sh->check_state = head_sh->check_state;
@@ -5277,12 +5298,15 @@ static void make_request(struct mddev *mddev, struct bio * bi)
* userspace, we want an interruptible
* wait.
*/
- flush_signals(current);
prepare_to_wait(&conf->wait_for_overlap,
&w, TASK_INTERRUPTIBLE);
if (logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi) {
+ sigset_t full, old;
+ sigfillset(&full);
+ sigprocmask(SIG_BLOCK, &full, &old);
schedule();
+ sigprocmask(SIG_SETMASK, &old, NULL);
do_prepare = true;
}
goto retry;
@@ -5816,6 +5840,10 @@ static void raid5_do_work(struct work_struct *work)
pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock);
+
+ r5l_flush_stripe_to_raid(conf->log);
+
+ async_tx_issue_pending_all();
blk_finish_plug(&plug);
pr_debug("--- raid5worker inactive\n");
@@ -6980,6 +7008,15 @@ static int run(struct mddev *mddev)
stripe = (stripe | (stripe-1)) + 1;
mddev->queue->limits.discard_alignment = stripe;
mddev->queue->limits.discard_granularity = stripe;
+
+ /*
+ * We use 16-bit counter of active stripes in bi_phys_segments
+ * (minus one for over-loaded initialization)
+ */
+ blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS);
+ blk_queue_max_discard_sectors(mddev->queue,
+ 0xfffe * STRIPE_SECTORS);
+
/*
* unaligned part of discard request will be ignored, so can't
* guarantee discard_zeroes_data
@@ -7517,12 +7554,10 @@ static void end_reshape(struct r5conf *conf)
{
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
- struct md_rdev *rdev;
spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
- rdev_for_each(rdev, conf->mddev)
- rdev->data_offset = rdev->new_data_offset;
+ md_finish_reshape(conf->mddev);
smp_wmb();
conf->reshape_progress = MaxSector;
conf->mddev->reshape_position = MaxSector;