From b47490c9bc73d0b34e4c194db40de183e592e446 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:50 -0800 Subject: md: Update md bitmap during resync. Currently an md array with a write-intent bitmap does not updated that bitmap to reflect successful partial resync. Rather the entire bitmap is updated when the resync completes. This is because there is no guarentee that resync requests will complete in order, and tracking each request individually is unnecessarily burdensome. However there is value in regularly updating the bitmap, so add code to periodically pause while all pending sync requests complete, then update the bitmap. Doing this only every few seconds (the same as the bitmap update time) does not notciably affect resync performance. [snitzer@gmail.com: export bitmap_cond_end_sync] Signed-off-by: Neil Brown Cc: "Mike Snitzer" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/raid') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index 306a1d1a5af0..e51b531cd0b2 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -244,6 +244,8 @@ struct bitmap { */ unsigned long daemon_lastrun; /* jiffies of last run */ unsigned long daemon_sleep; /* how many seconds between updates? */ + unsigned long last_end_sync; /* when we lasted called end_sync to + * update bitmap with resync progress */ atomic_t pending_writes; /* pending writes to the bitmap file */ wait_queue_head_t write_wait; @@ -275,6 +277,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded); void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); void bitmap_close_sync(struct bitmap *bitmap); +void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); void bitmap_unplug(struct bitmap *bitmap); void bitmap_daemon_work(struct bitmap *bitmap); -- cgit v1.2.3 From e691063a61f7f72a7d2882eb744b07a520cde23b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:51 -0800 Subject: md: support 'external' metadata for md arrays - Add a state flag 'external' to indicate that the metadata is managed externally (by user-space) so important changes need to be left of user-space to handle. Alternates are non-persistant ('none') where there is no stable metadata - after the array is stopped there is no record of it's status - and internal which can be version 0.90 or version 1.x These are selected by writing to the 'metadata' attribute. - move the updating of superblocks (sync_sbs) to after we have checked if there are any superblocks or not. - New array state 'write_pending'. This means that the metadata records the array as 'clean', but a write has been requested, so the metadata has to be updated to record a 'dirty' array before the write can continue. This change is reported to md by writing 'active' to the array_state attribute. - tidy up marking of sb_dirty: - don't set sb_dirty when resync finishes as md_check_recovery calls md_update_sb when the sync thread finishes anyway. - Don't set sb_dirty in multipath_run as the array might not be dirty. - don't mark superblock dirty when switching to 'clean' if there is no internal superblock (if external, userspace can choose to update the superblock whenever it chooses to). Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/raid') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dcb729244f47..b579cc628303 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -130,6 +130,9 @@ struct mddev_s minor_version, patch_version; int persistent; + int external; /* metadata is + * managed externally */ + char metadata_type[17]; /* externally set*/ int chunk_size; time_t ctime, utime; int level, layout; -- cgit v1.2.3 From c620727779f7cc8ea96efb71f0651a26349e59c1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:52 -0800 Subject: md: allow a maximum extent to be set for resyncing This allows userspace to control resync/reshape progress and synchronise it with other activities, such as shared access in a SAN, or backing up critical sections during a tricky reshape. Writing a number of sectors (which must be a multiple of the chunk size if such is meaningful) causes a resync to pause when it gets to that point. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/raid') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index b579cc628303..c77dca3221ed 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -219,6 +219,8 @@ struct mddev_s atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; + sector_t resync_max; /* resync should pause + * when it gets here */ spinlock_t write_lock; wait_queue_head_t sb_wait; /* for waiting on superblock updates */ -- cgit v1.2.3 From c5d79adba7ced41d7ac097c2ab74759d10522dd5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:54 -0800 Subject: md: allow devices to be shared between md arrays Currently, a given device is "claimed" by a particular array so that it cannot be used by other arrays. This is not ideal for DDF and other metadata schemes which have their own partitioning concept. So for externally managed metadata, just claim the device for md in general, require that "offset" and "size" are set properly for each device, and make sure that if a device is included in different arrays then the active sections do not overlap. This involves adding another flag to the rdev which makes it awkward to set "->flags = 0" to clear certain flags. So now clear flags explicitly by name when we want to clear things. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/raid') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index c77dca3221ed..5b2102e40286 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -81,6 +81,8 @@ struct mdk_rdev_s #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ #define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ +#define AllReserved 6 /* If whole device is reserved for + * one array */ int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ -- cgit v1.2.3 From d089c6af10c2be5988f03667d6d22fe6085fbe5e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:59 -0800 Subject: md: change ITERATE_RDEV to rdev_for_each As this is more in line with common practice in the kernel. Also swap the args around to be more like list_for_each. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/raid') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 5b2102e40286..9c19555f314b 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -322,7 +322,7 @@ static inline char * mdname (mddev_t * mddev) /* * iterates through the 'same array disks' ringlist */ -#define ITERATE_RDEV(mddev,rdev,tmp) \ +#define rdev_for_each(rdev, tmp, mddev) \ ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) /* -- cgit v1.2.3 From 73c34431c7119d0bc7d3436abfad75fe47b2c51f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 6 Feb 2008 01:39:59 -0800 Subject: md: change ITERATE_RDEV_GENERIC to rdev_for_each_list, and remove ITERATE_RDEV_PENDING. Finish ITERATE_ to for_each conversion. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux/raid') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 9c19555f314b..85a068bab625 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -313,23 +313,17 @@ static inline char * mdname (mddev_t * mddev) * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. */ -#define ITERATE_RDEV_GENERIC(head,rdev,tmp) \ +#define rdev_for_each_list(rdev, tmp, list) \ \ - for ((tmp) = (head).next; \ + for ((tmp) = (list).next; \ (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ - (tmp) = (tmp)->next, (tmp)->prev != &(head) \ + (tmp) = (tmp)->next, (tmp)->prev != &(list) \ ; ) /* * iterates through the 'same array disks' ringlist */ #define rdev_for_each(rdev, tmp, mddev) \ - ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) - -/* - * Iterates through 'pending RAID disks' - */ -#define ITERATE_RDEV_PENDING(rdev,tmp) \ - ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp) + rdev_for_each_list(rdev, tmp, (mddev)->disks) typedef struct mdk_thread_s { void (*run) (mddev_t *mddev); -- cgit v1.2.3