summaryrefslogtreecommitdiff
path: root/drivers/md/dm-cache-target.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2013-12-19 15:08:03 +0100
committerJiri Kosina <jkosina@suse.cz>2013-12-19 15:08:32 +0100
commite23c34bb41da65f354fb7eee04300c56ee48f60c (patch)
tree549fbe449d55273b81ef104a9755109bf4ae7817 /drivers/md/dm-cache-target.c
parentb481c2cb3534c85dca625973b33eba15f9af3e4c (diff)
parent319e2e3f63c348a9b66db4667efa73178e18b17d (diff)
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply fixes on top of newer things in tree (efi-stub). Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Diffstat (limited to 'drivers/md/dm-cache-target.c')
-rw-r--r--drivers/md/dm-cache-target.c742
1 files changed, 626 insertions, 116 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 0df3ec085ebb..1b1469ebe5cb 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -61,27 +61,80 @@ static void free_bitset(unsigned long *bits)
/*----------------------------------------------------------------*/
+/*
+ * There are a couple of places where we let a bio run, but want to do some
+ * work before calling its endio function. We do this by temporarily
+ * changing the endio fn.
+ */
+struct dm_hook_info {
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+};
+
+static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
+ bio_end_io_t *bi_end_io, void *bi_private)
+{
+ h->bi_end_io = bio->bi_end_io;
+ h->bi_private = bio->bi_private;
+
+ bio->bi_end_io = bi_end_io;
+ bio->bi_private = bi_private;
+}
+
+static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
+{
+ bio->bi_end_io = h->bi_end_io;
+ bio->bi_private = h->bi_private;
+}
+
+/*----------------------------------------------------------------*/
+
#define PRISON_CELLS 1024
#define MIGRATION_POOL_SIZE 128
#define COMMIT_PERIOD HZ
#define MIGRATION_COUNT_WINDOW 10
/*
- * The block size of the device holding cache data must be >= 32KB
+ * The block size of the device holding cache data must be
+ * between 32KB and 1GB.
*/
#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
+#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
/*
* FIXME: the cache is read/write for the time being.
*/
-enum cache_mode {
+enum cache_metadata_mode {
CM_WRITE, /* metadata may be changed */
CM_READ_ONLY, /* metadata may not be changed */
};
+enum cache_io_mode {
+ /*
+ * Data is written to cached blocks only. These blocks are marked
+ * dirty. If you lose the cache device you will lose data.
+ * Potential performance increase for both reads and writes.
+ */
+ CM_IO_WRITEBACK,
+
+ /*
+ * Data is written to both cache and origin. Blocks are never
+ * dirty. Potential performance benfit for reads only.
+ */
+ CM_IO_WRITETHROUGH,
+
+ /*
+ * A degraded mode useful for various cache coherency situations
+ * (eg, rolling back snapshots). Reads and writes always go to the
+ * origin. If a write goes to a cached oblock, then the cache
+ * block is invalidated.
+ */
+ CM_IO_PASSTHROUGH
+};
+
struct cache_features {
- enum cache_mode mode;
- bool write_through:1;
+ enum cache_metadata_mode mode;
+ enum cache_io_mode io_mode;
};
struct cache_stats {
@@ -97,10 +150,31 @@ struct cache_stats {
atomic_t discard_count;
};
+/*
+ * Defines a range of cblocks, begin to (end - 1) are in the range. end is
+ * the one-past-the-end value.
+ */
+struct cblock_range {
+ dm_cblock_t begin;
+ dm_cblock_t end;
+};
+
+struct invalidation_request {
+ struct list_head list;
+ struct cblock_range *cblocks;
+
+ atomic_t complete;
+ int err;
+
+ wait_queue_head_t result_wait;
+};
+
struct cache {
struct dm_target *ti;
struct dm_target_callbacks callbacks;
+ struct dm_cache_metadata *cmd;
+
/*
* Metadata is written to this device.
*/
@@ -117,11 +191,6 @@ struct cache {
struct dm_dev *cache_dev;
/*
- * Cache features such as write-through.
- */
- struct cache_features features;
-
- /*
* Size of the origin device in _complete_ blocks and native sectors.
*/
dm_oblock_t origin_blocks;
@@ -138,8 +207,6 @@ struct cache {
uint32_t sectors_per_block;
int sectors_per_block_shift;
- struct dm_cache_metadata *cmd;
-
spinlock_t lock;
struct bio_list deferred_bios;
struct bio_list deferred_flush_bios;
@@ -148,8 +215,12 @@ struct cache {
struct list_head completed_migrations;
struct list_head need_commit_migrations;
sector_t migration_threshold;
- atomic_t nr_migrations;
wait_queue_head_t migration_wait;
+ atomic_t nr_migrations;
+
+ wait_queue_head_t quiescing_wait;
+ atomic_t quiescing;
+ atomic_t quiescing_ack;
/*
* cache_size entries, dirty if set
@@ -160,9 +231,16 @@ struct cache {
/*
* origin_blocks entries, discarded if set.
*/
- uint32_t discard_block_size; /* a power of 2 times sectors per block */
dm_dblock_t discard_nr_blocks;
unsigned long *discard_bitset;
+ uint32_t discard_block_size; /* a power of 2 times sectors per block */
+
+ /*
+ * Rather than reconstructing the table line for the status we just
+ * save it and regurgitate.
+ */
+ unsigned nr_ctr_args;
+ const char **ctr_args;
struct dm_kcopyd_client *copier;
struct workqueue_struct *wq;
@@ -182,19 +260,23 @@ struct cache {
bool need_tick_bio:1;
bool sized:1;
- bool quiescing:1;
+ bool invalidate:1;
bool commit_requested:1;
bool loaded_mappings:1;
bool loaded_discards:1;
+ /*
+ * Cache features such as write-through.
+ */
+ struct cache_features features;
+
struct cache_stats stats;
/*
- * Rather than reconstructing the table line for the status we just
- * save it and regurgitate.
+ * Invalidation fields.
*/
- unsigned nr_ctr_args;
- const char **ctr_args;
+ spinlock_t invalidation_lock;
+ struct list_head invalidation_requests;
};
struct per_bio_data {
@@ -209,7 +291,7 @@ struct per_bio_data {
*/
struct cache *cache;
dm_cblock_t cblock;
- bio_end_io_t *saved_bi_end_io;
+ struct dm_hook_info hook_info;
struct dm_bio_details bio_details;
};
@@ -226,6 +308,8 @@ struct dm_cache_migration {
bool writeback:1;
bool demote:1;
bool promote:1;
+ bool requeue_holder:1;
+ bool invalidate:1;
struct dm_bio_prison_cell *old_ocell;
struct dm_bio_prison_cell *new_ocell;
@@ -531,9 +615,24 @@ static void save_stats(struct cache *cache)
#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
+static bool writethrough_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_WRITETHROUGH;
+}
+
+static bool writeback_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_WRITEBACK;
+}
+
+static bool passthrough_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_PASSTHROUGH;
+}
+
static size_t get_per_bio_data_size(struct cache *cache)
{
- return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+ return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
}
static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
@@ -603,6 +702,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
+ check_if_tick_bio_needed(cache, bio);
remap_to_cache(cache, bio, cblock);
if (bio_data_dir(bio) == WRITE) {
set_dirty(cache, oblock, cblock);
@@ -660,7 +760,8 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
static void writethrough_endio(struct bio *bio, int err)
{
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
- bio->bi_end_io = pb->saved_bi_end_io;
+
+ dm_unhook_bio(&pb->hook_info, bio);
if (err) {
bio_endio(bio, err);
@@ -691,9 +792,8 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
pb->cache = cache;
pb->cblock = cblock;
- pb->saved_bi_end_io = bio->bi_end_io;
+ dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
dm_bio_record(&pb->bio_details, bio);
- bio->bi_end_io = writethrough_endio;
remap_to_origin_clear_discard(pb->cache, bio, oblock);
}
@@ -746,8 +846,9 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
static void cleanup_migration(struct dm_cache_migration *mg)
{
- dec_nr_migrations(mg->cache);
+ struct cache *cache = mg->cache;
free_migration(mg);
+ dec_nr_migrations(cache);
}
static void migration_failure(struct dm_cache_migration *mg)
@@ -763,13 +864,13 @@ static void migration_failure(struct dm_cache_migration *mg)
DMWARN_LIMIT("demotion failed; couldn't copy block");
policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
- cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
+ cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
if (mg->promote)
- cell_defer(cache, mg->new_ocell, 1);
+ cell_defer(cache, mg->new_ocell, true);
} else {
DMWARN_LIMIT("promotion failed; couldn't copy block");
policy_remove_mapping(cache->policy, mg->new_oblock);
- cell_defer(cache, mg->new_ocell, 1);
+ cell_defer(cache, mg->new_ocell, true);
}
cleanup_migration(mg);
@@ -821,7 +922,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
return;
} else if (mg->demote) {
- cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
+ cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
if (mg->promote) {
mg->demote = false;
@@ -830,11 +931,19 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
list_add_tail(&mg->list, &cache->quiesced_migrations);
spin_unlock_irqrestore(&cache->lock, flags);
- } else
+ } else {
+ if (mg->invalidate)
+ policy_remove_mapping(cache->policy, mg->old_oblock);
cleanup_migration(mg);
+ }
} else {
- cell_defer(cache, mg->new_ocell, true);
+ if (mg->requeue_holder)
+ cell_defer(cache, mg->new_ocell, true);
+ else {
+ bio_endio(mg->new_ocell->holder, 0);
+ cell_defer(cache, mg->new_ocell, false);
+ }
clear_dirty(cache, mg->new_oblock, mg->cblock);
cleanup_migration(mg);
}
@@ -879,8 +988,46 @@ static void issue_copy_real(struct dm_cache_migration *mg)
r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
}
- if (r < 0)
+ if (r < 0) {
+ DMERR_LIMIT("issuing migration failed");
migration_failure(mg);
+ }
+}
+
+static void overwrite_endio(struct bio *bio, int err)
+{
+ struct dm_cache_migration *mg = bio->bi_private;
+ struct cache *cache = mg->cache;
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+ unsigned long flags;
+
+ if (err)
+ mg->err = true;
+
+ spin_lock_irqsave(&cache->lock, flags);
+ list_add_tail(&mg->list, &cache->completed_migrations);
+ dm_unhook_bio(&pb->hook_info, bio);
+ mg->requeue_holder = false;
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ wake_worker(cache);
+}
+
+static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
+{
+ size_t pb_data_size = get_per_bio_data_size(mg->cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+ dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
+ remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+ generic_make_request(bio);
+}
+
+static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
+{
+ return (bio_data_dir(bio) == WRITE) &&
+ (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static void avoid_copy(struct dm_cache_migration *mg)
@@ -897,9 +1044,17 @@ static void issue_copy(struct dm_cache_migration *mg)
if (mg->writeback || mg->demote)
avoid = !is_dirty(cache, mg->cblock) ||
is_discarded_oblock(cache, mg->old_oblock);
- else
+ else {
+ struct bio *bio = mg->new_ocell->holder;
+
avoid = is_discarded_oblock(cache, mg->new_oblock);
+ if (!avoid && bio_writes_complete_block(cache, bio)) {
+ issue_overwrite(mg, bio);
+ return;
+ }
+ }
+
avoid ? avoid_copy(mg) : issue_copy_real(mg);
}
@@ -989,6 +1144,8 @@ static void promote(struct cache *cache, struct prealloc *structs,
mg->writeback = false;
mg->demote = false;
mg->promote = true;
+ mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->new_oblock = oblock;
mg->cblock = cblock;
@@ -1010,6 +1167,8 @@ static void writeback(struct cache *cache, struct prealloc *structs,
mg->writeback = true;
mg->demote = false;
mg->promote = false;
+ mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->old_oblock = oblock;
mg->cblock = cblock;
@@ -1033,6 +1192,8 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
mg->writeback = false;
mg->demote = true;
mg->promote = true;
+ mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->old_oblock = old_oblock;
mg->new_oblock = new_oblock;
@@ -1045,6 +1206,33 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
quiesce_migration(mg);
}
+/*
+ * Invalidate a cache entry. No writeback occurs; any changes in the cache
+ * block are thrown away.
+ */
+static void invalidate(struct cache *cache, struct prealloc *structs,
+ dm_oblock_t oblock, dm_cblock_t cblock,
+ struct dm_bio_prison_cell *cell)
+{
+ struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+ mg->err = false;
+ mg->writeback = false;
+ mg->demote = true;
+ mg->promote = false;
+ mg->requeue_holder = true;
+ mg->invalidate = true;
+ mg->cache = cache;
+ mg->old_oblock = oblock;
+ mg->cblock = cblock;
+ mg->old_ocell = cell;
+ mg->new_ocell = NULL;
+ mg->start_jiffies = jiffies;
+
+ inc_nr_migrations(cache);
+ quiesce_migration(mg);
+}
+
/*----------------------------------------------------------------
* bio processing
*--------------------------------------------------------------*/
@@ -1107,13 +1295,6 @@ static bool spare_migration_bandwidth(struct cache *cache)
return current_volume < cache->migration_threshold;
}
-static bool is_writethrough_io(struct cache *cache, struct bio *bio,
- dm_cblock_t cblock)
-{
- return bio_data_dir(bio) == WRITE &&
- cache->features.write_through && !is_dirty(cache, cblock);
-}
-
static void inc_hit_counter(struct cache *cache, struct bio *bio)
{
atomic_inc(bio_data_dir(bio) == READ ?
@@ -1126,6 +1307,15 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
&cache->stats.read_miss : &cache->stats.write_miss);
}
+static void issue_cache_bio(struct cache *cache, struct bio *bio,
+ struct per_bio_data *pb,
+ dm_oblock_t oblock, dm_cblock_t cblock)
+{
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_cache_dirty(cache, bio, oblock, cblock);
+ issue(cache, bio);
+}
+
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
@@ -1137,7 +1327,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
- bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
+ bool passthrough = passthrough_mode(&cache->features);
+ bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
/*
* Check to see if that block is currently migrating.
@@ -1158,15 +1349,39 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
switch (lookup_result.op) {
case POLICY_HIT:
- inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ if (passthrough) {
+ inc_miss_counter(cache, bio);
- if (is_writethrough_io(cache, bio, lookup_result.cblock))
- remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- else
- remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ /*
+ * Passthrough always maps to the origin,
+ * invalidating any cache blocks that are written
+ * to.
+ */
+
+ if (bio_data_dir(bio) == WRITE) {
+ atomic_inc(&cache->stats.demotion);
+ invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
+ release_cell = false;
+
+ } else {
+ /* FIXME: factor out issue_origin() */
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_origin_clear_discard(cache, bio, block);
+ issue(cache, bio);
+ }
+ } else {
+ inc_hit_counter(cache, bio);
+
+ if (bio_data_dir(bio) == WRITE &&
+ writethrough_mode(&cache->features) &&
+ !is_dirty(cache, lookup_result.cblock)) {
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+ issue(cache, bio);
+ } else
+ issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+ }
- issue(cache, bio);
break;
case POLICY_MISS:
@@ -1225,15 +1440,17 @@ static int need_commit_due_to_time(struct cache *cache)
static int commit_if_needed(struct cache *cache)
{
- if (dm_cache_changed_this_transaction(cache->cmd) &&
- (cache->commit_requested || need_commit_due_to_time(cache))) {
+ int r = 0;
+
+ if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
+ dm_cache_changed_this_transaction(cache->cmd)) {
atomic_inc(&cache->stats.commit_count);
- cache->last_commit_jiffies = jiffies;
cache->commit_requested = false;
- return dm_cache_commit(cache->cmd, false);
+ r = dm_cache_commit(cache->cmd, false);
+ cache->last_commit_jiffies = jiffies;
}
- return 0;
+ return r;
}
static void process_deferred_bios(struct cache *cache)
@@ -1342,36 +1559,88 @@ static void writeback_some_dirty_blocks(struct cache *cache)
}
/*----------------------------------------------------------------
- * Main worker loop
+ * Invalidations.
+ * Dropping something from the cache *without* writing back.
*--------------------------------------------------------------*/
-static void start_quiescing(struct cache *cache)
+
+static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
{
- unsigned long flags;
+ int r = 0;
+ uint64_t begin = from_cblock(req->cblocks->begin);
+ uint64_t end = from_cblock(req->cblocks->end);
- spin_lock_irqsave(&cache->lock, flags);
- cache->quiescing = 1;
- spin_unlock_irqrestore(&cache->lock, flags);
+ while (begin != end) {
+ r = policy_remove_cblock(cache->policy, to_cblock(begin));
+ if (!r) {
+ r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
+ if (r)
+ break;
+
+ } else if (r == -ENODATA) {
+ /* harmless, already unmapped */
+ r = 0;
+
+ } else {
+ DMERR("policy_remove_cblock failed");
+ break;
+ }
+
+ begin++;
+ }
+
+ cache->commit_requested = true;
+
+ req->err = r;
+ atomic_set(&req->complete, 1);
+
+ wake_up(&req->result_wait);
}
-static void stop_quiescing(struct cache *cache)
+static void process_invalidation_requests(struct cache *cache)
{
- unsigned long flags;
+ struct list_head list;
+ struct invalidation_request *req, *tmp;
- spin_lock_irqsave(&cache->lock, flags);
- cache->quiescing = 0;
- spin_unlock_irqrestore(&cache->lock, flags);
+ INIT_LIST_HEAD(&list);
+ spin_lock(&cache->invalidation_lock);
+ list_splice_init(&cache->invalidation_requests, &list);
+ spin_unlock(&cache->invalidation_lock);
+
+ list_for_each_entry_safe (req, tmp, &list, list)
+ process_invalidation_request(cache, req);
}
+/*----------------------------------------------------------------
+ * Main worker loop
+ *--------------------------------------------------------------*/
static bool is_quiescing(struct cache *cache)
{
- int r;
- unsigned long flags;
+ return atomic_read(&cache->quiescing);
+}
- spin_lock_irqsave(&cache->lock, flags);
- r = cache->quiescing;
- spin_unlock_irqrestore(&cache->lock, flags);
+static void ack_quiescing(struct cache *cache)
+{
+ if (is_quiescing(cache)) {
+ atomic_inc(&cache->quiescing_ack);
+ wake_up(&cache->quiescing_wait);
+ }
+}
- return r;
+static void wait_for_quiescing_ack(struct cache *cache)
+{
+ wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
+}
+
+static void start_quiescing(struct cache *cache)
+{
+ atomic_inc(&cache->quiescing);
+ wait_for_quiescing_ack(cache);
+}
+
+static void stop_quiescing(struct cache *cache)
+{
+ atomic_set(&cache->quiescing, 0);
+ atomic_set(&cache->quiescing_ack, 0);
}
static void wait_for_migrations(struct cache *cache)
@@ -1410,7 +1679,8 @@ static int more_work(struct cache *cache)
!bio_list_empty(&cache->deferred_writethrough_bios) ||
!list_empty(&cache->quiesced_migrations) ||
!list_empty(&cache->completed_migrations) ||
- !list_empty(&cache->need_commit_migrations);
+ !list_empty(&cache->need_commit_migrations) ||
+ cache->invalidate;
}
static void do_worker(struct work_struct *ws)
@@ -1418,16 +1688,16 @@ static void do_worker(struct work_struct *ws)
struct cache *cache = container_of(ws, struct cache, worker);
do {
- if (!is_quiescing(cache))
+ if (!is_quiescing(cache)) {
+ writeback_some_dirty_blocks(cache);
+ process_deferred_writethrough_bios(cache);
process_deferred_bios(cache);
+ process_invalidation_requests(cache);
+ }
process_migrations(cache, &cache->quiesced_migrations, issue_copy);
process_migrations(cache, &cache->completed_migrations, complete_migration);
- writeback_some_dirty_blocks(cache);
-
- process_deferred_writethrough_bios(cache);
-
if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false);
@@ -1440,6 +1710,9 @@ static void do_worker(struct work_struct *ws)
process_migrations(cache, &cache->need_commit_migrations,
migration_success_post_commit);
}
+
+ ack_quiescing(cache);
+
} while (more_work(cache));
}
@@ -1687,24 +1960,25 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
char **error)
{
- unsigned long tmp;
+ unsigned long block_size;
if (!at_least_one_arg(as, error))
return -EINVAL;
- if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp ||
- tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
- tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
+ if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
+ block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
+ block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
+ block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
*error = "Invalid data block size";
return -EINVAL;
}
- if (tmp > ca->cache_sectors) {
+ if (block_size > ca->cache_sectors) {
*error = "Data block size is larger than the cache device";
return -EINVAL;
}
- ca->block_size = tmp;
+ ca->block_size = block_size;
return 0;
}
@@ -1712,7 +1986,7 @@ static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
static void init_features(struct cache_features *cf)
{
cf->mode = CM_WRITE;
- cf->write_through = false;
+ cf->io_mode = CM_IO_WRITEBACK;
}
static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
@@ -1737,10 +2011,13 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
arg = dm_shift_arg(as);
if (!strcasecmp(arg, "writeback"))
- cf->write_through = false;
+ cf->io_mode = CM_IO_WRITEBACK;
else if (!strcasecmp(arg, "writethrough"))
- cf->write_through = true;
+ cf->io_mode = CM_IO_WRITETHROUGH;
+
+ else if (!strcasecmp(arg, "passthrough"))
+ cf->io_mode = CM_IO_PASSTHROUGH;
else {
*error = "Unrecognised cache feature requested";
@@ -1869,14 +2146,15 @@ static int set_config_values(struct cache *cache, int argc, const char **argv)
static int create_cache_policy(struct cache *cache, struct cache_args *ca,
char **error)
{
- cache->policy = dm_cache_policy_create(ca->policy_name,
- cache->cache_size,
- cache->origin_sectors,
- cache->sectors_per_block);
- if (!cache->policy) {
+ struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
+ cache->cache_size,
+ cache->origin_sectors,
+ cache->sectors_per_block);
+ if (IS_ERR(p)) {
*error = "Error creating cache's policy";
- return -ENOMEM;
+ return PTR_ERR(p);
}
+ cache->policy = p;
return 0;
}
@@ -1992,6 +2270,22 @@ static int cache_create(struct cache_args *ca, struct cache **result)
}
cache->cmd = cmd;
+ if (passthrough_mode(&cache->features)) {
+ bool all_clean;
+
+ r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
+ if (r) {
+ *error = "dm_cache_metadata_all_clean() failed";
+ goto bad;
+ }
+
+ if (!all_clean) {
+ *error = "Cannot enter passthrough mode unless all blocks are clean";
+ r = -EINVAL;
+ goto bad;
+ }
+ }
+
spin_lock_init(&cache->lock);
bio_list_init(&cache->deferred_bios);
bio_list_init(&cache->deferred_flush_bios);
@@ -2002,6 +2296,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
atomic_set(&cache->nr_migrations, 0);
init_waitqueue_head(&cache->migration_wait);
+ init_waitqueue_head(&cache->quiescing_wait);
+ atomic_set(&cache->quiescing, 0);
+ atomic_set(&cache->quiescing_ack, 0);
+
r = -ENOMEM;
cache->nr_dirty = 0;
cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
@@ -2061,7 +2359,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
cache->need_tick_bio = true;
cache->sized = false;
- cache->quiescing = false;
+ cache->invalidate = false;
cache->commit_requested = false;
cache->loaded_mappings = false;
cache->loaded_discards = false;
@@ -2075,6 +2373,9 @@ static int cache_create(struct cache_args *ca, struct cache **result)
atomic_set(&cache->stats.commit_count, 0);
atomic_set(&cache->stats.discard_count, 0);
+ spin_lock_init(&cache->invalidation_lock);
+ INIT_LIST_HEAD(&cache->invalidation_requests);
+
*result = cache;
return 0;
@@ -2204,17 +2505,37 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
+ r = DM_MAPIO_REMAPPED;
switch (lookup_result.op) {
case POLICY_HIT:
- inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ if (passthrough_mode(&cache->features)) {
+ if (bio_data_dir(bio) == WRITE) {
+ /*
+ * We need to invalidate this block, so
+ * defer for the worker thread.
+ */
+ cell_defer(cache, cell, true);
+ r = DM_MAPIO_SUBMITTED;
+
+ } else {
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ inc_miss_counter(cache, bio);
+ remap_to_origin_clear_discard(cache, bio, block);
+
+ cell_defer(cache, cell, false);
+ }
- if (is_writethrough_io(cache, bio, lookup_result.cblock))
- remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- else
- remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ } else {
+ inc_hit_counter(cache, bio);
- cell_defer(cache, cell, false);
+ if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
+ !is_dirty(cache, lookup_result.cblock))
+ remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+ else
+ remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+
+ cell_defer(cache, cell, false);
+ }
break;
case POLICY_MISS:
@@ -2239,10 +2560,10 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
bio_io_error(bio);
- return DM_MAPIO_SUBMITTED;
+ r = DM_MAPIO_SUBMITTED;
}
- return DM_MAPIO_REMAPPED;
+ return r;
}
static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
@@ -2403,26 +2724,71 @@ static int load_discard(void *context, sector_t discard_block_size,
return 0;
}
+static dm_cblock_t get_cache_dev_size(struct cache *cache)
+{
+ sector_t size = get_dev_size(cache->cache_dev);
+ (void) sector_div(size, cache->sectors_per_block);
+ return to_cblock(size);
+}
+
+static bool can_resize(struct cache *cache, dm_cblock_t new_size)
+{
+ if (from_cblock(new_size) > from_cblock(cache->cache_size))
+ return true;
+
+ /*
+ * We can't drop a dirty block when shrinking the cache.
+ */
+ while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
+ new_size = to_cblock(from_cblock(new_size) + 1);
+ if (is_dirty(cache, new_size)) {
+ DMERR("unable to shrink cache; cache block %llu is dirty",
+ (unsigned long long) from_cblock(new_size));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
+{
+ int r;
+
+ r = dm_cache_resize(cache->cmd, new_size);
+ if (r) {
+ DMERR("could not resize cache metadata");
+ return r;
+ }
+
+ cache->cache_size = new_size;
+
+ return 0;
+}
+
static int cache_preresume(struct dm_target *ti)
{
int r = 0;
struct cache *cache = ti->private;
- sector_t actual_cache_size = get_dev_size(cache->cache_dev);
- (void) sector_div(actual_cache_size, cache->sectors_per_block);
+ dm_cblock_t csize = get_cache_dev_size(cache);
/*
* Check to see if the cache has resized.
*/
- if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) {
- cache->cache_size = to_cblock(actual_cache_size);
-
- r = dm_cache_resize(cache->cmd, cache->cache_size);
- if (r) {
- DMERR("could not resize cache metadata");
+ if (!cache->sized) {
+ r = resize_cache_dev(cache, csize);
+ if (r)
return r;
- }
cache->sized = true;
+
+ } else if (csize != cache->cache_size) {
+ if (!can_resize(cache, csize))
+ return -EINVAL;
+
+ r = resize_cache_dev(cache, csize);
+ if (r)
+ return r;
}
if (!cache->loaded_mappings) {
@@ -2515,10 +2881,19 @@ static void cache_status(struct dm_target *ti, status_type_t type,
(unsigned long long) from_cblock(residency),
cache->nr_dirty);
- if (cache->features.write_through)
+ if (writethrough_mode(&cache->features))
DMEMIT("1 writethrough ");
- else
- DMEMIT("0 ");
+
+ else if (passthrough_mode(&cache->features))
+ DMEMIT("1 passthrough ");
+
+ else if (writeback_mode(&cache->features))
+ DMEMIT("1 writeback ");
+
+ else {
+ DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
+ goto err;
+ }
DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
if (sz < maxlen) {
@@ -2550,7 +2925,128 @@ err:
}
/*
- * Supports <key> <value>.
+ * A cache block range can take two forms:
+ *
+ * i) A single cblock, eg. '3456'
+ * ii) A begin and end cblock with dots between, eg. 123-234
+ */
+static int parse_cblock_range(struct cache *cache, const char *str,
+ struct cblock_range *result)
+{
+ char dummy;
+ uint64_t b, e;
+ int r;
+
+ /*
+ * Try and parse form (ii) first.
+ */
+ r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
+ if (r < 0)
+ return r;
+
+ if (r == 2) {
+ result->begin = to_cblock(b);
+ result->end = to_cblock(e);
+ return 0;
+ }
+
+ /*
+ * That didn't work, try form (i).
+ */
+ r = sscanf(str, "%llu%c", &b, &dummy);
+ if (r < 0)
+ return r;
+
+ if (r == 1) {
+ result->begin = to_cblock(b);
+ result->end = to_cblock(from_cblock(result->begin) + 1u);
+ return 0;
+ }
+
+ DMERR("invalid cblock range '%s'", str);
+ return -EINVAL;
+}
+
+static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
+{
+ uint64_t b = from_cblock(range->begin);
+ uint64_t e = from_cblock(range->end);
+ uint64_t n = from_cblock(cache->cache_size);
+
+ if (b >= n) {
+ DMERR("begin cblock out of range: %llu >= %llu", b, n);
+ return -EINVAL;
+ }
+
+ if (e > n) {
+ DMERR("end cblock out of range: %llu > %llu", e, n);
+ return -EINVAL;
+ }
+
+ if (b >= e) {
+ DMERR("invalid cblock range: %llu >= %llu", b, e);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int request_invalidation(struct cache *cache, struct cblock_range *range)
+{
+ struct invalidation_request req;
+
+ INIT_LIST_HEAD(&req.list);
+ req.cblocks = range;
+ atomic_set(&req.complete, 0);
+ req.err = 0;
+ init_waitqueue_head(&req.result_wait);
+
+ spin_lock(&cache->invalidation_lock);
+ list_add(&req.list, &cache->invalidation_requests);
+ spin_unlock(&cache->invalidation_lock);
+ wake_worker(cache);
+
+ wait_event(req.result_wait, atomic_read(&req.complete));
+ return req.err;
+}
+
+static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
+ const char **cblock_ranges)
+{
+ int r = 0;
+ unsigned i;
+ struct cblock_range range;
+
+ if (!passthrough_mode(&cache->features)) {
+ DMERR("cache has to be in passthrough mode for invalidation");
+ return -EPERM;
+ }
+
+ for (i = 0; i < count; i++) {
+ r = parse_cblock_range(cache, cblock_ranges[i], &range);
+ if (r)
+ break;
+
+ r = validate_cblock_range(cache, &range);
+ if (r)
+ break;
+
+ /*
+ * Pass begin and end origin blocks to the worker and wake it.
+ */
+ r = request_invalidation(cache, &range);
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+/*
+ * Supports
+ * "<key> <value>"
+ * and
+ * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
*
* The key migration_threshold is supported by the cache target core.
*/
@@ -2558,6 +3054,12 @@ static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
{
struct cache *cache = ti->private;
+ if (!argc)
+ return -EINVAL;
+
+ if (!strcasecmp(argv[0], "invalidate_cblocks"))
+ return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
+
if (argc != 2)
return -EINVAL;
@@ -2609,9 +3111,17 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct cache *cache = ti->private;
+ uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
- blk_limits_io_min(limits, 0);
- blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+ /*
+ * If the system-determined stacked limits are compatible with the
+ * cache's blocksize (io_opt is a factor) do not override them.
+ */
+ if (io_opt_sectors < cache->sectors_per_block ||
+ do_div(io_opt_sectors, cache->sectors_per_block)) {
+ blk_limits_io_min(limits, 0);
+ blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+ }
set_discard_limits(cache, limits);
}
@@ -2619,7 +3129,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 1, 1},
+ .version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,