diff options
Diffstat (limited to 'mm/zcache.c')
| -rw-r--r-- | mm/zcache.c | 1169 | 
1 files changed, 1169 insertions, 0 deletions
| diff --git a/mm/zcache.c b/mm/zcache.c new file mode 100644 index 000000000000..01473566ed0b --- /dev/null +++ b/mm/zcache.c @@ -0,0 +1,1169 @@ +/* + * linux/mm/zcache.c + * + * A cleancache backend for file pages compression. + * Concepts based on original zcache by Dan Magenheimer. + * Copyright (C) 2013  Bob Liu <bob.liu@xxxxxxxxxx> + * + * With zcache, active file pages can be compressed in memory during page + * reclaiming. When their data is needed again the I/O reading operation is + * avoided. This results in a significant performance gain under memory pressure + * for systems with many file pages. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. +*/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/atomic.h> +#include <linux/cleancache.h> +#include <linux/cpu.h> +#include <linux/crypto.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/mm_types.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/radix-tree.h> +#include <linux/rbtree.h> +#include <linux/types.h> +#include <linux/zbud.h> + +/* + * Enable/disable zcache (disabled by default) + */ +static bool zcache_enabled __read_mostly; +module_param_named(enabled, zcache_enabled, bool, 0); + +/* + * Compressor to be used by zcache + */ +#define ZCACHE_COMPRESSOR_DEFAULT "lzo" +static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT; +module_param_named(compressor, zcache_compressor, charp, 0); + +/* + * The maximum percentage of memory that the compressed pool can occupy. + */ +static unsigned int zcache_max_pool_percent = 10; +module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644); + +static unsigned int zcache_clear_percent = 4; +module_param_named(clear_percent, zcache_clear_percent, uint, 0644); +/* + * zcache statistics + */ +static u64 zcache_pool_limit_hit; +static u64 zcache_dup_entry; +static u64 zcache_zbud_alloc_fail; +static u64 zcache_evict_zpages; +static u64 zcache_evict_filepages; +static u64 zcache_inactive_pages_refused; +static u64 zcache_reclaim_fail; +static u64 zcache_pool_shrink; +static u64 zcache_pool_shrink_fail; +static u64 zcache_pool_shrink_pages; +static u64 zcache_store_failed; +static atomic_t zcache_stored_pages = ATOMIC_INIT(0); +static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0); + +#define GFP_ZCACHE \ +	(__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \ +		__GFP_NOMEMALLOC | __GFP_ZERO) + +/* + * Make sure this is different from radix tree + * indirect ptr or exceptional entry. + */ +#define ZERO_HANDLE	((void *)~(~0UL >> 1)) + +/* + * Zcache receives pages for compression through the Cleancache API and is able + * to evict pages from its own compressed pool on an LRU basis in the case that + * the compressed pool is full. + * + * Zcache makes use of zbud for the managing the compressed memory pool. Each + * allocation in zbud is not directly accessible by address.  Rather, a handle + * (zaddr) is return by the allocation routine and that handle(zaddr must be + * mapped before being accessed. The compressed memory pool grows on demand and + * shrinks as compressed pages are freed. + * + * When a file page is passed from cleancache to zcache, zcache maintains a + * mapping of the <filesystem_type, inode_number, page_index> to the zbud + * address that references that compressed file page. This mapping is achieved + * with a red-black tree per filesystem type, plus a radix tree per red-black + * node. + * + * A zcache pool with pool_id as the index is created when a filesystem mounted + * Each zcache pool has a red-black tree, the inode number(rb_index) is the + * search key. Each red-black tree node has a radix tree which use + * page->index(ra_index) as the index. Each radix tree slot points to the zbud + * address combining with some extra information(zcache_ra_handle). + */ +#define MAX_ZCACHE_POOLS 32 +/* + * One zcache_pool per (cleancache aware) filesystem mount instance + */ +struct zcache_pool { +	struct rb_root rbtree; +	rwlock_t rb_lock;		/* Protects rbtree */ +	u64 size; +	struct zbud_pool *pool;         /* Zbud pool used */ +}; + +/* + * Manage all zcache pools + */ +struct _zcache { +	struct zcache_pool *pools[MAX_ZCACHE_POOLS]; +	u32 num_pools;			/* Current no. of zcache pools */ +	spinlock_t pool_lock;		/* Protects pools[] and num_pools */ +}; +struct _zcache zcache; + +/* + * Redblack tree node, each node has a page index radix-tree. + * Indexed by inode nubmer. + */ +struct zcache_rbnode { +	struct rb_node rb_node; +	int rb_index; +	struct radix_tree_root ratree; /* Page radix tree per inode rbtree */ +	spinlock_t ra_lock;		/* Protects radix tree */ +	struct kref refcount; +}; + +/* + * Radix-tree leaf, indexed by page->index + */ +struct zcache_ra_handle { +	int rb_index;			/* Redblack tree index */ +	int ra_index;			/* Radix tree index */ +	int zlen;			/* Compressed page size */ +	struct zcache_pool *zpool;	/* Finding zcache_pool during evict */ +}; + +u64 zcache_pages(void) +{ +	int i; +	u64 count = 0; + +	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++) +		count += zcache.pools[i]->size; + +	return count; +} + +static struct kmem_cache *zcache_rbnode_cache; +static int zcache_rbnode_cache_create(void) +{ +	zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0); +	return zcache_rbnode_cache == NULL; +} +static void zcache_rbnode_cache_destroy(void) +{ +	kmem_cache_destroy(zcache_rbnode_cache); +} + +static unsigned long zcache_count(struct shrinker *s, +				  struct shrink_control *sc) +{ +	unsigned long active_file; +	long file_gap; + +	active_file = global_page_state(NR_ACTIVE_FILE); +	file_gap = zcache_pages() - active_file; +	if (file_gap < 0) +		file_gap = 0; +	return file_gap; +} + +static unsigned long zcache_scan(struct shrinker *s, struct shrink_control *sc) +{ +	unsigned long active_file; +	unsigned long file; +	long file_gap; +	unsigned long freed = 0; +	unsigned long pool; +	static bool running; +	int i = 0; +	int retries; + +	if (running) +		goto end; + +	running = true; +	active_file = global_page_state(NR_ACTIVE_FILE); +	file = global_page_state(NR_FILE_PAGES); +	pool = zcache_pages(); + +	file_gap = pool - file; + +	if ((file_gap >= 0) && +		(totalram_pages * zcache_clear_percent / 100 > file)) { +		file_gap = pool; +		zcache_pool_shrink++; +		goto reclaim; +	} + +	/* +	 * file_gap == 0 means that the number of pages +	 * stored by zcache is around twice as many as the +	 * number of active file pages. +	 */ +	file_gap = pool - active_file; +	if (file_gap < 0) +		file_gap = 0; +	else +		zcache_pool_shrink++; + +reclaim: +	retries = file_gap; +	while ((file_gap > 0) && retries) { +		struct zcache_pool *zpool = +			zcache.pools[i++ % MAX_ZCACHE_POOLS]; +		if (!zpool || !zpool->size) +			continue; +		if (zbud_reclaim_page(zpool->pool, 8)) { +			zcache_pool_shrink_fail++; +			retries--; +			continue; +		} +		freed++; +		file_gap--; +	} + +	zcache_pool_shrink_pages += freed; +	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++) +		zcache.pools[i]->size = +			zbud_get_pool_size(zcache.pools[i]->pool); + +	running = false; +end: +	return freed; +} + +static struct shrinker zcache_shrinker = { +	.scan_objects = zcache_scan, +	.count_objects = zcache_count, +	.seeks = DEFAULT_SEEKS * 16 +}; + +/* + * Compression functions + * (Below functions are copyed from zswap!) + */ +static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms; + +enum comp_op { +	ZCACHE_COMPOP_COMPRESS, +	ZCACHE_COMPOP_DECOMPRESS +}; + +static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen, +				u8 *dst, unsigned int *dlen) +{ +	struct crypto_comp *tfm; +	int ret; + +	tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); +	switch (op) { +	case ZCACHE_COMPOP_COMPRESS: +		ret = crypto_comp_compress(tfm, src, slen, dst, dlen); +		break; +	case ZCACHE_COMPOP_DECOMPRESS: +		ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); +		break; +	default: +		ret = -EINVAL; +	} + +	put_cpu(); +	return ret; +} + +static int __init zcache_comp_init(void) +{ +	if (!crypto_has_comp(zcache_compressor, 0, 0)) { +		pr_info("%s compressor not available\n", zcache_compressor); +		/* fall back to default compressor */ +		zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT; +		if (!crypto_has_comp(zcache_compressor, 0, 0)) +			/* can't even load the default compressor */ +			return -ENODEV; +	} +	pr_info("using %s compressor\n", zcache_compressor); + +	/* alloc percpu transforms */ +	zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); +	if (!zcache_comp_pcpu_tfms) +		return -ENOMEM; +	return 0; +} + +static void zcache_comp_exit(void) +{ +	/* free percpu transforms */ +	if (zcache_comp_pcpu_tfms) +		free_percpu(zcache_comp_pcpu_tfms); +} + +/* + * Per-cpu code + * (Below functions are also copyed from zswap!) + */ +static DEFINE_PER_CPU(u8 *, zcache_dstmem); + +static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu) +{ +	struct crypto_comp *tfm; +	u8 *dst; + +	switch (action) { +	case CPU_UP_PREPARE: +		tfm = crypto_alloc_comp(zcache_compressor, 0, 0); +		if (IS_ERR(tfm)) { +			pr_err("can't allocate compressor transform\n"); +			return NOTIFY_BAD; +		} +		*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; +		dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); +		if (!dst) { +			pr_err("can't allocate compressor buffer\n"); +			crypto_free_comp(tfm); +			*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; +			return NOTIFY_BAD; +		} +		per_cpu(zcache_dstmem, cpu) = dst; +		break; +	case CPU_DEAD: +	case CPU_UP_CANCELED: +		tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); +		if (tfm) { +			crypto_free_comp(tfm); +			*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; +		} +		dst = per_cpu(zcache_dstmem, cpu); +		kfree(dst); +		per_cpu(zcache_dstmem, cpu) = NULL; +		break; +	default: +		break; +	} +	return NOTIFY_OK; +} + +static int zcache_cpu_notifier(struct notifier_block *nb, +				unsigned long action, void *pcpu) +{ +	unsigned long cpu = (unsigned long)pcpu; + +	return __zcache_cpu_notifier(action, cpu); +} + +static struct notifier_block zcache_cpu_notifier_block = { +	.notifier_call = zcache_cpu_notifier +}; + +static int zcache_cpu_init(void) +{ +	unsigned long cpu; + +	get_online_cpus(); +	for_each_online_cpu(cpu) +		if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) +			goto cleanup; +	register_cpu_notifier(&zcache_cpu_notifier_block); +	put_online_cpus(); +	return 0; + +cleanup: +	for_each_online_cpu(cpu) +		__zcache_cpu_notifier(CPU_UP_CANCELED, cpu); +	put_online_cpus(); +	return -ENOMEM; +} + +/* + * Zcache helpers + */ +static bool zcache_is_full(void) +{ +	long file = global_page_state(NR_FILE_PAGES); + +	return ((totalram_pages * zcache_max_pool_percent / 100 < +			zcache_pages()) || +			(totalram_pages * zcache_clear_percent / 100 > +			file)); +} + +/* + * The caller must hold zpool->rb_lock at least + */ +static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree, +	int index, struct rb_node **rb_parent, struct rb_node ***rb_link) +{ +	struct zcache_rbnode *entry; +	struct rb_node **__rb_link, *__rb_parent, *rb_prev; + +	__rb_link = &rbtree->rb_node; +	rb_prev = __rb_parent = NULL; + +	while (*__rb_link) { +		__rb_parent = *__rb_link; +		entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node); +		if (entry->rb_index > index) +			__rb_link = &__rb_parent->rb_left; +		else if (entry->rb_index < index) { +			rb_prev = __rb_parent; +			__rb_link = &__rb_parent->rb_right; +		} else +			return entry; +	} + +	if (rb_parent) +		*rb_parent = __rb_parent; +	if (rb_link) +		*rb_link = __rb_link; +	return NULL; +} + +static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool, +					int rb_index) +{ +	unsigned long flags; +	struct zcache_rbnode *rbnode; + +	read_lock_irqsave(&zpool->rb_lock, flags); +	rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0); +	if (rbnode) +		kref_get(&rbnode->refcount); +	read_unlock_irqrestore(&zpool->rb_lock, flags); +	return rbnode; +} + +/* + * kref_put callback for zcache_rbnode. + * + * The rbnode must have been isolated from rbtree already. + */ +static void zcache_rbnode_release(struct kref *kref) +{ +	struct zcache_rbnode *rbnode; + +	rbnode = container_of(kref, struct zcache_rbnode, refcount); +	BUG_ON(rbnode->ratree.rnode); +	kmem_cache_free(zcache_rbnode_cache, rbnode); +} + +/* + * Check whether the radix-tree of this rbnode is empty. + * If that's true, then we can delete this zcache_rbnode from + * zcache_pool->rbtree + * + * Caller must hold zcache_rbnode->ra_lock + */ +static int zcache_rbnode_empty(struct zcache_rbnode *rbnode) +{ +	return rbnode->ratree.rnode == NULL; +} + +/* + * Remove zcache_rbnode from zpool->rbtree + * + * holded_rblock - whether the caller has holded zpool->rb_lock + */ +static void zcache_rbnode_isolate(struct zcache_pool *zpool, +		struct zcache_rbnode *rbnode, bool holded_rblock) +{ +	unsigned long flags; + +	if (!holded_rblock) +		write_lock_irqsave(&zpool->rb_lock, flags); +	/* +	 * Someone can get reference on this rbnode before we could +	 * acquire write lock above. +	 * We want to remove it from zpool->rbtree when only the caller and +	 * corresponding ratree holds a reference to this rbnode. +	 * Below check ensures that a racing zcache put will not end up adding +	 * a page to an isolated node and thereby losing that memory. +	 */ +	if (atomic_read(&rbnode->refcount.refcount) == 2) { +		rb_erase(&rbnode->rb_node, &zpool->rbtree); +		RB_CLEAR_NODE(&rbnode->rb_node); +		kref_put(&rbnode->refcount, zcache_rbnode_release); +	} +	if (!holded_rblock) +		write_unlock_irqrestore(&zpool->rb_lock, flags); +} + +/* + * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree. + */ +static int zcache_store_zaddr(struct zcache_pool *zpool, +		int ra_index, int rb_index, unsigned long zaddr) +{ +	unsigned long flags; +	struct zcache_rbnode *rbnode, *tmp; +	struct rb_node **link = NULL, *parent = NULL; +	int ret; +	void *dup_zaddr; + +	rbnode = zcache_find_get_rbnode(zpool, rb_index); +	if (!rbnode) { +		/* alloc and init a new rbnode */ +		rbnode = kmem_cache_alloc(zcache_rbnode_cache, +			GFP_ZCACHE); +		if (!rbnode) +			return -ENOMEM; + +		INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN); +		spin_lock_init(&rbnode->ra_lock); +		rbnode->rb_index = rb_index; +		kref_init(&rbnode->refcount); +		RB_CLEAR_NODE(&rbnode->rb_node); + +		/* add that rbnode to rbtree */ +		write_lock_irqsave(&zpool->rb_lock, flags); +		tmp = zcache_find_rbnode(&zpool->rbtree, rb_index, +				&parent, &link); +		if (tmp) { +			/* somebody else allocated new rbnode */ +			kmem_cache_free(zcache_rbnode_cache, rbnode); +			rbnode = tmp; +		} else { +			rb_link_node(&rbnode->rb_node, parent, link); +			rb_insert_color(&rbnode->rb_node, &zpool->rbtree); +		} + +		/* Inc the reference of this zcache_rbnode */ +		kref_get(&rbnode->refcount); +		write_unlock_irqrestore(&zpool->rb_lock, flags); +	} + +	/* Succfully got a zcache_rbnode when arriving here */ +	spin_lock_irqsave(&rbnode->ra_lock, flags); +	dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index); +	if (unlikely(dup_zaddr)) { +		WARN_ON("duplicated, will be replaced!\n"); +		if (dup_zaddr == ZERO_HANDLE) { +			atomic_dec(&zcache_stored_zero_pages); +		} else { +			zbud_free(zpool->pool, (unsigned long)dup_zaddr); +			atomic_dec(&zcache_stored_pages); +			zpool->size = zbud_get_pool_size(zpool->pool); +		} +		zcache_dup_entry++; +	} + +	/* Insert zcache_ra_handle to ratree */ +	ret = radix_tree_insert(&rbnode->ratree, ra_index, +				(void *)zaddr); +	spin_unlock_irqrestore(&rbnode->ra_lock, flags); +	if (unlikely(ret)) { +		write_lock_irqsave(&zpool->rb_lock, flags); +		spin_lock(&rbnode->ra_lock); + +		if (zcache_rbnode_empty(rbnode)) +			zcache_rbnode_isolate(zpool, rbnode, 1); + +		spin_unlock(&rbnode->ra_lock); +		write_unlock_irqrestore(&zpool->rb_lock, flags); +	} + +	kref_put(&rbnode->refcount, zcache_rbnode_release); +	return ret; +} + +/* + * Load zaddr and delete it from radix tree. + * If the radix tree of the corresponding rbnode is empty, delete the rbnode + * from zpool->rbtree also. + */ +static void *zcache_load_delete_zaddr(struct zcache_pool *zpool, +				int rb_index, int ra_index) +{ +	struct zcache_rbnode *rbnode; +	void *zaddr = NULL; +	unsigned long flags; + +	rbnode = zcache_find_get_rbnode(zpool, rb_index); +	if (!rbnode) +		goto out; + +	BUG_ON(rbnode->rb_index != rb_index); + +	spin_lock_irqsave(&rbnode->ra_lock, flags); +	zaddr = radix_tree_delete(&rbnode->ratree, ra_index); +	spin_unlock_irqrestore(&rbnode->ra_lock, flags); + +	/* rb_lock and ra_lock must be taken again in the given sequence */ +	write_lock_irqsave(&zpool->rb_lock, flags); +	spin_lock(&rbnode->ra_lock); +	if (zcache_rbnode_empty(rbnode)) +		zcache_rbnode_isolate(zpool, rbnode, 1); +	spin_unlock(&rbnode->ra_lock); +	write_unlock_irqrestore(&zpool->rb_lock, flags); + +	kref_put(&rbnode->refcount, zcache_rbnode_release); +out: +	return zaddr; +} + +static bool zero_page(struct page *page) +{ +	unsigned long *ptr = kmap_atomic(page); +	int i; +	bool ret = false; + +	for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) { +		if (ptr[i]) +			goto out; +	} +	ret = true; +out: +	kunmap_atomic(ptr); +	return ret; +} + +static void zcache_store_page(int pool_id, struct cleancache_filekey key, +		pgoff_t index, struct page *page) +{ +	struct zcache_ra_handle *zhandle; +	u8 *zpage, *src, *dst; +	/* Address of zhandle + compressed data(zpage) */ +	unsigned long zaddr = 0; +	unsigned int zlen = PAGE_SIZE; +	bool zero = 0; +	int ret; + +	struct zcache_pool *zpool = zcache.pools[pool_id]; + +	/* +	 * Zcache will be ineffective if the compressed memory pool is full with +	 * compressed inactive file pages and most of them will never be used +	 * again. +	 * So we refuse to compress pages that are not from active file list. +	 */ +	if (!PageWasActive(page)) { +		zcache_inactive_pages_refused++; +		return; +	} + +	zero = zero_page(page); +	if (zero) +		goto zero; + +	if (zcache_is_full()) { +		zcache_pool_limit_hit++; +		if (zbud_reclaim_page(zpool->pool, 8)) { +			zcache_reclaim_fail++; +			return; +		} +		/* +		 * Continue if reclaimed a page frame succ. +		 */ +		zcache_evict_filepages++; +		zpool->size = zbud_get_pool_size(zpool->pool); +	} + +	/* compress */ +	dst = get_cpu_var(zcache_dstmem); +	src = kmap_atomic(page); +	ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst, +			&zlen); +	kunmap_atomic(src); +	if (ret) { +		pr_err("zcache compress error ret %d\n", ret); +		put_cpu_var(zcache_dstmem); +		return; +	} + +	/* store zcache handle together with compressed page data */ +	ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle), +			GFP_ZCACHE, &zaddr); +	if (ret) { +		zcache_zbud_alloc_fail++; +		put_cpu_var(zcache_dstmem); +		return; +	} + +	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr); + +	/* Compressed page data stored at the end of zcache_ra_handle */ +	zpage = (u8 *)(zhandle + 1); +	memcpy(zpage, dst, zlen); +	zbud_unmap(zpool->pool, zaddr); +	put_cpu_var(zcache_dstmem); + +zero: +	if (zero) +		zaddr = (unsigned long)ZERO_HANDLE; + +	/* store zcache handle */ +	ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr); +	if (ret) { +		zcache_store_failed++; +		if (!zero) +			zbud_free(zpool->pool, zaddr); +		return; +	} + +	/* update stats */ +	if (zero) { +		atomic_inc(&zcache_stored_zero_pages); +	} else { +		zhandle->ra_index = index; +		zhandle->rb_index = key.u.ino; +		zhandle->zlen = zlen; +		zhandle->zpool = zpool; +		atomic_inc(&zcache_stored_pages); +		zpool->size = zbud_get_pool_size(zpool->pool); +	} + +	return; +} + +static int zcache_load_page(int pool_id, struct cleancache_filekey key, +			pgoff_t index, struct page *page) +{ +	int ret = 0; +	u8 *src, *dst; +	void *zaddr; +	unsigned int dlen = PAGE_SIZE; +	struct zcache_ra_handle *zhandle; +	struct zcache_pool *zpool = zcache.pools[pool_id]; + +	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index); +	if (!zaddr) +		return -ENOENT; +	else if (zaddr == ZERO_HANDLE) +		goto map; + +	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, +			(unsigned long)zaddr); +	/* Compressed page data stored at the end of zcache_ra_handle */ +	src = (u8 *)(zhandle + 1); + +	/* decompress */ +map: +	dst = kmap_atomic(page); +	if (zaddr != ZERO_HANDLE) { +		ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src, +				zhandle->zlen, dst, &dlen); +	} else { +		memset(dst, 0, PAGE_SIZE); +		kunmap_atomic(dst); +		flush_dcache_page(page); +		atomic_dec(&zcache_stored_zero_pages); +		goto out; +	} +	kunmap_atomic(dst); +	zbud_unmap(zpool->pool, (unsigned long)zaddr); +	zbud_free(zpool->pool, (unsigned long)zaddr); + +	BUG_ON(ret); +	BUG_ON(dlen != PAGE_SIZE); + +	/* update stats */ +	atomic_dec(&zcache_stored_pages); +	zpool->size = zbud_get_pool_size(zpool->pool); +out: +	SetPageWasActive(page); +	return ret; +} + +static void zcache_flush_page(int pool_id, struct cleancache_filekey key, +			pgoff_t index) +{ +	struct zcache_pool *zpool = zcache.pools[pool_id]; +	void *zaddr = NULL; + +	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index); +	if (zaddr && (zaddr != ZERO_HANDLE)) { +		zbud_free(zpool->pool, (unsigned long)zaddr); +		atomic_dec(&zcache_stored_pages); +		zpool->size = zbud_get_pool_size(zpool->pool); +	} else if (zaddr == ZERO_HANDLE) { +		atomic_dec(&zcache_stored_zero_pages); +	} +} + +#define FREE_BATCH 16 +/* + * Callers must hold the lock + */ +static void zcache_flush_ratree(struct zcache_pool *zpool, +		struct zcache_rbnode *rbnode) +{ +	unsigned long index = 0; +	int count, i; +	struct zcache_ra_handle *zhandle; +	void *zaddr = NULL; + +	do { +		void *zaddrs[FREE_BATCH]; +		unsigned long indices[FREE_BATCH]; + +		count = radix_tree_gang_lookup_index(&rbnode->ratree, +				(void **)zaddrs, indices, +				index, FREE_BATCH); + +		for (i = 0; i < count; i++) { +			if (zaddrs[i] == ZERO_HANDLE) { +				zaddr = radix_tree_delete(&rbnode->ratree, +					indices[i]); +				if (zaddr) +					atomic_dec(&zcache_stored_zero_pages); +				continue; +			} +			zhandle = (struct zcache_ra_handle *)zbud_map( +					zpool->pool, (unsigned long)zaddrs[i]); +			index = zhandle->ra_index; +			zaddr = radix_tree_delete(&rbnode->ratree, index); +			if (!zaddr) +				continue; +			zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]); +			zbud_free(zpool->pool, (unsigned long)zaddrs[i]); +			atomic_dec(&zcache_stored_pages); +			zpool->size = zbud_get_pool_size(zpool->pool); +		} + +		index++; +	} while (count == FREE_BATCH); +} + +static void zcache_flush_inode(int pool_id, struct cleancache_filekey key) +{ +	struct zcache_rbnode *rbnode; +	unsigned long flags1, flags2; +	struct zcache_pool *zpool = zcache.pools[pool_id]; + +	/* +	 * Refuse new pages added in to the same rbinode, so get rb_lock at +	 * first. +	 */ +	write_lock_irqsave(&zpool->rb_lock, flags1); +	rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0); +	if (!rbnode) { +		write_unlock_irqrestore(&zpool->rb_lock, flags1); +		return; +	} + +	kref_get(&rbnode->refcount); +	spin_lock_irqsave(&rbnode->ra_lock, flags2); + +	zcache_flush_ratree(zpool, rbnode); +	if (zcache_rbnode_empty(rbnode)) +		/* When arrvied here, we already hold rb_lock */ +		zcache_rbnode_isolate(zpool, rbnode, 1); + +	spin_unlock_irqrestore(&rbnode->ra_lock, flags2); +	write_unlock_irqrestore(&zpool->rb_lock, flags1); +	kref_put(&rbnode->refcount, zcache_rbnode_release); +} + +static void zcache_destroy_pool(struct zcache_pool *zpool); +static void zcache_flush_fs(int pool_id) +{ +	struct zcache_rbnode *z_rbnode = NULL; +	struct rb_node *rbnode; +	unsigned long flags1, flags2; +	struct zcache_pool *zpool; + +	if (pool_id < 0) +		return; + +	zpool = zcache.pools[pool_id]; +	if (!zpool) +		return; + +	/* +	 * Refuse new pages added in, so get rb_lock at first. +	 */ +	write_lock_irqsave(&zpool->rb_lock, flags1); + +	rbnode = rb_first(&zpool->rbtree); +	while (rbnode) { +		z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node); +		rbnode = rb_next(rbnode); +		if (z_rbnode) { +			kref_get(&z_rbnode->refcount); +			spin_lock_irqsave(&z_rbnode->ra_lock, flags2); +			zcache_flush_ratree(zpool, z_rbnode); +			if (zcache_rbnode_empty(z_rbnode)) +				zcache_rbnode_isolate(zpool, z_rbnode, 1); +			spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2); +			kref_put(&z_rbnode->refcount, zcache_rbnode_release); +		} +	} + +	write_unlock_irqrestore(&zpool->rb_lock, flags1); +	zcache_destroy_pool(zpool); +} + +/* + * Evict compressed pages from zcache pool on an LRU basis after the compressed + * pool is full. + */ +static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr) +{ +	struct zcache_pool *zpool; +	struct zcache_ra_handle *zhandle; +	void *zaddr_intree; + +	BUG_ON(zaddr == (unsigned long)ZERO_HANDLE); + +	zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr); + +	zpool = zhandle->zpool; +	/* There can be a race with zcache store */ +	if (!zpool) +		return -EINVAL; + +	BUG_ON(pool != zpool->pool); + +	zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index, +			zhandle->ra_index); +	if (zaddr_intree) { +		BUG_ON((unsigned long)zaddr_intree != zaddr); +		zbud_unmap(pool, zaddr); +		zbud_free(pool, zaddr); +		atomic_dec(&zcache_stored_pages); +		zpool->size = zbud_get_pool_size(pool); +		zcache_evict_zpages++; +	} +	return 0; +} + +static struct zbud_ops zcache_zbud_ops = { +	.evict = zcache_evict_zpage +}; + +/* Return pool id */ +static int zcache_create_pool(void) +{ +	int ret; +	struct zcache_pool *zpool; + +	zpool = kzalloc(sizeof(*zpool), GFP_KERNEL); +	if (!zpool) { +		ret = -ENOMEM; +		goto out; +	} + +	zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops); +	if (!zpool->pool) { +		kfree(zpool); +		ret = -ENOMEM; +		goto out; +	} + +	spin_lock(&zcache.pool_lock); +	if (zcache.num_pools == MAX_ZCACHE_POOLS) { +		pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS); +		zbud_destroy_pool(zpool->pool); +		kfree(zpool); +		ret = -EPERM; +		goto out_unlock; +	} + +	rwlock_init(&zpool->rb_lock); +	zpool->rbtree = RB_ROOT; +	/* Add to pool list */ +	for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++) +		if (!zcache.pools[ret]) +			break; +	zcache.pools[ret] = zpool; +	zcache.num_pools++; +	pr_info("New pool created id:%d\n", ret); + +out_unlock: +	spin_unlock(&zcache.pool_lock); +out: +	return ret; +} + +static void zcache_destroy_pool(struct zcache_pool *zpool) +{ +	int i; + +	if (!zpool) +		return; + +	spin_lock(&zcache.pool_lock); +	zcache.num_pools--; +	for (i = 0; i < MAX_ZCACHE_POOLS; i++) +		if (zcache.pools[i] == zpool) +			break; +	zcache.pools[i] = NULL; +	spin_unlock(&zcache.pool_lock); + +	if (!RB_EMPTY_ROOT(&zpool->rbtree)) +		WARN_ON("Memory leak detected. Freeing non-empty pool!\n"); + +	zbud_destroy_pool(zpool->pool); +	kfree(zpool); +} + +static int zcache_init_fs(size_t pagesize) +{ +	int ret; + +	if (pagesize != PAGE_SIZE) { +		pr_info("Unsupported page size: %zu", pagesize); +		ret = -EINVAL; +		goto out; +	} + +	ret = zcache_create_pool(); +	if (ret < 0) { +		pr_info("Failed to create new pool\n"); +		ret = -ENOMEM; +		goto out; +	} +out: +	return ret; +} + +static int zcache_init_shared_fs(char *uuid, size_t pagesize) +{ +	/* shared pools are unsupported and map to private */ +	return zcache_init_fs(pagesize); +} + +static struct cleancache_ops zcache_ops = { +	.put_page = zcache_store_page, +	.get_page = zcache_load_page, +	.invalidate_page = zcache_flush_page, +	.invalidate_inode = zcache_flush_inode, +	.invalidate_fs = zcache_flush_fs, +	.init_shared_fs = zcache_init_shared_fs, +	.init_fs = zcache_init_fs +}; + +/* + * Debugfs functions + */ +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> + +static int pool_pages_get(void *_data, u64 *val) +{ +	*val = zcache_pages(); +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n"); + +static struct dentry *zcache_debugfs_root; + +static int __init zcache_debugfs_init(void) +{ +	if (!debugfs_initialized()) +		return -ENODEV; + +	zcache_debugfs_root = debugfs_create_dir("zcache", NULL); +	if (!zcache_debugfs_root) +		return -ENOMEM; + +	debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root, +			&zcache_pool_limit_hit); +	debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root, +			&zcache_zbud_alloc_fail); +	debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root, +			&zcache_dup_entry); +	debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL, +			&pool_page_fops); +	debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root, +			&zcache_stored_pages); +	debugfs_create_atomic_t("stored_zero_pages", S_IRUGO, +			zcache_debugfs_root, &zcache_stored_zero_pages); +	debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root, +			&zcache_evict_zpages); +	debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root, +			&zcache_evict_filepages); +	debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root, +			&zcache_reclaim_fail); +	debugfs_create_u64("inactive_pages_refused", S_IRUGO, +			zcache_debugfs_root, &zcache_inactive_pages_refused); +	debugfs_create_u64("pool_shrink_count", S_IRUGO, +			zcache_debugfs_root, &zcache_pool_shrink); +	debugfs_create_u64("pool_shrink_fail", S_IRUGO, +			zcache_debugfs_root, &zcache_pool_shrink_fail); +	debugfs_create_u64("pool_shrink_pages", S_IRUGO, +			zcache_debugfs_root, &zcache_pool_shrink_pages); +	debugfs_create_u64("store_fail", S_IRUGO, +			zcache_debugfs_root, &zcache_store_failed); +	return 0; +} + +static void __exit zcache_debugfs_exit(void) +{ +	debugfs_remove_recursive(zcache_debugfs_root); +} +#else +static int __init zcache_debugfs_init(void) +{ +	return 0; +} +static void __exit zcache_debugfs_exit(void) +{ +} +#endif + +/* + * zcache init and exit + */ +static int __init init_zcache(void) +{ +	if (!zcache_enabled) +		return 0; + +	pr_info("loading zcache..\n"); +	if (zcache_rbnode_cache_create()) { +		pr_err("entry cache creation failed\n"); +		goto error; +	} + +	if (zcache_comp_init()) { +		pr_err("compressor initialization failed\n"); +		goto compfail; +	} +	if (zcache_cpu_init()) { +		pr_err("per-cpu initialization failed\n"); +		goto pcpufail; +	} + +	spin_lock_init(&zcache.pool_lock); +	cleancache_register_ops(&zcache_ops); + +	if (zcache_debugfs_init()) +		pr_warn("debugfs initialization failed\n"); +	register_shrinker(&zcache_shrinker); +	return 0; +pcpufail: +	zcache_comp_exit(); +compfail: +	zcache_rbnode_cache_destroy(); +error: +	return -ENOMEM; +} + +/* must be late so crypto has time to come up */ +late_initcall(init_zcache); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>"); +MODULE_DESCRIPTION("Compressed cache for clean file pages"); + | 
