summaryrefslogtreecommitdiff
path: root/mm/zcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/zcache.c')
-rw-r--r--mm/zcache.c1169
1 files changed, 1169 insertions, 0 deletions
diff --git a/mm/zcache.c b/mm/zcache.c
new file mode 100644
index 000000000000..01473566ed0b
--- /dev/null
+++ b/mm/zcache.c
@@ -0,0 +1,1169 @@
+/*
+ * linux/mm/zcache.c
+ *
+ * A cleancache backend for file pages compression.
+ * Concepts based on original zcache by Dan Magenheimer.
+ * Copyright (C) 2013 Bob Liu <bob.liu@xxxxxxxxxx>
+ *
+ * With zcache, active file pages can be compressed in memory during page
+ * reclaiming. When their data is needed again the I/O reading operation is
+ * avoided. This results in a significant performance gain under memory pressure
+ * for systems with many file pages.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/atomic.h>
+#include <linux/cleancache.h>
+#include <linux/cpu.h>
+#include <linux/crypto.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/mm_types.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+#include <linux/zbud.h>
+
+/*
+ * Enable/disable zcache (disabled by default)
+ */
+static bool zcache_enabled __read_mostly;
+module_param_named(enabled, zcache_enabled, bool, 0);
+
+/*
+ * Compressor to be used by zcache
+ */
+#define ZCACHE_COMPRESSOR_DEFAULT "lzo"
+static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
+module_param_named(compressor, zcache_compressor, charp, 0);
+
+/*
+ * The maximum percentage of memory that the compressed pool can occupy.
+ */
+static unsigned int zcache_max_pool_percent = 10;
+module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);
+
+static unsigned int zcache_clear_percent = 4;
+module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
+/*
+ * zcache statistics
+ */
+static u64 zcache_pool_limit_hit;
+static u64 zcache_dup_entry;
+static u64 zcache_zbud_alloc_fail;
+static u64 zcache_evict_zpages;
+static u64 zcache_evict_filepages;
+static u64 zcache_inactive_pages_refused;
+static u64 zcache_reclaim_fail;
+static u64 zcache_pool_shrink;
+static u64 zcache_pool_shrink_fail;
+static u64 zcache_pool_shrink_pages;
+static u64 zcache_store_failed;
+static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
+static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);
+
+#define GFP_ZCACHE \
+ (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \
+ __GFP_NOMEMALLOC | __GFP_ZERO)
+
+/*
+ * Make sure this is different from radix tree
+ * indirect ptr or exceptional entry.
+ */
+#define ZERO_HANDLE ((void *)~(~0UL >> 1))
+
+/*
+ * Zcache receives pages for compression through the Cleancache API and is able
+ * to evict pages from its own compressed pool on an LRU basis in the case that
+ * the compressed pool is full.
+ *
+ * Zcache makes use of zbud for the managing the compressed memory pool. Each
+ * allocation in zbud is not directly accessible by address. Rather, a handle
+ * (zaddr) is return by the allocation routine and that handle(zaddr must be
+ * mapped before being accessed. The compressed memory pool grows on demand and
+ * shrinks as compressed pages are freed.
+ *
+ * When a file page is passed from cleancache to zcache, zcache maintains a
+ * mapping of the <filesystem_type, inode_number, page_index> to the zbud
+ * address that references that compressed file page. This mapping is achieved
+ * with a red-black tree per filesystem type, plus a radix tree per red-black
+ * node.
+ *
+ * A zcache pool with pool_id as the index is created when a filesystem mounted
+ * Each zcache pool has a red-black tree, the inode number(rb_index) is the
+ * search key. Each red-black tree node has a radix tree which use
+ * page->index(ra_index) as the index. Each radix tree slot points to the zbud
+ * address combining with some extra information(zcache_ra_handle).
+ */
+#define MAX_ZCACHE_POOLS 32
+/*
+ * One zcache_pool per (cleancache aware) filesystem mount instance
+ */
+struct zcache_pool {
+ struct rb_root rbtree;
+ rwlock_t rb_lock; /* Protects rbtree */
+ u64 size;
+ struct zbud_pool *pool; /* Zbud pool used */
+};
+
+/*
+ * Manage all zcache pools
+ */
+struct _zcache {
+ struct zcache_pool *pools[MAX_ZCACHE_POOLS];
+ u32 num_pools; /* Current no. of zcache pools */
+ spinlock_t pool_lock; /* Protects pools[] and num_pools */
+};
+struct _zcache zcache;
+
+/*
+ * Redblack tree node, each node has a page index radix-tree.
+ * Indexed by inode nubmer.
+ */
+struct zcache_rbnode {
+ struct rb_node rb_node;
+ int rb_index;
+ struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
+ spinlock_t ra_lock; /* Protects radix tree */
+ struct kref refcount;
+};
+
+/*
+ * Radix-tree leaf, indexed by page->index
+ */
+struct zcache_ra_handle {
+ int rb_index; /* Redblack tree index */
+ int ra_index; /* Radix tree index */
+ int zlen; /* Compressed page size */
+ struct zcache_pool *zpool; /* Finding zcache_pool during evict */
+};
+
+u64 zcache_pages(void)
+{
+ int i;
+ u64 count = 0;
+
+ for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
+ count += zcache.pools[i]->size;
+
+ return count;
+}
+
+static struct kmem_cache *zcache_rbnode_cache;
+static int zcache_rbnode_cache_create(void)
+{
+ zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
+ return zcache_rbnode_cache == NULL;
+}
+static void zcache_rbnode_cache_destroy(void)
+{
+ kmem_cache_destroy(zcache_rbnode_cache);
+}
+
+static unsigned long zcache_count(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ unsigned long active_file;
+ long file_gap;
+
+ active_file = global_page_state(NR_ACTIVE_FILE);
+ file_gap = zcache_pages() - active_file;
+ if (file_gap < 0)
+ file_gap = 0;
+ return file_gap;
+}
+
+static unsigned long zcache_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ unsigned long active_file;
+ unsigned long file;
+ long file_gap;
+ unsigned long freed = 0;
+ unsigned long pool;
+ static bool running;
+ int i = 0;
+ int retries;
+
+ if (running)
+ goto end;
+
+ running = true;
+ active_file = global_page_state(NR_ACTIVE_FILE);
+ file = global_page_state(NR_FILE_PAGES);
+ pool = zcache_pages();
+
+ file_gap = pool - file;
+
+ if ((file_gap >= 0) &&
+ (totalram_pages * zcache_clear_percent / 100 > file)) {
+ file_gap = pool;
+ zcache_pool_shrink++;
+ goto reclaim;
+ }
+
+ /*
+ * file_gap == 0 means that the number of pages
+ * stored by zcache is around twice as many as the
+ * number of active file pages.
+ */
+ file_gap = pool - active_file;
+ if (file_gap < 0)
+ file_gap = 0;
+ else
+ zcache_pool_shrink++;
+
+reclaim:
+ retries = file_gap;
+ while ((file_gap > 0) && retries) {
+ struct zcache_pool *zpool =
+ zcache.pools[i++ % MAX_ZCACHE_POOLS];
+ if (!zpool || !zpool->size)
+ continue;
+ if (zbud_reclaim_page(zpool->pool, 8)) {
+ zcache_pool_shrink_fail++;
+ retries--;
+ continue;
+ }
+ freed++;
+ file_gap--;
+ }
+
+ zcache_pool_shrink_pages += freed;
+ for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
+ zcache.pools[i]->size =
+ zbud_get_pool_size(zcache.pools[i]->pool);
+
+ running = false;
+end:
+ return freed;
+}
+
+static struct shrinker zcache_shrinker = {
+ .scan_objects = zcache_scan,
+ .count_objects = zcache_count,
+ .seeks = DEFAULT_SEEKS * 16
+};
+
+/*
+ * Compression functions
+ * (Below functions are copyed from zswap!)
+ */
+static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
+
+enum comp_op {
+ ZCACHE_COMPOP_COMPRESS,
+ ZCACHE_COMPOP_DECOMPRESS
+};
+
+static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen)
+{
+ struct crypto_comp *tfm;
+ int ret;
+
+ tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
+ switch (op) {
+ case ZCACHE_COMPOP_COMPRESS:
+ ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
+ break;
+ case ZCACHE_COMPOP_DECOMPRESS:
+ ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ put_cpu();
+ return ret;
+}
+
+static int __init zcache_comp_init(void)
+{
+ if (!crypto_has_comp(zcache_compressor, 0, 0)) {
+ pr_info("%s compressor not available\n", zcache_compressor);
+ /* fall back to default compressor */
+ zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
+ if (!crypto_has_comp(zcache_compressor, 0, 0))
+ /* can't even load the default compressor */
+ return -ENODEV;
+ }
+ pr_info("using %s compressor\n", zcache_compressor);
+
+ /* alloc percpu transforms */
+ zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
+ if (!zcache_comp_pcpu_tfms)
+ return -ENOMEM;
+ return 0;
+}
+
+static void zcache_comp_exit(void)
+{
+ /* free percpu transforms */
+ if (zcache_comp_pcpu_tfms)
+ free_percpu(zcache_comp_pcpu_tfms);
+}
+
+/*
+ * Per-cpu code
+ * (Below functions are also copyed from zswap!)
+ */
+static DEFINE_PER_CPU(u8 *, zcache_dstmem);
+
+static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
+{
+ struct crypto_comp *tfm;
+ u8 *dst;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("can't allocate compressor transform\n");
+ return NOTIFY_BAD;
+ }
+ *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
+ dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
+ if (!dst) {
+ pr_err("can't allocate compressor buffer\n");
+ crypto_free_comp(tfm);
+ *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
+ return NOTIFY_BAD;
+ }
+ per_cpu(zcache_dstmem, cpu) = dst;
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
+ if (tfm) {
+ crypto_free_comp(tfm);
+ *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
+ }
+ dst = per_cpu(zcache_dstmem, cpu);
+ kfree(dst);
+ per_cpu(zcache_dstmem, cpu) = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static int zcache_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
+{
+ unsigned long cpu = (unsigned long)pcpu;
+
+ return __zcache_cpu_notifier(action, cpu);
+}
+
+static struct notifier_block zcache_cpu_notifier_block = {
+ .notifier_call = zcache_cpu_notifier
+};
+
+static int zcache_cpu_init(void)
+{
+ unsigned long cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu)
+ if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
+ goto cleanup;
+ register_cpu_notifier(&zcache_cpu_notifier_block);
+ put_online_cpus();
+ return 0;
+
+cleanup:
+ for_each_online_cpu(cpu)
+ __zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
+ put_online_cpus();
+ return -ENOMEM;
+}
+
+/*
+ * Zcache helpers
+ */
+static bool zcache_is_full(void)
+{
+ long file = global_page_state(NR_FILE_PAGES);
+
+ return ((totalram_pages * zcache_max_pool_percent / 100 <
+ zcache_pages()) ||
+ (totalram_pages * zcache_clear_percent / 100 >
+ file));
+}
+
+/*
+ * The caller must hold zpool->rb_lock at least
+ */
+static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree,
+ int index, struct rb_node **rb_parent, struct rb_node ***rb_link)
+{
+ struct zcache_rbnode *entry;
+ struct rb_node **__rb_link, *__rb_parent, *rb_prev;
+
+ __rb_link = &rbtree->rb_node;
+ rb_prev = __rb_parent = NULL;
+
+ while (*__rb_link) {
+ __rb_parent = *__rb_link;
+ entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
+ if (entry->rb_index > index)
+ __rb_link = &__rb_parent->rb_left;
+ else if (entry->rb_index < index) {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ } else
+ return entry;
+ }
+
+ if (rb_parent)
+ *rb_parent = __rb_parent;
+ if (rb_link)
+ *rb_link = __rb_link;
+ return NULL;
+}
+
+static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool,
+ int rb_index)
+{
+ unsigned long flags;
+ struct zcache_rbnode *rbnode;
+
+ read_lock_irqsave(&zpool->rb_lock, flags);
+ rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
+ if (rbnode)
+ kref_get(&rbnode->refcount);
+ read_unlock_irqrestore(&zpool->rb_lock, flags);
+ return rbnode;
+}
+
+/*
+ * kref_put callback for zcache_rbnode.
+ *
+ * The rbnode must have been isolated from rbtree already.
+ */
+static void zcache_rbnode_release(struct kref *kref)
+{
+ struct zcache_rbnode *rbnode;
+
+ rbnode = container_of(kref, struct zcache_rbnode, refcount);
+ BUG_ON(rbnode->ratree.rnode);
+ kmem_cache_free(zcache_rbnode_cache, rbnode);
+}
+
+/*
+ * Check whether the radix-tree of this rbnode is empty.
+ * If that's true, then we can delete this zcache_rbnode from
+ * zcache_pool->rbtree
+ *
+ * Caller must hold zcache_rbnode->ra_lock
+ */
+static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
+{
+ return rbnode->ratree.rnode == NULL;
+}
+
+/*
+ * Remove zcache_rbnode from zpool->rbtree
+ *
+ * holded_rblock - whether the caller has holded zpool->rb_lock
+ */
+static void zcache_rbnode_isolate(struct zcache_pool *zpool,
+ struct zcache_rbnode *rbnode, bool holded_rblock)
+{
+ unsigned long flags;
+
+ if (!holded_rblock)
+ write_lock_irqsave(&zpool->rb_lock, flags);
+ /*
+ * Someone can get reference on this rbnode before we could
+ * acquire write lock above.
+ * We want to remove it from zpool->rbtree when only the caller and
+ * corresponding ratree holds a reference to this rbnode.
+ * Below check ensures that a racing zcache put will not end up adding
+ * a page to an isolated node and thereby losing that memory.
+ */
+ if (atomic_read(&rbnode->refcount.refcount) == 2) {
+ rb_erase(&rbnode->rb_node, &zpool->rbtree);
+ RB_CLEAR_NODE(&rbnode->rb_node);
+ kref_put(&rbnode->refcount, zcache_rbnode_release);
+ }
+ if (!holded_rblock)
+ write_unlock_irqrestore(&zpool->rb_lock, flags);
+}
+
+/*
+ * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
+ */
+static int zcache_store_zaddr(struct zcache_pool *zpool,
+ int ra_index, int rb_index, unsigned long zaddr)
+{
+ unsigned long flags;
+ struct zcache_rbnode *rbnode, *tmp;
+ struct rb_node **link = NULL, *parent = NULL;
+ int ret;
+ void *dup_zaddr;
+
+ rbnode = zcache_find_get_rbnode(zpool, rb_index);
+ if (!rbnode) {
+ /* alloc and init a new rbnode */
+ rbnode = kmem_cache_alloc(zcache_rbnode_cache,
+ GFP_ZCACHE);
+ if (!rbnode)
+ return -ENOMEM;
+
+ INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
+ spin_lock_init(&rbnode->ra_lock);
+ rbnode->rb_index = rb_index;
+ kref_init(&rbnode->refcount);
+ RB_CLEAR_NODE(&rbnode->rb_node);
+
+ /* add that rbnode to rbtree */
+ write_lock_irqsave(&zpool->rb_lock, flags);
+ tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
+ &parent, &link);
+ if (tmp) {
+ /* somebody else allocated new rbnode */
+ kmem_cache_free(zcache_rbnode_cache, rbnode);
+ rbnode = tmp;
+ } else {
+ rb_link_node(&rbnode->rb_node, parent, link);
+ rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
+ }
+
+ /* Inc the reference of this zcache_rbnode */
+ kref_get(&rbnode->refcount);
+ write_unlock_irqrestore(&zpool->rb_lock, flags);
+ }
+
+ /* Succfully got a zcache_rbnode when arriving here */
+ spin_lock_irqsave(&rbnode->ra_lock, flags);
+ dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
+ if (unlikely(dup_zaddr)) {
+ WARN_ON("duplicated, will be replaced!\n");
+ if (dup_zaddr == ZERO_HANDLE) {
+ atomic_dec(&zcache_stored_zero_pages);
+ } else {
+ zbud_free(zpool->pool, (unsigned long)dup_zaddr);
+ atomic_dec(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(zpool->pool);
+ }
+ zcache_dup_entry++;
+ }
+
+ /* Insert zcache_ra_handle to ratree */
+ ret = radix_tree_insert(&rbnode->ratree, ra_index,
+ (void *)zaddr);
+ spin_unlock_irqrestore(&rbnode->ra_lock, flags);
+ if (unlikely(ret)) {
+ write_lock_irqsave(&zpool->rb_lock, flags);
+ spin_lock(&rbnode->ra_lock);
+
+ if (zcache_rbnode_empty(rbnode))
+ zcache_rbnode_isolate(zpool, rbnode, 1);
+
+ spin_unlock(&rbnode->ra_lock);
+ write_unlock_irqrestore(&zpool->rb_lock, flags);
+ }
+
+ kref_put(&rbnode->refcount, zcache_rbnode_release);
+ return ret;
+}
+
+/*
+ * Load zaddr and delete it from radix tree.
+ * If the radix tree of the corresponding rbnode is empty, delete the rbnode
+ * from zpool->rbtree also.
+ */
+static void *zcache_load_delete_zaddr(struct zcache_pool *zpool,
+ int rb_index, int ra_index)
+{
+ struct zcache_rbnode *rbnode;
+ void *zaddr = NULL;
+ unsigned long flags;
+
+ rbnode = zcache_find_get_rbnode(zpool, rb_index);
+ if (!rbnode)
+ goto out;
+
+ BUG_ON(rbnode->rb_index != rb_index);
+
+ spin_lock_irqsave(&rbnode->ra_lock, flags);
+ zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
+ spin_unlock_irqrestore(&rbnode->ra_lock, flags);
+
+ /* rb_lock and ra_lock must be taken again in the given sequence */
+ write_lock_irqsave(&zpool->rb_lock, flags);
+ spin_lock(&rbnode->ra_lock);
+ if (zcache_rbnode_empty(rbnode))
+ zcache_rbnode_isolate(zpool, rbnode, 1);
+ spin_unlock(&rbnode->ra_lock);
+ write_unlock_irqrestore(&zpool->rb_lock, flags);
+
+ kref_put(&rbnode->refcount, zcache_rbnode_release);
+out:
+ return zaddr;
+}
+
+static bool zero_page(struct page *page)
+{
+ unsigned long *ptr = kmap_atomic(page);
+ int i;
+ bool ret = false;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
+ if (ptr[i])
+ goto out;
+ }
+ ret = true;
+out:
+ kunmap_atomic(ptr);
+ return ret;
+}
+
+static void zcache_store_page(int pool_id, struct cleancache_filekey key,
+ pgoff_t index, struct page *page)
+{
+ struct zcache_ra_handle *zhandle;
+ u8 *zpage, *src, *dst;
+ /* Address of zhandle + compressed data(zpage) */
+ unsigned long zaddr = 0;
+ unsigned int zlen = PAGE_SIZE;
+ bool zero = 0;
+ int ret;
+
+ struct zcache_pool *zpool = zcache.pools[pool_id];
+
+ /*
+ * Zcache will be ineffective if the compressed memory pool is full with
+ * compressed inactive file pages and most of them will never be used
+ * again.
+ * So we refuse to compress pages that are not from active file list.
+ */
+ if (!PageWasActive(page)) {
+ zcache_inactive_pages_refused++;
+ return;
+ }
+
+ zero = zero_page(page);
+ if (zero)
+ goto zero;
+
+ if (zcache_is_full()) {
+ zcache_pool_limit_hit++;
+ if (zbud_reclaim_page(zpool->pool, 8)) {
+ zcache_reclaim_fail++;
+ return;
+ }
+ /*
+ * Continue if reclaimed a page frame succ.
+ */
+ zcache_evict_filepages++;
+ zpool->size = zbud_get_pool_size(zpool->pool);
+ }
+
+ /* compress */
+ dst = get_cpu_var(zcache_dstmem);
+ src = kmap_atomic(page);
+ ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
+ &zlen);
+ kunmap_atomic(src);
+ if (ret) {
+ pr_err("zcache compress error ret %d\n", ret);
+ put_cpu_var(zcache_dstmem);
+ return;
+ }
+
+ /* store zcache handle together with compressed page data */
+ ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
+ GFP_ZCACHE, &zaddr);
+ if (ret) {
+ zcache_zbud_alloc_fail++;
+ put_cpu_var(zcache_dstmem);
+ return;
+ }
+
+ zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);
+
+ /* Compressed page data stored at the end of zcache_ra_handle */
+ zpage = (u8 *)(zhandle + 1);
+ memcpy(zpage, dst, zlen);
+ zbud_unmap(zpool->pool, zaddr);
+ put_cpu_var(zcache_dstmem);
+
+zero:
+ if (zero)
+ zaddr = (unsigned long)ZERO_HANDLE;
+
+ /* store zcache handle */
+ ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
+ if (ret) {
+ zcache_store_failed++;
+ if (!zero)
+ zbud_free(zpool->pool, zaddr);
+ return;
+ }
+
+ /* update stats */
+ if (zero) {
+ atomic_inc(&zcache_stored_zero_pages);
+ } else {
+ zhandle->ra_index = index;
+ zhandle->rb_index = key.u.ino;
+ zhandle->zlen = zlen;
+ zhandle->zpool = zpool;
+ atomic_inc(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(zpool->pool);
+ }
+
+ return;
+}
+
+static int zcache_load_page(int pool_id, struct cleancache_filekey key,
+ pgoff_t index, struct page *page)
+{
+ int ret = 0;
+ u8 *src, *dst;
+ void *zaddr;
+ unsigned int dlen = PAGE_SIZE;
+ struct zcache_ra_handle *zhandle;
+ struct zcache_pool *zpool = zcache.pools[pool_id];
+
+ zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
+ if (!zaddr)
+ return -ENOENT;
+ else if (zaddr == ZERO_HANDLE)
+ goto map;
+
+ zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
+ (unsigned long)zaddr);
+ /* Compressed page data stored at the end of zcache_ra_handle */
+ src = (u8 *)(zhandle + 1);
+
+ /* decompress */
+map:
+ dst = kmap_atomic(page);
+ if (zaddr != ZERO_HANDLE) {
+ ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
+ zhandle->zlen, dst, &dlen);
+ } else {
+ memset(dst, 0, PAGE_SIZE);
+ kunmap_atomic(dst);
+ flush_dcache_page(page);
+ atomic_dec(&zcache_stored_zero_pages);
+ goto out;
+ }
+ kunmap_atomic(dst);
+ zbud_unmap(zpool->pool, (unsigned long)zaddr);
+ zbud_free(zpool->pool, (unsigned long)zaddr);
+
+ BUG_ON(ret);
+ BUG_ON(dlen != PAGE_SIZE);
+
+ /* update stats */
+ atomic_dec(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(zpool->pool);
+out:
+ SetPageWasActive(page);
+ return ret;
+}
+
+static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
+ pgoff_t index)
+{
+ struct zcache_pool *zpool = zcache.pools[pool_id];
+ void *zaddr = NULL;
+
+ zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
+ if (zaddr && (zaddr != ZERO_HANDLE)) {
+ zbud_free(zpool->pool, (unsigned long)zaddr);
+ atomic_dec(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(zpool->pool);
+ } else if (zaddr == ZERO_HANDLE) {
+ atomic_dec(&zcache_stored_zero_pages);
+ }
+}
+
+#define FREE_BATCH 16
+/*
+ * Callers must hold the lock
+ */
+static void zcache_flush_ratree(struct zcache_pool *zpool,
+ struct zcache_rbnode *rbnode)
+{
+ unsigned long index = 0;
+ int count, i;
+ struct zcache_ra_handle *zhandle;
+ void *zaddr = NULL;
+
+ do {
+ void *zaddrs[FREE_BATCH];
+ unsigned long indices[FREE_BATCH];
+
+ count = radix_tree_gang_lookup_index(&rbnode->ratree,
+ (void **)zaddrs, indices,
+ index, FREE_BATCH);
+
+ for (i = 0; i < count; i++) {
+ if (zaddrs[i] == ZERO_HANDLE) {
+ zaddr = radix_tree_delete(&rbnode->ratree,
+ indices[i]);
+ if (zaddr)
+ atomic_dec(&zcache_stored_zero_pages);
+ continue;
+ }
+ zhandle = (struct zcache_ra_handle *)zbud_map(
+ zpool->pool, (unsigned long)zaddrs[i]);
+ index = zhandle->ra_index;
+ zaddr = radix_tree_delete(&rbnode->ratree, index);
+ if (!zaddr)
+ continue;
+ zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
+ zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
+ atomic_dec(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(zpool->pool);
+ }
+
+ index++;
+ } while (count == FREE_BATCH);
+}
+
+static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
+{
+ struct zcache_rbnode *rbnode;
+ unsigned long flags1, flags2;
+ struct zcache_pool *zpool = zcache.pools[pool_id];
+
+ /*
+ * Refuse new pages added in to the same rbinode, so get rb_lock at
+ * first.
+ */
+ write_lock_irqsave(&zpool->rb_lock, flags1);
+ rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
+ if (!rbnode) {
+ write_unlock_irqrestore(&zpool->rb_lock, flags1);
+ return;
+ }
+
+ kref_get(&rbnode->refcount);
+ spin_lock_irqsave(&rbnode->ra_lock, flags2);
+
+ zcache_flush_ratree(zpool, rbnode);
+ if (zcache_rbnode_empty(rbnode))
+ /* When arrvied here, we already hold rb_lock */
+ zcache_rbnode_isolate(zpool, rbnode, 1);
+
+ spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
+ write_unlock_irqrestore(&zpool->rb_lock, flags1);
+ kref_put(&rbnode->refcount, zcache_rbnode_release);
+}
+
+static void zcache_destroy_pool(struct zcache_pool *zpool);
+static void zcache_flush_fs(int pool_id)
+{
+ struct zcache_rbnode *z_rbnode = NULL;
+ struct rb_node *rbnode;
+ unsigned long flags1, flags2;
+ struct zcache_pool *zpool;
+
+ if (pool_id < 0)
+ return;
+
+ zpool = zcache.pools[pool_id];
+ if (!zpool)
+ return;
+
+ /*
+ * Refuse new pages added in, so get rb_lock at first.
+ */
+ write_lock_irqsave(&zpool->rb_lock, flags1);
+
+ rbnode = rb_first(&zpool->rbtree);
+ while (rbnode) {
+ z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
+ rbnode = rb_next(rbnode);
+ if (z_rbnode) {
+ kref_get(&z_rbnode->refcount);
+ spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
+ zcache_flush_ratree(zpool, z_rbnode);
+ if (zcache_rbnode_empty(z_rbnode))
+ zcache_rbnode_isolate(zpool, z_rbnode, 1);
+ spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
+ kref_put(&z_rbnode->refcount, zcache_rbnode_release);
+ }
+ }
+
+ write_unlock_irqrestore(&zpool->rb_lock, flags1);
+ zcache_destroy_pool(zpool);
+}
+
+/*
+ * Evict compressed pages from zcache pool on an LRU basis after the compressed
+ * pool is full.
+ */
+static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
+{
+ struct zcache_pool *zpool;
+ struct zcache_ra_handle *zhandle;
+ void *zaddr_intree;
+
+ BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);
+
+ zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);
+
+ zpool = zhandle->zpool;
+ /* There can be a race with zcache store */
+ if (!zpool)
+ return -EINVAL;
+
+ BUG_ON(pool != zpool->pool);
+
+ zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
+ zhandle->ra_index);
+ if (zaddr_intree) {
+ BUG_ON((unsigned long)zaddr_intree != zaddr);
+ zbud_unmap(pool, zaddr);
+ zbud_free(pool, zaddr);
+ atomic_dec(&zcache_stored_pages);
+ zpool->size = zbud_get_pool_size(pool);
+ zcache_evict_zpages++;
+ }
+ return 0;
+}
+
+static struct zbud_ops zcache_zbud_ops = {
+ .evict = zcache_evict_zpage
+};
+
+/* Return pool id */
+static int zcache_create_pool(void)
+{
+ int ret;
+ struct zcache_pool *zpool;
+
+ zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
+ if (!zpool) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
+ if (!zpool->pool) {
+ kfree(zpool);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ spin_lock(&zcache.pool_lock);
+ if (zcache.num_pools == MAX_ZCACHE_POOLS) {
+ pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
+ zbud_destroy_pool(zpool->pool);
+ kfree(zpool);
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ rwlock_init(&zpool->rb_lock);
+ zpool->rbtree = RB_ROOT;
+ /* Add to pool list */
+ for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
+ if (!zcache.pools[ret])
+ break;
+ zcache.pools[ret] = zpool;
+ zcache.num_pools++;
+ pr_info("New pool created id:%d\n", ret);
+
+out_unlock:
+ spin_unlock(&zcache.pool_lock);
+out:
+ return ret;
+}
+
+static void zcache_destroy_pool(struct zcache_pool *zpool)
+{
+ int i;
+
+ if (!zpool)
+ return;
+
+ spin_lock(&zcache.pool_lock);
+ zcache.num_pools--;
+ for (i = 0; i < MAX_ZCACHE_POOLS; i++)
+ if (zcache.pools[i] == zpool)
+ break;
+ zcache.pools[i] = NULL;
+ spin_unlock(&zcache.pool_lock);
+
+ if (!RB_EMPTY_ROOT(&zpool->rbtree))
+ WARN_ON("Memory leak detected. Freeing non-empty pool!\n");
+
+ zbud_destroy_pool(zpool->pool);
+ kfree(zpool);
+}
+
+static int zcache_init_fs(size_t pagesize)
+{
+ int ret;
+
+ if (pagesize != PAGE_SIZE) {
+ pr_info("Unsupported page size: %zu", pagesize);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = zcache_create_pool();
+ if (ret < 0) {
+ pr_info("Failed to create new pool\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static int zcache_init_shared_fs(char *uuid, size_t pagesize)
+{
+ /* shared pools are unsupported and map to private */
+ return zcache_init_fs(pagesize);
+}
+
+static struct cleancache_ops zcache_ops = {
+ .put_page = zcache_store_page,
+ .get_page = zcache_load_page,
+ .invalidate_page = zcache_flush_page,
+ .invalidate_inode = zcache_flush_inode,
+ .invalidate_fs = zcache_flush_fs,
+ .init_shared_fs = zcache_init_shared_fs,
+ .init_fs = zcache_init_fs
+};
+
+/*
+ * Debugfs functions
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+
+static int pool_pages_get(void *_data, u64 *val)
+{
+ *val = zcache_pages();
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");
+
+static struct dentry *zcache_debugfs_root;
+
+static int __init zcache_debugfs_init(void)
+{
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
+ if (!zcache_debugfs_root)
+ return -ENOMEM;
+
+ debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
+ &zcache_pool_limit_hit);
+ debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
+ &zcache_zbud_alloc_fail);
+ debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
+ &zcache_dup_entry);
+ debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
+ &pool_page_fops);
+ debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
+ &zcache_stored_pages);
+ debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
+ zcache_debugfs_root, &zcache_stored_zero_pages);
+ debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
+ &zcache_evict_zpages);
+ debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
+ &zcache_evict_filepages);
+ debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
+ &zcache_reclaim_fail);
+ debugfs_create_u64("inactive_pages_refused", S_IRUGO,
+ zcache_debugfs_root, &zcache_inactive_pages_refused);
+ debugfs_create_u64("pool_shrink_count", S_IRUGO,
+ zcache_debugfs_root, &zcache_pool_shrink);
+ debugfs_create_u64("pool_shrink_fail", S_IRUGO,
+ zcache_debugfs_root, &zcache_pool_shrink_fail);
+ debugfs_create_u64("pool_shrink_pages", S_IRUGO,
+ zcache_debugfs_root, &zcache_pool_shrink_pages);
+ debugfs_create_u64("store_fail", S_IRUGO,
+ zcache_debugfs_root, &zcache_store_failed);
+ return 0;
+}
+
+static void __exit zcache_debugfs_exit(void)
+{
+ debugfs_remove_recursive(zcache_debugfs_root);
+}
+#else
+static int __init zcache_debugfs_init(void)
+{
+ return 0;
+}
+static void __exit zcache_debugfs_exit(void)
+{
+}
+#endif
+
+/*
+ * zcache init and exit
+ */
+static int __init init_zcache(void)
+{
+ if (!zcache_enabled)
+ return 0;
+
+ pr_info("loading zcache..\n");
+ if (zcache_rbnode_cache_create()) {
+ pr_err("entry cache creation failed\n");
+ goto error;
+ }
+
+ if (zcache_comp_init()) {
+ pr_err("compressor initialization failed\n");
+ goto compfail;
+ }
+ if (zcache_cpu_init()) {
+ pr_err("per-cpu initialization failed\n");
+ goto pcpufail;
+ }
+
+ spin_lock_init(&zcache.pool_lock);
+ cleancache_register_ops(&zcache_ops);
+
+ if (zcache_debugfs_init())
+ pr_warn("debugfs initialization failed\n");
+ register_shrinker(&zcache_shrinker);
+ return 0;
+pcpufail:
+ zcache_comp_exit();
+compfail:
+ zcache_rbnode_cache_destroy();
+error:
+ return -ENOMEM;
+}
+
+/* must be late so crypto has time to come up */
+late_initcall(init_zcache);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
+MODULE_DESCRIPTION("Compressed cache for clean file pages");
+