summaryrefslogtreecommitdiff
path: root/drivers/gpu/msm/kgsl_sharedmem.c
diff options
context:
space:
mode:
authorDeepak Kumar <dkumar@codeaurora.org>2016-04-29 09:16:51 -0600
committerJeevan Shriram <jshriram@codeaurora.org>2016-05-05 15:05:55 -0700
commit658f4b0454659b6a190a3e34a81b2a81d3341afd (patch)
treea3263d7bd69b11fb205a425258686f7c9a21ee67 /drivers/gpu/msm/kgsl_sharedmem.c
parent436e7913d3e30609cbd04cfaac4d69c7b7f05838 (diff)
msm: kgsl: Use a page pool to reduce allocation time
Use a mempool to reduce the allocation time. When memory allocated from pages is freed, put it in a page pool. At allocation time, try to allocate from the page pool before getting pages from the system. Make sure that the pool does not grow too big by enforcing a maximum limit. Change-Id: Icac7fb4355ee1fd07e7127ea5c2721665e279272 Signed-off-by: Deepak Kumar <dkumar@codeaurora.org>
Diffstat (limited to 'drivers/gpu/msm/kgsl_sharedmem.c')
-rw-r--r--drivers/gpu/msm/kgsl_sharedmem.c171
1 files changed, 77 insertions, 94 deletions
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index 893e31852ccb..cc5e921ce920 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -27,6 +27,7 @@
#include "kgsl_device.h"
#include "kgsl_log.h"
#include "kgsl_mmu.h"
+#include "kgsl_pool.h"
/*
* The user can set this from debugfs to force failed memory allocations to
@@ -424,9 +425,6 @@ done:
static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
{
- unsigned int i = 0;
- struct scatterlist *sg;
-
kgsl_page_alloc_unmap_kernel(memdesc);
/* we certainly do not expect the hostptr to still be mapped */
BUG_ON(memdesc->hostptr);
@@ -451,28 +449,15 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc);
}
- for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) {
- /*
- * sg_alloc_table_from_pages() will collapse any physically
- * adjacent pages into a single scatterlist entry. We cannot
- * just call __free_pages() on the entire set since we cannot
- * ensure that the size is a whole order. Instead, free each
- * page or compound page group individually.
- */
- struct page *p = sg_page(sg), *next;
- unsigned int j = 0, count;
- while (j < (sg->length/PAGE_SIZE)) {
- if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)
- ClearPagePrivate(p);
-
- count = 1 << compound_order(p);
- next = nth_page(p, count);
- __free_pages(p, compound_order(p));
- p = next;
- j += count;
+ if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
+ struct sg_page_iter sg_iter;
- }
+ for_each_sg_page(memdesc->sgt->sgl, &sg_iter,
+ memdesc->sgt->nents, 0)
+ ClearPagePrivate(sg_page_iter_page(&sg_iter));
}
+
+ kgsl_pool_free_sgt(memdesc->sgt);
}
/*
@@ -681,19 +666,67 @@ static inline int get_page_size(size_t size, unsigned int align)
}
#endif
+static void kgsl_zero_pages(struct page **pages, unsigned int pcount)
+{
+ unsigned int j;
+ unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
+ pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+ void *ptr;
+
+ /*
+ * All memory that goes to the user has to be zeroed out before it gets
+ * exposed to userspace. This means that the memory has to be mapped in
+ * the kernel, zeroed (memset) and then unmapped. This also means that
+ * the dcache has to be flushed to ensure coherency between the kernel
+ * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
+ * zeroed and unmaped each individual page, and then we had to turn
+ * around and call flush_dcache_page() on that page to clear the caches.
+ * This was killing us for performance. Instead, we found it is much
+ * faster to allocate the pages without GFP_ZERO, map a chunk of the
+ * range ('step' pages), memset it, flush it and then unmap
+ * - this results in a factor of 4 improvement for speed for large
+ * buffers. There is a small decrease in speed for small buffers,
+ * but only on the order of a few microseconds at best. The 'step'
+ * size is based on a guess at the amount of free vmalloc space, but
+ * will scale down if there's not enough free space.
+ */
+ for (j = 0; j < pcount; j += step) {
+ step = min(step, pcount - j);
+
+ ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
+
+ if (ptr != NULL) {
+ memset(ptr, 0, step * PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
+ vunmap(ptr);
+ } else {
+ int k;
+ /* Very, very, very slow path */
+
+ for (k = j; k < j + step; k++) {
+ ptr = kmap_atomic(pages[k]);
+ memset(ptr, 0, PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + PAGE_SIZE);
+ kunmap_atomic(ptr);
+ }
+ /* scale down the step size to avoid this path */
+ if (step > 1)
+ step >>= 1;
+ }
+ }
+}
+
static int
kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable,
uint64_t size)
{
int ret = 0;
- unsigned int j, pcount = 0, page_size, len_alloc;
+ unsigned int j, page_size, len_alloc;
+ unsigned int pcount = 0;
size_t len;
struct page **pages = NULL;
- pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
- void *ptr;
unsigned int align;
- unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
size = PAGE_ALIGN(size);
if (size == 0 || size > UINT_MAX)
@@ -740,30 +773,16 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
len = size;
while (len > 0) {
- struct page *page;
- gfp_t gfp_mask = __GFP_HIGHMEM;
- int j;
+ int page_count;
/* don't waste space at the end of the allocation*/
if (len < page_size)
page_size = PAGE_SIZE;
- /*
- * Don't do some of the more aggressive memory recovery
- * techniques for large order allocations
- */
- if (page_size != PAGE_SIZE) {
- gfp_mask |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN;
- gfp_mask &= ~__GFP_RECLAIM;
- } else
- gfp_mask |= GFP_KERNEL;
-
- if (sharedmem_noretry_flag == true)
- gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+ page_count = kgsl_pool_alloc_page(page_size,
+ pages + pcount, len_alloc - pcount);
- page = alloc_pages(gfp_mask, get_order(page_size));
-
- if (page == NULL) {
+ if (page_count <= 0) {
if (page_size != PAGE_SIZE) {
page_size = PAGE_SIZE;
continue;
@@ -785,9 +804,7 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
goto done;
}
- for (j = 0; j < page_size >> PAGE_SHIFT; j++)
- pages[pcount++] = nth_page(page, j);
-
+ pcount += page_count;
len -= page_size;
memdesc->size += page_size;
}
@@ -824,57 +841,23 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
goto done;
}
- /*
- * All memory that goes to the user has to be zeroed out before it gets
- * exposed to userspace. This means that the memory has to be mapped in
- * the kernel, zeroed (memset) and then unmapped. This also means that
- * the dcache has to be flushed to ensure coherency between the kernel
- * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
- * zeroed and unmaped each individual page, and then we had to turn
- * around and call flush_dcache_page() on that page to clear the caches.
- * This was killing us for performance. Instead, we found it is much
- * faster to allocate the pages without GFP_ZERO, map a chunk of the
- * range ('step' pages), memset it, flush it and then unmap
- * - this results in a factor of 4 improvement for speed for large
- * buffers. There is a small decrease in speed for small buffers,
- * but only on the order of a few microseconds at best. The 'step'
- * size is based on a guess at the amount of free vmalloc space, but
- * will scale down if there's not enough free space.
- */
- for (j = 0; j < pcount; j += step) {
- step = min(step, pcount - j);
-
- ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
-
- if (ptr != NULL) {
- memset(ptr, 0, step * PAGE_SIZE);
- dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
- vunmap(ptr);
- } else {
- int k;
- /* Very, very, very slow path */
-
- for (k = j; k < j + step; k++) {
- ptr = kmap_atomic(pages[k]);
- memset(ptr, 0, PAGE_SIZE);
- dmac_flush_range(ptr, ptr + PAGE_SIZE);
- kunmap_atomic(ptr);
- }
- /* scale down the step size to avoid this path */
- if (step > 1)
- step >>= 1;
- }
- }
-
KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc,
&kgsl_driver.stats.page_alloc_max);
+ /*
+ * Zero out the pages.
+ */
+ kgsl_zero_pages(pages, pcount);
+
done:
if (ret) {
- unsigned int count = 1;
- for (j = 0; j < pcount; j += count) {
- count = 1 << compound_order(pages[j]);
- __free_pages(pages[j], compound_order(pages[j]));
+ if (pages) {
+ unsigned int count = 1;
+
+ for (j = 0; j < pcount; j += count) {
+ count = 1 << compound_order(pages[j]);
+ kgsl_pool_free_page(pages[j]);
+ }
}
kfree(memdesc->sgt);