diff options
author | Deepak Kumar <dkumar@codeaurora.org> | 2016-04-29 09:16:51 -0600 |
---|---|---|
committer | Jeevan Shriram <jshriram@codeaurora.org> | 2016-05-05 15:05:55 -0700 |
commit | 658f4b0454659b6a190a3e34a81b2a81d3341afd (patch) | |
tree | a3263d7bd69b11fb205a425258686f7c9a21ee67 /drivers/gpu/msm/kgsl_sharedmem.c | |
parent | 436e7913d3e30609cbd04cfaac4d69c7b7f05838 (diff) |
msm: kgsl: Use a page pool to reduce allocation time
Use a mempool to reduce the allocation time. When memory allocated
from pages is freed, put it in a page pool. At allocation time, try
to allocate from the page pool before getting pages from the system.
Make sure that the pool does not grow too big by enforcing a
maximum limit.
Change-Id: Icac7fb4355ee1fd07e7127ea5c2721665e279272
Signed-off-by: Deepak Kumar <dkumar@codeaurora.org>
Diffstat (limited to 'drivers/gpu/msm/kgsl_sharedmem.c')
-rw-r--r-- | drivers/gpu/msm/kgsl_sharedmem.c | 171 |
1 files changed, 77 insertions, 94 deletions
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 893e31852ccb..cc5e921ce920 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -27,6 +27,7 @@ #include "kgsl_device.h" #include "kgsl_log.h" #include "kgsl_mmu.h" +#include "kgsl_pool.h" /* * The user can set this from debugfs to force failed memory allocations to @@ -424,9 +425,6 @@ done: static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) { - unsigned int i = 0; - struct scatterlist *sg; - kgsl_page_alloc_unmap_kernel(memdesc); /* we certainly do not expect the hostptr to still be mapped */ BUG_ON(memdesc->hostptr); @@ -451,28 +449,15 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); } - for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { - /* - * sg_alloc_table_from_pages() will collapse any physically - * adjacent pages into a single scatterlist entry. We cannot - * just call __free_pages() on the entire set since we cannot - * ensure that the size is a whole order. Instead, free each - * page or compound page group individually. - */ - struct page *p = sg_page(sg), *next; - unsigned int j = 0, count; - while (j < (sg->length/PAGE_SIZE)) { - if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) - ClearPagePrivate(p); - - count = 1 << compound_order(p); - next = nth_page(p, count); - __free_pages(p, compound_order(p)); - p = next; - j += count; + if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) { + struct sg_page_iter sg_iter; - } + for_each_sg_page(memdesc->sgt->sgl, &sg_iter, + memdesc->sgt->nents, 0) + ClearPagePrivate(sg_page_iter_page(&sg_iter)); } + + kgsl_pool_free_sgt(memdesc->sgt); } /* @@ -681,19 +666,67 @@ static inline int get_page_size(size_t size, unsigned int align) } #endif +static void kgsl_zero_pages(struct page **pages, unsigned int pcount) +{ + unsigned int j; + unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT; + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + void *ptr; + + /* + * All memory that goes to the user has to be zeroed out before it gets + * exposed to userspace. This means that the memory has to be mapped in + * the kernel, zeroed (memset) and then unmapped. This also means that + * the dcache has to be flushed to ensure coherency between the kernel + * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped + * zeroed and unmaped each individual page, and then we had to turn + * around and call flush_dcache_page() on that page to clear the caches. + * This was killing us for performance. Instead, we found it is much + * faster to allocate the pages without GFP_ZERO, map a chunk of the + * range ('step' pages), memset it, flush it and then unmap + * - this results in a factor of 4 improvement for speed for large + * buffers. There is a small decrease in speed for small buffers, + * but only on the order of a few microseconds at best. The 'step' + * size is based on a guess at the amount of free vmalloc space, but + * will scale down if there's not enough free space. + */ + for (j = 0; j < pcount; j += step) { + step = min(step, pcount - j); + + ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot); + + if (ptr != NULL) { + memset(ptr, 0, step * PAGE_SIZE); + dmac_flush_range(ptr, ptr + step * PAGE_SIZE); + vunmap(ptr); + } else { + int k; + /* Very, very, very slow path */ + + for (k = j; k < j + step; k++) { + ptr = kmap_atomic(pages[k]); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + } + /* scale down the step size to avoid this path */ + if (step > 1) + step >>= 1; + } + } +} + static int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, uint64_t size) { int ret = 0; - unsigned int j, pcount = 0, page_size, len_alloc; + unsigned int j, page_size, len_alloc; + unsigned int pcount = 0; size_t len; struct page **pages = NULL; - pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); - void *ptr; unsigned int align; - unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT; size = PAGE_ALIGN(size); if (size == 0 || size > UINT_MAX) @@ -740,30 +773,16 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, len = size; while (len > 0) { - struct page *page; - gfp_t gfp_mask = __GFP_HIGHMEM; - int j; + int page_count; /* don't waste space at the end of the allocation*/ if (len < page_size) page_size = PAGE_SIZE; - /* - * Don't do some of the more aggressive memory recovery - * techniques for large order allocations - */ - if (page_size != PAGE_SIZE) { - gfp_mask |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN; - gfp_mask &= ~__GFP_RECLAIM; - } else - gfp_mask |= GFP_KERNEL; - - if (sharedmem_noretry_flag == true) - gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + page_count = kgsl_pool_alloc_page(page_size, + pages + pcount, len_alloc - pcount); - page = alloc_pages(gfp_mask, get_order(page_size)); - - if (page == NULL) { + if (page_count <= 0) { if (page_size != PAGE_SIZE) { page_size = PAGE_SIZE; continue; @@ -785,9 +804,7 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, goto done; } - for (j = 0; j < page_size >> PAGE_SHIFT; j++) - pages[pcount++] = nth_page(page, j); - + pcount += page_count; len -= page_size; memdesc->size += page_size; } @@ -824,57 +841,23 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, goto done; } - /* - * All memory that goes to the user has to be zeroed out before it gets - * exposed to userspace. This means that the memory has to be mapped in - * the kernel, zeroed (memset) and then unmapped. This also means that - * the dcache has to be flushed to ensure coherency between the kernel - * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped - * zeroed and unmaped each individual page, and then we had to turn - * around and call flush_dcache_page() on that page to clear the caches. - * This was killing us for performance. Instead, we found it is much - * faster to allocate the pages without GFP_ZERO, map a chunk of the - * range ('step' pages), memset it, flush it and then unmap - * - this results in a factor of 4 improvement for speed for large - * buffers. There is a small decrease in speed for small buffers, - * but only on the order of a few microseconds at best. The 'step' - * size is based on a guess at the amount of free vmalloc space, but - * will scale down if there's not enough free space. - */ - for (j = 0; j < pcount; j += step) { - step = min(step, pcount - j); - - ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot); - - if (ptr != NULL) { - memset(ptr, 0, step * PAGE_SIZE); - dmac_flush_range(ptr, ptr + step * PAGE_SIZE); - vunmap(ptr); - } else { - int k; - /* Very, very, very slow path */ - - for (k = j; k < j + step; k++) { - ptr = kmap_atomic(pages[k]); - memset(ptr, 0, PAGE_SIZE); - dmac_flush_range(ptr, ptr + PAGE_SIZE); - kunmap_atomic(ptr); - } - /* scale down the step size to avoid this path */ - if (step > 1) - step >>= 1; - } - } - KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc, &kgsl_driver.stats.page_alloc_max); + /* + * Zero out the pages. + */ + kgsl_zero_pages(pages, pcount); + done: if (ret) { - unsigned int count = 1; - for (j = 0; j < pcount; j += count) { - count = 1 << compound_order(pages[j]); - __free_pages(pages[j], compound_order(pages[j])); + if (pages) { + unsigned int count = 1; + + for (j = 0; j < pcount; j += count) { + count = 1 << compound_order(pages[j]); + kgsl_pool_free_page(pages[j]); + } } kfree(memdesc->sgt); |