From f2902f9065c5ec4f475367e5d39bdfff0d41fda6 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 18 Apr 2014 14:07:28 -0700 Subject: prctl: adds PR_SET_TIMERSLACK_PID for setting timer slack of an arbitrary thread. Second argument is similar to PR_SET_TIMERSLACK, if non-zero then the slack is set to that value otherwise sets it to the default for the thread. Takes PID of the thread as the third argument. This allows power/performance management software to set timer slack for other threads according to its policy for the thread (such as when the thread is designated foreground vs. background activity) Change-Id: I744d451ff4e60dae69f38f53948ff36c51c14a3f Signed-off-by: Ruchi Kandoi --- kernel/sys.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 6af9212ab5aa..fd0680992912 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2076,6 +2076,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -2199,6 +2200,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; + case PR_SET_TIMERSLACK_PID: + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; default: return -EINVAL; } -- cgit v1.2.3 From 2476d3c24159f3b7113d2a36685ba81ae5209648 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 13 Jun 2014 17:03:01 -0700 Subject: prctl: adds the capable(CAP_SYS_NICE) check to PR_SET_TIMERSLACK_PID. Adds a capable() check to make sure that arbitary apps do not change the timer slack for other apps. Bug: 15000427 Change-Id: I558a2551a0e3579c7f7e7aae54b28aa9d982b209 Signed-off-by: Ruchi Kandoi --- kernel/sys.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index fd0680992912..d448bad88823 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2201,6 +2201,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; break; case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; rcu_read_lock(); tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); if (tsk == NULL) { -- cgit v1.2.3 From 18f42f60bedb93c9e90641bf9a551f9c0caf0a40 Mon Sep 17 00:00:00 2001 From: Micha Kalfon Date: Tue, 13 Jan 2015 11:47:20 +0200 Subject: prctl: fix misplaced PR_SET_TIMERSLACK_PID case The case clause for the PR_SET_TIMERSLACK_PID option was placed inside the an internal switch statement for PR_MCE_KILL (see commits 37a591d4 and 8ae872f1) . This commit moves it to the right place. Change-Id: I63251669d7e2f2aa843d1b0900e7df61518c3dea Signed-off-by: Micha Kalfon Acked-by: Oren Laadan --- kernel/sys.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index d448bad88823..0f45fb855fe3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2200,26 +2200,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; - case PR_SET_TIMERSLACK_PID: - if (current->pid != (pid_t)arg3 && - !capable(CAP_SYS_NICE)) - return -EPERM; - rcu_read_lock(); - tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); - if (tsk == NULL) { - rcu_read_unlock(); - return -EINVAL; - } - get_task_struct(tsk); - rcu_read_unlock(); - if (arg2 <= 0) - tsk->timer_slack_ns = - tsk->default_timer_slack_ns; - else - tsk->timer_slack_ns = arg2; - put_task_struct(tsk); - error = 0; - break; default: return -EINVAL; } @@ -2239,6 +2219,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_TID_ADDRESS: error = prctl_get_tid_address(me, (int __user **)arg2); break; + case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; break; -- cgit v1.2.3 From d4d049c55d0f0fa7fb5521a83c7e0c029c80e7c0 Mon Sep 17 00:00:00 2001 From: Micha Kalfon Date: Tue, 13 Jan 2015 12:12:57 +0200 Subject: prctl: make PR_SET_TIMERSLACK_PID pid namespace aware Make PR_SET_TIMERSLACK_PID consider pid namespace and resolve the target pid in the caller's namespace. Otherwise, calls from pid namespace other than init would fail or affect the wrong task. Change-Id: I1da15196abc4096536713ce03714e99d2e63820a Signed-off-by: Micha Kalfon Acked-by: Oren Laadan --- kernel/sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 0f45fb855fe3..66b933a5c061 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2220,11 +2220,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = prctl_get_tid_address(me, (int __user **)arg2); break; case PR_SET_TIMERSLACK_PID: - if (current->pid != (pid_t)arg3 && + if (task_pid_vnr(current) != (pid_t)arg3 && !capable(CAP_SYS_NICE)) return -EPERM; rcu_read_lock(); - tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + tsk = find_task_by_vpid((pid_t)arg3); if (tsk == NULL) { rcu_read_unlock(); return -EINVAL; -- cgit v1.2.3 From 586278d78bfa926d7a705da2f192325b3469919d Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 27 Oct 2015 16:42:08 -0700 Subject: mm: add a field to store names for private anonymous memory Userspace processes often have multiple allocators that each do anonymous mmaps to get memory. When examining memory usage of individual processes or systems as a whole, it is useful to be able to break down the various heaps that were allocated by each layer and examine their size, RSS, and physical memory usage. This patch adds a user pointer to the shared union in vm_area_struct that points to a null terminated string inside the user process containing a name for the vma. vmas that point to the same address will be merged, but vmas that point to equivalent strings at different addresses will not be merged. Userspace can set the name for a region of memory by calling prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name); Setting the name to NULL clears it. The names of named anonymous vmas are shown in /proc/pid/maps as [anon:] and in /proc/pid/smaps in a new "Name" field that is only present for named vmas. If the userspace pointer is no longer valid all or part of the name will be replaced with "". The idea to store a userspace pointer to reduce the complexity within mm (at the expense of the complexity of reading /proc/pid/mem) came from Dave Hansen. This results in no runtime overhead in the mm subsystem other than comparing the anon_name pointers when considering vma merging. The pointer is stored in a union with fieds that are only used on file-backed mappings, so it does not increase memory usage. Includes fix from Jed Davis for typo in prctl_set_vma_anon_name, which could attempt to set the name across two vmas at the same time due to a typo, which might corrupt the vma list. Fix it to use tmp instead of end to limit the name setting to a single vma at a time. Change-Id: I9aa7b6b5ef536cd780599ba4e2fba8ceebe8b59f Signed-off-by: Dmitry Shmidt --- kernel/sys.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 66b933a5c061..d0cb632eef8b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include #include @@ -2072,6 +2074,153 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +#ifdef CONFIG_MMU +static int prctl_update_vma_anon_name(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + const char __user *name_addr) +{ + struct mm_struct *mm = vma->vm_mm; + int error = 0; + pgoff_t pgoff; + + if (name_addr == vma_get_anon_name(vma)) { + *prev = vma; + goto out; + } + + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); + *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma, + vma->vm_file, pgoff, vma_policy(vma), + name_addr); + if (*prev) { + vma = *prev; + goto success; + } + + *prev = vma; + + if (start != vma->vm_start) { + error = split_vma(mm, vma, start, 1); + if (error) + goto out; + } + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + goto out; + } + +success: + if (!vma->vm_file) + vma->anon_name = name_addr; + +out: + if (error == -ENOMEM) + error = -EAGAIN; + return error; +} + +static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, + unsigned long arg) +{ + unsigned long tmp; + struct vm_area_struct *vma, *prev; + int unmapped_error = 0; + int error = -EINVAL; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + * - this matches the handling in madvise. + */ + vma = find_vma_prev(current->mm, start, &prev); + if (vma && start > vma->vm_start) + prev = vma; + + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + return error; + + /* Here start < (end|vma->vm_end). */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + if (start >= end) + return error; + } + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + if (end < tmp) + tmp = end; + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = prctl_update_vma_anon_name(vma, &prev, start, tmp, + (const char __user *)arg); + if (error) + return error; + start = tmp; + if (prev && start < prev->vm_end) + start = prev->vm_end; + error = unmapped_error; + if (start >= end) + return error; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_sem */ + vma = find_vma(current->mm, start); + } +} + +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + struct mm_struct *mm = current->mm; + int error; + unsigned long len; + unsigned long end; + + if (start & ~PAGE_MASK) + return -EINVAL; + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + + /* Check to see whether len was rounded up from small -ve to zero */ + if (len_in && !len) + return -EINVAL; + + end = start + len; + if (end < start) + return -EINVAL; + + if (end == start) + return 0; + + down_write(&mm->mmap_sem); + + switch (opt) { + case PR_SET_VMA_ANON_NAME: + error = prctl_set_vma_anon_name(start, end, arg); + break; + default: + error = -EINVAL; + } + + up_write(&mm->mmap_sem); + + return error; +} +#else /* CONFIG_MMU */ +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + return -EINVAL; +} +#endif + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2287,6 +2436,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_SET_VMA: + error = prctl_set_vma(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; -- cgit v1.2.3 From 29a4f01daa11254debe7aba980681bb838dfff1b Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 10 Dec 2015 18:24:51 +0530 Subject: mm: private anonymous memory build fixes for 4.4 Update vma_merge() call in private anonymous memory prctl, introduced in AOSP commit ee8c5f78f09a "mm: add a field to store names for private anonymous memory", so as to align with changes from upstream commit 19a809afe2fe "userfaultfd: teach vma_merge to merge across vma->vm_userfaultfd_ctx". Signed-off-by: Amit Pundir --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index d0cb632eef8b..11333311cf1c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2092,7 +2092,7 @@ static int prctl_update_vma_anon_name(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - name_addr); + vma->vm_userfaultfd_ctx, name_addr); if (*prev) { vma = *prev; goto success; -- cgit v1.2.3