summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig7
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c180
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/internal.h9
-rw-r--r--fs/proc/kcore.c31
-rw-r--r--fs/proc/meminfo.c5
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/proc_tty.c3
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c105
-rw-r--r--fs/proc/task_nommu.c28
-rw-r--r--fs/proc/uid.c295
13 files changed, 608 insertions, 65 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 1ade1206bb89..08dce22afec1 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -81,3 +81,10 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_UID
+ bool "Include /proc/uid/ files"
+ default y
+ depends on PROC_FS && RT_MUTEXES
+ help
+ Provides aggregated per-uid information under /proc/uid.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 7151ea428041..baab1daf7585 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -24,6 +24,7 @@ proc-y += softirqs.o
proc-y += namespaces.o
proc-y += self.o
proc-y += thread_self.o
+proc-$(CONFIG_PROC_UID) += uid.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2b8c754f627..d1a675d32320 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -87,6 +87,7 @@
#include <linux/slab.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
+#include <linux/cpufreq_times.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
@@ -94,6 +95,8 @@
#include "internal.h"
#include "fd.h"
+#include "../../lib/kstrtox.h"
+
/* NOTE:
* Implementing inode permission operations in /proc is almost
* certainly an error. Permission checks need to happen during
@@ -953,6 +956,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
unsigned long src = *ppos;
int ret = 0;
struct mm_struct *mm = file->private_data;
+ unsigned long env_start, env_end;
/* Ensure the process spawned far enough to have an environment. */
if (!mm || !mm->env_end)
@@ -965,19 +969,25 @@ static ssize_t environ_read(struct file *file, char __user *buf,
ret = 0;
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
+
+ down_read(&mm->mmap_sem);
+ env_start = mm->env_start;
+ env_end = mm->env_end;
+ up_read(&mm->mmap_sem);
+
while (count > 0) {
size_t this_len, max_len;
int retval;
- if (src >= (mm->env_end - mm->env_start))
+ if (src >= (env_end - env_start))
break;
- this_len = mm->env_end - (mm->env_start + src);
+ this_len = env_end - (env_start + src);
max_len = min_t(size_t, PAGE_SIZE, count);
this_len = min(max_len, this_len);
- retval = access_remote_vm(mm, (mm->env_start + src),
+ retval = access_remote_vm(mm, (env_start + src),
page, this_len, 0);
if (retval <= 0) {
@@ -1829,9 +1839,34 @@ end_instantiate:
static int dname_to_vma_addr(struct dentry *dentry,
unsigned long *start, unsigned long *end)
{
- if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+ const char *str = dentry->d_name.name;
+ unsigned long long sval, eval;
+ unsigned int len;
+
+ len = _parse_integer(str, 16, &sval);
+ if (len & KSTRTOX_OVERFLOW)
+ return -EINVAL;
+ if (sval != (unsigned long)sval)
+ return -EINVAL;
+ str += len;
+
+ if (*str != '-')
+ return -EINVAL;
+ str++;
+
+ len = _parse_integer(str, 16, &eval);
+ if (len & KSTRTOX_OVERFLOW)
+ return -EINVAL;
+ if (eval != (unsigned long)eval)
+ return -EINVAL;
+ str += len;
+
+ if (*str != '\0')
return -EINVAL;
+ *start = sval;
+ *end = eval;
+
return 0;
}
@@ -2240,6 +2275,92 @@ static const struct file_operations proc_timers_operations = {
.release = seq_release_private,
};
+static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ u64 slack_ns;
+ int err;
+
+ err = kstrtoull_from_user(buf, count, 10, &slack_ns);
+ if (err < 0)
+ return err;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (p != current) {
+ if (!capable(CAP_SYS_NICE)) {
+ count = -EPERM;
+ goto out;
+ }
+
+ err = security_task_setscheduler(p);
+ if (err) {
+ count = err;
+ goto out;
+ }
+ }
+
+ task_lock(p);
+ if (slack_ns == 0)
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ else
+ p->timer_slack_ns = slack_ns;
+ task_unlock(p);
+
+out:
+ put_task_struct(p);
+
+ return count;
+}
+
+static int timerslack_ns_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+ int err = 0;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (p != current) {
+
+ if (!capable(CAP_SYS_NICE)) {
+ err = -EPERM;
+ goto out;
+ }
+ err = security_task_getscheduler(p);
+ if (err)
+ goto out;
+ }
+
+ task_lock(p);
+ seq_printf(m, "%llu\n", p->timer_slack_ns);
+ task_unlock(p);
+
+out:
+ put_task_struct(p);
+
+ return err;
+}
+
+static int timerslack_ns_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, timerslack_ns_show, inode);
+}
+
+static const struct file_operations proc_pid_set_timerslack_ns_operations = {
+ .open = timerslack_ns_open,
+ .read = seq_read,
+ .write = timerslack_ns_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2817,6 +2938,10 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
#endif
+ REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
+#ifdef CONFIG_CPU_FREQ_TIMES
+ ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3058,6 +3183,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
+
+ cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
@@ -3074,6 +3201,44 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
}
/*
+ * proc_tid_comm_permission is a special permission function exclusively
+ * used for the node /proc/<pid>/task/<tid>/comm.
+ * It bypasses generic permission checks in the case where a task of the same
+ * task group attempts to access the node.
+ * The rationale behind this is that glibc and bionic access this node for
+ * cross thread naming (pthread_set/getname_np(!self)). However, if
+ * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
+ * which locks out the cross thread naming implementation.
+ * This function makes sure that the node is always accessible for members of
+ * same thread group.
+ */
+static int proc_tid_comm_permission(struct inode *inode, int mask)
+{
+ bool is_same_tgroup;
+ struct task_struct *task;
+
+ task = get_proc_task(inode);
+ if (!task)
+ return -ESRCH;
+ is_same_tgroup = same_thread_group(current, task);
+ put_task_struct(task);
+
+ if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
+ /* This file (/proc/<pid>/task/<tid>/comm) can always be
+ * read or written by the members of the corresponding
+ * thread group.
+ */
+ return 0;
+ }
+
+ return generic_permission(inode, mask);
+}
+
+static const struct inode_operations proc_tid_comm_inode_operations = {
+ .permission = proc_tid_comm_permission,
+};
+
+/*
* Tasks
*/
static const struct pid_entry tid_base_stuff[] = {
@@ -3091,7 +3256,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
- REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
+ NOD("comm", S_IFREG|S_IRUGO|S_IWUSR,
+ &proc_tid_comm_inode_operations,
+ &proc_pid_set_comm_operations, {}),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
@@ -3159,6 +3326,9 @@ static const struct pid_entry tid_base_stuff[] = {
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
+#ifdef CONFIG_CPU_FREQ_TIMES
+ ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index ff3ffc76a937..3773335791da 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -469,6 +469,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->data = NULL;
ent->proc_fops = NULL;
ent->proc_iops = NULL;
+ parent->nlink++;
if (proc_register(parent, ent) < 0) {
kfree(ent);
parent->nlink--;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa2781095bd1..fa4dd182bfaa 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -256,6 +256,15 @@ static inline void sysctl_head_put(struct ctl_table_header *head) { }
#endif
/*
+ * uid.c
+ */
+#ifdef CONFIG_PROC_UID
+extern int proc_uid_init(void);
+#else
+static inline void proc_uid_init(void) { }
+#endif
+
+/*
* proc_tty.c
*/
#ifdef CONFIG_TTY
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726f6e37..21f198aa0961 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;
- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 9155a5a0d3b9..df4661abadc4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
/*
* Estimate the amount of memory available for userspace allocations,
* without causing swapping.
- *
- * Free memory cannot be taken below the low watermark, before the
- * system starts swapping.
*/
- available = i.freeram - wmark_low;
+ available = i.freeram - totalreserve_pages;
/*
* Not all the page cache can be freed, otherwise the system will
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fe5b6e6c4671..5e1054f028af 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -654,7 +654,10 @@ static bool proc_sys_link_fill_cache(struct file *file,
struct ctl_table *table)
{
bool ret = true;
+
head = sysctl_head_grab(head);
+ if (IS_ERR(head))
+ return false;
if (S_ISLNK(table->mode)) {
/* It is not an error if we can not follow the link ignore it */
@@ -703,7 +706,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
ctl_dir = container_of(head, struct ctl_dir, header);
if (!dir_emit_dots(file, ctx))
- return 0;
+ goto out;
pos = 2;
@@ -713,6 +716,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
break;
}
}
+out:
sysctl_head_finish(head);
return 0;
}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15f327bed8c6..7340c36978a3 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -14,6 +14,7 @@
#include <linux/tty.h>
#include <linux/seq_file.h>
#include <linux/bitops.h>
+#include "internal.h"
/*
* The /proc/tty directory inodes...
@@ -164,7 +165,7 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
if (!ent)
return;
- remove_proc_entry(driver->driver_name, proc_tty_driver);
+ remove_proc_entry(ent->name, proc_tty_driver);
driver->proc_entry = NULL;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ec649c92d270..1d0b6a125563 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -182,7 +182,7 @@ void __init proc_root_init(void)
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
-
+ proc_uid_init();
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index d598b9c809c1..fdbb9df83bc5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -116,6 +116,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+ const char __user *name = vma_get_anon_name(vma);
+ struct mm_struct *mm = vma->vm_mm;
+
+ unsigned long page_start_vaddr;
+ unsigned long page_offset;
+ unsigned long num_pages;
+ unsigned long max_len = NAME_MAX;
+ int i;
+
+ page_start_vaddr = (unsigned long)name & PAGE_MASK;
+ page_offset = (unsigned long)name - page_start_vaddr;
+ num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+ seq_puts(m, "[anon:");
+
+ for (i = 0; i < num_pages; i++) {
+ int len;
+ int write_len;
+ const char *kaddr;
+ long pages_pinned;
+ struct page *page;
+
+ pages_pinned = get_user_pages(current, mm, page_start_vaddr,
+ 1, 0, 0, &page, NULL);
+ if (pages_pinned < 1) {
+ seq_puts(m, "<fault>]");
+ return;
+ }
+
+ kaddr = (const char *)kmap(page);
+ len = min(max_len, PAGE_SIZE - page_offset);
+ write_len = strnlen(kaddr + page_offset, len);
+ seq_write(m, kaddr + page_offset, write_len);
+ kunmap(page);
+ put_page(page);
+
+ /* if strnlen hit a null terminator then we're done */
+ if (write_len != len)
+ break;
+
+ max_len -= len;
+ page_offset = 0;
+ page_start_vaddr += PAGE_SIZE;
+ }
+
+ seq_putc(m, ']');
+}
+
static void vma_stop(struct proc_maps_private *priv)
{
struct mm_struct *mm = priv->mm;
@@ -253,24 +303,15 @@ static int do_maps_open(struct inode *inode, struct file *file,
* /proc/PID/maps that is the stack of the main task.
*/
static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, int is_pid)
+ struct vm_area_struct *vma)
{
- int stack = 0;
-
- if (is_pid) {
- stack = vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack;
- } else {
- struct inode *inode = priv->inode;
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task)
- stack = vma_is_stack_for_task(vma, task);
- rcu_read_unlock();
- }
- return stack;
+ /*
+ * We make no effort to guess what a given thread considers to be
+ * its "stack". It's not even well-defined for programs written
+ * languages like Go.
+ */
+ return vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
}
static void
@@ -295,11 +336,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (stack_guard_page_start(vma, start))
- start += PAGE_SIZE;
end = vma->vm_end;
- if (stack_guard_page_end(vma, end))
- end -= PAGE_SIZE;
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
@@ -341,8 +378,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- if (is_stack(priv, vma, is_pid))
+ if (is_stack(priv, vma)) {
name = "[stack]";
+ goto done;
+ }
+
+ if (vma_get_anon_name(vma)) {
+ seq_pad(m, ' ');
+ seq_print_vma_name(m, vma);
+ }
}
done:
@@ -667,6 +711,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
show_map_vma(m, vma, is_pid);
+ if (vma_get_anon_name(vma)) {
+ seq_puts(m, "Name: ");
+ seq_print_vma_name(m, vma);
+ seq_putc(m, '\n');
+ }
+
seq_printf(m,
"Size: %8lu kB\n"
"Rss: %8lu kB\n"
@@ -803,7 +853,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+ pmd_t pmd = *pmdp;
+
+ /* See comment in change_huge_pmd() */
+ pmdp_invalidate(vma, addr, pmdp);
+ if (pmd_dirty(*pmdp))
+ pmd = pmd_mkdirty(pmd);
+ if (pmd_young(*pmdp))
+ pmd = pmd_mkyoung(pmd);
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
@@ -1557,7 +1614,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else if (is_stack(proc_priv, vma, is_pid)) {
+ } else if (is_stack(proc_priv, vma)) {
seq_puts(m, " stack");
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index faacb0c0d857..37175621e890 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm,
}
static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, int is_pid)
+ struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
- int stack = 0;
-
- if (is_pid) {
- stack = vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack;
- } else {
- struct inode *inode = priv->inode;
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task)
- stack = vma_is_stack_for_task(vma, task);
- rcu_read_unlock();
- }
- return stack;
+
+ /*
+ * We make no effort to guess what a given thread considers to be
+ * its "stack". It's not even well-defined for programs written
+ * languages like Go.
+ */
+ return vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack;
}
/*
@@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm && is_stack(priv, vma, is_pid)) {
+ } else if (mm && is_stack(priv, vma)) {
seq_pad(m, ' ');
seq_printf(m, "[stack]");
}
diff --git a/fs/proc/uid.c b/fs/proc/uid.c
new file mode 100644
index 000000000000..11f1efc33c59
--- /dev/null
+++ b/fs/proc/uid.c
@@ -0,0 +1,295 @@
+/*
+ * /proc/uid support
+ */
+
+#include <linux/cpufreq_times.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/rtmutex.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct proc_dir_entry *proc_uid;
+
+#define UID_HASH_BITS 10
+
+static DECLARE_HASHTABLE(proc_uid_hash_table, UID_HASH_BITS);
+
+/*
+ * use rt_mutex here to avoid priority inversion between high-priority readers
+ * of these files and tasks calling proc_register_uid().
+ */
+static DEFINE_RT_MUTEX(proc_uid_lock); /* proc_uid_hash_table */
+
+struct uid_hash_entry {
+ uid_t uid;
+ struct hlist_node hash;
+};
+
+/* Caller must hold proc_uid_lock */
+static bool uid_hash_entry_exists_locked(uid_t uid)
+{
+ struct uid_hash_entry *entry;
+
+ hash_for_each_possible(proc_uid_hash_table, entry, hash, uid) {
+ if (entry->uid == uid)
+ return true;
+ }
+ return false;
+}
+
+void proc_register_uid(kuid_t kuid)
+{
+ struct uid_hash_entry *entry;
+ bool exists;
+ uid_t uid = from_kuid_munged(current_user_ns(), kuid);
+
+ rt_mutex_lock(&proc_uid_lock);
+ exists = uid_hash_entry_exists_locked(uid);
+ rt_mutex_unlock(&proc_uid_lock);
+ if (exists)
+ return;
+
+ entry = kzalloc(sizeof(struct uid_hash_entry), GFP_KERNEL);
+ if (!entry)
+ return;
+ entry->uid = uid;
+
+ rt_mutex_lock(&proc_uid_lock);
+ if (uid_hash_entry_exists_locked(uid))
+ kfree(entry);
+ else
+ hash_add(proc_uid_hash_table, &entry->hash, uid);
+ rt_mutex_unlock(&proc_uid_lock);
+}
+
+struct uid_entry {
+ const char *name;
+ int len;
+ umode_t mode;
+ const struct inode_operations *iop;
+ const struct file_operations *fop;
+};
+
+#define NOD(NAME, MODE, IOP, FOP) { \
+ .name = (NAME), \
+ .len = sizeof(NAME) - 1, \
+ .mode = MODE, \
+ .iop = IOP, \
+ .fop = FOP, \
+}
+
+#ifdef CONFIG_CPU_FREQ_TIMES
+const struct file_operations proc_uid_time_in_state_operations = {
+ .open = single_uid_time_in_state_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static const struct uid_entry uid_base_stuff[] = {
+#ifdef CONFIG_CPU_FREQ_TIMES
+ NOD("time_in_state", 0444, NULL, &proc_uid_time_in_state_operations),
+#endif
+};
+
+const struct inode_operations proc_uid_def_inode_operations = {
+ .setattr = proc_setattr,
+};
+
+struct inode *proc_uid_make_inode(struct super_block *sb, kuid_t kuid)
+{
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+
+ inode->i_ino = get_next_ino();
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &proc_uid_def_inode_operations;
+ inode->i_uid = kuid;
+
+ return inode;
+}
+
+static int proc_uident_instantiate(struct inode *dir, struct dentry *dentry,
+ struct task_struct *unused, const void *ptr)
+{
+ const struct uid_entry *u = ptr;
+ struct inode *inode;
+
+ inode = proc_uid_make_inode(dir->i_sb, dir->i_uid);
+ if (!inode)
+ return -ENOENT;
+
+ inode->i_mode = u->mode;
+ if (S_ISDIR(inode->i_mode))
+ set_nlink(inode, 2);
+ if (u->iop)
+ inode->i_op = u->iop;
+ if (u->fop)
+ inode->i_fop = u->fop;
+ d_add(dentry, inode);
+ return 0;
+}
+
+static struct dentry *proc_uid_base_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ const struct uid_entry *u, *last;
+ unsigned int nents = ARRAY_SIZE(uid_base_stuff);
+
+ if (nents == 0)
+ return ERR_PTR(-ENOENT);
+
+ last = &uid_base_stuff[nents - 1];
+ for (u = uid_base_stuff; u <= last; u++) {
+ if (u->len != dentry->d_name.len)
+ continue;
+ if (!memcmp(dentry->d_name.name, u->name, u->len))
+ break;
+ }
+ if (u > last)
+ return ERR_PTR(-ENOENT);
+
+ return ERR_PTR(proc_uident_instantiate(dir, dentry, NULL, u));
+}
+
+static int proc_uid_base_readdir(struct file *file, struct dir_context *ctx)
+{
+ unsigned int nents = ARRAY_SIZE(uid_base_stuff);
+ const struct uid_entry *u;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
+ if (ctx->pos >= nents + 2)
+ return 0;
+
+ for (u = uid_base_stuff + (ctx->pos - 2);
+ u < uid_base_stuff + nents; u++) {
+ if (!proc_fill_cache(file, ctx, u->name, u->len,
+ proc_uident_instantiate, NULL, u))
+ break;
+ ctx->pos++;
+ }
+
+ return 0;
+}
+
+static const struct inode_operations proc_uid_base_inode_operations = {
+ .lookup = proc_uid_base_lookup,
+ .setattr = proc_setattr,
+};
+
+static const struct file_operations proc_uid_base_operations = {
+ .read = generic_read_dir,
+ .iterate = proc_uid_base_readdir,
+ .llseek = default_llseek,
+};
+
+static int proc_uid_instantiate(struct inode *dir, struct dentry *dentry,
+ struct task_struct *unused, const void *ptr)
+{
+ unsigned int i, len;
+ nlink_t nlinks;
+ kuid_t *kuid = (kuid_t *)ptr;
+ struct inode *inode = proc_uid_make_inode(dir->i_sb, *kuid);
+
+ if (!inode)
+ return -ENOENT;
+
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_op = &proc_uid_base_inode_operations;
+ inode->i_fop = &proc_uid_base_operations;
+ inode->i_flags |= S_IMMUTABLE;
+
+ nlinks = 2;
+ len = ARRAY_SIZE(uid_base_stuff);
+ for (i = 0; i < len; ++i) {
+ if (S_ISDIR(uid_base_stuff[i].mode))
+ ++nlinks;
+ }
+ set_nlink(inode, nlinks);
+
+ d_add(dentry, inode);
+
+ return 0;
+}
+
+static int proc_uid_readdir(struct file *file, struct dir_context *ctx)
+{
+ int last_shown, i;
+ unsigned long bkt;
+ struct uid_hash_entry *entry;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
+ i = 0;
+ last_shown = ctx->pos - 2;
+ rt_mutex_lock(&proc_uid_lock);
+ hash_for_each(proc_uid_hash_table, bkt, entry, hash) {
+ int len;
+ char buf[PROC_NUMBUF];
+
+ if (i < last_shown)
+ continue;
+ len = snprintf(buf, sizeof(buf), "%u", entry->uid);
+ if (!proc_fill_cache(file, ctx, buf, len,
+ proc_uid_instantiate, NULL, &entry->uid))
+ break;
+ i++;
+ ctx->pos++;
+ }
+ rt_mutex_unlock(&proc_uid_lock);
+ return 0;
+}
+
+static struct dentry *proc_uid_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ int result = -ENOENT;
+
+ uid_t uid = name_to_int(&dentry->d_name);
+ bool uid_exists;
+
+ rt_mutex_lock(&proc_uid_lock);
+ uid_exists = uid_hash_entry_exists_locked(uid);
+ rt_mutex_unlock(&proc_uid_lock);
+ if (uid_exists) {
+ kuid_t kuid = make_kuid(current_user_ns(), uid);
+
+ result = proc_uid_instantiate(dir, dentry, NULL, &kuid);
+ }
+ return ERR_PTR(result);
+}
+
+static const struct file_operations proc_uid_operations = {
+ .read = generic_read_dir,
+ .iterate = proc_uid_readdir,
+ .llseek = default_llseek,
+};
+
+static const struct inode_operations proc_uid_inode_operations = {
+ .lookup = proc_uid_lookup,
+ .setattr = proc_setattr,
+};
+
+int __init proc_uid_init(void)
+{
+ proc_uid = proc_mkdir("uid", NULL);
+ if (!proc_uid)
+ return -ENOMEM;
+ proc_uid->proc_iops = &proc_uid_inode_operations;
+ proc_uid->proc_fops = &proc_uid_operations;
+
+ return 0;
+}