diff options
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/Kconfig | 7 | ||||
| -rw-r--r-- | fs/proc/Makefile | 1 | ||||
| -rw-r--r-- | fs/proc/base.c | 180 | ||||
| -rw-r--r-- | fs/proc/generic.c | 1 | ||||
| -rw-r--r-- | fs/proc/internal.h | 9 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 31 | ||||
| -rw-r--r-- | fs/proc/meminfo.c | 5 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 6 | ||||
| -rw-r--r-- | fs/proc/proc_tty.c | 3 | ||||
| -rw-r--r-- | fs/proc/root.c | 2 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 105 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 28 | ||||
| -rw-r--r-- | fs/proc/uid.c | 295 |
13 files changed, 608 insertions, 65 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 1ade1206bb89..08dce22afec1 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -81,3 +81,10 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_UID + bool "Include /proc/uid/ files" + default y + depends on PROC_FS && RT_MUTEXES + help + Provides aggregated per-uid information under /proc/uid. diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 7151ea428041..baab1daf7585 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -24,6 +24,7 @@ proc-y += softirqs.o proc-y += namespaces.o proc-y += self.o proc-y += thread_self.o +proc-$(CONFIG_PROC_UID) += uid.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/base.c b/fs/proc/base.c index d2b8c754f627..d1a675d32320 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -87,6 +87,7 @@ #include <linux/slab.h> #include <linux/flex_array.h> #include <linux/posix-timers.h> +#include <linux/cpufreq_times.h> #ifdef CONFIG_HARDWALL #include <asm/hardwall.h> #endif @@ -94,6 +95,8 @@ #include "internal.h" #include "fd.h" +#include "../../lib/kstrtox.h" + /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during @@ -953,6 +956,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, unsigned long src = *ppos; int ret = 0; struct mm_struct *mm = file->private_data; + unsigned long env_start, env_end; /* Ensure the process spawned far enough to have an environment. */ if (!mm || !mm->env_end) @@ -965,19 +969,25 @@ static ssize_t environ_read(struct file *file, char __user *buf, ret = 0; if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + + down_read(&mm->mmap_sem); + env_start = mm->env_start; + env_end = mm->env_end; + up_read(&mm->mmap_sem); + while (count > 0) { size_t this_len, max_len; int retval; - if (src >= (mm->env_end - mm->env_start)) + if (src >= (env_end - env_start)) break; - this_len = mm->env_end - (mm->env_start + src); + this_len = env_end - (env_start + src); max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (mm->env_start + src), + retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); if (retval <= 0) { @@ -1829,9 +1839,34 @@ end_instantiate: static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { - if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + const char *str = dentry->d_name.name; + unsigned long long sval, eval; + unsigned int len; + + len = _parse_integer(str, 16, &sval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (sval != (unsigned long)sval) + return -EINVAL; + str += len; + + if (*str != '-') + return -EINVAL; + str++; + + len = _parse_integer(str, 16, &eval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (eval != (unsigned long)eval) + return -EINVAL; + str += len; + + if (*str != '\0') return -EINVAL; + *start = sval; + *end = eval; + return 0; } @@ -2240,6 +2275,92 @@ static const struct file_operations proc_timers_operations = { .release = seq_release_private, }; +static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + u64 slack_ns; + int err; + + err = kstrtoull_from_user(buf, count, 10, &slack_ns); + if (err < 0) + return err; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (p != current) { + if (!capable(CAP_SYS_NICE)) { + count = -EPERM; + goto out; + } + + err = security_task_setscheduler(p); + if (err) { + count = err; + goto out; + } + } + + task_lock(p); + if (slack_ns == 0) + p->timer_slack_ns = p->default_timer_slack_ns; + else + p->timer_slack_ns = slack_ns; + task_unlock(p); + +out: + put_task_struct(p); + + return count; +} + +static int timerslack_ns_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + int err = 0; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (p != current) { + + if (!capable(CAP_SYS_NICE)) { + err = -EPERM; + goto out; + } + err = security_task_getscheduler(p); + if (err) + goto out; + } + + task_lock(p); + seq_printf(m, "%llu\n", p->timer_slack_ns); + task_unlock(p); + +out: + put_task_struct(p); + + return err; +} + +static int timerslack_ns_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, timerslack_ns_show, inode); +} + +static const struct file_operations proc_pid_set_timerslack_ns_operations = { + .open = timerslack_ns_open, + .read = seq_read, + .write = timerslack_ns_write, + .llseek = seq_lseek, + .release = single_release, +}; + static int proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -2817,6 +2938,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), #endif + REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3058,6 +3183,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[PROC_NUMBUF]; int len; + + cond_resched(); if (!has_pid_permissions(ns, iter.task, 2)) continue; @@ -3074,6 +3201,44 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) } /* + * proc_tid_comm_permission is a special permission function exclusively + * used for the node /proc/<pid>/task/<tid>/comm. + * It bypasses generic permission checks in the case where a task of the same + * task group attempts to access the node. + * The rationale behind this is that glibc and bionic access this node for + * cross thread naming (pthread_set/getname_np(!self)). However, if + * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, + * which locks out the cross thread naming implementation. + * This function makes sure that the node is always accessible for members of + * same thread group. + */ +static int proc_tid_comm_permission(struct inode *inode, int mask) +{ + bool is_same_tgroup; + struct task_struct *task; + + task = get_proc_task(inode); + if (!task) + return -ESRCH; + is_same_tgroup = same_thread_group(current, task); + put_task_struct(task); + + if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { + /* This file (/proc/<pid>/task/<tid>/comm) can always be + * read or written by the members of the corresponding + * thread group. + */ + return 0; + } + + return generic_permission(inode, mask); +} + +static const struct inode_operations proc_tid_comm_inode_operations = { + .permission = proc_tid_comm_permission, +}; + +/* * Tasks */ static const struct pid_entry tid_base_stuff[] = { @@ -3091,7 +3256,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif - REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), + NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, + &proc_tid_comm_inode_operations, + &proc_pid_set_comm_operations, {}), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif @@ -3159,6 +3326,9 @@ static const struct pid_entry tid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif +#ifdef CONFIG_CPU_FREQ_TIMES + ONE("time_in_state", 0444, proc_time_in_state_show), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ff3ffc76a937..3773335791da 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -469,6 +469,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->data = NULL; ent->proc_fops = NULL; ent->proc_iops = NULL; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); parent->nlink--; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa2781095bd1..fa4dd182bfaa 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -256,6 +256,15 @@ static inline void sysctl_head_put(struct ctl_table_header *head) { } #endif /* + * uid.c + */ +#ifdef CONFIG_PROC_UID +extern int proc_uid_init(void); +#else +static inline void proc_uid_init(void) { } +#endif + +/* * proc_tty.c */ #ifdef CONFIG_TTY diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 92e6726f6e37..21f198aa0961 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) static ssize_t read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) { + char *buf = file->private_data; ssize_t acc = 0; size_t size, tsz; size_t elf_buflen; @@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); + vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; - } - kfree(elf_buf); } else { if (kern_addr_valid(start)) { unsigned long n; - n = copy_to_user(buffer, (char *)start, tsz); + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + memcpy(buf, (char *) start, tsz); + n = copy_to_user(buffer, buf, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens @@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, + .release = release_kcore, .llseek = default_llseek, }; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 9155a5a0d3b9..df4661abadc4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) /* * Estimate the amount of memory available for userspace allocations, * without causing swapping. - * - * Free memory cannot be taken below the low watermark, before the - * system starts swapping. */ - available = i.freeram - wmark_low; + available = i.freeram - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fe5b6e6c4671..5e1054f028af 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -654,7 +654,10 @@ static bool proc_sys_link_fill_cache(struct file *file, struct ctl_table *table) { bool ret = true; + head = sysctl_head_grab(head); + if (IS_ERR(head)) + return false; if (S_ISLNK(table->mode)) { /* It is not an error if we can not follow the link ignore it */ @@ -703,7 +706,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) - return 0; + goto out; pos = 2; @@ -713,6 +716,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) break; } } +out: sysctl_head_finish(head); return 0; } diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 15f327bed8c6..7340c36978a3 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -14,6 +14,7 @@ #include <linux/tty.h> #include <linux/seq_file.h> #include <linux/bitops.h> +#include "internal.h" /* * The /proc/tty directory inodes... @@ -164,7 +165,7 @@ void proc_tty_unregister_driver(struct tty_driver *driver) if (!ent) return; - remove_proc_entry(driver->driver_name, proc_tty_driver); + remove_proc_entry(ent->name, proc_tty_driver); driver->proc_entry = NULL; } diff --git a/fs/proc/root.c b/fs/proc/root.c index ec649c92d270..1d0b6a125563 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -182,7 +182,7 @@ void __init proc_root_init(void) proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); - + proc_uid_init(); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index d598b9c809c1..fdbb9df83bc5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -116,6 +116,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif +static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) +{ + const char __user *name = vma_get_anon_name(vma); + struct mm_struct *mm = vma->vm_mm; + + unsigned long page_start_vaddr; + unsigned long page_offset; + unsigned long num_pages; + unsigned long max_len = NAME_MAX; + int i; + + page_start_vaddr = (unsigned long)name & PAGE_MASK; + page_offset = (unsigned long)name - page_start_vaddr; + num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE); + + seq_puts(m, "[anon:"); + + for (i = 0; i < num_pages; i++) { + int len; + int write_len; + const char *kaddr; + long pages_pinned; + struct page *page; + + pages_pinned = get_user_pages(current, mm, page_start_vaddr, + 1, 0, 0, &page, NULL); + if (pages_pinned < 1) { + seq_puts(m, "<fault>]"); + return; + } + + kaddr = (const char *)kmap(page); + len = min(max_len, PAGE_SIZE - page_offset); + write_len = strnlen(kaddr + page_offset, len); + seq_write(m, kaddr + page_offset, write_len); + kunmap(page); + put_page(page); + + /* if strnlen hit a null terminator then we're done */ + if (write_len != len) + break; + + max_len -= len; + page_offset = 0; + page_start_vaddr += PAGE_SIZE; + } + + seq_putc(m, ']'); +} + static void vma_stop(struct proc_maps_private *priv) { struct mm_struct *mm = priv->mm; @@ -253,24 +303,15 @@ static int do_maps_open(struct inode *inode, struct file *file, * /proc/PID/maps that is the stack of the main task. */ static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { - int stack = 0; - - if (is_pid) { - stack = vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static void @@ -295,11 +336,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (stack_guard_page_start(vma, start)) - start += PAGE_SIZE; end = vma->vm_end; - if (stack_guard_page_end(vma, end)) - end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", @@ -341,8 +378,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma, is_pid)) + if (is_stack(priv, vma)) { name = "[stack]"; + goto done; + } + + if (vma_get_anon_name(vma)) { + seq_pad(m, ' '); + seq_print_vma_name(m, vma); + } } done: @@ -667,6 +711,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) show_map_vma(m, vma, is_pid); + if (vma_get_anon_name(vma)) { + seq_puts(m, "Name: "); + seq_print_vma_name(m, vma); + seq_putc(m, '\n'); + } + seq_printf(m, "Size: %8lu kB\n" "Rss: %8lu kB\n" @@ -803,7 +853,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); + pmd_t pmd = *pmdp; + + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); @@ -1557,7 +1614,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else if (is_stack(proc_priv, vma, is_pid)) { + } else if (is_stack(proc_priv, vma)) { seq_puts(m, " stack"); } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index faacb0c0d857..37175621e890 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm, } static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; - int stack = 0; - - if (is_pid) { - stack = vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack; } /* @@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm && is_stack(priv, vma, is_pid)) { + } else if (mm && is_stack(priv, vma)) { seq_pad(m, ' '); seq_printf(m, "[stack]"); } diff --git a/fs/proc/uid.c b/fs/proc/uid.c new file mode 100644 index 000000000000..11f1efc33c59 --- /dev/null +++ b/fs/proc/uid.c @@ -0,0 +1,295 @@ +/* + * /proc/uid support + */ + +#include <linux/cpufreq_times.h> +#include <linux/fs.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/rtmutex.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include "internal.h" + +struct proc_dir_entry *proc_uid; + +#define UID_HASH_BITS 10 + +static DECLARE_HASHTABLE(proc_uid_hash_table, UID_HASH_BITS); + +/* + * use rt_mutex here to avoid priority inversion between high-priority readers + * of these files and tasks calling proc_register_uid(). + */ +static DEFINE_RT_MUTEX(proc_uid_lock); /* proc_uid_hash_table */ + +struct uid_hash_entry { + uid_t uid; + struct hlist_node hash; +}; + +/* Caller must hold proc_uid_lock */ +static bool uid_hash_entry_exists_locked(uid_t uid) +{ + struct uid_hash_entry *entry; + + hash_for_each_possible(proc_uid_hash_table, entry, hash, uid) { + if (entry->uid == uid) + return true; + } + return false; +} + +void proc_register_uid(kuid_t kuid) +{ + struct uid_hash_entry *entry; + bool exists; + uid_t uid = from_kuid_munged(current_user_ns(), kuid); + + rt_mutex_lock(&proc_uid_lock); + exists = uid_hash_entry_exists_locked(uid); + rt_mutex_unlock(&proc_uid_lock); + if (exists) + return; + + entry = kzalloc(sizeof(struct uid_hash_entry), GFP_KERNEL); + if (!entry) + return; + entry->uid = uid; + + rt_mutex_lock(&proc_uid_lock); + if (uid_hash_entry_exists_locked(uid)) + kfree(entry); + else + hash_add(proc_uid_hash_table, &entry->hash, uid); + rt_mutex_unlock(&proc_uid_lock); +} + +struct uid_entry { + const char *name; + int len; + umode_t mode; + const struct inode_operations *iop; + const struct file_operations *fop; +}; + +#define NOD(NAME, MODE, IOP, FOP) { \ + .name = (NAME), \ + .len = sizeof(NAME) - 1, \ + .mode = MODE, \ + .iop = IOP, \ + .fop = FOP, \ +} + +#ifdef CONFIG_CPU_FREQ_TIMES +const struct file_operations proc_uid_time_in_state_operations = { + .open = single_uid_time_in_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static const struct uid_entry uid_base_stuff[] = { +#ifdef CONFIG_CPU_FREQ_TIMES + NOD("time_in_state", 0444, NULL, &proc_uid_time_in_state_operations), +#endif +}; + +const struct inode_operations proc_uid_def_inode_operations = { + .setattr = proc_setattr, +}; + +struct inode *proc_uid_make_inode(struct super_block *sb, kuid_t kuid) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + return NULL; + + inode->i_ino = get_next_ino(); + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &proc_uid_def_inode_operations; + inode->i_uid = kuid; + + return inode; +} + +static int proc_uident_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *unused, const void *ptr) +{ + const struct uid_entry *u = ptr; + struct inode *inode; + + inode = proc_uid_make_inode(dir->i_sb, dir->i_uid); + if (!inode) + return -ENOENT; + + inode->i_mode = u->mode; + if (S_ISDIR(inode->i_mode)) + set_nlink(inode, 2); + if (u->iop) + inode->i_op = u->iop; + if (u->fop) + inode->i_fop = u->fop; + d_add(dentry, inode); + return 0; +} + +static struct dentry *proc_uid_base_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + const struct uid_entry *u, *last; + unsigned int nents = ARRAY_SIZE(uid_base_stuff); + + if (nents == 0) + return ERR_PTR(-ENOENT); + + last = &uid_base_stuff[nents - 1]; + for (u = uid_base_stuff; u <= last; u++) { + if (u->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, u->name, u->len)) + break; + } + if (u > last) + return ERR_PTR(-ENOENT); + + return ERR_PTR(proc_uident_instantiate(dir, dentry, NULL, u)); +} + +static int proc_uid_base_readdir(struct file *file, struct dir_context *ctx) +{ + unsigned int nents = ARRAY_SIZE(uid_base_stuff); + const struct uid_entry *u; + + if (!dir_emit_dots(file, ctx)) + return 0; + + if (ctx->pos >= nents + 2) + return 0; + + for (u = uid_base_stuff + (ctx->pos - 2); + u < uid_base_stuff + nents; u++) { + if (!proc_fill_cache(file, ctx, u->name, u->len, + proc_uident_instantiate, NULL, u)) + break; + ctx->pos++; + } + + return 0; +} + +static const struct inode_operations proc_uid_base_inode_operations = { + .lookup = proc_uid_base_lookup, + .setattr = proc_setattr, +}; + +static const struct file_operations proc_uid_base_operations = { + .read = generic_read_dir, + .iterate = proc_uid_base_readdir, + .llseek = default_llseek, +}; + +static int proc_uid_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *unused, const void *ptr) +{ + unsigned int i, len; + nlink_t nlinks; + kuid_t *kuid = (kuid_t *)ptr; + struct inode *inode = proc_uid_make_inode(dir->i_sb, *kuid); + + if (!inode) + return -ENOENT; + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &proc_uid_base_inode_operations; + inode->i_fop = &proc_uid_base_operations; + inode->i_flags |= S_IMMUTABLE; + + nlinks = 2; + len = ARRAY_SIZE(uid_base_stuff); + for (i = 0; i < len; ++i) { + if (S_ISDIR(uid_base_stuff[i].mode)) + ++nlinks; + } + set_nlink(inode, nlinks); + + d_add(dentry, inode); + + return 0; +} + +static int proc_uid_readdir(struct file *file, struct dir_context *ctx) +{ + int last_shown, i; + unsigned long bkt; + struct uid_hash_entry *entry; + + if (!dir_emit_dots(file, ctx)) + return 0; + + i = 0; + last_shown = ctx->pos - 2; + rt_mutex_lock(&proc_uid_lock); + hash_for_each(proc_uid_hash_table, bkt, entry, hash) { + int len; + char buf[PROC_NUMBUF]; + + if (i < last_shown) + continue; + len = snprintf(buf, sizeof(buf), "%u", entry->uid); + if (!proc_fill_cache(file, ctx, buf, len, + proc_uid_instantiate, NULL, &entry->uid)) + break; + i++; + ctx->pos++; + } + rt_mutex_unlock(&proc_uid_lock); + return 0; +} + +static struct dentry *proc_uid_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int result = -ENOENT; + + uid_t uid = name_to_int(&dentry->d_name); + bool uid_exists; + + rt_mutex_lock(&proc_uid_lock); + uid_exists = uid_hash_entry_exists_locked(uid); + rt_mutex_unlock(&proc_uid_lock); + if (uid_exists) { + kuid_t kuid = make_kuid(current_user_ns(), uid); + + result = proc_uid_instantiate(dir, dentry, NULL, &kuid); + } + return ERR_PTR(result); +} + +static const struct file_operations proc_uid_operations = { + .read = generic_read_dir, + .iterate = proc_uid_readdir, + .llseek = default_llseek, +}; + +static const struct inode_operations proc_uid_inode_operations = { + .lookup = proc_uid_lookup, + .setattr = proc_setattr, +}; + +int __init proc_uid_init(void) +{ + proc_uid = proc_mkdir("uid", NULL); + if (!proc_uid) + return -ENOMEM; + proc_uid->proc_iops = &proc_uid_inode_operations; + proc_uid->proc_fops = &proc_uid_operations; + + return 0; +} |
