From 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:09 +1000 Subject: cgroup: allow a cgroup subsystem to reject a fork Add a new cgroup subsystem callback can_fork that conditionally states whether or not the fork is accepted or rejected by a cgroup policy. In addition, add a cancel_fork callback so that if an error occurs later in the forking process, any state modified by can_fork can be reverted. Allow for a private opaque pointer to be passed from cgroup_can_fork to cgroup_post_fork, allowing for the fork state to be stored by each subsystem separately. Also add a tagging system for cgroup_subsys.h to allow for CGROUP_ enumerations to be be defined and used. In addition, explicitly add a CGROUP_CANFORK_COUNT macro to make arrays easier to define. This is in preparation for implementing the pids cgroup subsystem. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 10 +++++++++- include/linux/cgroup.h | 15 +++++++++++++-- include/linux/cgroup_subsys.h | 23 +++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..83e37d8c4d80 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -34,12 +34,17 @@ struct seq_file; /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, +#define SUBSYS_TAG(_t) CGROUP_ ## _t, \ + __unused_tag_ ## _t = CGROUP_ ## _t - 1, enum cgroup_subsys_id { #include CGROUP_SUBSYS_COUNT, }; +#undef SUBSYS_TAG #undef SUBSYS +#define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) + /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ @@ -406,7 +411,9 @@ struct cgroup_subsys { struct cgroup_taskset *tset); void (*attach)(struct cgroup_subsys_state *css, struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); + int (*can_fork)(struct task_struct *task, void **priv_p); + void (*cancel_fork)(struct task_struct *task, void *priv); + void (*fork)(struct task_struct *task, void *priv); void (*exit)(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task); @@ -491,6 +498,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ +#define CGROUP_CANFORK_COUNT 0 #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..a71fe2a3984e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,7 +62,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -void cgroup_post_fork(struct task_struct *p); +extern int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_post_fork(struct task_struct *p, + void *old_ss_priv[CGROUP_CANFORK_COUNT]); void cgroup_exit(struct task_struct *p); int cgroup_init_early(void); @@ -524,7 +529,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline void cgroup_post_fork(struct task_struct *p) {} +static inline int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} +static inline void cgroup_post_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} static inline void cgroup_exit(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..ec43bce7e1ea 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -3,6 +3,17 @@ * * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ + +/* + * This file *must* be included with SUBSYS() defined. + * SUBSYS_TAG() is a noop if undefined. + */ + +#ifndef SUBSYS_TAG +#define __TMP_SUBSYS_TAG +#define SUBSYS_TAG(_x) +#endif + #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -47,12 +58,24 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif +/* + * Subsystems that implement the can_fork() family of callbacks. + */ +SUBSYS_TAG(CANFORK_START) +SUBSYS_TAG(CANFORK_END) + /* * The following subsystems are not supported on the default hierarchy. */ #if IS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif + +#ifdef __TMP_SUBSYS_TAG +#undef __TMP_SUBSYS_TAG +#undef SUBSYS_TAG +#endif + /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ -- cgit v1.2.3 From 49b786ea146f69c371df18e81ce0a2d5839f865c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:10 +1000 Subject: cgroup: implement the PIDs subsystem Adds a new single-purpose PIDs subsystem to limit the number of tasks that can be forked inside a cgroup. Essentially this is an implementation of RLIMIT_NPROC that applies to a cgroup rather than a process tree. However, it should be noted that organisational operations (adding and removing tasks from a PIDs hierarchy) will *not* be prevented. Rather, the number of tasks in the hierarchy cannot exceed the limit through forking. This is due to the fact that, in the unified hierarchy, attach cannot fail (and it is not possible for a task to overcome its PIDs cgroup policy limit by attaching to a child cgroup -- even if migrating mid-fork it must be able to fork in the parent first). PIDs are fundamentally a global resource, and it is possible to reach PID exhaustion inside a cgroup without hitting any reasonable kmemcg policy. Once you've hit PID exhaustion, you're only in a marginally better state than OOM. This subsystem allows PID exhaustion inside a cgroup to be prevented. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ec43bce7e1ea..1f36945fd23d 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -62,6 +62,11 @@ SUBSYS(hugetlb) * Subsystems that implement the can_fork() family of callbacks. */ SUBSYS_TAG(CANFORK_START) + +#if IS_ENABLED(CONFIG_CGROUP_PIDS) +SUBSYS(pids) +#endif + SUBSYS_TAG(CANFORK_END) /* -- cgit v1.2.3 From 6abc8ca19df0078de17dc38340db3002ed489ce7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Aug 2015 15:20:55 -0400 Subject: cgroup: define controller file conventions Traditionally, each cgroup controller implemented whatever interface it wanted leading to interfaces which are widely inconsistent. Examining the requirements of the controllers readily yield that there are only a few control schemes shared among all. Two major controllers already had to implement new interface for the unified hierarchy due to significant structural changes. Let's take the chance to establish common conventions throughout all controllers. This patch defines CGROUP_WEIGHT_MIN/DFL/MAX to be used on all weight based control knobs and documents the conventions that controllers should follow on the unified hierarchy. Except for io.weight knob, all existing unified hierarchy knobs are already compliant. A follow-up patch will update io.weight. v2: Added descriptions of min, low and high knobs. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Cc: Li Zefan Cc: Peter Zijlstra --- include/linux/cgroup.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..c6bf9d30c270 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,6 +22,15 @@ #ifdef CONFIG_CGROUPS +/* + * All weight knobs on the default hierarhcy should use the following min, + * default and max values. The default value is the logarithmic center of + * MIN and MAX and allows 100x to be expressed in both directions. + */ +#define CGROUP_WEIGHT_MIN 1 +#define CGROUP_WEIGHT_DFL 100 +#define CGROUP_WEIGHT_MAX 10000 + /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; -- cgit v1.2.3 From 731a981e1059dd68c97212ccc9c0e1f169c1a77b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Aug 2015 13:35:42 -0400 Subject: cgroup: make cftype->private a unsigned long It's pretty unusual to have an int as a private data field and it makes it impossible to carray a pointer value through it. Let's make it an unsigned long. AFAICS, this shouldn't break anything. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..8f5770a7d85a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -318,7 +318,7 @@ struct cftype { * end of cftype array. */ char name[MAX_CFTYPE_NAME]; - int private; + unsigned long private; /* * If not 0, file mode is set to this value, otherwise it will * be figured out automatically -- cgit v1.2.3 From 3e1d2eed39d804e48282931835c7203fa47fe1d9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Aug 2015 13:58:16 -0700 Subject: cgroup: introduce cgroup_subsys->legacy_name This allows cgroup subsystems to use a different name on the unified hierarchy. cgroup_subsys->name is used on the unified hierarchy, ->legacy_name elsewhere. If ->legacy_name is not explicitly set, it's automatically set to ->name and the userland visible behavior remains unchanged. v2: Make parse_cgroupfs_options() only consider ->legacy_name as mount options are used only on legacy hierarchies. Suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Johannes Weiner Cc: cgroups@vger.kernel.org --- include/linux/cgroup-defs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8f5770a7d85a..7d0bb53e4553 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -434,6 +434,9 @@ struct cgroup_subsys { int id; const char *name; + /* optional, initialized automatically during boot if not set */ + const char *legacy_name; + /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; -- cgit v1.2.3