From 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:09 +1000 Subject: cgroup: allow a cgroup subsystem to reject a fork Add a new cgroup subsystem callback can_fork that conditionally states whether or not the fork is accepted or rejected by a cgroup policy. In addition, add a cancel_fork callback so that if an error occurs later in the forking process, any state modified by can_fork can be reverted. Allow for a private opaque pointer to be passed from cgroup_can_fork to cgroup_post_fork, allowing for the fork state to be stored by each subsystem separately. Also add a tagging system for cgroup_subsys.h to allow for CGROUP_ enumerations to be be defined and used. In addition, explicitly add a CGROUP_CANFORK_COUNT macro to make arrays easier to define. This is in preparation for implementing the pids cgroup subsystem. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux/cgroup_subsys.h') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..ec43bce7e1ea 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -3,6 +3,17 @@ * * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ + +/* + * This file *must* be included with SUBSYS() defined. + * SUBSYS_TAG() is a noop if undefined. + */ + +#ifndef SUBSYS_TAG +#define __TMP_SUBSYS_TAG +#define SUBSYS_TAG(_x) +#endif + #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -47,12 +58,24 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif +/* + * Subsystems that implement the can_fork() family of callbacks. + */ +SUBSYS_TAG(CANFORK_START) +SUBSYS_TAG(CANFORK_END) + /* * The following subsystems are not supported on the default hierarchy. */ #if IS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif + +#ifdef __TMP_SUBSYS_TAG +#undef __TMP_SUBSYS_TAG +#undef SUBSYS_TAG +#endif + /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ -- cgit v1.2.3 From 49b786ea146f69c371df18e81ce0a2d5839f865c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:10 +1000 Subject: cgroup: implement the PIDs subsystem Adds a new single-purpose PIDs subsystem to limit the number of tasks that can be forked inside a cgroup. Essentially this is an implementation of RLIMIT_NPROC that applies to a cgroup rather than a process tree. However, it should be noted that organisational operations (adding and removing tasks from a PIDs hierarchy) will *not* be prevented. Rather, the number of tasks in the hierarchy cannot exceed the limit through forking. This is due to the fact that, in the unified hierarchy, attach cannot fail (and it is not possible for a task to overcome its PIDs cgroup policy limit by attaching to a child cgroup -- even if migrating mid-fork it must be able to fork in the parent first). PIDs are fundamentally a global resource, and it is possible to reach PID exhaustion inside a cgroup without hitting any reasonable kmemcg policy. Once you've hit PID exhaustion, you're only in a marginally better state than OOM. This subsystem allows PID exhaustion inside a cgroup to be prevented. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/cgroup_subsys.h') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ec43bce7e1ea..1f36945fd23d 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -62,6 +62,11 @@ SUBSYS(hugetlb) * Subsystems that implement the can_fork() family of callbacks. */ SUBSYS_TAG(CANFORK_START) + +#if IS_ENABLED(CONFIG_CGROUP_PIDS) +SUBSYS(pids) +#endif + SUBSYS_TAG(CANFORK_END) /* -- cgit v1.2.3 From c165b3e3c7bb68c2ed55a5ac2623f030d01d9567 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Aug 2015 14:55:29 -0700 Subject: blkcg: rename subsystem name from blkio to io blkio interface has become messy over time and is currently the largest. In addition to the inconsistent naming scheme, it has multiple stat files which report more or less the same thing, a number of debug stat files which expose internal details which shouldn't have been part of the public interface in the first place, recursive and non-recursive stats and leaf and non-leaf knobs. Both recursive vs. non-recursive and leaf vs. non-leaf distinctions don't make any sense on the unified hierarchy as only leaf cgroups can contain processes. cgroups is going through a major interface revision with the unified hierarchy involving significant fundamental usage changes and given that a significant portion of the interface doesn't make sense anymore, it's a good time to reorganize the interface. As the first step, this patch renames the external visible subsystem name from "blkio" to "io". This is more concise, matches the other two major subsystem names, "cpu" and "memory", and better suited as blkcg will be involved in anything writeback related too whether an actual block device is involved or not. As the subsystem legacy_name is set to "blkio", the only userland visible change outside the unified hierarchy is that blkcg is reported as "io" instead of "blkio" in the subsystem initialized message during boot. On the unified hierarchy, blkcg now appears as "io". Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: cgroups@vger.kernel.org Signed-off-by: Jens Axboe --- include/linux/cgroup_subsys.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/cgroup_subsys.h') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..86b5056104df 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -16,7 +16,7 @@ SUBSYS(cpuacct) #endif #if IS_ENABLED(CONFIG_BLK_CGROUP) -SUBSYS(blkio) +SUBSYS(io) #endif #if IS_ENABLED(CONFIG_MEMCG) -- cgit v1.2.3