summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2017-06-17 16:10:27 -0400
committerLuke Shumaker <lukeshu@lukeshu.com>2017-09-12 12:14:30 -0400
commitee183f27ff304b5d35c4f55fc53b6b83d1625ff9 (patch)
tree6b5ecc1291509da81004c190dc4c580c071df36e
parented7ae30145072051e56e122464c13d4160a03835 (diff)
cgroup-util,nspawn: Add a special "inherit" cgroup mode for nspawnnotsystemd/v232.2
The "inherit" mode inspects /proc/self/mountinfo to do its best to replicate the cgroup setup of the outer host. It is used by default unless a different specific cgroup setup is to be used; either because the user requested it (via $UNIFIED_CGROUP_HIERARCHY), or because pick_cgroup_version() sniffed that the container has a version of systemd that doesn't support the outer host's setup. This means that nspawn can now be used when outer_cgver=CGROUP_UNIFIED_UNKNOWN; AKA when running on a non-systemd host. Unfortunately, without further rework of cgroup-util, the name=systemd v1 hierarchy and the v2 hierarchy can't be meaningfully differentiated between, which means that cgroup_setup() can't do anything intelligent with the v2 hierarchy when outer_cgver=CGROUP_UNIFIED_UNKNOWN. So in the meantime there's a test in cgroup_decide_mounts_inherit() that bails if it sees the v2 hierarchy in a place that would cause outer_cgver=CGROUP_UNIFIED_UNKNOWN.
-rw-r--r--Makefile.am2
-rw-r--r--src/basic/cgroup-util.h1
-rw-r--r--src/nspawn/nspawn-cgroup.c114
-rw-r--r--src/nspawn/nspawn.c27
4 files changed, 133 insertions, 11 deletions
diff --git a/Makefile.am b/Makefile.am
index 35ec60f736..4f1d729a74 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3113,6 +3113,7 @@ systemd_nspawn_CFLAGS = \
$(AM_CFLAGS) \
$(ACL_CFLAGS) \
$(BLKID_CFLAGS) \
+ $(MOUNT_CFLAGS) \
$(SECCOMP_CFLAGS) \
$(SELINUX_CFLAGS)
@@ -3120,6 +3121,7 @@ systemd_nspawn_LDADD = \
libsystemd-shared.la \
$(ACL_LIBS) \
$(BLKID_LIBS) \
+ $(MOUNT_LIBS) \
$(SECCOMP_LIBS) \
$(SELINUX_LIBS)
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 0e5b41103e..3406e0b4fb 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -118,6 +118,7 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
typedef enum CGroupUnified {
+ CGROUP_UNIFIED_INHERIT = -2, /* special case only used by nspawn */
CGROUP_UNIFIED_UNKNOWN = -1,
CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */
CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index 3287aff95b..a226866207 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -18,8 +18,11 @@
***/
#include <sys/mount.h>
+#include <libmount.h>
#include "alloc-util.h"
+#include "dirent-util.h"
+#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
@@ -267,6 +270,13 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver
_cleanup_set_free_ Set *peers = NULL;
int r;
+ /* For purposes of version comparison */
+ if (inner_cgver == CGROUP_UNIFIED_INHERIT)
+ inner_cgver = outer_cgver;
+
+ if (outer_cgver == CGROUP_UNIFIED_UNKNOWN)
+ return 0;
+
if ((outer_cgver >= CGROUP_UNIFIED_SYSTEMD) != (inner_cgver >= CGROUP_UNIFIED_SYSTEMD)) {
/* sync the name=systemd hierarchy with the unified hierarchy */
r = sync_cgroup(pid, outer_cgver, inner_cgver, uid_shift);
@@ -316,6 +326,108 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver
/********************************************************************/
+static int cgroup_decide_mounts_inherit(CGMounts *ret_mounts) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_(cgroup_free_mounts) CGMounts mounts = {};
+ int r;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL, *superopts = NULL, *fsopts = NULL;
+ char *name;
+ CGMountType type;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) per-mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%ms" /* (11) per-superblock options */
+ "%*[^\n]", /* some rubbish at the end */
+ &escmountpoint,
+ &fstype,
+ &superopts
+ );
+ if (k != 3) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint);
+ if (r < 0)
+ return r;
+
+ name = path_startswith(mountpoint, "/sys/fs/cgroup");
+ if (!name)
+ continue;
+
+ if (!filename_is_valid(name) && !isempty(name))
+ continue;
+
+ if (streq(fstype, "tmpfs"))
+ type = CGMOUNT_TMPFS;
+ else if(streq(fstype, "cgroup"))
+ type = CGMOUNT_CGROUP1;
+ else if (streq(fstype, "cgroup2")) {
+ if (!isempty(name) && !streq(name, "systemd"))
+ /* Unfortunately, We need to disable cgroup v2 when outer_cgver=CGROUP_UNIFIED_UNKNOWN
+ * until cgroup_setup() can do something intelligent with it. */
+ return log_error_errno(EINVAL, "Cannot use the cgroup v2 hierarchy unless running on a host with a recognized (systemd or unified) cgroup setup");
+ type = CGMOUNT_CGROUP2;
+ } else
+ continue;
+
+ r = mnt_split_optstr(superopts, NULL, NULL, &fsopts, 0, 0);
+ if (r < 0)
+ return r;
+
+ if (!cgmount_add(&mounts, type, fsopts, name)) {
+ return -ENOMEM;
+ }
+
+ if (type == CGMOUNT_TMPFS) {
+ _cleanup_closedir_ DIR *dir;
+ struct dirent *entry;
+
+ dir = opendir(mountpoint);
+ if (!dir)
+ return log_error_errno(errno, "Failed to open directory %s: %m", mountpoint);
+
+ FOREACH_DIRENT(entry, dir, break) {
+ _cleanup_free_ char *target = NULL;
+ r = dirent_ensure_type(dir, entry);
+ if (r < 0)
+ return r;
+ if (entry->d_type != DT_LNK)
+ continue;
+ r = readlinkat_malloc(dirfd(dir), entry->d_name, &target);
+ if (r < 0)
+ return r;
+ if (!cgmount_add(&mounts, CGMOUNT_SYMLINK, target, entry->d_name))
+ return -ENOMEM;
+ }
+ }
+ }
+
+ *ret_mounts = mounts;
+ mounts.mounts = NULL;
+ mounts.n = 0;
+
+ return 0;
+}
+
/* Retrieve a list of cgroup v1 hierarchies. */
static int get_v1_hierarchies(Set *subsystems) {
_cleanup_fclose_ FILE *f = NULL;
@@ -522,6 +634,8 @@ int cgroup_decide_mounts(
bool use_cgns) {
switch (inner_cgver) {
+ case CGROUP_UNIFIED_INHERIT:
+ return cgroup_decide_mounts_inherit(ret_mounts);
case CGROUP_UNIFIED_NONE:
case CGROUP_UNIFIED_SYSTEMD:
/* Historically, if use_cgns, then this ran inside the container; if !use_cgns, then it ran outside.
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index b8afd8c325..1de119867b 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -328,11 +328,14 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) {
* by checking libsystemd-shared). */
switch (outer) {
default:
+ case CGROUP_UNIFIED_INHERIT:
+ assert_not_reached("Invalid host cgroup version");
+ return -EINVAL;
case CGROUP_UNIFIED_UNKNOWN:
- assert_not_reached("Unknown host cgroup version");
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT;
break;
case CGROUP_UNIFIED_NONE: /* cgroup v1-sd */
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT;
break;
case CGROUP_UNIFIED_ALL: /* cgroup v2 */
/* Unified cgroup hierarchy support was added in 230. Unfortunately libsystemd-shared,
@@ -342,7 +345,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) {
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
if (r > 0)
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT;
else
arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
break;
@@ -352,7 +355,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) {
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
if (r > 0)
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
+ arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT;
else
arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
break;
@@ -4076,6 +4079,9 @@ int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
cg_unified_flush();
+ r = cg_version(&outer_cgver);
+ if (r < 0)
+ outer_cgver = CGROUP_UNIFIED_UNKNOWN;
/* Make sure rename_process() in the stub init process can work */
saved_argv = argv;
@@ -4090,13 +4096,6 @@ int main(int argc, char *argv[]) {
r = -EPERM;
goto finish;
}
-
- r = cg_version(&outer_cgver);
- if (r < 0) {
- log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
- goto finish;
- }
-
r = determine_names();
if (r < 0)
goto finish;
@@ -4265,6 +4264,12 @@ int main(int argc, char *argv[]) {
goto finish;
}
+ if (outer_cgver == CGROUP_UNIFIED_UNKNOWN &&
+ arg_unified_cgroup_hierarchy != CGROUP_UNIFIED_INHERIT) {
+ r = log_error_errno(EINVAL, "Cannot set cgroup version of container unless running on a host with a recognized (systemd or unified) cgroup setup");
+ goto finish;
+ }
+
interactive =
isatty(STDIN_FILENO) > 0 &&
isatty(STDOUT_FILENO) > 0;