diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2017-06-17 16:10:27 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2017-09-12 12:14:30 -0400 |
commit | ee183f27ff304b5d35c4f55fc53b6b83d1625ff9 (patch) | |
tree | 6b5ecc1291509da81004c190dc4c580c071df36e | |
parent | ed7ae30145072051e56e122464c13d4160a03835 (diff) |
cgroup-util,nspawn: Add a special "inherit" cgroup mode for nspawnnotsystemd/v232.2
The "inherit" mode inspects /proc/self/mountinfo to do its best to
replicate the cgroup setup of the outer host. It is used by default unless
a different specific cgroup setup is to be used; either because the user
requested it (via $UNIFIED_CGROUP_HIERARCHY), or because
pick_cgroup_version() sniffed that the container has a version of systemd
that doesn't support the outer host's setup.
This means that nspawn can now be used when
outer_cgver=CGROUP_UNIFIED_UNKNOWN; AKA when running on a non-systemd host.
Unfortunately, without further rework of cgroup-util, the name=systemd v1
hierarchy and the v2 hierarchy can't be meaningfully differentiated
between, which means that cgroup_setup() can't do anything intelligent with
the v2 hierarchy when outer_cgver=CGROUP_UNIFIED_UNKNOWN. So in the
meantime there's a test in cgroup_decide_mounts_inherit() that bails if it
sees the v2 hierarchy in a place that would cause
outer_cgver=CGROUP_UNIFIED_UNKNOWN.
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 1 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 114 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 27 |
4 files changed, 133 insertions, 11 deletions
diff --git a/Makefile.am b/Makefile.am index 35ec60f736..4f1d729a74 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3113,6 +3113,7 @@ systemd_nspawn_CFLAGS = \ $(AM_CFLAGS) \ $(ACL_CFLAGS) \ $(BLKID_CFLAGS) \ + $(MOUNT_CFLAGS) \ $(SECCOMP_CFLAGS) \ $(SELINUX_CFLAGS) @@ -3120,6 +3121,7 @@ systemd_nspawn_LDADD = \ libsystemd-shared.la \ $(ACL_LIBS) \ $(BLKID_LIBS) \ + $(MOUNT_LIBS) \ $(SECCOMP_LIBS) \ $(SELINUX_LIBS) diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 0e5b41103e..3406e0b4fb 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -118,6 +118,7 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { #define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */ typedef enum CGroupUnified { + CGROUP_UNIFIED_INHERIT = -2, /* special case only used by nspawn */ CGROUP_UNIFIED_UNKNOWN = -1, CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */ diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 3287aff95b..a226866207 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -18,8 +18,11 @@ ***/ #include <sys/mount.h> +#include <libmount.h> #include "alloc-util.h" +#include "dirent-util.h" +#include "escape.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" @@ -267,6 +270,13 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver _cleanup_set_free_ Set *peers = NULL; int r; + /* For purposes of version comparison */ + if (inner_cgver == CGROUP_UNIFIED_INHERIT) + inner_cgver = outer_cgver; + + if (outer_cgver == CGROUP_UNIFIED_UNKNOWN) + return 0; + if ((outer_cgver >= CGROUP_UNIFIED_SYSTEMD) != (inner_cgver >= CGROUP_UNIFIED_SYSTEMD)) { /* sync the name=systemd hierarchy with the unified hierarchy */ r = sync_cgroup(pid, outer_cgver, inner_cgver, uid_shift); @@ -316,6 +326,108 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver /********************************************************************/ +static int cgroup_decide_mounts_inherit(CGMounts *ret_mounts) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + _cleanup_(cgroup_free_mounts) CGMounts mounts = {}; + int r; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + for (;;) { + _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL, *superopts = NULL, *fsopts = NULL; + char *name; + CGMountType type; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) per-mount options */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%ms" /* (11) per-superblock options */ + "%*[^\n]", /* some rubbish at the end */ + &escmountpoint, + &fstype, + &superopts + ); + if (k != 3) { + if (k == EOF) + break; + + continue; + } + + r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint); + if (r < 0) + return r; + + name = path_startswith(mountpoint, "/sys/fs/cgroup"); + if (!name) + continue; + + if (!filename_is_valid(name) && !isempty(name)) + continue; + + if (streq(fstype, "tmpfs")) + type = CGMOUNT_TMPFS; + else if(streq(fstype, "cgroup")) + type = CGMOUNT_CGROUP1; + else if (streq(fstype, "cgroup2")) { + if (!isempty(name) && !streq(name, "systemd")) + /* Unfortunately, We need to disable cgroup v2 when outer_cgver=CGROUP_UNIFIED_UNKNOWN + * until cgroup_setup() can do something intelligent with it. */ + return log_error_errno(EINVAL, "Cannot use the cgroup v2 hierarchy unless running on a host with a recognized (systemd or unified) cgroup setup"); + type = CGMOUNT_CGROUP2; + } else + continue; + + r = mnt_split_optstr(superopts, NULL, NULL, &fsopts, 0, 0); + if (r < 0) + return r; + + if (!cgmount_add(&mounts, type, fsopts, name)) { + return -ENOMEM; + } + + if (type == CGMOUNT_TMPFS) { + _cleanup_closedir_ DIR *dir; + struct dirent *entry; + + dir = opendir(mountpoint); + if (!dir) + return log_error_errno(errno, "Failed to open directory %s: %m", mountpoint); + + FOREACH_DIRENT(entry, dir, break) { + _cleanup_free_ char *target = NULL; + r = dirent_ensure_type(dir, entry); + if (r < 0) + return r; + if (entry->d_type != DT_LNK) + continue; + r = readlinkat_malloc(dirfd(dir), entry->d_name, &target); + if (r < 0) + return r; + if (!cgmount_add(&mounts, CGMOUNT_SYMLINK, target, entry->d_name)) + return -ENOMEM; + } + } + } + + *ret_mounts = mounts; + mounts.mounts = NULL; + mounts.n = 0; + + return 0; +} + /* Retrieve a list of cgroup v1 hierarchies. */ static int get_v1_hierarchies(Set *subsystems) { _cleanup_fclose_ FILE *f = NULL; @@ -522,6 +634,8 @@ int cgroup_decide_mounts( bool use_cgns) { switch (inner_cgver) { + case CGROUP_UNIFIED_INHERIT: + return cgroup_decide_mounts_inherit(ret_mounts); case CGROUP_UNIFIED_NONE: case CGROUP_UNIFIED_SYSTEMD: /* Historically, if use_cgns, then this ran inside the container; if !use_cgns, then it ran outside. diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index b8afd8c325..1de119867b 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -328,11 +328,14 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) { * by checking libsystemd-shared). */ switch (outer) { default: + case CGROUP_UNIFIED_INHERIT: + assert_not_reached("Invalid host cgroup version"); + return -EINVAL; case CGROUP_UNIFIED_UNKNOWN: - assert_not_reached("Unknown host cgroup version"); + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; break; case CGROUP_UNIFIED_NONE: /* cgroup v1-sd */ - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; break; case CGROUP_UNIFIED_ALL: /* cgroup v2 */ /* Unified cgroup hierarchy support was added in 230. Unfortunately libsystemd-shared, @@ -342,7 +345,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) { if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; break; @@ -352,7 +355,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer) { if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; break; @@ -4076,6 +4079,9 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); cg_unified_flush(); + r = cg_version(&outer_cgver); + if (r < 0) + outer_cgver = CGROUP_UNIFIED_UNKNOWN; /* Make sure rename_process() in the stub init process can work */ saved_argv = argv; @@ -4090,13 +4096,6 @@ int main(int argc, char *argv[]) { r = -EPERM; goto finish; } - - r = cg_version(&outer_cgver); - if (r < 0) { - log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); - goto finish; - } - r = determine_names(); if (r < 0) goto finish; @@ -4265,6 +4264,12 @@ int main(int argc, char *argv[]) { goto finish; } + if (outer_cgver == CGROUP_UNIFIED_UNKNOWN && + arg_unified_cgroup_hierarchy != CGROUP_UNIFIED_INHERIT) { + r = log_error_errno(EINVAL, "Cannot set cgroup version of container unless running on a host with a recognized (systemd or unified) cgroup setup"); + goto finish; + } + interactive = isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0; |