diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2017-06-17 16:10:27 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2017-09-13 14:54:23 -0400 |
commit | 533c6b93ec1f8af64483f31f6adb98306653ece9 (patch) | |
tree | 0deb72f4b70291d65a19aa46e8528dbe90bef5ff | |
parent | 3cca1d4c3b24f1006e81535cc035f0c1c05602ff (diff) |
cgroup-util,nspawn: Add a special "inherit" cgroup mode for nspawnnotsystemd/v234.1
The "inherit" mode inspects /proc/self/mountinfo to do its best to
replicate the cgroup setup of the outer host. It is used by default unless
a different specific cgroup setup is to be used; either because the user
requested it (via $UNIFIED_CGROUP_HIERARCHY), or because
pick_cgroup_version() sniffed that the container has a version of systemd
that doesn't support the outer host's setup.
This means that nspawn can now be used when
outer_cgver=CGROUP_UNIFIED_UNKNOWN; AKA when running on a non-systemd host.
Unfortunately, without further rework of cgroup-util, the name=systemd v1
hierarchy and the v2 hierarchy can't be meaningfully differentiated
between, which means that cgroup_setup() can't do anything intelligent with
the v2 hierarchy when outer_cgver=CGROUP_UNIFIED_UNKNOWN. So in the
meantime there's a test in cgroup_decide_mounts_inherit() that bails if it
sees the v2 hierarchy in a place that would cause
outer_cgver=CGROUP_UNIFIED_UNKNOWN.
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | meson.build | 1 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 1 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 114 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 26 |
5 files changed, 135 insertions, 9 deletions
diff --git a/Makefile.am b/Makefile.am index b95c93bb98..8962e7943c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3277,6 +3277,7 @@ systemd_nspawn_CFLAGS = \ $(AM_CFLAGS) \ $(ACL_CFLAGS) \ $(BLKID_CFLAGS) \ + $(MOUNT_CFLAGS) \ $(SECCOMP_CFLAGS) \ $(SELINUX_CFLAGS) @@ -3284,6 +3285,7 @@ systemd_nspawn_LDADD = \ libsystemd-shared.la \ $(ACL_LIBS) \ $(BLKID_LIBS) \ + $(MOUNT_LIBS) \ $(SECCOMP_LIBS) \ $(SELINUX_LIBS) diff --git a/meson.build b/meson.build index 1e1677ca7e..098c1e9ada 100644 --- a/meson.build +++ b/meson.build @@ -2161,6 +2161,7 @@ exe = executable('systemd-nspawn', link_with : [libshared], dependencies : [libacl, libblkid, + libmount, libseccomp, libselinux], install_rpath : rootlibexecdir, diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 16edbc580f..a8216c7b6a 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -118,6 +118,7 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { #define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */ typedef enum CGroupUnified { + CGROUP_UNIFIED_INHERIT = -2, /* special case only used by nspawn */ CGROUP_UNIFIED_UNKNOWN = -1, CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ CGROUP_UNIFIED_SYSTEMD232 = 1, /* Only systemd on unified, as done by SD232 */ diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 9a873a7d53..2a15f1892c 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -18,8 +18,11 @@ ***/ #include <sys/mount.h> +#include <libmount.h> #include "alloc-util.h" +#include "dirent-util.h" +#include "escape.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" @@ -267,6 +270,13 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver _cleanup_set_free_ Set *peers = NULL; int r; + /* For purposes of version comparison */ + if (inner_cgver == CGROUP_UNIFIED_INHERIT) + inner_cgver = outer_cgver; + + if (outer_cgver == CGROUP_UNIFIED_UNKNOWN) + return 0; + if ((outer_cgver >= CGROUP_UNIFIED_SYSTEMD232) != (inner_cgver >= CGROUP_UNIFIED_SYSTEMD232)) { /* sync the name=systemd hierarchy with the unified hierarchy */ r = sync_cgroup(pid, outer_cgver, inner_cgver, uid_shift); @@ -316,6 +326,108 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver /********************************************************************/ +static int cgroup_decide_mounts_inherit(CGMounts *ret_mounts) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + _cleanup_(cgroup_free_mounts) CGMounts mounts = {}; + int r; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + for (;;) { + _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL, *superopts = NULL, *fsopts = NULL; + char *name; + CGMountType type; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) per-mount options */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%ms" /* (11) per-superblock options */ + "%*[^\n]", /* some rubbish at the end */ + &escmountpoint, + &fstype, + &superopts + ); + if (k != 3) { + if (k == EOF) + break; + + continue; + } + + r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint); + if (r < 0) + return r; + + name = path_startswith(mountpoint, "/sys/fs/cgroup"); + if (!name) + continue; + + if (!filename_is_valid(name) && !isempty(name)) + continue; + + if (streq(fstype, "tmpfs")) + type = CGMOUNT_TMPFS; + else if(streq(fstype, "cgroup")) + type = CGMOUNT_CGROUP1; + else if (streq(fstype, "cgroup2")) { + if (!isempty(name) && !streq(name, "systemd")) + /* Unfortunately, We need to disable cgroup v2 when outer_cgver=CGROUP_UNIFIED_UNKNOWN + * until cgroup_setup() can do something intelligent with it. */ + return log_error_errno(EINVAL, "Cannot use the cgroup v2 hierarchy unless running on a host with a recognized (systemd or unified) cgroup setup"); + type = CGMOUNT_CGROUP2; + } else + continue; + + r = mnt_split_optstr(superopts, NULL, NULL, &fsopts, 0, 0); + if (r < 0) + return r; + + if (!cgmount_add(&mounts, type, fsopts, name)) { + return -ENOMEM; + } + + if (type == CGMOUNT_TMPFS) { + _cleanup_closedir_ DIR *dir; + struct dirent *entry; + + dir = opendir(mountpoint); + if (!dir) + return log_error_errno(errno, "Failed to open directory %s: %m", mountpoint); + + FOREACH_DIRENT(entry, dir, break) { + _cleanup_free_ char *target = NULL; + r = dirent_ensure_type(dir, entry); + if (r < 0) + return r; + if (entry->d_type != DT_LNK) + continue; + r = readlinkat_malloc(dirfd(dir), entry->d_name, &target); + if (r < 0) + return r; + if (!cgmount_add(&mounts, CGMOUNT_SYMLINK, target, entry->d_name)) + return -ENOMEM; + } + } + } + + *ret_mounts = mounts; + mounts.mounts = NULL; + mounts.n = 0; + + return 0; +} + /* Retrieve a list of cgroup v1 hierarchies. */ static int get_v1_hierarchies(Set *subsystems) { _cleanup_fclose_ FILE *f = NULL; @@ -537,6 +649,8 @@ int cgroup_decide_mounts( default: case CGROUP_UNIFIED_UNKNOWN: assert_not_reached("unknown inner_cgver"); + case CGROUP_UNIFIED_INHERIT: + return cgroup_decide_mounts_inherit(ret_mounts); case CGROUP_UNIFIED_NONE: case CGROUP_UNIFIED_SYSTEMD232: case CGROUP_UNIFIED_SYSTEMD233: diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index b8f8a3dd70..6399a9e882 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -325,8 +325,12 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer_cgver) * by checking libsystemd-shared). */ switch (outer_cgver) { default: + case CGROUP_UNIFIED_INHERIT: + assert_not_reached("Invalid host cgroup version"); + return -EINVAL; case CGROUP_UNIFIED_UNKNOWN: - assert_not_reached("unknown cgroup version"); + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; + break; case CGROUP_UNIFIED_ALL: /* Unified cgroup hierarchy support was added in 230. Unfortunately, libsystemd-shared (which we use * to sniff the systemd version) was only added in 231, so we'll have a false negative here for 230. */ @@ -334,7 +338,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer_cgver) if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; break; @@ -344,7 +348,7 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer_cgver) if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD233; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; break; @@ -354,12 +358,12 @@ static int pick_cgroup_version(const char *directory, CGroupUnified outer_cgver) if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD232; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; break; case CGROUP_UNIFIED_NONE: - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_INHERIT; break; } @@ -3629,10 +3633,8 @@ int main(int argc, char *argv[]) { goto finish; r = cg_version(&outer_cgver); - if (r < 0) { - log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); - goto finish; - } + if (r < 0) + outer_cgver = CGROUP_UNIFIED_UNKNOWN; if (geteuid() != 0) { log_error("Need to be root."); @@ -3900,6 +3902,12 @@ int main(int argc, char *argv[]) { goto finish; } + if (outer_cgver == CGROUP_UNIFIED_UNKNOWN && + arg_unified_cgroup_hierarchy != CGROUP_UNIFIED_INHERIT) { + r = log_error_errno(EINVAL, "Cannot set cgroup version of container unless running on a host with a recognized (systemd or unified) cgroup setup"); + goto finish; + } + interactive = isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0; |