diff options
author | Luke Shumaker <lukeshu@parabola.nu> | 2018-07-23 22:54:04 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@parabola.nu> | 2018-08-16 21:55:17 -0400 |
commit | 4312657456f1d253045e3b661cc96568c52f9e50 (patch) | |
tree | c668e9f12ca149697df302dc880ef4db828971c1 | |
parent | 44c65162b10acb9f80254a2a356e22226dcc2c2d (diff) |
cgroup-util,nspawn: Add a special "inherit" cgroup mode for nspawn
The "inherit" mode inspects /proc/self/mountinfo to do its best to
replicate the cgroup setup of the outer host. It is used by default unless
a different specific cgroup setup is to be used; either because the user
requested it (via $UNIFIED_CGROUP_HIERARCHY), or because
pick_cgroup_version() sniffed that the container has a version of systemd
that doesn't support the outer host's setup.
This means that nspawn can now be used when
outer_cgver=CGROUP_UNIFIED_UNKNOWN; AKA when running on a non-systemd host.
-rw-r--r-- | doc/ENVIRONMENT.md | 3 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 1 | ||||
-rw-r--r-- | src/nspawn/meson.build | 1 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 154 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 24 |
5 files changed, 171 insertions, 12 deletions
diff --git a/doc/ENVIRONMENT.md b/doc/ENVIRONMENT.md index 7afd8127a2..9111835208 100644 --- a/doc/ENVIRONMENT.md +++ b/doc/ENVIRONMENT.md @@ -59,9 +59,10 @@ systemd-nspawn: - "hybrid" or "hybrid-sd233" sets it to systemd-v233+'s cgroup-v1/v2 hybrid mode, - "unified" or truthy values sets it to cgroup-v2 mode, + - "inherit" sets it to inherit the host's cgroup mode Leaving it unset causes it to try to magically sniff the appropriate - cgroup mode from the container's image. + cgroup mode from the container's image, falling back to "inherit". * `$SYSTEMD_NSPAWN_API_VFS_WRITABLE=1` — if set, make /sys and /proc/sys and friends writable in the container. If set to "network", leave only diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 6a840c328f..c07c5ee92b 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -104,6 +104,7 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { #define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */ typedef enum CGroupUnified { + CGROUP_UNIFIED_INHERIT = -2, /* special case only used by nspawn */ CGROUP_UNIFIED_UNKNOWN = -1, CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ CGROUP_UNIFIED_SYSTEMD232 = 1, /* Only systemd on unified, as done by SD232 */ diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build index be54ba36c5..16b0cfed0c 100644 --- a/src/nspawn/meson.build +++ b/src/nspawn/meson.build @@ -37,6 +37,7 @@ libnspawn_core = static_library( libnspawn_core_sources, include_directories : includes, dependencies : [libacl, + libmount, libseccomp, libselinux]) diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index d2df6ae400..8814bbcdbf 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -1,8 +1,11 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #include <sys/mount.h> +#include <libmount.h> #include "alloc-util.h" +#include "dirent-util.h" +#include "escape.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" @@ -392,12 +395,55 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver _cleanup_free_ char *cg1sd_mountpoint = NULL; int r, q; - assert(outer_cgver != CGROUP_UNIFIED_UNKNOWN); - assert(inner_cgver != CGROUP_UNIFIED_UNKNOWN); + /* For purposes of version comparison */ + if (inner_cgver == CGROUP_UNIFIED_INHERIT) + inner_cgver = outer_cgver; cg1sd_used = inner_cgver == CGROUP_UNIFIED_NONE || inner_cgver == CGROUP_UNIFIED_SYSTEMD233; - cg2_used = inner_cgver >= CGROUP_UNIFIED_SYSTEMD232; - if (cg2_used) { + cg2_used = inner_cgver >= CGROUP_UNIFIED_SYSTEMD232; /* or ... */ + if (inner_cgver == CGROUP_UNIFIED_UNKNOWN) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + for (;;) { + _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) per-mount options */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%*s" /* (11) per-superblock options */ + "%*[^\n]", /* some rubbish at the end */ + &escmountpoint, + &fstype + ); + if (k != 2) { + if (k == EOF) + break; + continue; + } + + r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint); + if (r < 0) + return r; + + if (path_startswith(mountpoint, "/sys/fs/cgroup") && streq(fstype, "cgroup2")) { + cg2_used = true; + cg2_mountpoint = strdup(mountpoint); + } + } + } else if (cg2_used) { switch (outer_cgver) { case CGROUP_UNIFIED_SYSTEMD233: cg2_mountpoint = strdup("/sys/fs/cgroup/unified"); break; case CGROUP_UNIFIED_SYSTEMD232: cg2_mountpoint = strdup("/sys/fs/cgroup/systemd"); break; @@ -466,6 +512,104 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver /* cgroup_decide_mounts *********************************************/ +static int cgroup_decide_mounts_inherit(CGMounts *ret_mounts) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + _cleanup_(cgroup_free_mounts) CGMounts mounts = {}; + int r; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + for (;;) { + _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL, *superopts = NULL, *fsopts = NULL; + char *name; + CGMountType type; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) per-mount options */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%ms" /* (11) per-superblock options */ + "%*[^\n]", /* some rubbish at the end */ + &escmountpoint, + &fstype, + &superopts + ); + if (k != 3) { + if (k == EOF) + break; + + continue; + } + + r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint); + if (r < 0) + return r; + + name = path_startswith(mountpoint, "/sys/fs/cgroup"); + if (!name) + continue; + + if (!filename_is_valid(name) && !isempty(name)) + continue; + + if (streq(fstype, "tmpfs")) + type = CGMOUNT_TMPFS; + else if(streq(fstype, "cgroup")) + type = CGMOUNT_CGROUP1; + else if (streq(fstype, "cgroup2")) { + type = CGMOUNT_CGROUP2; + } else + continue; + + r = mnt_split_optstr(superopts, NULL, NULL, &fsopts, 0, 0); + if (r < 0) + return r; + + if (!cgmount_add(&mounts, type, fsopts, name)) { + return -ENOMEM; + } + + if (type == CGMOUNT_TMPFS) { + _cleanup_closedir_ DIR *dir; + struct dirent *entry; + + dir = opendir(mountpoint); + if (!dir) + return log_error_errno(errno, "Failed to open directory %s: %m", mountpoint); + + FOREACH_DIRENT(entry, dir, break) { + _cleanup_free_ char *target = NULL; + r = dirent_ensure_type(dir, entry); + if (r < 0) + return r; + if (entry->d_type != DT_LNK) + continue; + r = readlinkat_malloc(dirfd(dir), entry->d_name, &target); + if (r < 0) + return r; + if (!cgmount_add(&mounts, CGMOUNT_SYMLINK, target, entry->d_name)) + return -ENOMEM; + } + } + } + + *ret_mounts = mounts; + mounts.mounts = NULL; + mounts.n = 0; + + return 0; +} + /* Retrieve a list of cgroup v1 hierarchies. */ static int get_v1_hierarchies(Set **ret) { _cleanup_set_free_free_ Set *controllers = NULL; @@ -688,6 +832,8 @@ int cgroup_decide_mounts( default: case CGROUP_UNIFIED_UNKNOWN: assert_not_reached("unknown inner_cgver"); + case CGROUP_UNIFIED_INHERIT: + return cgroup_decide_mounts_inherit(ret_mounts); case CGROUP_UNIFIED_NONE: case CGROUP_UNIFIED_SYSTEMD232: case CGROUP_UNIFIED_SYSTEMD233: diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 5ae11ea0ce..ca827844db 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -341,6 +341,8 @@ static void parse_inner_cgver_env(void) { arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD233; else if (streq(e, "unified")) arg_inner_cgver = CGROUP_UNIFIED_ALL; + else if (streq(e, "inherit")) + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; else { r = parse_boolean(e); if (r < 0) { @@ -362,8 +364,12 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou * by checking libsystemd-shared). */ switch (outer_cgver) { default: + case CGROUP_UNIFIED_INHERIT: + assert_not_reached("Invalid host cgroup version"); + return -EINVAL; case CGROUP_UNIFIED_UNKNOWN: - assert_not_reached("unknown cgroup version"); + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; + break; case CGROUP_UNIFIED_ALL: /* Unified cgroup hierarchy support was added in 230. Unfortunately, libsystemd-shared (which we use * to sniff the systemd version) was only added in 231, so we'll have a false negative here for 230. */ @@ -371,7 +377,7 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_inner_cgver = CGROUP_UNIFIED_ALL; + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; else arg_inner_cgver = CGROUP_UNIFIED_NONE; break; @@ -381,7 +387,7 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD233; + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; else arg_inner_cgver = CGROUP_UNIFIED_NONE; break; @@ -391,12 +397,12 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); if (r > 0) - arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD232; + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; else arg_inner_cgver = CGROUP_UNIFIED_NONE; break; case CGROUP_UNIFIED_NONE: - arg_inner_cgver = CGROUP_UNIFIED_NONE; + arg_inner_cgver = CGROUP_UNIFIED_INHERIT; break; } @@ -4275,8 +4281,12 @@ int main(int argc, char *argv[]) { goto finish; r = cg_version(&outer_cgver); - if (r < 0) { - log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); + if (r < 0) + outer_cgver = CGROUP_UNIFIED_UNKNOWN; + + if (outer_cgver == CGROUP_UNIFIED_UNKNOWN && arg_inner_cgver > CGROUP_UNIFIED_UNKNOWN) { + log_error("Cannot set cgroup version of container unless running on a host with a recognized (systemd or unified) cgroup setup"); + r = -EINVAL; goto finish; } |