summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@parabola.nu>2018-07-23 22:54:04 -0400
committerLuke Shumaker <lukeshu@parabola.nu>2018-08-16 21:55:17 -0400
commit4312657456f1d253045e3b661cc96568c52f9e50 (patch)
treec668e9f12ca149697df302dc880ef4db828971c1
parent44c65162b10acb9f80254a2a356e22226dcc2c2d (diff)
cgroup-util,nspawn: Add a special "inherit" cgroup mode for nspawn
The "inherit" mode inspects /proc/self/mountinfo to do its best to replicate the cgroup setup of the outer host. It is used by default unless a different specific cgroup setup is to be used; either because the user requested it (via $UNIFIED_CGROUP_HIERARCHY), or because pick_cgroup_version() sniffed that the container has a version of systemd that doesn't support the outer host's setup. This means that nspawn can now be used when outer_cgver=CGROUP_UNIFIED_UNKNOWN; AKA when running on a non-systemd host.
-rw-r--r--doc/ENVIRONMENT.md3
-rw-r--r--src/basic/cgroup-util.h1
-rw-r--r--src/nspawn/meson.build1
-rw-r--r--src/nspawn/nspawn-cgroup.c154
-rw-r--r--src/nspawn/nspawn.c24
5 files changed, 171 insertions, 12 deletions
diff --git a/doc/ENVIRONMENT.md b/doc/ENVIRONMENT.md
index 7afd8127a2..9111835208 100644
--- a/doc/ENVIRONMENT.md
+++ b/doc/ENVIRONMENT.md
@@ -59,9 +59,10 @@ systemd-nspawn:
- "hybrid" or "hybrid-sd233" sets it to systemd-v233+'s
cgroup-v1/v2 hybrid mode,
- "unified" or truthy values sets it to cgroup-v2 mode,
+ - "inherit" sets it to inherit the host's cgroup mode
Leaving it unset causes it to try to magically sniff the appropriate
- cgroup mode from the container's image.
+ cgroup mode from the container's image, falling back to "inherit".
* `$SYSTEMD_NSPAWN_API_VFS_WRITABLE=1` — if set, make /sys and /proc/sys and
friends writable in the container. If set to "network", leave only
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 6a840c328f..c07c5ee92b 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -104,6 +104,7 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
#define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */
typedef enum CGroupUnified {
+ CGROUP_UNIFIED_INHERIT = -2, /* special case only used by nspawn */
CGROUP_UNIFIED_UNKNOWN = -1,
CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */
CGROUP_UNIFIED_SYSTEMD232 = 1, /* Only systemd on unified, as done by SD232 */
diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build
index be54ba36c5..16b0cfed0c 100644
--- a/src/nspawn/meson.build
+++ b/src/nspawn/meson.build
@@ -37,6 +37,7 @@ libnspawn_core = static_library(
libnspawn_core_sources,
include_directories : includes,
dependencies : [libacl,
+ libmount,
libseccomp,
libselinux])
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index d2df6ae400..8814bbcdbf 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -1,8 +1,11 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <sys/mount.h>
+#include <libmount.h>
#include "alloc-util.h"
+#include "dirent-util.h"
+#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
@@ -392,12 +395,55 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver
_cleanup_free_ char *cg1sd_mountpoint = NULL;
int r, q;
- assert(outer_cgver != CGROUP_UNIFIED_UNKNOWN);
- assert(inner_cgver != CGROUP_UNIFIED_UNKNOWN);
+ /* For purposes of version comparison */
+ if (inner_cgver == CGROUP_UNIFIED_INHERIT)
+ inner_cgver = outer_cgver;
cg1sd_used = inner_cgver == CGROUP_UNIFIED_NONE || inner_cgver == CGROUP_UNIFIED_SYSTEMD233;
- cg2_used = inner_cgver >= CGROUP_UNIFIED_SYSTEMD232;
- if (cg2_used) {
+ cg2_used = inner_cgver >= CGROUP_UNIFIED_SYSTEMD232; /* or ... */
+ if (inner_cgver == CGROUP_UNIFIED_UNKNOWN) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) per-mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) per-superblock options */
+ "%*[^\n]", /* some rubbish at the end */
+ &escmountpoint,
+ &fstype
+ );
+ if (k != 2) {
+ if (k == EOF)
+ break;
+ continue;
+ }
+
+ r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint);
+ if (r < 0)
+ return r;
+
+ if (path_startswith(mountpoint, "/sys/fs/cgroup") && streq(fstype, "cgroup2")) {
+ cg2_used = true;
+ cg2_mountpoint = strdup(mountpoint);
+ }
+ }
+ } else if (cg2_used) {
switch (outer_cgver) {
case CGROUP_UNIFIED_SYSTEMD233: cg2_mountpoint = strdup("/sys/fs/cgroup/unified"); break;
case CGROUP_UNIFIED_SYSTEMD232: cg2_mountpoint = strdup("/sys/fs/cgroup/systemd"); break;
@@ -466,6 +512,104 @@ int cgroup_setup(pid_t pid, CGroupUnified outer_cgver, CGroupUnified inner_cgver
/* cgroup_decide_mounts *********************************************/
+static int cgroup_decide_mounts_inherit(CGMounts *ret_mounts) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ _cleanup_(cgroup_free_mounts) CGMounts mounts = {};
+ int r;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *escmountpoint = NULL, *mountpoint = NULL, *fstype = NULL, *superopts = NULL, *fsopts = NULL;
+ char *name;
+ CGMountType type;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) per-mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%ms" /* (11) per-superblock options */
+ "%*[^\n]", /* some rubbish at the end */
+ &escmountpoint,
+ &fstype,
+ &superopts
+ );
+ if (k != 3) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(escmountpoint, UNESCAPE_RELAX, &mountpoint);
+ if (r < 0)
+ return r;
+
+ name = path_startswith(mountpoint, "/sys/fs/cgroup");
+ if (!name)
+ continue;
+
+ if (!filename_is_valid(name) && !isempty(name))
+ continue;
+
+ if (streq(fstype, "tmpfs"))
+ type = CGMOUNT_TMPFS;
+ else if(streq(fstype, "cgroup"))
+ type = CGMOUNT_CGROUP1;
+ else if (streq(fstype, "cgroup2")) {
+ type = CGMOUNT_CGROUP2;
+ } else
+ continue;
+
+ r = mnt_split_optstr(superopts, NULL, NULL, &fsopts, 0, 0);
+ if (r < 0)
+ return r;
+
+ if (!cgmount_add(&mounts, type, fsopts, name)) {
+ return -ENOMEM;
+ }
+
+ if (type == CGMOUNT_TMPFS) {
+ _cleanup_closedir_ DIR *dir;
+ struct dirent *entry;
+
+ dir = opendir(mountpoint);
+ if (!dir)
+ return log_error_errno(errno, "Failed to open directory %s: %m", mountpoint);
+
+ FOREACH_DIRENT(entry, dir, break) {
+ _cleanup_free_ char *target = NULL;
+ r = dirent_ensure_type(dir, entry);
+ if (r < 0)
+ return r;
+ if (entry->d_type != DT_LNK)
+ continue;
+ r = readlinkat_malloc(dirfd(dir), entry->d_name, &target);
+ if (r < 0)
+ return r;
+ if (!cgmount_add(&mounts, CGMOUNT_SYMLINK, target, entry->d_name))
+ return -ENOMEM;
+ }
+ }
+ }
+
+ *ret_mounts = mounts;
+ mounts.mounts = NULL;
+ mounts.n = 0;
+
+ return 0;
+}
+
/* Retrieve a list of cgroup v1 hierarchies. */
static int get_v1_hierarchies(Set **ret) {
_cleanup_set_free_free_ Set *controllers = NULL;
@@ -688,6 +832,8 @@ int cgroup_decide_mounts(
default:
case CGROUP_UNIFIED_UNKNOWN:
assert_not_reached("unknown inner_cgver");
+ case CGROUP_UNIFIED_INHERIT:
+ return cgroup_decide_mounts_inherit(ret_mounts);
case CGROUP_UNIFIED_NONE:
case CGROUP_UNIFIED_SYSTEMD232:
case CGROUP_UNIFIED_SYSTEMD233:
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 5ae11ea0ce..ca827844db 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -341,6 +341,8 @@ static void parse_inner_cgver_env(void) {
arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD233;
else if (streq(e, "unified"))
arg_inner_cgver = CGROUP_UNIFIED_ALL;
+ else if (streq(e, "inherit"))
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
else {
r = parse_boolean(e);
if (r < 0) {
@@ -362,8 +364,12 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou
* by checking libsystemd-shared). */
switch (outer_cgver) {
default:
+ case CGROUP_UNIFIED_INHERIT:
+ assert_not_reached("Invalid host cgroup version");
+ return -EINVAL;
case CGROUP_UNIFIED_UNKNOWN:
- assert_not_reached("unknown cgroup version");
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
+ break;
case CGROUP_UNIFIED_ALL:
/* Unified cgroup hierarchy support was added in 230. Unfortunately, libsystemd-shared (which we use
* to sniff the systemd version) was only added in 231, so we'll have a false negative here for 230. */
@@ -371,7 +377,7 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
if (r > 0)
- arg_inner_cgver = CGROUP_UNIFIED_ALL;
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
else
arg_inner_cgver = CGROUP_UNIFIED_NONE;
break;
@@ -381,7 +387,7 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
if (r > 0)
- arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD233;
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
else
arg_inner_cgver = CGROUP_UNIFIED_NONE;
break;
@@ -391,12 +397,12 @@ static int detect_inner_cgver_from_image(const char *directory, CGroupUnified ou
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
if (r > 0)
- arg_inner_cgver = CGROUP_UNIFIED_SYSTEMD232;
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
else
arg_inner_cgver = CGROUP_UNIFIED_NONE;
break;
case CGROUP_UNIFIED_NONE:
- arg_inner_cgver = CGROUP_UNIFIED_NONE;
+ arg_inner_cgver = CGROUP_UNIFIED_INHERIT;
break;
}
@@ -4275,8 +4281,12 @@ int main(int argc, char *argv[]) {
goto finish;
r = cg_version(&outer_cgver);
- if (r < 0) {
- log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+ if (r < 0)
+ outer_cgver = CGROUP_UNIFIED_UNKNOWN;
+
+ if (outer_cgver == CGROUP_UNIFIED_UNKNOWN && arg_inner_cgver > CGROUP_UNIFIED_UNKNOWN) {
+ log_error("Cannot set cgroup version of container unless running on a host with a recognized (systemd or unified) cgroup setup");
+ r = -EINVAL;
goto finish;
}