diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2017-10-31 16:41:18 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2017-10-31 16:41:18 -0400 |
commit | 8668392ed29f4429e6d5e71ee8a82b6e5baf8504 (patch) | |
tree | 5fa1d6b0c8b92234521256d395731e459d98a994 | |
parent | 974437cd53439c816b4631240aa661ec1c196f40 (diff) |
a
-rw-r--r-- | src/nspawn/meson.build | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.c | 233 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.h | 35 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 60 |
4 files changed, 5 insertions, 325 deletions
diff --git a/src/nspawn/meson.build b/src/nspawn/meson.build index 6b332a1b3f..f0dd15c655 100644 --- a/src/nspawn/meson.build +++ b/src/nspawn/meson.build @@ -1,5 +1,3 @@ systemd_nspawn_sources = files(''' nspawn.c - nspawn-mount.c - nspawn-mount.h '''.split()) diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c deleted file mode 100644 index 9dbf8091c3..0000000000 --- a/src/nspawn/nspawn-mount.c +++ /dev/null @@ -1,233 +0,0 @@ -/*** - This file is part of systemd. - - Copyright 2015 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -#include <sys/mount.h> - -#include "fs-util.h" -#include "mount-util.h" -#include "path-util.h" -#include "strv.h" -#include "user-util.h" - -#include "nspawn-mount.h" - -static int tmpfs_patch_options( - const char *options, - bool userns, - uid_t uid_shift, uid_t uid_range, - bool patch_ids, - const char *selinux_apifs_context, - char **ret) { - - char *buf = NULL; - - if ((userns && uid_shift != 0) || patch_ids) { - assert(uid_shift != UID_INVALID); - - if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT, - options ?: "", options ? "," : "", - uid_shift, uid_shift) < 0) - return -ENOMEM; - - options = buf; - } - -#ifdef HAVE_SELINUX - if (selinux_apifs_context) { - char *t; - - t = strjoin(options ?: "", options ? "," : "", - "context=\"", selinux_apifs_context, "\""); - free(buf); - if (!t) - return -ENOMEM; - - buf = t; - } -#endif - - if (!buf && options) { - buf = strdup(options); - if (!buf) - return -ENOMEM; - } - *ret = buf; - - return !!buf; -} - -static int mkdir_userns(const char *path, mode_t mode, MountSettingsMask mask, uid_t uid_shift) { - int r; - - assert(path); - - r = mkdir(path, mode); - if (r < 0 && errno != EEXIST) - return -errno; - - if ((mask & MOUNT_USE_USERNS) == 0) - return 0; - - if (mask & MOUNT_IN_USERNS) - return 0; - - r = lchown(path, uid_shift, uid_shift); - if (r < 0) - return -errno; - - return 0; -} - -static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, MountSettingsMask mask, uid_t uid_shift) { - const char *p, *e; - int r; - - assert(path); - - if (prefix && !path_startswith(path, prefix)) - return -ENOTDIR; - - /* create every parent directory in the path, except the last component */ - p = path + strspn(path, "/"); - for (;;) { - char t[strlen(path) + 1]; - - e = p + strcspn(p, "/"); - p = e + strspn(e, "/"); - - /* Is this the last component? If so, then we're done */ - if (*p == 0) - break; - - memcpy(t, path, e - path); - t[e-path] = 0; - - if (prefix && path_startswith(prefix, t)) - continue; - - r = mkdir_userns(t, mode, mask, uid_shift); - if (r < 0) - return r; - } - - return mkdir_userns(path, mode, mask, uid_shift); -} - -int mount_all(const char *dest, - MountSettingsMask mount_settings, - uid_t uid_shift, uid_t uid_range, - const char *selinux_apifs_context) { - - typedef struct MountPoint { - const char *what; - const char *where; - const char *type; - const char *options; - unsigned long flags; - MountSettingsMask mount_settings; - } MountPoint; - - static const MountPoint mount_table[] = { - /* inner child mounts */ - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_IN_USERNS }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ - { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ - { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ - { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ - - /* outer child mounts */ - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS }, - - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, -#ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, 0 }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 }, /* Then, make it r/o */ -#endif - }; - - unsigned k; - int r; - bool use_userns = (mount_settings & MOUNT_USE_USERNS); - bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS); - bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO); - bool in_userns = (mount_settings & MOUNT_IN_USERNS); - - for (k = 0; k < ELEMENTSOF(mount_table); k++) { - _cleanup_free_ char *where = NULL, *options = NULL; - const char *o; - bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL); - - if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS)) - continue; - - if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS)) - continue; - - if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO)) - continue; - - r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where); - if (r < 0) - return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where); - - r = path_is_mount_point(where, NULL, 0); - if (r < 0 && r != -ENOENT) - return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where); - - /* Skip this entry if it is not a remount. */ - if (mount_table[k].what && r > 0) - continue; - - r = mkdir_userns_p(dest, where, 0755, mount_settings, uid_shift); - if (r < 0 && r != -EEXIST) { - if (fatal) - return log_error_errno(r, "Failed to create directory %s: %m", where); - - log_debug_errno(r, "Failed to create directory %s: %m", where); - continue; - } - - o = mount_table[k].options; - if (streq_ptr(mount_table[k].type, "tmpfs")) { - if (in_userns) - r = tmpfs_patch_options(o, use_userns, 0, uid_range, true, selinux_apifs_context, &options); - else - r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, false, selinux_apifs_context, &options); - if (r < 0) - return log_oom(); - if (r > 0) - o = options; - } - - r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG, - mount_table[k].what, - where, - mount_table[k].type, - mount_table[k].flags, - o); - if (r < 0 && fatal) - return r; - } - - return 0; -} diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h deleted file mode 100644 index afa21d82c7..0000000000 --- a/src/nspawn/nspawn-mount.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -/*** - This file is part of systemd. - - Copyright 2015 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -#include <stdbool.h> - -#include "volatile-util.h" - -typedef enum MountSettingsMask { - MOUNT_FATAL = 1 << 0, /* if set, a mount error is considered fatal */ - MOUNT_USE_USERNS = 1 << 1, /* if set, mounts are patched considering uid/gid shifts in a user namespace */ - MOUNT_IN_USERNS = 1 << 2, /* if set, the mount is executed in the inner child, otherwise in the outer child */ - MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sysfs will be mounted read-only, otherwise read-write. */ - MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write. - Works only if MOUNT_APPLY_APIVFS_RO is also set. */ -} MountSettingsMask; - -int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 308f156493..4884435445 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -34,10 +34,6 @@ #include "terminal-util.h" #include "user-util.h" -#include "nspawn-mount.h" - -#define EXIT_FORCE_RESTART 133 - typedef enum ContainerStatus { CONTAINER_TERMINATED, CONTAINER_REBOOTED @@ -45,10 +41,9 @@ typedef enum ContainerStatus { static char *arg_directory = NULL; static bool arg_quiet = false; -static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; +static uid_t arg_uid_shift = UID_INVALID; static char **arg_parameters = NULL; static unsigned long arg_clone_ns_flags = CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS; -static MountSettingsMask arg_mount_settings = MOUNT_APPLY_APIVFS_RO; /* * Return values: @@ -139,12 +134,7 @@ static int inner_child( assert(barrier); assert(directory); - r = mount_all(NULL, - arg_mount_settings | MOUNT_IN_USERNS, - arg_uid_shift, - arg_uid_range, - NULL); - + r = mount_verbose(LOG_ERR, "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); if (r < 0) return r; @@ -155,14 +145,6 @@ static int inner_child( return -ESRCH; } - /* Let the parent know that we are ready and - * wait until the parent is ready with the - * setup, too... */ - if (!barrier_place_and_sync(barrier)) { /* #4 */ - log_error("Parent died too early"); - return -ESRCH; - } - execvp(arg_parameters[0], arg_parameters); r = -errno; @@ -218,14 +200,6 @@ static int outer_child( if (r < 0) return r; - r = mount_all(directory, - arg_mount_settings, - arg_uid_shift, - arg_uid_range, - NULL); - if (r < 0) - return r; - if (chdir(directory) < 0) return log_error_errno(errno, "Failed to chdir: %m"); if (chroot(".") < 0) @@ -352,12 +326,6 @@ static int run(int master, log_debug("Init process invoked as PID "PID_FMT, *pid); - /* Notify the child that the parent is ready with all - * its setup (including cgroup-ification), and that - * the child can now hand over control to the code to - * run inside the container. */ - (void) barrier_place(&barrier); /* #3 */ - /* Block SIGCHLD here, before notifying child. * process_pty() will handle it with the other signals. */ assert_se(sigprocmask(SIG_BLOCK, &mask_chld, NULL) >= 0); @@ -394,31 +362,13 @@ static int run(int master, r = wait_for_container(*pid, &container_status); *pid = 0; - if (r < 0) + if (r < 0) { /* We failed to wait for the container, or the container exited abnormally. */ return r; - if (r > 0 || container_status == CONTAINER_TERMINATED) { - /* r > 0 → The container exited with a non-zero status. - * As a special case, we need to replace 133 with a different value, - * because 133 is special-cased in the service file to reboot the container. - * otherwise → The container exited with zero status and a reboot was not requested. - */ - if (r == EXIT_FORCE_RESTART) - r = EXIT_FAILURE; /* replace 133 with the general failure code */ + } else { *ret = r; - return 0; /* finito */ + return 0; } - - /* CONTAINER_REBOOTED, loop again */ - - /* Special handling if we are running as a service: instead of simply - * restarting the machine we want to restart the entire service, so let's - * inform systemd about this with the special exit code 133. The service - * file uses RestartForceExitStatus=133 so that this results in a full - * nspawn restart. This is necessary since we might have cgroup parameters - * set we want to have flushed out. */ - *ret = EXIT_FORCE_RESTART; - return 0; /* finito */ } int main(int argc, char *argv[]) { |