diff options
author | Rich Felker <dalias@aerifal.cx> | 2013-02-03 16:42:40 -0500 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2013-02-03 16:42:40 -0500 |
commit | fb6b159d9ec7cf1e037daa974eeeacf3c8b3b3f1 (patch) | |
tree | b2abcc807bf30691c9e65a59f463c77f0e46ff19 | |
parent | 89d3df6e5420878e35a93a481105017a94a02852 (diff) | |
download | musl-fb6b159d9ec7cf1e037daa974eeeacf3c8b3b3f1.tar.gz musl-fb6b159d9ec7cf1e037daa974eeeacf3c8b3b3f1.tar.bz2 musl-fb6b159d9ec7cf1e037daa974eeeacf3c8b3b3f1.tar.xz musl-fb6b159d9ec7cf1e037daa974eeeacf3c8b3b3f1.zip |
overhaul posix_spawn to use CLONE_VM instead of vfork
the proposed change was described in detail in detail previously on
the mailing list. in short, vfork is unsafe because:
1. the compiler could make optimizations that cause the child to
clobber the parent's local vars.
2. strace is buggy and allows the vforking parent to run before the
child execs when run under strace.
the new design uses a close-on-exec pipe instead of vfork semantics to
synchronize the parent and child so that the parent does not return
before the child has finished using its arguments (and now, also its
stack). this also allows reporting exec failures to the caller instead
of giving the caller a child that mysteriously exits with status 127
on exec error.
basic testing has been performed on both the success and failure code
paths. further testing should be done.
-rw-r--r-- | src/process/fdop.h | 2 | ||||
-rw-r--r-- | src/process/posix_spawn.c | 174 | ||||
-rw-r--r-- | src/process/posix_spawn_file_actions_adddup2.c | 4 |
3 files changed, 125 insertions, 55 deletions
diff --git a/src/process/fdop.h b/src/process/fdop.h index 02ff83c5..00b87514 100644 --- a/src/process/fdop.h +++ b/src/process/fdop.h @@ -4,7 +4,7 @@ struct fdop { struct fdop *next, *prev; - int cmd, fd, newfd, oflag; + int cmd, fd, srcfd, oflag; mode_t mode; char path[]; }; diff --git a/src/process/posix_spawn.c b/src/process/posix_spawn.c index 5eb516b0..c55907d3 100644 --- a/src/process/posix_spawn.c +++ b/src/process/posix_spawn.c @@ -1,51 +1,47 @@ +#define _GNU_SOURCE #include <spawn.h> +#include <sched.h> #include <unistd.h> #include <signal.h> -#include <stdint.h> #include <fcntl.h> +#include <sys/wait.h> #include "syscall.h" #include "pthread_impl.h" #include "fdop.h" #include "libc.h" -extern char **environ; - static void dummy_0() { } weak_alias(dummy_0, __acquire_ptc); weak_alias(dummy_0, __release_ptc); -pid_t __vfork(void); - -int __posix_spawnx(pid_t *restrict res, const char *restrict path, - int (*exec)(const char *, char *const *, char *const *), - const posix_spawn_file_actions_t *fa, - const posix_spawnattr_t *restrict attr, - char *const argv[restrict], char *const envp[restrict]) -{ - pid_t pid; +struct args { + int p[2]; sigset_t oldmask; - int i; - posix_spawnattr_t dummy_attr = { 0 }; - - if (!attr) attr = &dummy_attr; - - sigprocmask(SIG_BLOCK, SIGALL_SET, &oldmask); - - __acquire_ptc(); - pid = __vfork(); - - if (pid) { - __release_ptc(); - sigprocmask(SIG_SETMASK, &oldmask, 0); - if (pid < 0) return -pid; - *res = pid; - return 0; - } - - for (i=1; i<=8*__SYSCALL_SSLEN; i++) { - struct sigaction sa; + const char *path; + int (*exec)(const char *, char *const *, char *const *); + const posix_spawn_file_actions_t *fa; + const posix_spawnattr_t *restrict attr; + char *const *argv, *const *envp; +}; + +static int child(void *args_vp) +{ + int i, ret; + struct sigaction sa; + struct args *args = args_vp; + int p = args->p[1]; + const posix_spawn_file_actions_t *fa = args->fa; + const posix_spawnattr_t *restrict attr = args->attr; + + close(args->p[0]); + + /* All signal dispositions must be either SIG_DFL or SIG_IGN + * before signals are unblocked. Otherwise a signal handler + * from the parent might get run in the child while sharing + * memory, with unpredictable and dangerous results. */ + for (i=1; i<_NSIG; i++) { __libc_sigaction(i, 0, &sa); if (sa.sa_handler!=SIG_DFL && (sa.sa_handler!=SIG_IGN || ((attr->__flags & POSIX_SPAWN_SETSIGDEF) @@ -55,50 +51,124 @@ int __posix_spawnx(pid_t *restrict res, const char *restrict path, } } - if ((attr->__flags&POSIX_SPAWN_SETPGROUP) && setpgid(0, attr->__pgrp)) - _exit(127); + if (attr->__flags & POSIX_SPAWN_SETPGROUP) + if ((ret=__syscall(SYS_setpgid, 0, attr->__pgrp))) + goto fail; - /* Use syscalls directly because pthread state is not consistent - * for making calls to the library wrappers... */ - if ((attr->__flags&POSIX_SPAWN_RESETIDS) && ( - __syscall(SYS_setgid, __syscall(SYS_getgid)) || - __syscall(SYS_setuid, __syscall(SYS_getuid)) )) - _exit(127); + /* Use syscalls directly because pthread state because the + * library functions attempt to do a multi-threaded synchronized + * id-change, which would trash the parent's state. */ + if (attr->__flags & POSIX_SPAWN_RESETIDS) + if ((ret=__syscall(SYS_setgid, __syscall(SYS_getgid))) || + (ret=__syscall(SYS_setuid, __syscall(SYS_getuid))) ) + goto fail; if (fa && fa->__actions) { struct fdop *op; - int ret, fd; + int fd; for (op = fa->__actions; op->next; op = op->next); for (; op; op = op->prev) { + /* It's possible that a file operation would clobber + * the pipe fd used for synchronizing with the + * parent. To avoid that, we dup the pipe onto + * an unoccupied fd. */ + if (op->fd == p) { + ret = __syscall(SYS_dup, p); + if (ret < 0) goto fail; + __syscall(SYS_close, p); + p = ret; + } switch(op->cmd) { case FDOP_CLOSE: - ret = __syscall(SYS_close, op->fd); + if ((ret=__syscall(SYS_close, op->fd))) + goto fail; break; case FDOP_DUP2: - ret = __syscall(SYS_dup2, op->fd, op->newfd)<0; + if ((ret=__syscall(SYS_dup2, op->srcfd, op->fd))<0) + goto fail; break; case FDOP_OPEN: fd = __syscall(SYS_open, op->path, op->oflag | O_LARGEFILE, op->mode); - if (fd == op->fd) { - ret = 0; - } else { - ret = __syscall(SYS_dup2, fd, op->fd)<0; + if ((ret=fd) < 0) goto fail; + if (fd != op->fd) { + if ((ret=__syscall(SYS_dup2, fd, op->fd))<0) + goto fail; __syscall(SYS_close, fd); } break; } - if (ret) _exit(127); } } - sigprocmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK) - ? &attr->__mask : &oldmask, 0); + /* Close-on-exec flag may have been lost if we moved the pipe + * to a different fd. We don't use F_DUPFD_CLOEXEC above because + * it would fail on older kernels and atomicity is not needed -- + * in this process there are no threads or signal handlers. */ + __syscall(SYS_fcntl, p, F_SETFD, FD_CLOEXEC); - exec(path, argv, envp ? envp : environ); + pthread_sigmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK) + ? &attr->__mask : &args->oldmask, 0); + + args->exec(args->path, args->argv, args->envp); + +fail: + /* Since sizeof errno < PIPE_BUF, the write is atomic. */ + ret = -ret; + if (ret) while (write(p, &ret, sizeof ret) < 0); _exit(127); +} + + +int __posix_spawnx(pid_t *restrict res, const char *restrict path, + int (*exec)(const char *, char *const *, char *const *), + const posix_spawn_file_actions_t *fa, + const posix_spawnattr_t *restrict attr, + char *const argv[restrict], char *const envp[restrict]) +{ + pid_t pid; + char stack[1024]; + int ec=0, cs; + struct args args; + + if (pipe2(args.p, O_CLOEXEC)) + return errno; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); + + args.path = path; + args.exec = exec; + args.fa = fa; + args.attr = attr ? attr : &(const posix_spawnattr_t){0}; + args.argv = argv; + args.envp = envp; + pthread_sigmask(SIG_BLOCK, SIGALL_SET, &args.oldmask); + + /* This lock prevents setuid/setgid operations while the parent + * is sharing memory with the child. Situations where processes + * with different permissions share VM are fundamentally unsafe. */ + __acquire_ptc(); + pid = __clone(child, stack+sizeof stack, CLONE_VM|SIGCHLD, &args); + close(args.p[1]); + + if (pid > 0) { + if (read(args.p[0], &ec, sizeof ec) < sizeof ec) ec = 0; + else waitpid(pid, &(int){0}, 0); + } else { + ec = -pid; + } + + /* At this point, the child has either exited or successfully + * performed exec, so the lock may be released. */ + __release_ptc(); + close(args.p[0]); + + if (!ec) *res = pid; + + pthread_sigmask(SIG_SETMASK, &args.oldmask, 0); + pthread_setcancelstate(cs, 0); - return 0; + return ec; } int posix_spawn(pid_t *restrict res, const char *restrict path, diff --git a/src/process/posix_spawn_file_actions_adddup2.c b/src/process/posix_spawn_file_actions_adddup2.c index 26f2c5cc..0367498f 100644 --- a/src/process/posix_spawn_file_actions_adddup2.c +++ b/src/process/posix_spawn_file_actions_adddup2.c @@ -3,13 +3,13 @@ #include <errno.h> #include "fdop.h" -int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int fd, int newfd) +int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd) { struct fdop *op = malloc(sizeof *op); if (!op) return ENOMEM; op->cmd = FDOP_DUP2; + op->srcfd = srcfd; op->fd = fd; - op->newfd = newfd; if ((op->next = fa->__actions)) op->next->prev = op; op->prev = 0; fa->__actions = op; |