mirror of
https://github.com/containers/podman.git
synced 2025-06-20 00:51:16 +08:00
Merge pull request #3188 from giuseppe/fix-join-existing-containers
rootless: new function to join existing conmon processes
This commit is contained in:
@ -4,11 +4,9 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"log/syslog"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
@ -120,18 +118,10 @@ func setupRootless(cmd *cobra.Command, args []string) error {
|
||||
return errors.Wrapf(err, "could not get pause process pid file path")
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadFile(pausePidPath)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
|
||||
}
|
||||
if err == nil {
|
||||
pausePid, err := strconv.Atoi(string(data))
|
||||
if _, err := os.Stat(pausePidPath); err == nil {
|
||||
became, ret, err := rootless.TryJoinFromFilePaths("", false, []string{pausePidPath})
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath)
|
||||
}
|
||||
became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "")
|
||||
if err != nil {
|
||||
logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath)
|
||||
logrus.Errorf("cannot join pause process. You may need to remove %s and stop all containers", pausePidPath)
|
||||
logrus.Errorf("you can use `system migrate` to recreate the pause process")
|
||||
logrus.Errorf(err.Error())
|
||||
os.Exit(1)
|
||||
@ -154,28 +144,13 @@ func setupRootless(cmd *cobra.Command, args []string) error {
|
||||
logrus.Errorf(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
var became bool
|
||||
var ret int
|
||||
if len(ctrs) == 0 {
|
||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||
} else {
|
||||
|
||||
paths := []string{}
|
||||
for _, ctr := range ctrs {
|
||||
data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile)
|
||||
if err != nil {
|
||||
logrus.Errorf(err.Error())
|
||||
continue
|
||||
}
|
||||
conmonPid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
logrus.Errorf(err.Error())
|
||||
continue
|
||||
}
|
||||
became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
paths = append(paths, ctr.Config().ConmonPidFile)
|
||||
}
|
||||
|
||||
became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
|
||||
if err != nil {
|
||||
logrus.Errorf(err.Error())
|
||||
os.Exit(1)
|
||||
@ -185,6 +160,7 @@ func setupRootless(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setRLimits() error {
|
||||
rlimits := new(syscall.Rlimit)
|
||||
rlimits.Cur = 1048576
|
||||
|
@ -167,7 +167,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
|
||||
}()
|
||||
|
||||
if rootless.IsRootless() && ctr.config.ConmonPidFile == "" {
|
||||
ctr.config.ConmonPidFile = filepath.Join(ctr.config.StaticDir, "conmon.pid")
|
||||
ctr.config.ConmonPidFile = filepath.Join(ctr.state.RunDir, "conmon.pid")
|
||||
}
|
||||
|
||||
// Go through named volumes and add them.
|
||||
|
@ -69,6 +69,19 @@ rootless_gid ()
|
||||
static void
|
||||
do_pause ()
|
||||
{
|
||||
int i;
|
||||
struct sigaction act;
|
||||
int const sig[] =
|
||||
{
|
||||
SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, SIGPOLL,
|
||||
SIGPROF, SIGVTALRM, SIGXCPU, SIGXFSZ, 0
|
||||
};
|
||||
|
||||
act.sa_handler = SIG_IGN;
|
||||
|
||||
for (i = 0; sig[i]; i++)
|
||||
sigaction (sig[i], &act, NULL);
|
||||
|
||||
prctl (PR_SET_NAME, "podman pause", NULL, NULL, NULL);
|
||||
while (1)
|
||||
pause ();
|
||||
@ -333,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack)
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
reexec_in_user_namespace_wait (int pid, int options)
|
||||
{
|
||||
pid_t p;
|
||||
int status;
|
||||
|
||||
do
|
||||
p = waitpid (pid, &status, 0);
|
||||
while (p < 0 && errno == EINTR);
|
||||
|
||||
if (p < 0)
|
||||
return -1;
|
||||
|
||||
if (WIFEXITED (status))
|
||||
return WEXITSTATUS (status);
|
||||
if (WIFSIGNALED (status))
|
||||
return 128 + WTERMSIG (status);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
{
|
||||
@ -356,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
while (r < 0 && errno == EINTR);
|
||||
close (p[0]);
|
||||
|
||||
reexec_in_user_namespace_wait(r, 0);
|
||||
|
||||
return r == 1 && b == '0' ? 0 : -1;
|
||||
}
|
||||
else
|
||||
@ -560,8 +595,51 @@ check_proc_sys_userns_file (const char *path)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
copy_file_to_fd (const char *file_to_read, int outfd)
|
||||
{
|
||||
char buf[512];
|
||||
int fd;
|
||||
|
||||
fd = open (file_to_read, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
ssize_t r, w, t = 0;
|
||||
|
||||
do
|
||||
r = read (fd, buf, sizeof buf);
|
||||
while (r < 0 && errno == EINTR);
|
||||
if (r < 0)
|
||||
{
|
||||
close (fd);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (r == 0)
|
||||
break;
|
||||
|
||||
while (t < r)
|
||||
{
|
||||
do
|
||||
w = write (outfd, &buf[t], r - t);
|
||||
while (w < 0 && errno == EINTR);
|
||||
if (w < 0)
|
||||
{
|
||||
close (fd);
|
||||
return w;
|
||||
}
|
||||
t += w;
|
||||
}
|
||||
}
|
||||
close (fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
|
||||
{
|
||||
int ret;
|
||||
pid_t pid;
|
||||
@ -574,6 +652,7 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
char *listen_pid = NULL;
|
||||
bool do_socket_activation = false;
|
||||
char *cwd = getcwd (NULL, 0);
|
||||
sigset_t sigset, oldsigset;
|
||||
|
||||
if (cwd == NULL)
|
||||
{
|
||||
@ -584,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
listen_pid = getenv("LISTEN_PID");
|
||||
listen_fds = getenv("LISTEN_FDS");
|
||||
|
||||
if (listen_pid != NULL && listen_fds != NULL) {
|
||||
if (strtol(listen_pid, NULL, 10) == getpid()) {
|
||||
if (listen_pid != NULL && listen_fds != NULL)
|
||||
{
|
||||
if (strtol(listen_pid, NULL, 10) == getpid())
|
||||
do_socket_activation = true;
|
||||
}
|
||||
}
|
||||
|
||||
sprintf (uid, "%d", geteuid ());
|
||||
sprintf (gid, "%d", getegid ());
|
||||
@ -621,6 +700,22 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
return pid;
|
||||
}
|
||||
|
||||
if (sigfillset (&sigset) < 0)
|
||||
{
|
||||
fprintf (stderr, "cannot fill sigset: %s\n", strerror (errno));
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
if (sigdelset (&sigset, SIGCHLD) < 0)
|
||||
{
|
||||
fprintf (stderr, "cannot sigdelset(SIGCHLD): %s\n", strerror (errno));
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
if (sigprocmask (SIG_BLOCK, &sigset, &oldsigset) < 0)
|
||||
{
|
||||
fprintf (stderr, "cannot block signals: %s\n", strerror (errno));
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
argv = get_cmd_line_args (ppid);
|
||||
if (argv == NULL)
|
||||
{
|
||||
@ -628,7 +723,8 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (do_socket_activation) {
|
||||
if (do_socket_activation)
|
||||
{
|
||||
char s[32];
|
||||
sprintf (s, "%d", getpid());
|
||||
setenv ("LISTEN_PID", s, true);
|
||||
@ -685,27 +781,20 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||
while (ret < 0 && errno == EINTR);
|
||||
close (ready);
|
||||
|
||||
if (sigprocmask (SIG_SETMASK, &oldsigset, NULL) < 0)
|
||||
{
|
||||
fprintf (stderr, "cannot block signals: %s\n", strerror (errno));
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (file_to_read && file_to_read[0])
|
||||
{
|
||||
ret = copy_file_to_fd (file_to_read, outputfd);
|
||||
close (outputfd);
|
||||
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
execvp (argv[0], argv);
|
||||
|
||||
_exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int
|
||||
reexec_in_user_namespace_wait (int pid)
|
||||
{
|
||||
pid_t p;
|
||||
int status;
|
||||
|
||||
do
|
||||
p = waitpid (pid, &status, 0);
|
||||
while (p < 0 && errno == EINTR);
|
||||
|
||||
if (p < 0)
|
||||
return -1;
|
||||
|
||||
if (WIFEXITED (status))
|
||||
return WEXITSTATUS (status);
|
||||
if (WIFSIGNALED (status))
|
||||
return 128 + WTERMSIG (status);
|
||||
return -1;
|
||||
}
|
||||
|
@ -26,8 +26,8 @@ import (
|
||||
#include <stdlib.h>
|
||||
extern uid_t rootless_uid();
|
||||
extern uid_t rootless_gid();
|
||||
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
|
||||
extern int reexec_in_user_namespace_wait(int pid);
|
||||
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
|
||||
extern int reexec_in_user_namespace_wait(int pid, int options);
|
||||
extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
|
||||
*/
|
||||
import "C"
|
||||
@ -194,10 +194,24 @@ func getUserNSFirstChild(fd uintptr) (*os.File, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
|
||||
func enableLinger(pausePid string) {
|
||||
if pausePid == "" {
|
||||
return
|
||||
}
|
||||
// If we are trying to write a pause pid file, make sure we can leave processes
|
||||
// running longer than the user session.
|
||||
err := exec.Command("loginctl", "enable-linger", fmt.Sprintf("%d", GetRootlessUID())).Run()
|
||||
if err != nil {
|
||||
logrus.Warnf("cannot run `loginctl enable-linger` for the current user: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount
|
||||
// namespace of the specified PID without looking up its parent. Useful to join directly
|
||||
// the conmon process.
|
||||
func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
|
||||
func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
|
||||
enableLinger(pausePid)
|
||||
|
||||
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
|
||||
return false, -1, nil
|
||||
}
|
||||
@ -226,7 +240,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
|
||||
return false, -1, errors.Errorf("cannot re-exec process")
|
||||
}
|
||||
|
||||
ret := C.reexec_in_user_namespace_wait(pidC)
|
||||
ret := C.reexec_in_user_namespace_wait(pidC, 0)
|
||||
if ret < 0 {
|
||||
return false, -1, errors.New("error waiting for the re-exec process")
|
||||
}
|
||||
@ -234,11 +248,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
|
||||
return true, int(ret), nil
|
||||
}
|
||||
|
||||
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
|
||||
// into a new user namespace and the return code from the re-executed podman process.
|
||||
// If podman was re-executed the caller needs to propagate the error code returned by the child
|
||||
// process.
|
||||
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) {
|
||||
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
|
||||
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
|
||||
return false, 0, runInUser()
|
||||
@ -249,6 +259,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
cPausePid := C.CString(pausePid)
|
||||
defer C.free(unsafe.Pointer(cPausePid))
|
||||
|
||||
cFileToRead := C.CString(fileToRead)
|
||||
defer C.free(unsafe.Pointer(cFileToRead))
|
||||
var fileOutputFD C.int
|
||||
if fileOutput != nil {
|
||||
fileOutputFD = C.int(fileOutput.Fd())
|
||||
}
|
||||
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
@ -262,7 +279,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
defer w.Close()
|
||||
defer w.Write([]byte("0"))
|
||||
|
||||
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
|
||||
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
|
||||
pid := int(pidC)
|
||||
if pid < 0 {
|
||||
return false, -1, errors.Errorf("cannot re-exec process")
|
||||
@ -328,6 +345,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
return false, -1, errors.Wrapf(err, "read from sync pipe")
|
||||
}
|
||||
|
||||
if fileOutput != nil {
|
||||
return true, 0, nil
|
||||
}
|
||||
|
||||
if b[0] == '2' {
|
||||
// We have lost the race for writing the PID file, as probably another
|
||||
// process created a namespace and wrote the PID.
|
||||
@ -336,7 +357,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
if err == nil {
|
||||
pid, err := strconv.ParseUint(string(data), 10, 0)
|
||||
if err == nil {
|
||||
return JoinUserAndMountNS(uint(pid), "")
|
||||
return joinUserAndMountNS(uint(pid), "")
|
||||
}
|
||||
}
|
||||
return false, -1, errors.Wrapf(err, "error setting up the process")
|
||||
@ -368,10 +389,96 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
}
|
||||
}()
|
||||
|
||||
ret := C.reexec_in_user_namespace_wait(pidC)
|
||||
ret := C.reexec_in_user_namespace_wait(pidC, 0)
|
||||
if ret < 0 {
|
||||
return false, -1, errors.New("error waiting for the re-exec process")
|
||||
}
|
||||
|
||||
return true, int(ret), nil
|
||||
}
|
||||
|
||||
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
|
||||
// into a new user namespace and the return code from the re-executed podman process.
|
||||
// If podman was re-executed the caller needs to propagate the error code returned by the child
|
||||
// process.
|
||||
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||
enableLinger(pausePid)
|
||||
return becomeRootInUserNS(pausePid, "", nil)
|
||||
}
|
||||
|
||||
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
|
||||
// This is useful when there are already running containers and we
|
||||
// don't have a pause process yet. We can use the paths to the conmon
|
||||
// processes to attempt joining their namespaces.
|
||||
// If needNewNamespace is set, the file is read from a temporary user
|
||||
// namespace, this is useful for containers that are running with a
|
||||
// different uidmap and the unprivileged user has no way to read the
|
||||
// file owned by the root in the container.
|
||||
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
|
||||
if len(paths) == 0 {
|
||||
return BecomeRootInUserNS(pausePidPath)
|
||||
}
|
||||
|
||||
var lastErr error
|
||||
var pausePid int
|
||||
|
||||
for _, path := range paths {
|
||||
if !needNewNamespace {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
pausePid, err = strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
lastErr = errors.Wrapf(err, "cannot parse file %s", path)
|
||||
continue
|
||||
}
|
||||
|
||||
lastErr = nil
|
||||
break
|
||||
} else {
|
||||
fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file")
|
||||
|
||||
defer w.Close()
|
||||
defer r.Close()
|
||||
|
||||
if _, _, err := becomeRootInUserNS("", path, w); err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
w.Close()
|
||||
defer func() {
|
||||
r.Close()
|
||||
C.reexec_in_user_namespace_wait(-1, 0)
|
||||
}()
|
||||
|
||||
b := make([]byte, 32)
|
||||
|
||||
n, err := r.Read(b)
|
||||
if err != nil {
|
||||
lastErr = errors.Wrapf(err, "cannot read %s\n", path)
|
||||
continue
|
||||
}
|
||||
|
||||
pausePid, err = strconv.Atoi(string(b[:n]))
|
||||
if err == nil {
|
||||
lastErr = nil
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if lastErr != nil {
|
||||
return false, 0, lastErr
|
||||
}
|
||||
|
||||
return joinUserAndMountNS(uint(pausePid), pausePidPath)
|
||||
}
|
||||
|
@ -29,10 +29,14 @@ func GetRootlessGID() int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// JoinUserAndMountNS re-exec podman in a new userNS and join the user and mount
|
||||
// namespace of the specified PID without looking up its parent. Useful to join directly
|
||||
// the conmon process. It is a convenience function for JoinUserAndMountNSWithOpts
|
||||
// with a default configuration.
|
||||
func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
|
||||
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
|
||||
// This is useful when there are already running containers and we
|
||||
// don't have a pause process yet. We can use the paths to the conmon
|
||||
// processes to attempt joining their namespaces.
|
||||
// If needNewNamespace is set, the file is read from a temporary user
|
||||
// namespace, this is useful for containers that are running with a
|
||||
// different uidmap and the unprivileged user has no way to read the
|
||||
// file owned by the root in the container.
|
||||
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
|
||||
return false, -1, errors.New("this function is not supported on this os")
|
||||
}
|
||||
|
Reference in New Issue
Block a user