mirror of
https://github.com/containers/podman.git
synced 2025-12-02 02:58:03 +08:00
Merge pull request #27604 from Luap99/migrate
podman system migrate fixes when pause process and conmon got killed
This commit is contained in:
@@ -4,6 +4,7 @@ package abi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
@@ -14,6 +15,7 @@ import (
|
||||
"go.podman.io/common/pkg/config"
|
||||
"go.podman.io/common/pkg/systemd"
|
||||
"go.podman.io/storage/pkg/unshare"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Default path for system runtime state
|
||||
@@ -59,6 +61,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return early as we are already re-exec or root here so no need to join the rootless userns.
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -74,36 +78,41 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
|
||||
if became {
|
||||
os.Exit(ret)
|
||||
}
|
||||
if noMoveProcess {
|
||||
return nil
|
||||
}
|
||||
|
||||
// if there is no pid file, try to join existing containers, and create a pause process.
|
||||
ctrs, err := ic.Libpod.GetRunningContainers()
|
||||
if err != nil {
|
||||
logrus.Error(err.Error())
|
||||
os.Exit(1)
|
||||
return err
|
||||
}
|
||||
|
||||
paths := []string{}
|
||||
paths := make([]string, 0, len(ctrs))
|
||||
for _, ctr := range ctrs {
|
||||
paths = append(paths, ctr.ConfigNoCopy().ConmonPidFile)
|
||||
}
|
||||
|
||||
if len(paths) > 0 {
|
||||
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
|
||||
} else {
|
||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||
if err == nil {
|
||||
systemd.MovePauseProcessToScope(pausePidPath)
|
||||
// TryJoinFromFilePaths fails with ESRCH when the PID are all not valid anymore
|
||||
// In this case create a new userns.
|
||||
if errors.Is(err, unix.ESRCH) {
|
||||
logrus.Warnf("Failed to join existing conmon namespace, creating a new rootless podman user namespace. If there are existing container running please stop them with %q to reset the namespace", os.Args[0]+" system migrate")
|
||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||
}
|
||||
} else {
|
||||
logrus.Info("Creating a new rootless user namespace")
|
||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.Error(fmt.Errorf("invalid internal status, try resetting the pause process with %q: %w", os.Args[0]+" system migrate", err))
|
||||
os.Exit(1)
|
||||
return fmt.Errorf("fatal error, invalid internal status, unable to create a new pause process: %w. Try running %q and if that doesn't work reboot to recover", err, os.Args[0]+" system migrate")
|
||||
}
|
||||
if !noMoveProcess {
|
||||
systemd.MovePauseProcessToScope(pausePidPath)
|
||||
}
|
||||
if became {
|
||||
os.Exit(ret)
|
||||
}
|
||||
|
||||
logrus.Error("Internal error, failed to re-exec podman into user namespace without error. This should never happen, if you see this please report a bug")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -384,8 +384,7 @@ can_use_shortcut (char **argv)
|
||||
|| strcmp (argv[argc], "version") == 0
|
||||
|| strcmp (argv[argc], "context") == 0
|
||||
|| strcmp (argv[argc], "search") == 0
|
||||
|| strcmp (argv[argc], "compose") == 0
|
||||
|| (strcmp (argv[argc], "system") == 0 && argv[argc+1] && strcmp (argv[argc+1], "service") != 0))
|
||||
|| strcmp (argv[argc], "compose") == 0)
|
||||
{
|
||||
ret = false;
|
||||
break;
|
||||
|
||||
@@ -149,3 +149,30 @@ function _check_pause_process() {
|
||||
# This used to hang trying to unmount the netns.
|
||||
run_podman rm -f -t0 $cname
|
||||
}
|
||||
|
||||
# regression test for https://issues.redhat.com/browse/RHEL-130252
|
||||
@test "podman system migrate works with conmon being killed" {
|
||||
skip_if_not_rootless "pause process is only used as rootless"
|
||||
skip_if_remote "system migrate not supported via remote"
|
||||
|
||||
local cname=c-$(safename)
|
||||
run_podman run --name $cname --stop-signal SIGKILL -d $IMAGE sleep 100
|
||||
|
||||
run_podman inspect --format '{{.State.ConmonPid}}' $cname
|
||||
conmon_pid="$output"
|
||||
|
||||
# check for pause pid and then kill it
|
||||
_check_pause_process
|
||||
kill -9 $pause_pid
|
||||
|
||||
# kill conmon
|
||||
kill -9 $conmon_pid
|
||||
|
||||
# Use podman system migrate to stop the currently running pause process
|
||||
run_podman 125 system migrate
|
||||
assert "$output" =~ "Failed to join existing conmon namespace" "fallback to userns creating"
|
||||
assert "$output" =~ "conmon process killed"
|
||||
|
||||
# Now the removal command should work fine without errors.
|
||||
run_podman rm $cname
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user