mirror of
https://github.com/containers/podman.git
synced 2025-05-21 00:56:36 +08:00
Merge pull request #23206 from Luap99/rootless-reexec-userns
pkg/rootless: simplify reexec for container code
This commit is contained in:
@ -30,15 +30,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
configureCgroup := cgroupMode != "disabled"
|
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
|
||||||
if configureCgroup {
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
|
||||||
|
if os.Geteuid() == 0 && hasCapSysAdmin {
|
||||||
// do it only after podman has already re-execed and running with uid==0.
|
// do it only after podman has already re-execed and running with uid==0.
|
||||||
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
|
configureCgroup := cgroupMode != "disabled"
|
||||||
if err != nil {
|
if configureCgroup {
|
||||||
return err
|
|
||||||
}
|
|
||||||
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
|
|
||||||
if os.Geteuid() == 0 && hasCapSysAdmin {
|
|
||||||
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
|
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Infof("Failed to detect the owner for the current cgroup: %v", err)
|
logrus.Infof("Failed to detect the owner for the current cgroup: %v", err)
|
||||||
@ -55,8 +56,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
pausePidPath, err := util.GetRootlessPauseProcessPidPath()
|
pausePidPath, err := util.GetRootlessPauseProcessPidPath()
|
||||||
@ -88,7 +89,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(paths) > 0 {
|
if len(paths) > 0 {
|
||||||
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
|
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
|
||||||
} else {
|
} else {
|
||||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
|
|||||||
return false, -1, err
|
return false, -1, err
|
||||||
}
|
}
|
||||||
|
|
||||||
became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath})
|
became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return became, ret, nil
|
return became, ret, nil
|
||||||
}
|
}
|
||||||
@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
|
// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
|
||||||
became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath})
|
became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// It is still failing. We can safely remove it.
|
// It is still failing. We can safely remove it.
|
||||||
os.Remove(pausePidPath)
|
os.Remove(pausePidPath)
|
||||||
|
@ -38,11 +38,7 @@ func GetRootlessGID() int {
|
|||||||
// This is useful when there are already running containers and we
|
// This is useful when there are already running containers and we
|
||||||
// don't have a pause process yet. We can use the paths to the conmon
|
// don't have a pause process yet. We can use the paths to the conmon
|
||||||
// processes to attempt joining their namespaces.
|
// processes to attempt joining their namespaces.
|
||||||
// If needNewNamespace is set, the file is read from a temporary user
|
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
|
||||||
// namespace, this is useful for containers that are running with a
|
|
||||||
// different uidmap and the unprivileged user has no way to read the
|
|
||||||
// file owned by the root in the container.
|
|
||||||
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
|
|
||||||
return false, -1, errors.New("this function is not supported on this os")
|
return false, -1, errors.New("this function is not supported on this os")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
copy_file_to_fd (const char *file_to_read, int outfd)
|
|
||||||
{
|
|
||||||
char buf[512];
|
|
||||||
cleanup_close int fd = -1;
|
|
||||||
|
|
||||||
fd = open (file_to_read, O_RDONLY);
|
|
||||||
if (fd < 0)
|
|
||||||
{
|
|
||||||
fprintf (stderr, "open `%s`: %m\n", file_to_read);
|
|
||||||
return fd;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
ssize_t r, w, t = 0;
|
|
||||||
|
|
||||||
r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf));
|
|
||||||
if (r < 0)
|
|
||||||
{
|
|
||||||
fprintf (stderr, "read from `%s`: %m\n", file_to_read);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r == 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
while (t < r)
|
|
||||||
{
|
|
||||||
w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t));
|
|
||||||
if (w < 0)
|
|
||||||
{
|
|
||||||
fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read);
|
|
||||||
return w;
|
|
||||||
}
|
|
||||||
t += w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
|
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
|
||||||
{
|
{
|
||||||
cleanup_free char **argv = NULL;
|
cleanup_free char **argv = NULL;
|
||||||
cleanup_free char *argv0 = NULL;
|
cleanup_free char *argv0 = NULL;
|
||||||
@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re
|
|||||||
_exit (EXIT_FAILURE);
|
_exit (EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file_to_read && file_to_read[0])
|
|
||||||
{
|
|
||||||
ret = copy_file_to_fd (file_to_read, outputfd);
|
|
||||||
close (outputfd);
|
|
||||||
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
execvp ("/proc/self/exe", argv);
|
execvp ("/proc/self/exe", argv);
|
||||||
fprintf (stderr, "failed to reexec: %m\n");
|
fprintf (stderr, "failed to reexec: %m\n");
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ import (
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
extern uid_t rootless_uid();
|
extern uid_t rootless_uid();
|
||||||
extern uid_t rootless_gid();
|
extern uid_t rootless_gid();
|
||||||
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
|
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
|
||||||
extern int reexec_in_user_namespace_wait(int pid, int options);
|
extern int reexec_in_user_namespace_wait(int pid, int options);
|
||||||
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
|
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
|
||||||
extern int is_fd_inherited(int fd);
|
extern int is_fd_inherited(int fd);
|
||||||
@ -213,7 +213,7 @@ func copyMappings(from, to string) error {
|
|||||||
return os.WriteFile(to, content, 0600)
|
return os.WriteFile(to, content, 0600)
|
||||||
}
|
}
|
||||||
|
|
||||||
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
|
func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) {
|
||||||
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
|
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, 0, err
|
return false, 0, err
|
||||||
@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
|
|||||||
cPausePid := C.CString(pausePid)
|
cPausePid := C.CString(pausePid)
|
||||||
defer C.free(unsafe.Pointer(cPausePid))
|
defer C.free(unsafe.Pointer(cPausePid))
|
||||||
|
|
||||||
cFileToRead := C.CString(fileToRead)
|
|
||||||
defer C.free(unsafe.Pointer(cFileToRead))
|
|
||||||
var fileOutputFD C.int
|
|
||||||
if fileOutput != nil {
|
|
||||||
fileOutputFD = C.int(fileOutput.Fd())
|
|
||||||
}
|
|
||||||
|
|
||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
defer runtime.UnlockOSThread()
|
defer runtime.UnlockOSThread()
|
||||||
|
|
||||||
@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
|
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
|
||||||
pid = int(pidC)
|
pid = int(pidC)
|
||||||
if pid < 0 {
|
if pid < 0 {
|
||||||
return false, -1, fmt.Errorf("cannot re-exec process")
|
return false, -1, fmt.Errorf("cannot re-exec process")
|
||||||
@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
|
|||||||
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
|
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if fileOutput != nil {
|
|
||||||
ret := C.reexec_in_user_namespace_wait(pidC, 0)
|
|
||||||
if ret < 0 {
|
|
||||||
return false, -1, errors.New("waiting for the re-exec process")
|
|
||||||
}
|
|
||||||
return true, 0, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if b[0] == '2' {
|
if b[0] == '2' {
|
||||||
// We have lost the race for writing the PID file, as probably another
|
// We have lost the race for writing the PID file, as probably another
|
||||||
// process created a namespace and wrote the PID.
|
// process created a namespace and wrote the PID.
|
||||||
@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
|
|||||||
// If podman was re-executed the caller needs to propagate the error code returned by the child
|
// If podman was re-executed the caller needs to propagate the error code returned by the child
|
||||||
// process.
|
// process.
|
||||||
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
|
||||||
return becomeRootInUserNS(pausePid, "", nil)
|
return becomeRootInUserNS(pausePid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
|
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
|
||||||
// This is useful when there are already running containers and we
|
// This is useful when there are already running containers and we
|
||||||
// don't have a pause process yet. We can use the paths to the conmon
|
// don't have a pause process yet. We can use the paths to the conmon
|
||||||
// processes to attempt joining their namespaces.
|
// processes to attempt joining their namespaces.
|
||||||
// If needNewNamespace is set, the file is read from a temporary user
|
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
|
||||||
// namespace, this is useful for containers that are running with a
|
|
||||||
// different uidmap and the unprivileged user has no way to read the
|
|
||||||
// file owned by the root in the container.
|
|
||||||
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
|
|
||||||
var lastErr error
|
var lastErr error
|
||||||
var pausePid int
|
|
||||||
|
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
if !needNewNamespace {
|
data, err := os.ReadFile(path)
|
||||||
data, err := os.ReadFile(path)
|
if err != nil {
|
||||||
if err != nil {
|
lastErr = err
|
||||||
lastErr = err
|
continue
|
||||||
continue
|
}
|
||||||
}
|
|
||||||
|
|
||||||
pausePid, err = strconv.Atoi(string(data))
|
pausePid, err := strconv.Atoi(string(data))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
|
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
|
||||||
continue
|
continue
|
||||||
}
|
|
||||||
} else {
|
|
||||||
r, w, err := os.Pipe()
|
|
||||||
if err != nil {
|
|
||||||
lastErr = err
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
defer errorhandling.CloseQuiet(r)
|
|
||||||
|
|
||||||
if _, _, err := becomeRootInUserNS("", path, w); err != nil {
|
|
||||||
w.Close()
|
|
||||||
lastErr = err
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := w.Close(); err != nil {
|
|
||||||
return false, 0, err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
C.reexec_in_user_namespace_wait(-1, 0)
|
|
||||||
}()
|
|
||||||
|
|
||||||
b := make([]byte, 32)
|
|
||||||
|
|
||||||
n, err := r.Read(b)
|
|
||||||
if err != nil {
|
|
||||||
lastErr = fmt.Errorf("cannot read %q: %w", path, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
pausePid, err = strconv.Atoi(string(b[:n]))
|
|
||||||
if err != nil {
|
|
||||||
lastErr = err
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if pausePid > 0 && unix.Kill(pausePid, 0) == nil {
|
if pausePid > 0 && unix.Kill(pausePid, 0) == nil {
|
||||||
|
@ -41,11 +41,7 @@ func GetRootlessGID() int {
|
|||||||
// This is useful when there are already running containers and we
|
// This is useful when there are already running containers and we
|
||||||
// don't have a pause process yet. We can use the paths to the conmon
|
// don't have a pause process yet. We can use the paths to the conmon
|
||||||
// processes to attempt joining their namespaces.
|
// processes to attempt joining their namespaces.
|
||||||
// If needNewNamespace is set, the file is read from a temporary user
|
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
|
||||||
// namespace, this is useful for containers that are running with a
|
|
||||||
// different uidmap and the unprivileged user has no way to read the
|
|
||||||
// file owned by the root in the container.
|
|
||||||
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
|
|
||||||
return false, -1, errors.New("this function is not supported on this os")
|
return false, -1, errors.New("this function is not supported on this os")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ function _check_pause_process() {
|
|||||||
|
|
||||||
# First let's run a container in the background to keep the userns active
|
# First let's run a container in the background to keep the userns active
|
||||||
local cname1=c1_$(random_string)
|
local cname1=c1_$(random_string)
|
||||||
run_podman run -d --name $cname1 $IMAGE top
|
run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top
|
||||||
|
|
||||||
run_podman unshare readlink /proc/self/ns/user
|
run_podman unshare readlink /proc/self/ns/user
|
||||||
userns="$output"
|
userns="$output"
|
||||||
@ -136,6 +136,9 @@ function _check_pause_process() {
|
|||||||
|
|
||||||
_test_sigproxy $cname2 $kidpid
|
_test_sigproxy $cname2 $kidpid
|
||||||
|
|
||||||
|
# check pause process again
|
||||||
|
_check_pause_process
|
||||||
|
|
||||||
# our container exits 0 so podman should too
|
# our container exits 0 so podman should too
|
||||||
wait $kidpid || die "podman run exited $? instead of zero"
|
wait $kidpid || die "podman run exited $? instead of zero"
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user