pkg/rootless: simplify reexec for container code

The code currently tried to avoid joining the userns from conmon
directly and rather joined to only read the pid file and then send this
back to use so we could join the userns. From the comment this was done
because we could not read the pid file. However this is no longer true
as of commit 49eb5af301 and file is no always owned by the real user.

This means we can just remove this special logic and join the namespace
directly there. A test has been added to check the rejoin logic with a
custom uidmapping.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2024-07-05 16:20:28 +02:00
parent c276b28696
commit 3350cd3eed
7 changed files with 24 additions and 134 deletions

View File

@ -88,7 +88,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}
if len(paths) > 0 {
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
} else {
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
if err == nil {

View File

@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
return false, -1, err
}
became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath})
if err == nil {
return became, ret, nil
}
@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
}()
// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath})
if err != nil {
// It is still failing. We can safely remove it.
os.Remove(pausePidPath)

View File

@ -38,11 +38,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

View File

@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path)
}
}
static int
copy_file_to_fd (const char *file_to_read, int outfd)
{
char buf[512];
cleanup_close int fd = -1;
fd = open (file_to_read, O_RDONLY);
if (fd < 0)
{
fprintf (stderr, "open `%s`: %m\n", file_to_read);
return fd;
}
for (;;)
{
ssize_t r, w, t = 0;
r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf));
if (r < 0)
{
fprintf (stderr, "read from `%s`: %m\n", file_to_read);
return r;
}
if (r == 0)
break;
while (t < r)
{
w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t));
if (w < 0)
{
fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read);
return w;
}
t += w;
}
}
return 0;
}
int
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
{
cleanup_free char **argv = NULL;
cleanup_free char *argv0 = NULL;
@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re
_exit (EXIT_FAILURE);
}
if (file_to_read && file_to_read[0])
{
ret = copy_file_to_fd (file_to_read, outputfd);
close (outputfd);
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
execvp ("/proc/self/exe", argv);
fprintf (stderr, "failed to reexec: %m\n");

View File

@ -32,7 +32,7 @@ import (
#include <sys/types.h>
extern uid_t rootless_uid();
extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
extern int is_fd_inherited(int fd);
@ -213,7 +213,7 @@ func copyMappings(from, to string) error {
return os.WriteFile(to, content, 0600)
}
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) {
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
if err != nil {
return false, 0, err
@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))
cFileToRead := C.CString(fileToRead)
defer C.free(unsafe.Pointer(cFileToRead))
var fileOutputFD C.int
if fileOutput != nil {
fileOutputFD = C.int(fileOutput.Fd())
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
}
}()
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pid = int(pidC)
if pid < 0 {
return false, -1, fmt.Errorf("cannot re-exec process")
@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
}
if fileOutput != nil {
ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}
return true, 0, nil
}
if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID.
@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return becomeRootInUserNS(pausePid, "", nil)
return becomeRootInUserNS(pausePid)
}
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
var lastErr error
var pausePid int
for _, path := range paths {
if !needNewNamespace {
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}
pausePid, err = strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}
} else {
r, w, err := os.Pipe()
if err != nil {
lastErr = err
continue
}
defer errorhandling.CloseQuiet(r)
if _, _, err := becomeRootInUserNS("", path, w); err != nil {
w.Close()
lastErr = err
continue
}
if err := w.Close(); err != nil {
return false, 0, err
}
defer func() {
C.reexec_in_user_namespace_wait(-1, 0)
}()
b := make([]byte, 32)
n, err := r.Read(b)
if err != nil {
lastErr = fmt.Errorf("cannot read %q: %w", path, err)
continue
}
pausePid, err = strconv.Atoi(string(b[:n]))
if err != nil {
lastErr = err
continue
}
pausePid, err := strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}
if pausePid > 0 && unix.Kill(pausePid, 0) == nil {

View File

@ -41,11 +41,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

View File

@ -119,7 +119,7 @@ function _check_pause_process() {
# First let's run a container in the background to keep the userns active
local cname1=c1_$(random_string)
run_podman run -d --name $cname1 $IMAGE top
run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top
run_podman unshare readlink /proc/self/ns/user
userns="$output"
@ -136,6 +136,9 @@ function _check_pause_process() {
_test_sigproxy $cname2 $kidpid
# check pause process again
_check_pause_process
# our container exits 0 so podman should too
wait $kidpid || die "podman run exited $? instead of zero"