mirror of
https://github.com/containers/podman.git
synced 2025-05-17 23:26:08 +08:00
Merge pull request #23032 from giuseppe/drop-make-accessible
libpod: do not chmod bind mounts
This commit is contained in:
@ -14,6 +14,8 @@ import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
metadata "github.com/checkpoint-restore/checkpointctl/lib"
|
||||
@ -544,16 +546,6 @@ func (c *Container) setupStorage(ctx context.Context) error {
|
||||
c.config.StaticDir = containerInfo.Dir
|
||||
c.state.RunDir = containerInfo.RunDir
|
||||
|
||||
if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
|
||||
if err := idtools.SafeChown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := idtools.SafeChown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Set the default Entrypoint and Command
|
||||
if containerInfo.Config != nil {
|
||||
// Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user.
|
||||
@ -2372,6 +2364,75 @@ func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[s
|
||||
return allHooks, nil
|
||||
}
|
||||
|
||||
// getRootPathForOCI returns the root path to use for the OCI runtime.
|
||||
// If the current user is mapped in the container user namespace, then it returns
|
||||
// the container's mountpoint directly from the storage.
|
||||
// Otherwise, it returns an intermediate mountpoint that is accessible to anyone.
|
||||
func (c *Container) getRootPathForOCI() (string, error) {
|
||||
if hasCurrentUserMapped(c) {
|
||||
return c.state.Mountpoint, nil
|
||||
}
|
||||
return c.getIntermediateMountpointUser()
|
||||
}
|
||||
|
||||
var (
|
||||
intermediateMountPoint string
|
||||
intermediateMountPointErr error
|
||||
intermediateMountPointSync sync.Mutex
|
||||
)
|
||||
|
||||
// getIntermediateMountpointUser returns a path that is accessible to everyone. It must be on TMPDIR since
|
||||
// the runroot/tmpdir used by libpod are accessible only to the owner.
|
||||
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
|
||||
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
|
||||
func (c *Container) recreateIntermediateMountpointUser() (string, error) {
|
||||
uid := os.Geteuid()
|
||||
gid := os.Getegid()
|
||||
for i := 0; ; i++ {
|
||||
tmpDir := os.Getenv("TMPDIR")
|
||||
if tmpDir == "" {
|
||||
tmpDir = "/tmp"
|
||||
}
|
||||
dir := filepath.Join(tmpDir, fmt.Sprintf("intermediate-mountpoint-%d.%d", rootless.GetRootlessUID(), i))
|
||||
err := os.Mkdir(dir, 0755)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrExist) {
|
||||
return "", err
|
||||
}
|
||||
st, err2 := os.Stat(dir)
|
||||
if err2 != nil {
|
||||
return "", err
|
||||
}
|
||||
sys := st.Sys().(*syscall.Stat_t)
|
||||
if !st.IsDir() || sys.Uid != uint32(uid) || sys.Gid != uint32(gid) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
}
|
||||
|
||||
// getIntermediateMountpointUser returns a path that is accessible to everyone.
|
||||
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
|
||||
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
|
||||
func (c *Container) getIntermediateMountpointUser() (string, error) {
|
||||
intermediateMountPointSync.Lock()
|
||||
defer intermediateMountPointSync.Unlock()
|
||||
|
||||
if intermediateMountPoint == "" || fileutils.Exists(intermediateMountPoint) != nil {
|
||||
return c.recreateIntermediateMountpointUser()
|
||||
}
|
||||
|
||||
// update the timestamp to prevent systemd-tmpfiles from removing it
|
||||
now := time.Now()
|
||||
if err := os.Chtimes(intermediateMountPoint, now, now); err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return c.recreateIntermediateMountpointUser()
|
||||
}
|
||||
}
|
||||
return intermediateMountPoint, intermediateMountPointErr
|
||||
}
|
||||
|
||||
// mount mounts the container's root filesystem
|
||||
func (c *Container) mount() (string, error) {
|
||||
if c.state.State == define.ContainerStateRemoving {
|
||||
|
@ -565,7 +565,11 @@ func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFunc
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
g.SetRootPath(c.state.Mountpoint)
|
||||
rootPath, err := c.getRootPathForOCI()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
g.SetRootPath(rootPath)
|
||||
g.AddAnnotation("org.opencontainers.image.stopSignal", strconv.FormatUint(uint64(c.config.StopSignal), 10))
|
||||
|
||||
if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
|
||||
@ -1834,10 +1838,6 @@ func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error
|
||||
|
||||
// Make standard bind mounts to include in the container
|
||||
func (c *Container) makeBindMounts() error {
|
||||
if err := idtools.SafeChown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
|
||||
return fmt.Errorf("cannot chown run directory: %w", err)
|
||||
}
|
||||
|
||||
if c.state.BindMounts == nil {
|
||||
c.state.BindMounts = make(map[string]string)
|
||||
}
|
||||
@ -1917,15 +1917,6 @@ func (c *Container) makeBindMounts() error {
|
||||
return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
|
||||
}
|
||||
}
|
||||
|
||||
if !hasCurrentUserMapped(c) {
|
||||
if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !c.config.UseImageResolvConf {
|
||||
if err := c.createResolvConf(); err != nil {
|
||||
|
@ -183,23 +183,11 @@ func hasCurrentUserMapped(ctr *Container) bool {
|
||||
|
||||
// CreateContainer creates a container.
|
||||
func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
// always make the run dir accessible to the current user so that the PID files can be read without
|
||||
// being in the rootless user namespace.
|
||||
if err := makeAccessible(ctr.state.RunDir, 0, 0); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if !hasCurrentUserMapped(ctr) {
|
||||
for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.Engine.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.Engine.VolumePath} {
|
||||
if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// if we are running a non privileged container, be sure to umount some kernel paths so they are not
|
||||
// bind mounted inside the container at all.
|
||||
if !ctr.config.Privileged && !rootless.IsRootless() {
|
||||
return r.createRootlessContainer(ctr, restoreOptions)
|
||||
}
|
||||
hideFiles := !ctr.config.Privileged && !rootless.IsRootless()
|
||||
return r.createRootlessContainer(ctr, restoreOptions, hideFiles)
|
||||
}
|
||||
return r.createOCIContainer(ctr, restoreOptions)
|
||||
}
|
||||
@ -979,28 +967,6 @@ func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntime
|
||||
return &conmon, &ocirt, nil
|
||||
}
|
||||
|
||||
// makeAccessible changes the path permission and each parent directory to have --x--x--x
|
||||
func makeAccessible(path string, uid, gid int) error {
|
||||
for ; path != "/"; path = filepath.Dir(path) {
|
||||
st, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
|
||||
continue
|
||||
}
|
||||
if st.Mode()&0111 != 0111 {
|
||||
if err := os.Chmod(path, st.Mode()|0111); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait for a container which has been sent a signal to stop
|
||||
func waitContainerStop(ctr *Container, timeout time.Duration) error {
|
||||
return waitPidStop(ctr.state.PID, timeout)
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
|
||||
return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,9 @@
|
||||
package libpod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@ -25,7 +27,7 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
|
||||
type result struct {
|
||||
restoreDuration int64
|
||||
err error
|
||||
@ -40,35 +42,88 @@ func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOption
|
||||
}
|
||||
defer errorhandling.CloseQuiet(fd)
|
||||
|
||||
rootPath, err := ctr.getRootPathForOCI()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// create a new mountns on the current thread
|
||||
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() {
|
||||
if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
|
||||
logrus.Errorf("Unable to clone new namespace: %q", err)
|
||||
err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS)
|
||||
if err == nil {
|
||||
// If we are able to reset the previous mount namespace, unlock the thread and reuse it
|
||||
runtime.UnlockOSThread()
|
||||
} else {
|
||||
// otherwise, leave the thread locked and the Go runtime will terminate it
|
||||
logrus.Errorf("Unable to reset the previous mount namespace: %q", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// don't spread our mounts around. We are setting only /sys to be slave
|
||||
// so that the cleanup process is still able to umount the storage and the
|
||||
// changes are propagated to the host.
|
||||
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
|
||||
}
|
||||
|
||||
mounts, err := pmount.GetMounts()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for _, m := range mounts {
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
|
||||
continue
|
||||
if rootPath != "" {
|
||||
byMountpoint := make(map[string]*pmount.Info)
|
||||
for _, m := range mounts {
|
||||
byMountpoint[m.Mountpoint] = m
|
||||
}
|
||||
err = unix.Unmount(m.Mountpoint, 0)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
|
||||
isShared := false
|
||||
var parentMount string
|
||||
for dir := filepath.Dir(rootPath); ; dir = filepath.Dir(dir) {
|
||||
if m, found := byMountpoint[dir]; found {
|
||||
parentMount = dir
|
||||
for _, o := range strings.Split(m.Optional, ",") {
|
||||
opt := strings.Split(o, ":")
|
||||
if opt[0] == "shared" {
|
||||
isShared = true
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
if dir == "/" {
|
||||
return 0, fmt.Errorf("cannot find mountpoint for the root path")
|
||||
}
|
||||
}
|
||||
|
||||
// do not propagate the bind mount on the parent mount namespace
|
||||
if err := unix.Mount("", parentMount, "", unix.MS_SLAVE, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to make %s slave: %w", parentMount, err)
|
||||
}
|
||||
|
||||
// bind mount the containers' mount path to the path where the OCI runtime expects it to be
|
||||
if err := unix.Mount(ctr.state.Mountpoint, rootPath, "", unix.MS_BIND, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to bind mount %s to %s: %w", ctr.state.Mountpoint, rootPath, err)
|
||||
}
|
||||
|
||||
if isShared {
|
||||
// we need to restore the shared propagation of the parent mount so that we don't break -v $SRC:$DST:shared in the container
|
||||
// if $SRC is on the same mount as the root path
|
||||
if err := unix.Mount("", parentMount, "", unix.MS_SHARED, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to restore MS_SHARED propagation for %s: %w", parentMount, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hideFiles {
|
||||
// don't spread our mounts around. We are setting only /sys to be slave
|
||||
// so that the cleanup process is still able to umount the storage and the
|
||||
// changes are propagated to the host.
|
||||
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
|
||||
}
|
||||
for _, m := range mounts {
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
|
||||
continue
|
||||
}
|
||||
err = unix.Unmount(m.Mountpoint, 0)
|
||||
if err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return r.createOCIContainer(ctr, restoreOptions)
|
||||
|
Reference in New Issue
Block a user