libpod: mount safely subpaths

add a function to securely mount a subpath inside a volume.  We cannot
trust that the subpath is safe since it is beneath a volume that could
be controlled by a separate container.  To avoid TOCTOU races between
when we check the subpath and when the OCI runtime mounts it, we open
the subpath, validate it, bind mount to a temporary directory and use
it instead of the original path.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2023-03-30 21:23:49 +02:00
parent 0858fab601
commit 4d56292e7a
7 changed files with 237 additions and 53 deletions

View File

@ -1035,10 +1035,11 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
// Generate the OCI newSpec
newSpec, err := c.generateSpec(ctx)
newSpec, cleanupFunc, err := c.generateSpec(ctx)
if err != nil {
return err
}
defer cleanupFunc()
// Make sure the workdir exists while initializing container
if err := c.resolveWorkDir(); err != nil {

View File

@ -13,6 +13,7 @@ import (
"os/user"
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
@ -170,7 +171,21 @@ func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFuncRet func(), err error) {
var safeMounts []*safeMountInfo
// lock the thread so that the current thread will be kept alive until the mounts are used
runtime.LockOSThread()
cleanupFunc := func() {
runtime.UnlockOSThread()
for _, s := range safeMounts {
s.Close()
}
}
defer func() {
if err != nil {
cleanupFunc()
}
}()
overrides := c.getUserOverrides()
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
if err != nil {
@ -178,7 +193,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
execUser, err = lookupHostUser(c.config.User)
}
if err != nil {
return nil, err
return nil, nil, err
}
}
@ -194,51 +209,57 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
systemdMode = *c.config.Systemd
}
if err := util.AddPrivilegedDevices(&g, systemdMode); err != nil {
return nil, err
return nil, nil, err
}
}
// If network namespace was requested, add it now
if err := c.addNetworkNamespace(&g); err != nil {
return nil, err
return nil, nil, err
}
// Apply AppArmor checks and load the default profile if needed.
if len(c.config.Spec.Process.ApparmorProfile) > 0 {
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
if err != nil {
return nil, err
return nil, nil, err
}
g.SetProcessApparmorProfile(updatedProfile)
}
if err := c.makeBindMounts(); err != nil {
return nil, err
return nil, nil, err
}
if err := c.mountNotifySocket(g); err != nil {
return nil, err
return nil, nil, err
}
// Get host UID and GID based on the container process UID and GID.
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
if err != nil {
return nil, err
return nil, nil, err
}
// Add named volumes
for _, namedVol := range c.config.NamedVolumes {
volume, err := c.runtime.GetVolume(namedVol.Name)
if err != nil {
return nil, fmt.Errorf("retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
return nil, nil, fmt.Errorf("retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
}
mountPoint, err := volume.MountPoint()
if err != nil {
return nil, err
return nil, nil, err
}
if len(namedVol.SubPath) > 0 {
mountPoint = filepath.Join(mountPoint, namedVol.SubPath)
safeMount, err := c.safeMountSubPath(mountPoint, namedVol.SubPath)
if err != nil {
return nil, nil, err
}
safeMounts = append(safeMounts, safeMount)
mountPoint = safeMount.mountPoint
}
overlayFlag := false
@ -249,7 +270,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
overlayFlag = true
upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
if err != nil {
return nil, err
return nil, nil, err
}
}
}
@ -259,7 +280,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
var overlayOpts *overlay.Options
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, err
return nil, nil, err
}
overlayOpts = &overlay.Options{RootUID: c.RootUID(),
@ -271,17 +292,17 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
if err != nil {
return nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
return nil, nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
}
for _, o := range namedVol.Options {
if o == "U" {
if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
return nil, nil, err
}
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
return nil, nil, err
}
}
}
@ -305,11 +326,21 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
m := &g.Config.Mounts[i]
var options []string
for _, o := range m.Options {
if strings.HasPrefix(o, "subpath=") {
subpath := strings.Split(o, "=")[1]
safeMount, err := c.safeMountSubPath(m.Source, subpath)
if err != nil {
return nil, nil, err
}
safeMounts = append(safeMounts, safeMount)
m.Source = safeMount.mountPoint
continue
}
if o == "idmap" || strings.HasPrefix(o, "idmap=") {
var err error
m.UIDMappings, m.GIDMappings, err = parseIDMapMountOption(c.config.IDMappings, o)
if err != nil {
return nil, err
return nil, nil, err
}
continue
}
@ -320,14 +351,14 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
} else {
// only chown on initial creation of container
if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
return nil, nil, err
}
}
case "z":
fallthrough
case "Z":
if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
return nil, err
return nil, nil, err
}
default:
@ -365,11 +396,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
for _, overlayVol := range c.config.OverlayVolumes {
upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
if err != nil {
return nil, err
return nil, nil, err
}
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, err
return nil, nil, err
}
overlayOpts := &overlay.Options{RootUID: c.RootUID(),
RootGID: c.RootGID(),
@ -380,18 +411,18 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
if err != nil {
return nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
return nil, nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
}
// Check overlay volume options
for _, o := range overlayVol.Options {
if o == "U" {
if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
return nil, nil, err
}
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
return nil, err
return nil, nil, err
}
}
}
@ -404,16 +435,16 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
// Mount the specified image.
img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
if err != nil {
return nil, fmt.Errorf("creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
return nil, nil, fmt.Errorf("creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
}
mountPoint, err := img.Mount(ctx, nil, "")
if err != nil {
return nil, fmt.Errorf("mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
return nil, nil, fmt.Errorf("mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
}
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
if err != nil {
return nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
return nil, nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
}
var overlayMount spec.Mount
@ -423,7 +454,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
}
if err != nil {
return nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
return nil, nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
}
g.AddMount(overlayMount)
}
@ -449,7 +480,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
if c.config.Umask != "" {
decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
if err != nil {
return nil, fmt.Errorf("invalid Umask Value: %w", err)
return nil, nil, fmt.Errorf("invalid Umask Value: %w", err)
}
umask := uint32(decVal)
g.Config.Process.User.Umask = &umask
@ -459,7 +490,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
if len(c.config.Groups) > 0 {
gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
if err != nil {
return nil, fmt.Errorf("looking up supplemental groups for container %s: %w", c.ID(), err)
return nil, nil, fmt.Errorf("looking up supplemental groups for container %s: %w", c.ID(), err)
}
for _, gid := range gids {
g.AddProcessAdditionalGid(gid)
@ -467,7 +498,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
if err := c.addSystemdMounts(&g); err != nil {
return nil, err
return nil, nil, err
}
// Look up and add groups the user belongs to, if a group wasn't directly specified
@ -482,7 +513,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
// Check whether the current user namespace has enough gids available.
availableGids, err := rootless.GetAvailableGids()
if err != nil {
return nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
return nil, nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
}
gidMappings = []idtools.IDMap{{
ContainerID: 0,
@ -514,7 +545,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
// Add shared namespaces from other containers
if err := c.addSharedNamespaces(&g); err != nil {
return nil, err
return nil, nil, err
}
g.SetRootPath(c.state.Mountpoint)
@ -525,7 +556,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
if err := c.setCgroupsPath(&g); err != nil {
return nil, err
return nil, nil, err
}
// Warning: CDI may alter g.Config in place.
@ -538,7 +569,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
_, err := registry.InjectDevices(g.Config, c.config.CDIDevices...)
if err != nil {
return nil, fmt.Errorf("setting up CDI devices: %w", err)
return nil, nil, fmt.Errorf("setting up CDI devices: %w", err)
}
}
@ -554,7 +585,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
if m.Type == "tmpfs" {
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
if err != nil {
return nil, fmt.Errorf("resolving symlinks for mount destination %s: %w", m.Destination, err)
return nil, nil, fmt.Errorf("resolving symlinks for mount destination %s: %w", m.Destination, err)
}
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
m.Destination = trimmedPath
@ -563,25 +594,25 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
if err := c.addRootPropagation(&g, mounts); err != nil {
return nil, err
return nil, nil, err
}
// Warning: precreate hooks may alter g.Config in place.
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
return nil, fmt.Errorf("setting up OCI Hooks: %w", err)
return nil, nil, fmt.Errorf("setting up OCI Hooks: %w", err)
}
if len(c.config.EnvSecrets) > 0 {
manager, err := c.runtime.SecretsManager()
if err != nil {
return nil, err
return nil, nil, err
}
if err != nil {
return nil, err
return nil, nil, err
}
for name, secr := range c.config.EnvSecrets {
_, data, err := manager.LookupSecretData(secr.Name)
if err != nil {
return nil, err
return nil, nil, err
}
g.AddProcessEnv(name, string(data))
}
@ -599,7 +630,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
}
return g.Config, nil
return g.Config, cleanupFunc, nil
}
// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,

View File

@ -6,6 +6,7 @@ package libpod
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
@ -316,3 +317,22 @@ func (c *Container) jailName() string {
return c.ID()
}
}
type safeMountInfo struct {
// mountPoint is the mount point.
mountPoint string
}
// Close releases the resources allocated with the safe mount info.
func (s *safeMountInfo) Close() {
}
// safeMountSubPath securely mounts a subpath inside a volume to a new temporary location.
// The function checks that the subpath is a valid subpath within the volume and that it
// does not escape the boundaries of the mount point (volume).
//
// The caller is responsible for closing the file descriptor and unmounting the subpath
// when it's no longer needed.
func (c *Container) safeMountSubPath(mountPoint, subpath string) (s *safeMountInfo, err error) {
return &safeMountInfo{mountPoint: filepath.Join(mountPoint, subpath)}, nil
}

View File

@ -6,6 +6,7 @@ package libpod
import (
"errors"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
@ -696,3 +697,81 @@ func (c *Container) getConmonPidFd() int {
}
return -1
}
type safeMountInfo struct {
// file is the open File.
file *os.File
// mountPoint is the mount point.
mountPoint string
}
// Close releases the resources allocated with the safe mount info.
func (s *safeMountInfo) Close() {
_ = unix.Unmount(s.mountPoint, unix.MNT_DETACH)
_ = s.file.Close()
}
// safeMountSubPath securely mounts a subpath inside a volume to a new temporary location.
// The function checks that the subpath is a valid subpath within the volume and that it
// does not escape the boundaries of the mount point (volume).
//
// The caller is responsible for closing the file descriptor and unmounting the subpath
// when it's no longer needed.
func (c *Container) safeMountSubPath(mountPoint, subpath string) (s *safeMountInfo, err error) {
joinedPath := filepath.Clean(filepath.Join(mountPoint, subpath))
fd, err := unix.Open(joinedPath, unix.O_RDONLY|unix.O_PATH, 0)
if err != nil {
return nil, err
}
f := os.NewFile(uintptr(fd), joinedPath)
defer func() {
if err != nil {
f.Close()
}
}()
// Once we got the file descriptor, we need to check that the subpath is a valid. We
// refer to the open FD so there won't be other path lookups (and no risk to follow a symlink).
fdPath := fmt.Sprintf("/proc/%d/fd/%d", os.Getpid(), f.Fd())
p, err := os.Readlink(fdPath)
if err != nil {
return nil, err
}
relPath, err := filepath.Rel(mountPoint, p)
if err != nil {
return nil, err
}
if relPath == ".." || strings.HasPrefix(relPath, "../") {
return nil, fmt.Errorf("subpath %q is outside of the volume %q", subpath, mountPoint)
}
fi, err := os.Stat(fdPath)
if err != nil {
return nil, err
}
npath := ""
switch {
case fi.Mode()&fs.ModeSymlink != 0:
return nil, fmt.Errorf("file %q is a symlink", joinedPath)
case fi.IsDir():
npath, err = os.MkdirTemp(c.state.RunDir, "subpath")
if err != nil {
return nil, err
}
default:
tmp, err := os.CreateTemp(c.state.RunDir, "subpath")
if err != nil {
return nil, err
}
tmp.Close()
npath = tmp.Name()
}
if err := unix.Mount(fdPath, npath, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
return nil, err
}
return &safeMountInfo{
file: f,
mountPoint: npath,
}, nil
}

View File

@ -8,7 +8,6 @@ import (
"math"
"net"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
@ -407,10 +406,6 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
}
volume.MountPath = dest
path := volumeSource.Source
if len(volume.SubPath) > 0 {
path = filepath.Join(path, volume.SubPath)
}
switch volumeSource.Type {
case KubeVolumeTypeBindMount:
// If the container has bind mounts, we need to check if
@ -419,17 +414,20 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
// Make sure the z/Z option is not already there (from editing the YAML)
if k == define.BindMountPrefix {
lastIndex := strings.LastIndex(v, ":")
if v[:lastIndex] == path && !cutil.StringInSlice("z", options) && !cutil.StringInSlice("Z", options) {
if v[:lastIndex] == volumeSource.Source && !cutil.StringInSlice("z", options) && !cutil.StringInSlice("Z", options) {
options = append(options, v[lastIndex+1:])
}
}
}
mount := spec.Mount{
Destination: volume.MountPath,
Source: path,
Source: volumeSource.Source,
Type: "bind",
Options: options,
}
if len(volume.SubPath) > 0 {
mount.Options = append(mount.Options, fmt.Sprintf("subpath=%s", volume.SubPath))
}
s.Mounts = append(s.Mounts, mount)
case KubeVolumeTypeNamed:
namedVolume := specgen.NamedVolume{
@ -451,7 +449,7 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
// We are setting the path as hostPath:mountPath to comply with pkg/specgen/generate.DeviceFromPath.
// The type is here just to improve readability as it is not taken into account when the actual device is created.
device := spec.LinuxDevice{
Path: fmt.Sprintf("%s:%s", path, volume.MountPath),
Path: fmt.Sprintf("%s:%s", volumeSource.Source, volume.MountPath),
Type: "c",
}
s.Devices = append(s.Devices, device)
@ -459,7 +457,7 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
// We are setting the path as hostPath:mountPath to comply with pkg/specgen/generate.DeviceFromPath.
// The type is here just to improve readability as it is not taken into account when the actual device is created.
device := spec.LinuxDevice{
Path: fmt.Sprintf("%s:%s", path, volume.MountPath),
Path: fmt.Sprintf("%s:%s", volumeSource.Source, volume.MountPath),
Type: "b",
}
s.Devices = append(s.Devices, device)

View File

@ -44,7 +44,10 @@ func ProcessOptions(options []string, isTmpfs bool, sourcePath string) ([]string
continue
}
}
if strings.HasPrefix(splitOpt[0], "subpath") {
newOptions = append(newOptions, opt)
continue
}
if strings.HasPrefix(splitOpt[0], "idmap") {
if foundIdmap {
return nil, fmt.Errorf("the 'idmap' option can only be set once: %w", ErrDupeMntOption)

View File

@ -5015,6 +5015,58 @@ spec:
Expect(exec.OutputToString()).Should(ContainSubstring("123.txt"))
})
It("podman play kube with unsafe subpaths", func() {
SkipIfRemote("volume export does not exist on remote")
volumeCreate := podmanTest.Podman([]string{"volume", "create", "testvol1"})
volumeCreate.WaitWithDefaultTimeout()
Expect(volumeCreate).Should(Exit(0))
session := podmanTest.Podman([]string{"run", "--volume", "testvol1:/data", ALPINE, "sh", "-c", "mkdir -p /data/testing && ln -s /etc /data/testing/onlythis"})
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
tar := filepath.Join(podmanTest.TempDir, "out.tar")
session = podmanTest.Podman([]string{"volume", "export", "--output", tar, "testvol1"})
session.WaitWithDefaultTimeout()
Expect(session).Should(Exit(0))
volumeCreate = podmanTest.Podman([]string{"volume", "create", "testvol"})
volumeCreate.WaitWithDefaultTimeout()
Expect(volumeCreate).Should(Exit(0))
volumeImp := podmanTest.Podman([]string{"volume", "import", "testvol", filepath.Join(podmanTest.TempDir, "out.tar")})
volumeImp.WaitWithDefaultTimeout()
Expect(volumeImp).Should(Exit(0))
err = writeYaml(subpathTestNamedVolume, kubeYaml)
Expect(err).ToNot(HaveOccurred())
playKube := podmanTest.Podman([]string{"play", "kube", kubeYaml})
playKube.WaitWithDefaultTimeout()
Expect(playKube).Should(Exit(125))
Expect(playKube.OutputToString()).Should(ContainSubstring("is outside"))
})
It("podman play kube with unsafe hostPath subpaths", func() {
if !Containerized() {
Skip("something is wrong with file permissions in CI or in the yaml creation. cannot ls or cat the fs unless in a container")
}
hostPathLocation := podmanTest.TempDir
Expect(os.MkdirAll(filepath.Join(hostPathLocation, "testing"), 0755)).To(Succeed())
Expect(os.Symlink("/", filepath.Join(hostPathLocation, "testing", "symlink"))).To(Succeed())
pod := getPod(withPodName("testpod"), withCtr(getCtr(withImage(ALPINE), withName("testctr"), withCmd([]string{"top"}), withVolumeMount("/foo", "testing/symlink", false))), withVolume(getHostPathVolume("DirectoryOrCreate", hostPathLocation)))
err = generateKubeYaml("pod", pod, kubeYaml)
Expect(err).To(Not(HaveOccurred()))
playKube := podmanTest.Podman([]string{"play", "kube", kubeYaml})
playKube.WaitWithDefaultTimeout()
Expect(playKube).Should(Exit(125))
Expect(playKube.OutputToString()).Should(ContainSubstring("is outside"))
})
It("podman play kube with configMap subpaths", func() {
volumeName := "cmVol"
cm := getConfigMap(withConfigMapName(volumeName), withConfigMapData("FOO", "foobar"))