Files
podman/pkg/util/utils_linux.go
Martin Roukala (né Peres) 70057c8b47 Make rootless privileged containers share the same tty devices as rootfull ones
Until Podman v4.3, privileged rootfull containers would expose all the
host devices to the container while rootless ones would exclude
`/dev/ptmx` and `/dev/tty*`.

When 5a2405ae1b3a ("Don't mount /dev/tty* inside privileged containers
running systemd") landed, rootfull containers started excluding all the
`/dev/tty*` devices when the container would be running in systemd
mode, reducing the disparity between rootless and rootfull containers
when running in this mode.

However, this commit regressed some legitimate use cases: exposing
non-virtual-terminal tty devices (modems, arduinos, serial
consoles, ...) to the container, and the regression was addressed in
f4c81b0aa5fd ("Only prevent VTs to be mounted inside privileged
systemd containers").

This now calls into question why all tty devices were historically
prevented from being shared to the rootless non-privileged containers.
A look at the podman git history reveals that the code was introduced
as part of ba430bfe5ef6 ("podman v2 remove bloat v2"), and obviously
was copy-pasted from some other code I couldn't find.

In any case, we can easily guess that this check was put for the same
reason 5a2405ae1b3a was introduced: to prevent breaking the host
environment's consoles. This also means that excluding *all* tty
devices is overbearing, and should instead be limited to just virtual
terminals like we do on the rootfull path.

This is what this commit does, thus making the rootless codepath behave
like the rootfull one when in systemd mode.

This leaves `/dev/ptmx` as the main difference between the two
codepath. Based on the blog post from the then-runC maintainer[1] and
this Red Hat bug[2], I believe that this is intentional and a needed
difference for the rootless path.

Closes: #16925
Suggested-by: Fabian Holler <mail@fholler.de>
Signed-off-by: Martin Roukala (né Peres) <martin.roukala@mupuf.org>

[1]: https://www.cyphar.com/blog/post/20160627-rootless-containers-with-runc
[2]: https://bugzilla.redhat.com/show_bug.cgi?id=501718
2023-01-16 16:23:53 +02:00

243 lines
6.6 KiB
Go

package util
import (
"errors"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"syscall"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/psgo"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
errNotADevice = errors.New("not a device node")
)
// GetContainerPidInformationDescriptors returns a string slice of all supported
// format descriptors of GetContainerPidInformation.
func GetContainerPidInformationDescriptors() ([]string, error) {
return psgo.ListDescriptors(), nil
}
// FindDeviceNodes parses /dev/ into a set of major:minor -> path, where
// [major:minor] is the device's major and minor numbers formatted as, for
// example, 2:0 and path is the path to the device node.
// Symlinks to nodes are ignored.
func FindDeviceNodes() (map[string]string, error) {
nodes := make(map[string]string)
err := filepath.WalkDir("/dev", func(path string, d fs.DirEntry, err error) error {
if err != nil {
logrus.Warnf("Error descending into path %s: %v", path, err)
return filepath.SkipDir
}
// If we aren't a device node, do nothing.
if d.Type()&(os.ModeDevice|os.ModeCharDevice) == 0 {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
// We are a device node. Get major/minor.
sysstat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return errors.New("could not convert stat output for use")
}
// We must typeconvert sysstat.Rdev from uint64->int to avoid constant overflow
rdev := int(sysstat.Rdev)
major := ((rdev >> 8) & 0xfff) | ((rdev >> 32) & ^0xfff)
minor := (rdev & 0xff) | ((rdev >> 12) & ^0xff)
nodes[fmt.Sprintf("%d:%d", major, minor)] = path
return nil
})
if err != nil {
return nil, err
}
return nodes, nil
}
func isVirtualConsoleDevice(device string) bool {
/*
Virtual consoles are of the form `/dev/tty\d+`, any other device such as
/dev/tty, ttyUSB0, or ttyACM0 should not be matched.
See `man 4 console` for more information.
NOTE: Matching is done using path.Match even though a regular expression
would have been more accurate. This is because a regular
expression would have required pre-compilation, which would have
increase the startup time needlessly or made the code more complex
than needed.
*/
matched, _ := path.Match("/dev/tty[0-9]*", device)
return matched
}
func AddPrivilegedDevices(g *generate.Generator, systemdMode bool) error {
hostDevices, err := getDevices("/dev")
if err != nil {
return err
}
g.ClearLinuxDevices()
if rootless.IsRootless() {
mounts := make(map[string]interface{})
for _, m := range g.Mounts() {
mounts[m.Destination] = true
}
newMounts := []spec.Mount{}
for _, d := range hostDevices {
devMnt := spec.Mount{
Destination: d.Path,
Type: define.TypeBind,
Source: d.Path,
Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"},
}
/* The following devices should not be mounted in rootless containers:
*
* /dev/ptmx: The host-provided /dev/ptmx should not be shared to
* the rootless containers for security reasons, and
* the container runtime will create it for us
* anyway (ln -s /dev/pts/ptmx /dev/ptmx);
* /dev/tty[0-9]+: Prevent the container from taking over the host's
* virtual consoles, even when not in systemd mode
* for backwards compatibility.
*/
if d.Path == "/dev/ptmx" || isVirtualConsoleDevice(d.Path) {
continue
}
if _, found := mounts[d.Path]; found {
continue
}
newMounts = append(newMounts, devMnt)
}
g.Config.Mounts = append(newMounts, g.Config.Mounts...)
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
} else {
for _, d := range hostDevices {
/* Restrict access to the virtual consoles *only* when running
* in systemd mode to improve backwards compatibility. See
* https://github.com/containers/podman/issues/15878.
*
* NOTE: May need revisiting in the future to drop the systemd
* condition if more use cases end up breaking the virtual terminals
* of people who specifically disable the systemd mode. It would
* also provide a more consistent behaviour between rootless and
* rootfull containers.
*/
if systemdMode && isVirtualConsoleDevice(d.Path) {
continue
}
g.AddDevice(d)
}
// Add resources device - need to clear the existing one first.
if g.Config.Linux.Resources != nil {
g.Config.Linux.Resources.Devices = nil
}
g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm")
}
return nil
}
// based on getDevices from runc (libcontainer/devices/devices.go)
func getDevices(path string) ([]spec.LinuxDevice, error) {
files, err := os.ReadDir(path)
if err != nil {
if rootless.IsRootless() && os.IsPermission(err) {
return nil, nil
}
return nil, err
}
out := []spec.LinuxDevice{}
for _, f := range files {
switch {
case f.IsDir():
switch f.Name() {
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
continue
default:
sub, err := getDevices(filepath.Join(path, f.Name()))
if err != nil {
return nil, err
}
if sub != nil {
out = append(out, sub...)
}
continue
}
case f.Name() == "console":
continue
case f.Type()&os.ModeSymlink != 0:
continue
}
device, err := DeviceFromPath(filepath.Join(path, f.Name()))
if err != nil {
if err == errNotADevice {
continue
}
if os.IsNotExist(err) {
continue
}
return nil, err
}
out = append(out, *device)
}
return out, nil
}
// Copied from github.com/opencontainers/runc/libcontainer/devices
// Given the path to a device look up the information about a linux device
func DeviceFromPath(path string) (*spec.LinuxDevice, error) {
var stat unix.Stat_t
err := unix.Lstat(path, &stat)
if err != nil {
return nil, err
}
var (
devType string
mode = stat.Mode
devNumber = uint64(stat.Rdev) //nolint: unconvert
m = os.FileMode(mode)
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = "b"
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = "c"
case mode&unix.S_IFIFO == unix.S_IFIFO:
devType = "p"
default:
return nil, errNotADevice
}
return &spec.LinuxDevice{
Type: devType,
Path: path,
FileMode: &m,
UID: &stat.Uid,
GID: &stat.Gid,
Major: int64(unix.Major(devNumber)),
Minor: int64(unix.Minor(devNumber)),
}, nil
}