mirror of
https://github.com/containers/podman.git
synced 2025-08-06 19:44:14 +08:00

Until Podman v4.3, privileged rootfull containers would expose all the host devices to the container while rootless ones would exclude `/dev/ptmx` and `/dev/tty*`. When 5a2405ae1b3a ("Don't mount /dev/tty* inside privileged containers running systemd") landed, rootfull containers started excluding all the `/dev/tty*` devices when the container would be running in systemd mode, reducing the disparity between rootless and rootfull containers when running in this mode. However, this commit regressed some legitimate use cases: exposing non-virtual-terminal tty devices (modems, arduinos, serial consoles, ...) to the container, and the regression was addressed in f4c81b0aa5fd ("Only prevent VTs to be mounted inside privileged systemd containers"). This now calls into question why all tty devices were historically prevented from being shared to the rootless non-privileged containers. A look at the podman git history reveals that the code was introduced as part of ba430bfe5ef6 ("podman v2 remove bloat v2"), and obviously was copy-pasted from some other code I couldn't find. In any case, we can easily guess that this check was put for the same reason 5a2405ae1b3a was introduced: to prevent breaking the host environment's consoles. This also means that excluding *all* tty devices is overbearing, and should instead be limited to just virtual terminals like we do on the rootfull path. This is what this commit does, thus making the rootless codepath behave like the rootfull one when in systemd mode. This leaves `/dev/ptmx` as the main difference between the two codepath. Based on the blog post from the then-runC maintainer[1] and this Red Hat bug[2], I believe that this is intentional and a needed difference for the rootless path. Closes: #16925 Suggested-by: Fabian Holler <mail@fholler.de> Signed-off-by: Martin Roukala (né Peres) <martin.roukala@mupuf.org> [1]: https://www.cyphar.com/blog/post/20160627-rootless-containers-with-runc [2]: https://bugzilla.redhat.com/show_bug.cgi?id=501718
243 lines
6.6 KiB
Go
243 lines
6.6 KiB
Go
package util
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"syscall"
|
|
|
|
"github.com/containers/podman/v4/libpod/define"
|
|
"github.com/containers/podman/v4/pkg/rootless"
|
|
"github.com/containers/psgo"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
var (
|
|
errNotADevice = errors.New("not a device node")
|
|
)
|
|
|
|
// GetContainerPidInformationDescriptors returns a string slice of all supported
|
|
// format descriptors of GetContainerPidInformation.
|
|
func GetContainerPidInformationDescriptors() ([]string, error) {
|
|
return psgo.ListDescriptors(), nil
|
|
}
|
|
|
|
// FindDeviceNodes parses /dev/ into a set of major:minor -> path, where
|
|
// [major:minor] is the device's major and minor numbers formatted as, for
|
|
// example, 2:0 and path is the path to the device node.
|
|
// Symlinks to nodes are ignored.
|
|
func FindDeviceNodes() (map[string]string, error) {
|
|
nodes := make(map[string]string)
|
|
err := filepath.WalkDir("/dev", func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
logrus.Warnf("Error descending into path %s: %v", path, err)
|
|
return filepath.SkipDir
|
|
}
|
|
|
|
// If we aren't a device node, do nothing.
|
|
if d.Type()&(os.ModeDevice|os.ModeCharDevice) == 0 {
|
|
return nil
|
|
}
|
|
|
|
info, err := d.Info()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// We are a device node. Get major/minor.
|
|
sysstat, ok := info.Sys().(*syscall.Stat_t)
|
|
if !ok {
|
|
return errors.New("could not convert stat output for use")
|
|
}
|
|
// We must typeconvert sysstat.Rdev from uint64->int to avoid constant overflow
|
|
rdev := int(sysstat.Rdev)
|
|
major := ((rdev >> 8) & 0xfff) | ((rdev >> 32) & ^0xfff)
|
|
minor := (rdev & 0xff) | ((rdev >> 12) & ^0xff)
|
|
|
|
nodes[fmt.Sprintf("%d:%d", major, minor)] = path
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nodes, nil
|
|
}
|
|
|
|
func isVirtualConsoleDevice(device string) bool {
|
|
/*
|
|
Virtual consoles are of the form `/dev/tty\d+`, any other device such as
|
|
/dev/tty, ttyUSB0, or ttyACM0 should not be matched.
|
|
See `man 4 console` for more information.
|
|
|
|
NOTE: Matching is done using path.Match even though a regular expression
|
|
would have been more accurate. This is because a regular
|
|
expression would have required pre-compilation, which would have
|
|
increase the startup time needlessly or made the code more complex
|
|
than needed.
|
|
*/
|
|
matched, _ := path.Match("/dev/tty[0-9]*", device)
|
|
return matched
|
|
}
|
|
|
|
func AddPrivilegedDevices(g *generate.Generator, systemdMode bool) error {
|
|
hostDevices, err := getDevices("/dev")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
g.ClearLinuxDevices()
|
|
|
|
if rootless.IsRootless() {
|
|
mounts := make(map[string]interface{})
|
|
for _, m := range g.Mounts() {
|
|
mounts[m.Destination] = true
|
|
}
|
|
newMounts := []spec.Mount{}
|
|
for _, d := range hostDevices {
|
|
devMnt := spec.Mount{
|
|
Destination: d.Path,
|
|
Type: define.TypeBind,
|
|
Source: d.Path,
|
|
Options: []string{"slave", "nosuid", "noexec", "rw", "rbind"},
|
|
}
|
|
|
|
/* The following devices should not be mounted in rootless containers:
|
|
*
|
|
* /dev/ptmx: The host-provided /dev/ptmx should not be shared to
|
|
* the rootless containers for security reasons, and
|
|
* the container runtime will create it for us
|
|
* anyway (ln -s /dev/pts/ptmx /dev/ptmx);
|
|
* /dev/tty[0-9]+: Prevent the container from taking over the host's
|
|
* virtual consoles, even when not in systemd mode
|
|
* for backwards compatibility.
|
|
*/
|
|
if d.Path == "/dev/ptmx" || isVirtualConsoleDevice(d.Path) {
|
|
continue
|
|
}
|
|
if _, found := mounts[d.Path]; found {
|
|
continue
|
|
}
|
|
newMounts = append(newMounts, devMnt)
|
|
}
|
|
g.Config.Mounts = append(newMounts, g.Config.Mounts...)
|
|
if g.Config.Linux.Resources != nil {
|
|
g.Config.Linux.Resources.Devices = nil
|
|
}
|
|
} else {
|
|
for _, d := range hostDevices {
|
|
/* Restrict access to the virtual consoles *only* when running
|
|
* in systemd mode to improve backwards compatibility. See
|
|
* https://github.com/containers/podman/issues/15878.
|
|
*
|
|
* NOTE: May need revisiting in the future to drop the systemd
|
|
* condition if more use cases end up breaking the virtual terminals
|
|
* of people who specifically disable the systemd mode. It would
|
|
* also provide a more consistent behaviour between rootless and
|
|
* rootfull containers.
|
|
*/
|
|
if systemdMode && isVirtualConsoleDevice(d.Path) {
|
|
continue
|
|
}
|
|
g.AddDevice(d)
|
|
}
|
|
// Add resources device - need to clear the existing one first.
|
|
if g.Config.Linux.Resources != nil {
|
|
g.Config.Linux.Resources.Devices = nil
|
|
}
|
|
g.AddLinuxResourcesDevice(true, "", nil, nil, "rwm")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// based on getDevices from runc (libcontainer/devices/devices.go)
|
|
func getDevices(path string) ([]spec.LinuxDevice, error) {
|
|
files, err := os.ReadDir(path)
|
|
if err != nil {
|
|
if rootless.IsRootless() && os.IsPermission(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
out := []spec.LinuxDevice{}
|
|
for _, f := range files {
|
|
switch {
|
|
case f.IsDir():
|
|
switch f.Name() {
|
|
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
|
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
|
|
continue
|
|
default:
|
|
sub, err := getDevices(filepath.Join(path, f.Name()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if sub != nil {
|
|
out = append(out, sub...)
|
|
}
|
|
continue
|
|
}
|
|
case f.Name() == "console":
|
|
continue
|
|
case f.Type()&os.ModeSymlink != 0:
|
|
continue
|
|
}
|
|
|
|
device, err := DeviceFromPath(filepath.Join(path, f.Name()))
|
|
if err != nil {
|
|
if err == errNotADevice {
|
|
continue
|
|
}
|
|
if os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
return nil, err
|
|
}
|
|
out = append(out, *device)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// Copied from github.com/opencontainers/runc/libcontainer/devices
|
|
// Given the path to a device look up the information about a linux device
|
|
func DeviceFromPath(path string) (*spec.LinuxDevice, error) {
|
|
var stat unix.Stat_t
|
|
err := unix.Lstat(path, &stat)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var (
|
|
devType string
|
|
mode = stat.Mode
|
|
devNumber = uint64(stat.Rdev) //nolint: unconvert
|
|
m = os.FileMode(mode)
|
|
)
|
|
|
|
switch {
|
|
case mode&unix.S_IFBLK == unix.S_IFBLK:
|
|
devType = "b"
|
|
case mode&unix.S_IFCHR == unix.S_IFCHR:
|
|
devType = "c"
|
|
case mode&unix.S_IFIFO == unix.S_IFIFO:
|
|
devType = "p"
|
|
default:
|
|
return nil, errNotADevice
|
|
}
|
|
|
|
return &spec.LinuxDevice{
|
|
Type: devType,
|
|
Path: path,
|
|
FileMode: &m,
|
|
UID: &stat.Uid,
|
|
GID: &stat.Gid,
|
|
Major: int64(unix.Major(devNumber)),
|
|
Minor: int64(unix.Minor(devNumber)),
|
|
}, nil
|
|
}
|