update github.com/opencontainers/runc to v1.2.0-rc.3

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2024-09-03 18:31:28 +02:00
parent 77702d73ef
commit beffd05d5a
8 changed files with 544 additions and 5 deletions

2
go.mod
View File

@ -55,7 +55,7 @@ require (
github.com/onsi/gomega v1.34.2
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0
github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5
github.com/opencontainers/runc v1.2.0-rc.3
github.com/opencontainers/runtime-spec v1.2.0
github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc
github.com/opencontainers/selinux v1.11.0

4
go.sum
View File

@ -394,8 +394,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5 h1:VqTLG6pS4DlCwEAiwoYoQ3kXnhYCEeHB85vsYeM5ico=
github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5/go.mod h1:H8njh/SD+WY9bYMmVsEEWDJgJdviOSDjNeXMjeNbYCE=
github.com/opencontainers/runc v1.2.0-rc.3 h1:5vQhejBp4S5w1DwFZ7L3CSOQX9cmcc8JKFy/mOBTJlo=
github.com/opencontainers/runc v1.2.0-rc.3/go.mod h1:HADgqJU4nqAmOpe+uYBTJ4ZRvjks3ptCjKXp1pHqmCc=
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc h1:d2hUh5O6MRBvStV55MQ8we08t42zSTqBbscoQccWmMc=

View File

@ -0,0 +1,257 @@
//go:build linux
package system
import (
"fmt"
"io"
"os"
"runtime"
"strconv"
"strings"
"syscall"
"unsafe"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type ParentDeathSignal int
func (p ParentDeathSignal) Restore() error {
if p == 0 {
return nil
}
current, err := GetParentDeathSignal()
if err != nil {
return err
}
if p == current {
return nil
}
return p.Set()
}
func (p ParentDeathSignal) Set() error {
return SetParentDeathSignal(uintptr(p))
}
func Exec(cmd string, args []string, env []string) error {
for {
err := unix.Exec(cmd, args, env)
if err != unix.EINTR {
return &os.PathError{Op: "exec", Path: cmd, Err: err}
}
}
}
func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
pathnamep, err := syscall.BytePtrFromString(pathname)
if err != nil {
return err
}
argvp, err := syscall.SlicePtrFromStrings(args)
if err != nil {
return err
}
envp, err := syscall.SlicePtrFromStrings(env)
if err != nil {
return err
}
_, _, errno := syscall.Syscall6(
unix.SYS_EXECVEAT,
fd,
uintptr(unsafe.Pointer(pathnamep)),
uintptr(unsafe.Pointer(&argvp[0])),
uintptr(unsafe.Pointer(&envp[0])),
uintptr(flags),
0,
)
return errno
}
func Fexecve(fd uintptr, args []string, env []string) error {
var err error
for {
err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
if err != unix.EINTR { // nolint:errorlint // unix errors are bare
break
}
}
if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare
// Fallback to classic /proc/self/fd/... exec.
return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env)
}
return os.NewSyscallError("execveat", err)
}
func SetParentDeathSignal(sig uintptr) error {
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
return err
}
return nil
}
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
return err
}
return nil
}
func ClearKeepCaps() error {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
return err
}
return nil
}
func Setctty() error {
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
return err
}
return nil
}
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
// GetSubreaper returns the subreaper setting for the calling process
func GetSubreaper() (int, error) {
var i uintptr
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
return -1, err
}
return int(i), nil
}
func ExecutableMemfd(comment string, flags int) (*os.File, error) {
// Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this
// flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an
// executable memfd. For vm.memfd_noexec=2 this is a bit more complicated.
// The original vm.memfd_noexec=2 implementation incorrectly silently
// allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer
// kernels, we will get -EACCES if we try to use MFD_EXEC with
// vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value).
//
// The upshot is we only need to retry without MFD_EXEC on -EINVAL because
// it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on
// kernels where -EINVAL is actually a security denial.
memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC)
if err == unix.EINVAL {
memfd, err = unix.MemfdCreate(comment, flags)
}
if err != nil {
if err == unix.EACCES {
logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE")
}
err := os.NewSyscallError("memfd_create", err)
return nil, fmt.Errorf("failed to create executable memfd: %w", err)
}
return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil
}
// Copy is like io.Copy except it uses sendfile(2) if the source and sink are
// both (*os.File) as an optimisation to make copies faster.
func Copy(dst io.Writer, src io.Reader) (copied int64, err error) {
dstFile, _ := dst.(*os.File)
srcFile, _ := src.(*os.File)
if dstFile != nil && srcFile != nil {
fi, err := srcFile.Stat()
if err != nil {
goto fallback
}
size := fi.Size()
for size > 0 {
n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size))
if n > 0 {
size -= int64(n)
copied += int64(n)
}
if err == unix.EINTR {
continue
}
if err != nil {
if copied == 0 {
// If we haven't copied anything so far, we can safely just
// fallback to io.Copy. We could always do the fallback but
// it's safer to error out in the case of a partial copy
// followed by an error (which should never happen).
goto fallback
}
return copied, fmt.Errorf("partial sendfile copy: %w", err)
}
}
return copied, nil
}
fallback:
return io.Copy(dst, src)
}
// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation.
// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion.
func SetLinuxPersonality(personality int) error {
_, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0)
if errno != 0 {
return &os.SyscallError{Syscall: "set_personality", Err: errno}
}
return nil
}
func prepareAt(dir *os.File, path string) (int, string) {
if dir == nil {
return unix.AT_FDCWD, path
}
// Rather than just filepath.Join-ing path here, do it manually so the
// error and handle correctly indicate cases like path=".." as being
// relative to the correct directory. The handle.Name() might end up being
// wrong but because this is (currently) only used in MkdirAllInRoot, that
// isn't a problem.
dirName := dir.Name()
if !strings.HasSuffix(dirName, "/") {
dirName += "/"
}
fullPath := dirName + path
return int(dir.Fd()), fullPath
}
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
dirFd, fullPath := prepareAt(dir, path)
fd, err := unix.Openat(dirFd, path, flags, mode)
if err != nil {
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return os.NewFile(uintptr(fd), fullPath), nil
}
func Mkdirat(dir *os.File, path string, mode uint32) error {
dirFd, fullPath := prepareAt(dir, path)
err := unix.Mkdirat(dirFd, path, mode)
if err != nil {
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return err
}

View File

@ -0,0 +1,127 @@
package system
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
)
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
Parked State = 'P'
Idle State = 'I'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
case Parked:
return "parked"
case Idle:
return "idle" // kernel thread
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
}
// Stat returns a Stat_t instance for the specified process.
func Stat(pid int) (stat Stat_t, err error) {
bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return stat, err
}
return parseStat(string(bytes))
}
func parseStat(data string) (stat Stat_t, err error) {
// Example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
// The fields are space-separated, see full description in proc(5).
//
// We are only interested in:
// * field 2: process name. It is the only field enclosed into
// parenthesis, as it can contain spaces (and parenthesis) inside.
// * field 3: process state, a single character (%c)
// * field 22: process start time, a long unsigned integer (%llu).
// 1. Look for the first '(' and the last ')' first, what's in between is Name.
// We expect at least 20 fields and a space after the last one.
const minAfterName = 20*2 + 1 // the min field is '0 '.
first := strings.IndexByte(data, '(')
if first < 0 || first+minAfterName >= len(data) {
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
}
last := strings.LastIndexByte(data, ')')
if last <= first || last+minAfterName >= len(data) {
return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
}
stat.Name = data[first+1 : last]
// 2. Remove fields 1 and 2 and a space after. State is right after.
data = data[last+2:]
stat.State = State(data[0])
// 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
skipSpaces := 22 - 3
for first = 0; skipSpaces > 0 && first < len(data); first++ {
if data[first] == ' ' {
skipSpaces--
}
}
// Now first points to StartTime; look for space right after.
i := strings.IndexByte(data[first:], ' ')
if i < 0 {
return stat, fmt.Errorf("invalid stat data (too short): %q", data)
}
stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64)
if err != nil {
return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
}
return stat, nil
}

View File

@ -0,0 +1,15 @@
//go:build go1.23
package system
import (
"syscall"
)
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument
// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076.
func ClearRlimitNofileCache(lim *syscall.Rlimit) {
// Ignore the return values since we only need to clean the cache,
// the limit is going to be set via unix.Prlimit elsewhere.
_ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim)
}

View File

@ -0,0 +1,27 @@
//go:build !go1.23
// TODO: remove this file once go 1.22 is no longer supported.
package system
import (
"sync/atomic"
"syscall"
_ "unsafe" // Needed for go:linkname to work.
)
//go:linkname syscallOrigRlimitNofile syscall.origRlimitNofile
var syscallOrigRlimitNofile atomic.Pointer[syscall.Rlimit]
// ClearRlimitNofileCache clears go runtime's nofile rlimit cache.
// The argument is process RLIMIT_NOFILE values.
func ClearRlimitNofileCache(_ *syscall.Rlimit) {
// As reported in issue #4195, the new version of go runtime(since 1.19)
// will cache rlimit-nofile. Before executing execve, the rlimit-nofile
// of the process will be restored with the cache. In runc, this will
// cause the rlimit-nofile setting by the parent process for the container
// to become invalid. It can be solved by clearing this cache. But
// unfortunately, go stdlib doesn't provide such function, so we need to
// link to the private var `origRlimitNofile` in package syscall to hack.
syscallOrigRlimitNofile.Store(nil)
}

View File

@ -3,6 +3,7 @@
package utils
import (
"errors"
"fmt"
"math"
"os"
@ -13,6 +14,8 @@ import (
"sync"
_ "unsafe" // for go:linkname
"github.com/opencontainers/runc/libcontainer/system"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@ -275,3 +278,112 @@ func IsLexicallyInRoot(root, path string) bool {
}
return strings.HasPrefix(path, root)
}
// MkdirAllInRootOpen attempts to make
//
// path, _ := securejoin.SecureJoin(root, unsafePath)
// os.MkdirAll(path, mode)
// os.Open(path)
//
// safer against attacks where components in the path are changed between
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
// try to detect any symlink components in the path while we are doing the
// MkdirAll.
//
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
//
// NOTE: If unsafePath is a subpath of root, we assume that you have already
// called SecureJoin and so we use the provided path verbatim without resolving
// any symlinks (this is done in a way that avoids symlink-exchange races).
// This means that the path also must not contain ".." elements, otherwise an
// error will occur.
//
// This is a somewhat less safe alternative to
// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
// detect attempts to trick us into creating directories outside of the root.
// We should migrate to securejoin.MkdirAll once it is merged.
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
// If the path is already "within" the root, use it verbatim.
fullPath := unsafePath
if !IsLexicallyInRoot(root, unsafePath) {
var err error
fullPath, err = securejoin.SecureJoin(root, unsafePath)
if err != nil {
return nil, err
}
}
subPath, err := filepath.Rel(root, fullPath)
if err != nil {
return nil, err
}
// Check for any silly mode bits.
if mode&^0o7777 != 0 {
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
}
currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open root handle: %w", err)
}
defer func() {
if Err != nil {
currentDir.Close()
}
}()
for _, part := range strings.Split(subPath, string(filepath.Separator)) {
switch part {
case "", ".":
// Skip over no-op components.
continue
case "..":
return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
}
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
switch {
case err == nil:
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir
case errors.Is(err, unix.ENOTDIR):
// This might be a symlink or some other random file. Either way,
// error out.
return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
case errors.Is(err, os.ErrNotExist):
// Luckily, mkdirat will not follow trailing symlinks, so this is
// safe to do as-is.
if err := system.Mkdirat(currentDir, part, mode); err != nil {
return nil, err
}
// Open the new directory. There is a race here where an attacker
// could swap the directory with a different directory, but
// MkdirAll's fuzzy semantics mean we don't care about that.
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open newly created directory: %w", err)
}
// Update the currentDir.
_ = currentDir.Close()
currentDir = nextDir
default:
return nil, err
}
}
return currentDir, nil
}
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
// returned handle, for callers that don't need to use it.
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
if err == nil {
_ = f.Close()
}
return err
}

5
vendor/modules.txt vendored
View File

@ -886,8 +886,8 @@ github.com/opencontainers/go-digest
## explicit; go 1.18
github.com/opencontainers/image-spec/specs-go
github.com/opencontainers/image-spec/specs-go/v1
# github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5
## explicit; go 1.21
# github.com/opencontainers/runc v1.2.0-rc.3
## explicit; go 1.22
github.com/opencontainers/runc/libcontainer/apparmor
github.com/opencontainers/runc/libcontainer/cgroups
github.com/opencontainers/runc/libcontainer/cgroups/fs
@ -895,6 +895,7 @@ github.com/opencontainers/runc/libcontainer/cgroups/fs2
github.com/opencontainers/runc/libcontainer/cgroups/fscommon
github.com/opencontainers/runc/libcontainer/configs
github.com/opencontainers/runc/libcontainer/devices
github.com/opencontainers/runc/libcontainer/system
github.com/opencontainers/runc/libcontainer/user
github.com/opencontainers/runc/libcontainer/userns
github.com/opencontainers/runc/libcontainer/utils