//go:build linux package system import ( "fmt" "io" "os" "runtime" "strconv" "strings" "syscall" "unsafe" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) type ParentDeathSignal int func (p ParentDeathSignal) Restore() error { if p == 0 { return nil } current, err := GetParentDeathSignal() if err != nil { return err } if p == current { return nil } return p.Set() } func (p ParentDeathSignal) Set() error { return SetParentDeathSignal(uintptr(p)) } func Exec(cmd string, args []string, env []string) error { for { err := unix.Exec(cmd, args, env) if err != unix.EINTR { return &os.PathError{Op: "exec", Path: cmd, Err: err} } } } func execveat(fd uintptr, pathname string, args []string, env []string, flags int) error { pathnamep, err := syscall.BytePtrFromString(pathname) if err != nil { return err } argvp, err := syscall.SlicePtrFromStrings(args) if err != nil { return err } envp, err := syscall.SlicePtrFromStrings(env) if err != nil { return err } _, _, errno := syscall.Syscall6( unix.SYS_EXECVEAT, fd, uintptr(unsafe.Pointer(pathnamep)), uintptr(unsafe.Pointer(&argvp[0])), uintptr(unsafe.Pointer(&envp[0])), uintptr(flags), 0, ) return errno } func Fexecve(fd uintptr, args []string, env []string) error { var err error for { err = execveat(fd, "", args, env, unix.AT_EMPTY_PATH) if err != unix.EINTR { // nolint:errorlint // unix errors are bare break } } if err == unix.ENOSYS { // nolint:errorlint // unix errors are bare // Fallback to classic /proc/self/fd/... exec. return Exec("/proc/self/fd/"+strconv.Itoa(int(fd)), args, env) } return os.NewSyscallError("execveat", err) } func SetParentDeathSignal(sig uintptr) error { if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { return err } return nil } func GetParentDeathSignal() (ParentDeathSignal, error) { var sig int if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { return -1, err } return ParentDeathSignal(sig), nil } func SetKeepCaps() error { if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { return err } return nil } func ClearKeepCaps() error { if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { return err } return nil } func Setctty() error { if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { return err } return nil } // SetSubreaper sets the value i as the subreaper setting for the calling process func SetSubreaper(i int) error { return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) } // GetSubreaper returns the subreaper setting for the calling process func GetSubreaper() (int, error) { var i uintptr if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { return -1, err } return int(i), nil } func ExecutableMemfd(comment string, flags int) (*os.File, error) { // Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this // flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an // executable memfd. For vm.memfd_noexec=2 this is a bit more complicated. // The original vm.memfd_noexec=2 implementation incorrectly silently // allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer // kernels, we will get -EACCES if we try to use MFD_EXEC with // vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value). // // The upshot is we only need to retry without MFD_EXEC on -EINVAL because // it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on // kernels where -EINVAL is actually a security denial. memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC) if err == unix.EINVAL { memfd, err = unix.MemfdCreate(comment, flags) } if err != nil { if err == unix.EACCES { logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE") } err := os.NewSyscallError("memfd_create", err) return nil, fmt.Errorf("failed to create executable memfd: %w", err) } return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil } // Copy is like io.Copy except it uses sendfile(2) if the source and sink are // both (*os.File) as an optimisation to make copies faster. func Copy(dst io.Writer, src io.Reader) (copied int64, err error) { dstFile, _ := dst.(*os.File) srcFile, _ := src.(*os.File) if dstFile != nil && srcFile != nil { fi, err := srcFile.Stat() if err != nil { goto fallback } size := fi.Size() for size > 0 { n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size)) if n > 0 { size -= int64(n) copied += int64(n) } if err == unix.EINTR { continue } if err != nil { if copied == 0 { // If we haven't copied anything so far, we can safely just // fallback to io.Copy. We could always do the fallback but // it's safer to error out in the case of a partial copy // followed by an error (which should never happen). goto fallback } return copied, fmt.Errorf("partial sendfile copy: %w", err) } } return copied, nil } fallback: return io.Copy(dst, src) } // SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation. // checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion. func SetLinuxPersonality(personality int) error { _, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0) if errno != 0 { return &os.SyscallError{Syscall: "set_personality", Err: errno} } return nil } func prepareAt(dir *os.File, path string) (int, string) { if dir == nil { return unix.AT_FDCWD, path } // Rather than just filepath.Join-ing path here, do it manually so the // error and handle correctly indicate cases like path=".." as being // relative to the correct directory. The handle.Name() might end up being // wrong but because this is (currently) only used in MkdirAllInRoot, that // isn't a problem. dirName := dir.Name() if !strings.HasSuffix(dirName, "/") { dirName += "/" } fullPath := dirName + path return int(dir.Fd()), fullPath } func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { dirFd, fullPath := prepareAt(dir, path) fd, err := unix.Openat(dirFd, path, flags, mode) if err != nil { return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} } runtime.KeepAlive(dir) return os.NewFile(uintptr(fd), fullPath), nil } func Mkdirat(dir *os.File, path string, mode uint32) error { dirFd, fullPath := prepareAt(dir, path) err := unix.Mkdirat(dirFd, path, mode) if err != nil { err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err} } runtime.KeepAlive(dir) return err }