vendor: bump c/storage

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2024-07-17 23:05:01 +02:00
parent c804f10686
commit 90411b2290
27 changed files with 939 additions and 301 deletions

46
vendor/github.com/Microsoft/hcsshim/hnsaccelnet.go generated vendored Normal file
View File

@ -0,0 +1,46 @@
//go:build windows
package hcsshim
import (
"errors"
"github.com/Microsoft/hcsshim/internal/hns"
)
// HNSNnvManagementMacAddress represents management mac address
// which needs to be excluded from VF reassignment
type HNSNnvManagementMacAddress = hns.HNSNnvManagementMacAddress
// HNSNnvManagementMacList represents a list of management
// mac addresses for exclusion from VF reassignment
type HNSNnvManagementMacList = hns.HNSNnvManagementMacList
var (
ErrorEmptyMacAddressList = errors.New("management mac_address list is empty")
)
// SetNnvManagementMacAddresses sets a list of
// management mac addresses in hns for exclusion from VF reassignment.
func SetNnvManagementMacAddresses(managementMacAddresses []string) (*HNSNnvManagementMacList, error) {
if len(managementMacAddresses) == 0 {
return nil, ErrorEmptyMacAddressList
}
nnvManagementMacList := &HNSNnvManagementMacList{}
for _, mac := range managementMacAddresses {
nnvManagementMacList.MacAddressList = append(nnvManagementMacList.MacAddressList, HNSNnvManagementMacAddress{MacAddress: mac})
}
return nnvManagementMacList.Set()
}
// GetNnvManagementMacAddresses retrieves a list of
// management mac addresses in hns for exclusion from VF reassignment.
func GetNnvManagementMacAddresses() (*HNSNnvManagementMacList, error) {
return hns.GetNnvManagementMacAddressList()
}
// DeleteNnvManagementMacAddresses delete list of
// management mac addresses in hns which are excluded from VF reassignment.
func DeleteNnvManagementMacAddresses() (*HNSNnvManagementMacList, error) {
return hns.DeleteNnvManagementMacAddressList()
}

View File

@ -0,0 +1,60 @@
//go:build windows
package hns
import (
"encoding/json"
"github.com/sirupsen/logrus"
)
// HNSNnvManagementMacAddress represents management mac address
// which needs to be excluded from VF reassignment
type HNSNnvManagementMacAddress struct {
MacAddress string `json:",omitempty"`
}
// HNSNnvManagementMacList represents a list of management
// mac addresses for exclusion from VF reassignment
type HNSNnvManagementMacList struct {
MacAddressList []HNSNnvManagementMacAddress `json:",omitempty"`
}
// HNSNnvManagementMacRequest makes a HNS call to modify/query NnvManagementMacList
func HNSNnvManagementMacRequest(method, path, request string) (*HNSNnvManagementMacList, error) {
nnvManagementMacList := &HNSNnvManagementMacList{}
err := hnsCall(method, "/accelnet/"+path, request, &nnvManagementMacList)
if err != nil {
return nil, err
}
return nnvManagementMacList, nil
}
// Set ManagementMacAddressList by sending "POST" NnvManagementMacRequest to HNS.
func (nnvManagementMacList *HNSNnvManagementMacList) Set() (*HNSNnvManagementMacList, error) {
operation := "Set"
title := "hcsshim::nnvManagementMacList::" + operation
logrus.Debugf(title+" id=%s", nnvManagementMacList.MacAddressList)
jsonString, err := json.Marshal(nnvManagementMacList)
if err != nil {
return nil, err
}
return HNSNnvManagementMacRequest("POST", "", string(jsonString))
}
// Get ManagementMacAddressList by sending "GET" NnvManagementMacRequest to HNS.
func GetNnvManagementMacAddressList() (*HNSNnvManagementMacList, error) {
operation := "Get"
title := "hcsshim::nnvManagementMacList::" + operation
logrus.Debugf(title)
return HNSNnvManagementMacRequest("GET", "", "")
}
// Delete ManagementMacAddressList by sending "DELETE" NnvManagementMacRequest to HNS.
func DeleteNnvManagementMacAddressList() (*HNSNnvManagementMacList, error) {
operation := "Delete"
title := "hcsshim::nnvManagementMacList::" + operation
logrus.Debugf(title)
return HNSNnvManagementMacRequest("DELETE", "", "")
}

View File

@ -10,6 +10,28 @@ import (
"github.com/sirupsen/logrus"
)
// EndpointState represents the states of an HNS Endpoint lifecycle.
type EndpointState uint16
// EndpointState const
// The lifecycle of an Endpoint goes through created, attached, AttachedSharing - endpoint is being shared with other containers,
// detached, after being attached, degraded and finally destroyed.
// Note: This attribute is used by calico to define stale containers and is dependent on HNS v1 api, if we move to HNS v2 api we will need
// to update the current calico code and cordinate the change with calico. Reach out to Microsoft to facilate the change via HNS.
const (
Uninitialized EndpointState = iota
Created EndpointState = 1
Attached EndpointState = 2
AttachedSharing EndpointState = 3
Detached EndpointState = 4
Degraded EndpointState = 5
Destroyed EndpointState = 6
)
func (es EndpointState) String() string {
return [...]string{"Uninitialized", "Attached", "AttachedSharing", "Detached", "Degraded", "Destroyed"}[es]
}
// HNSEndpoint represents a network endpoint in HNS
type HNSEndpoint struct {
Id string `json:"ID,omitempty"`
@ -34,6 +56,7 @@ type HNSEndpoint struct {
Namespace *Namespace `json:",omitempty"`
EncapOverhead uint16 `json:",omitempty"`
SharedContainers []string `json:",omitempty"`
State EndpointState `json:",omitempty"`
}
// SystemType represents the type of the system on which actions are done

View File

@ -8,6 +8,7 @@ import (
"encoding/binary"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
@ -56,7 +57,7 @@ func generateComposeFsBlob(verityDigests map[string]string, toc interface{}, com
fd, err := unix.Openat(unix.AT_FDCWD, destFile, unix.O_WRONLY|unix.O_CREAT|unix.O_TRUNC|unix.O_EXCL|unix.O_CLOEXEC, 0o644)
if err != nil {
return fmt.Errorf("failed to open output file %q: %w", destFile, err)
return &fs.PathError{Op: "openat", Path: destFile, Err: err}
}
outFd := os.NewFile(uintptr(fd), "outFd")
@ -117,7 +118,7 @@ func hasACL(path string) (bool, error) {
fd, err := unix.Openat(unix.AT_FDCWD, path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return false, err
return false, &fs.PathError{Op: "openat", Path: path, Err: err}
}
defer unix.Close(fd)
// do not worry about checking the magic number, if the file is invalid
@ -125,7 +126,7 @@ func hasACL(path string) (bool, error) {
flags := make([]byte, 4)
nread, err := unix.Pread(fd, flags, 8)
if err != nil {
return false, err
return false, fmt.Errorf("pread %q: %w", path, err)
}
if nread != 4 {
return false, fmt.Errorf("failed to read flags from %q", path)
@ -150,5 +151,8 @@ func mountComposefsBlob(dataDir, mountPoint string) error {
mountOpts += ",noacl"
}
return unix.Mount(loop.Name(), mountPoint, "erofs", unix.MS_RDONLY, mountOpts)
if err := unix.Mount(loop.Name(), mountPoint, "erofs", unix.MS_RDONLY, mountOpts); err != nil {
return fmt.Errorf("failed to mount erofs image at %q: %w", mountPoint, err)
}
return nil
}

View File

@ -848,14 +848,14 @@ func (d *Driver) Status() [][2]string {
// Metadata returns meta data about the overlay driver such as
// LowerDir, UpperDir, WorkDir and MergeDir used to store data.
func (d *Driver) Metadata(id string) (map[string]string, error) {
dir := d.dir(id)
dir, _, inAdditionalStore := d.dir2(id, false)
if err := fileutils.Exists(dir); err != nil {
return nil, err
}
metadata := map[string]string{
"WorkDir": path.Join(dir, "work"),
"MergedDir": path.Join(dir, "merged"),
"MergedDir": d.getMergedDir(id, dir, inAdditionalStore),
"UpperDir": path.Join(dir, "diff"),
}
@ -1703,10 +1703,10 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}
mergedDir := path.Join(dir, "merged")
mergedDir := d.getMergedDir(id, dir, inAdditionalStore)
// Attempt to create the merged dir only if it doesn't exist.
if err := fileutils.Exists(mergedDir); err != nil && os.IsNotExist(err) {
if err := idtools.MkdirAs(mergedDir, 0o700, rootUID, rootGID); err != nil && !os.IsExist(err) {
if err := idtools.MkdirAllAs(mergedDir, 0o700, rootUID, rootGID); err != nil && !os.IsExist(err) {
return "", err
}
}
@ -1856,7 +1856,9 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
return mountOverlayFrom(d.home, source, target, mType, flags, label)
}
mountTarget = path.Join(id, "merged")
if !inAdditionalStore {
mountTarget = path.Join(id, "merged")
}
}
// overlay has a check in place to prevent mounting the same file system twice
@ -1875,13 +1877,26 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
return mergedDir, nil
}
// getMergedDir returns the directory path that should be used as the mount point for the overlayfs.
func (d *Driver) getMergedDir(id, dir string, inAdditionalStore bool) string {
// If the layer is in an additional store, the lock we might hold only a reading lock. To prevent
// races with other processes, use a private directory under the main store rundir. At this point, the
// current process is holding an exclusive lock on the store, and since the rundir cannot be shared for
// different stores, it is safe to assume the current process has exclusive access to it.
if inAdditionalStore {
return path.Join(d.runhome, id, "merged")
}
return path.Join(dir, "merged")
}
// Put unmounts the mount path created for the give id.
func (d *Driver) Put(id string) error {
dir, _, inAdditionalStore := d.dir2(id, false)
if err := fileutils.Exists(dir); err != nil {
return err
}
mountpoint := path.Join(dir, "merged")
mountpoint := d.getMergedDir(id, dir, inAdditionalStore)
if count := d.ctr.Decrement(mountpoint); count > 0 {
return nil
}
@ -1938,7 +1953,15 @@ func (d *Driver) Put(id string) error {
}
}
if !inAdditionalStore {
if inAdditionalStore {
// check the base name for extra safety
if strings.HasPrefix(mountpoint, d.runhome) && filepath.Base(mountpoint) == "merged" {
err := os.RemoveAll(filepath.Dir(mountpoint))
if err != nil {
logrus.Warningf("Failed to remove mountpoint %s overlay: %s: %v", id, mountpoint, err)
}
}
} else {
uid, gid := int(0), int(0)
fi, err := os.Stat(mountpoint)
if err != nil {
@ -1955,7 +1978,7 @@ func (d *Driver) Put(id string) error {
// rename(2) can be used on an empty directory, as it is the mountpoint after umount, and it retains
// its atomic semantic. In this way the "merged" directory is never removed.
if err := unix.Rename(tmpMountpoint, mountpoint); err != nil {
logrus.Debugf("Failed to replace mountpoint %s overlay: %s - %v", id, mountpoint, err)
logrus.Debugf("Failed to replace mountpoint %s overlay: %s: %v", id, mountpoint, err)
return fmt.Errorf("replacing mount point %q: %w", mountpoint, err)
}
}
@ -2410,14 +2433,18 @@ func (d *Driver) Changes(id string, idMappings *idtools.IDMappings, parent strin
// layers.
diffPath, err := d.getDiffPath(id)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to get diff path: %w", err)
}
layers, err := d.getLowerDiffPaths(id)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to get lower diff path: %w", err)
}
return archive.OverlayChanges(layers, diffPath)
c, err := archive.OverlayChanges(layers, diffPath)
if err != nil {
return nil, fmt.Errorf("computing changes: %w", err)
}
return c, nil
}
// AdditionalImageStores returns additional image stores supported by the driver

View File

@ -316,7 +316,11 @@ func parseDirent(buf []byte, names []nameIno) (consumed int, newnames []nameIno)
// with respect to the parent layers
func OverlayChanges(layers []string, rw string) ([]Change, error) {
dc := func(root, path string, fi os.FileInfo) (string, error) {
return overlayDeletedFile(layers, root, path, fi)
r, err := overlayDeletedFile(layers, root, path, fi)
if err != nil {
return "", fmt.Errorf("overlay deleted file query: %w", err)
}
return r, nil
}
return changes(layers, rw, dc, nil, overlayLowerContainsWhiteout)
}
@ -351,7 +355,7 @@ func overlayDeletedFile(layers []string, root, path string, fi os.FileInfo) (str
// If the directory isn't marked as opaque, then it's just a normal directory.
opaque, err := system.Lgetxattr(filepath.Join(root, path), getOverlayOpaqueXattrName())
if err != nil {
return "", err
return "", fmt.Errorf("failed querying overlay opaque xattr: %w", err)
}
if len(opaque) != 1 || opaque[0] != 'y' {
return "", err

View File

@ -26,6 +26,19 @@ func getFilterPath(name string) string {
return path.(string)
}
type errorRecordingReader struct {
r io.Reader
err error
}
func (r *errorRecordingReader) Read(p []byte) (int, error) {
n, err := r.r.Read(p)
if r.err == nil && err != io.EOF {
r.err = err
}
return n, err
}
// tryProcFilter tries to run the command specified in args, passing input to its stdin and returning its stdout.
// cleanup() is a caller provided function that will be called when the command finishes running, regardless of
// whether it succeeds or fails.
@ -38,22 +51,20 @@ func tryProcFilter(args []string, input io.Reader, cleanup func()) (io.ReadClose
var stderrBuf bytes.Buffer
inputWithError := &errorRecordingReader{r: input}
r, w := io.Pipe()
cmd := exec.Command(path, args[1:]...)
cmd.Stdin = input
cmd.Stdin = inputWithError
cmd.Stdout = w
cmd.Stderr = &stderrBuf
go func() {
err := cmd.Run()
if err != nil && stderrBuf.Len() > 0 {
b := make([]byte, 1)
// if there is an error reading from input, prefer to return that error
_, errRead := input.Read(b)
if errRead != nil && errRead != io.EOF {
err = errRead
} else {
err = fmt.Errorf("%s: %w", strings.TrimRight(stderrBuf.String(), "\n"), err)
}
// if there is an error reading from input, prefer to return that error
if inputWithError.err != nil {
err = inputWithError.err
} else if err != nil && stderrBuf.Len() > 0 {
err = fmt.Errorf("%s: %w", strings.TrimRight(stderrBuf.String(), "\n"), err)
}
w.CloseWithError(err) // CloseWithErr(nil) == Close()
cleanup()

View File

@ -287,6 +287,13 @@ func (c *layersCache) load() error {
newLayers = append(newLayers, l)
continue
}
if r.ReadOnly {
// if the layer is coming from a read-only store, do not attempt
// to write to it.
continue
}
// the cache file is either not present or broken. Try to generate it from the TOC.
l, err = c.createCacheFileFromTOC(r.ID)
if err != nil {

View File

@ -5,13 +5,16 @@ import (
"errors"
"fmt"
"io"
"maps"
"strconv"
"time"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
expMaps "golang.org/x/exp/maps"
)
var typesToTar = map[string]byte{
@ -221,6 +224,12 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
// when exporting the layer contents. Ensure the two match, otherwise local inspection of a container
// might be misleading about the exported contents.
if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil {
return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err)
}
} else if tarSplitChunk.Offset > 0 {
// We must ignore the tar-split when the digest is not present in the TOC, because we cant authenticate it.
//
@ -234,6 +243,131 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}
// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries.
func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries
for i := range toc.Entries {
e := &toc.Entries[i]
if e.Type != internal.TypeChunk {
if _, ok := pendingFiles[e.Name]; ok {
return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name)
}
pendingFiles[e.Name] = e
}
}
if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
e, ok := pendingFiles[hdr.Name]
if !ok {
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
}
delete(pendingFiles, hdr.Name)
expected, err := internal.NewFileMetadata(hdr)
if err != nil {
return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err)
}
if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil {
return fmt.Errorf("TOC and tar-split metadata doesnt match: %w", err)
}
return nil
}); err != nil {
return err
}
if len(pendingFiles) != 0 {
remaining := expMaps.Keys(pendingFiles)
if len(remaining) > 5 {
remaining = remaining[:5] // Just to limit the size of the output.
}
return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining)
}
return nil
}
// ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error {
// We didnt always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
// The archive/tar code turns time.IsZero() timestamps into an Unix timestamp of 0 when writing, but turns an Unix timestamp of 0
// when writing into a (local-timezone) Jan 1 1970, which is not IsZero(). So, treat that the same as IsZero as well.
unixZero := time.Unix(0, 0)
if a != nil && (a.IsZero() || a.Equal(unixZero)) {
a = nil
}
if b != nil && (b.IsZero() || b.Equal(unixZero)) {
b = nil
}
switch {
case a == nil && b == nil:
return nil
case a == nil:
return fmt.Errorf("nil != %v", *b)
case b == nil:
return fmt.Errorf("%v != nil", *a)
default:
if a.Equal(*b) {
return nil
}
return fmt.Errorf("%v != %v", *a, *b)
}
}
// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data
// in the tar stream or matching contents)
func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error {
// Keep this in sync with internal.FileMetadata!
if a.Type != b.Type {
return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type)
}
if a.Name != b.Name {
return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name)
}
if a.Linkname != b.Linkname {
return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname)
}
if a.Mode != b.Mode {
return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode)
}
if a.Size != b.Size {
return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size)
}
if a.UID != b.UID {
return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID)
}
if a.GID != b.GID {
return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID)
}
if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil {
return fmt.Errorf("mismatch of ModTime: %w", err)
}
if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil {
return fmt.Errorf("mismatch of AccessTime: %w", err)
}
if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil {
return fmt.Errorf("mismatch of ChangeTime: %w", err)
}
if a.Devmajor != b.Devmajor {
return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor)
}
if a.Devminor != b.Devminor {
return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor)
}
if !maps.Equal(a.Xattrs, b.Xattrs) {
return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs)
}
// Digest is not compared
// Offset is not compared
// EndOffset is not compared
// ChunkSize is not compared
// ChunkOffset is not compared
// ChunkDigest is not compared
// ChunkType is not compared
return nil
}
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {

View File

@ -7,12 +7,8 @@ package compressor
import (
"bufio"
"bytes"
"encoding/base64"
"io"
"strings"
"time"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/klauspost/compress/zstd"
@ -234,14 +230,6 @@ func newTarSplitData(level int) (*tarSplitData, error) {
}, nil
}
// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
@ -380,38 +368,14 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
}
}
typ, err := internal.GetType(hdr.Typeflag)
mainEntry, err := internal.NewFileMetadata(hdr)
if err != nil {
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
entries := []internal.FileMetadata{
{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: lastOffset,
},
}
mainEntry.Digest = checksum
mainEntry.Offset = startOffset
mainEntry.EndOffset = lastOffset
entries := []internal.FileMetadata{mainEntry}
for i := 1; i < len(chunks); i++ {
entries = append(entries, internal.FileMetadata{
Type: internal.TypeChunk,

View File

@ -4,6 +4,7 @@ package dump
import (
"bufio"
"encoding/base64"
"fmt"
"io"
"path/filepath"
@ -22,12 +23,12 @@ const (
ESCAPE_LONE_DASH
)
func escaped(val string, escape int) string {
func escaped(val []byte, escape int) string {
noescapeSpace := escape&NOESCAPE_SPACE != 0
escapeEqual := escape&ESCAPE_EQUAL != 0
escapeLoneDash := escape&ESCAPE_LONE_DASH != 0
if escapeLoneDash && val == "-" {
if escapeLoneDash && len(val) == 1 && val[0] == '-' {
return fmt.Sprintf("\\x%.2x", val[0])
}
@ -75,8 +76,8 @@ func escaped(val string, escape int) string {
return result
}
func escapedOptional(val string, escape int) string {
if val == "" {
func escapedOptional(val []byte, escape int) string {
if len(val) == 0 {
return "-"
}
return escaped(val, escape)
@ -136,7 +137,7 @@ func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[
}
added[path] = entry
if _, err := fmt.Fprint(out, escaped(path, ESCAPE_STANDARD)); err != nil {
if _, err := fmt.Fprint(out, escaped([]byte(path), ESCAPE_STANDARD)); err != nil {
return err
}
@ -180,7 +181,7 @@ func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[
}
}
if _, err := fmt.Fprint(out, escapedOptional(payload, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional([]byte(payload), ESCAPE_LONE_DASH)); err != nil {
return err
}
@ -194,14 +195,18 @@ func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[
return err
}
digest := verityDigests[payload]
if _, err := fmt.Fprint(out, escapedOptional(digest, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional([]byte(digest), ESCAPE_LONE_DASH)); err != nil {
return err
}
for k, v := range entry.Xattrs {
name := escaped(k, ESCAPE_EQUAL)
value := escaped(v, ESCAPE_EQUAL)
for k, vEncoded := range entry.Xattrs {
v, err := base64.StdEncoding.DecodeString(vEncoded)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", k, err)
}
name := escaped([]byte(k), ESCAPE_EQUAL)
value := escaped(v, ESCAPE_EQUAL)
if _, err := fmt.Fprintf(out, " %s=%s", name, value); err != nil {
return err
}

View File

@ -5,16 +5,19 @@ package internal
// larger software like the graph drivers.
import (
"archive/tar"
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"io"
"strings"
"time"
"github.com/containers/storage/pkg/archive"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
)
// TOC is short for Table of Contents and is used by the zstd:chunked
@ -36,10 +39,22 @@ type TOC struct {
// that duplicates what can found in the tar header (and should match), but
// also special/custom content (see below).
//
// Regular files may optionally be represented as a sequence of “chunks”,
// which may be ChunkTypeData or ChunkTypeZeros (and ChunkTypeData boundaries
// are heuristically determined to increase chance of chunk matching / reuse
// similar to rsync). In that case, the regular file is represented
// as an initial TypeReg entry (with all metadata for the file as a whole)
// immediately followed by zero or more TypeChunk entries (containing only Type,
// Name and Chunk* fields); if there is at least one TypeChunk entry, the Chunk*
// fields are relevant in all of these entries, including the initial
// TypeReg one.
//
// Note that the metadata here, when fetched by a zstd:chunked aware client,
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// If you add any fields, update ensureFileMetadataMatches as well!
// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
@ -267,3 +282,43 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte {
return manifestDataLE
}
// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}
// NewFileMetadata creates a basic FileMetadata entry for hdr.
// The caller must set DigestOffset/EndOffset, and the Chunk* values, separately.
func NewFileMetadata(hdr *tar.Header) (FileMetadata, error) {
typ, err := GetType(hdr.Typeflag)
if err != nil {
return FileMetadata{}, err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
return FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
}, nil
}

View File

@ -0,0 +1,68 @@
package chunked
import (
"bytes"
"fmt"
"io"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)
// iterateTarSplit calls handler for each tar header in tarSplit
func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
// This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
// for every non-empty file, which constraints it basically to the output we expect.
//
// Specifically, we assume:
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
// - (There is a FileType entry for every tar header, we ignore it)
// - Trailing padding of a file, if any, is included in the next SegmentType entry
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
for {
tsEntry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
return nil
}
return fmt.Errorf("reading tar-split entries: %w", err)
}
switch tsEntry.Type {
case storage.SegmentType:
payload := tsEntry.Payload
// This is horrible, but we dont know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
// files, but for sparse files that is set to the logical size.)
//
// First, assume that all padding is zero bytes.
// A tar header starts with a file name, which might in principle be empty, but
// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
// the tar name should never be empty (it should be ".", or maybe "./").
//
// This will cause us to skip all zero bytes in the trailing blocks, but thats fine.
i := 0
for i < len(payload) && payload[i] == 0 {
i++
}
payload = payload[i:]
tr := tar.NewReader(bytes.NewReader(payload))
hdr, err := tr.Next()
if err != nil {
if err == io.EOF { // Probably the last entry, but lets let the unpacker drive that.
break
}
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
}
if err := handler(hdr); err != nil {
return err
}
case storage.FileType:
// Nothing
default:
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
}
}
}

View File

@ -6,10 +6,12 @@ package loopback
import (
"errors"
"fmt"
"io/fs"
"os"
"syscall"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// Loopback related errors
@ -39,7 +41,7 @@ func getNextFreeLoopbackIndex() (int, error) {
return index, err
}
func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File) (loopFile *os.File, err error) {
func openNextAvailableLoopback(sparseName string, sparseFile *os.File) (loopFile *os.File, err error) {
// Read information about the loopback file.
var st syscall.Stat_t
err = syscall.Fstat(int(sparseFile.Fd()), &st)
@ -48,31 +50,51 @@ func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File
return nil, ErrAttachLoopbackDevice
}
// upper bound to avoid infinite loop
remaining := 1000
// Start looking for a free /dev/loop
for {
target := fmt.Sprintf("/dev/loop%d", index)
index++
fi, err := os.Stat(target)
if err != nil {
if os.IsNotExist(err) {
logrus.Error("There are no more loopback devices available.")
}
if remaining == 0 {
logrus.Errorf("No free loopback devices available")
return nil, ErrAttachLoopbackDevice
}
remaining--
if fi.Mode()&os.ModeDevice != os.ModeDevice {
logrus.Errorf("Loopback device %s is not a block device.", target)
continue
index, err := getNextFreeLoopbackIndex()
if err != nil {
logrus.Debugf("Error retrieving the next available loopback: %s", err)
return nil, err
}
target := fmt.Sprintf("/dev/loop%d", index)
// OpenFile adds O_CLOEXEC
loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644)
if err != nil {
// The kernel returns ENXIO when opening a device that is in the "deleting" or "rundown" state, so
// just treat ENXIO as if the device does not exist.
if errors.Is(err, fs.ErrNotExist) || errors.Is(err, unix.ENXIO) {
// Another process could have taken the loopback device in the meantime. So repeat
// the process with the next loopback device.
continue
}
logrus.Errorf("Opening loopback device: %s", err)
return nil, ErrAttachLoopbackDevice
}
fi, err := loopFile.Stat()
if err != nil {
loopFile.Close()
logrus.Errorf("Stat loopback device: %s", err)
return nil, ErrAttachLoopbackDevice
}
if fi.Mode()&os.ModeDevice != os.ModeDevice {
loopFile.Close()
logrus.Errorf("Loopback device %s is not a block device.", target)
continue
}
// Try to attach to the loop file
if err := ioctlLoopSetFd(loopFile.Fd(), sparseFile.Fd()); err != nil {
loopFile.Close()
@ -124,14 +146,6 @@ func AttachLoopDeviceRO(sparseName string) (loop *os.File, err error) {
}
func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err error) {
// Try to retrieve the next available loopback device via syscall.
// If it fails, we discard error and start looping for a
// loopback from index 0.
startIndex, err := getNextFreeLoopbackIndex()
if err != nil {
logrus.Debugf("Error retrieving the next available loopback: %s", err)
}
var sparseFile *os.File
// OpenFile adds O_CLOEXEC
@ -146,7 +160,7 @@ func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err erro
}
defer sparseFile.Close()
loopFile, err := openNextAvailableLoopback(startIndex, sparseName, sparseFile)
loopFile, err := openNextAvailableLoopback(sparseName, sparseFile)
if err != nil {
return nil, err
}

View File

@ -2846,7 +2846,7 @@ func (s *store) mount(id string, options drivers.MountOpts) (string, error) {
exists := store.Exists(id)
store.stopReading()
if exists {
return "", fmt.Errorf("mounting read/only store images is not allowed: %w", ErrLayerUnknown)
return "", fmt.Errorf("mounting read/only store images is not allowed: %w", ErrStoreIsReadOnly)
}
}
@ -2930,14 +2930,40 @@ func (s *store) Unmount(id string, force bool) (bool, error) {
}
func (s *store) Changes(from, to string) ([]archive.Change, error) {
if res, done, err := readAllLayerStores(s, func(store roLayerStore) ([]archive.Change, bool, error) {
// NaiveDiff could cause mounts to happen without a lock, so be safe
// and treat the .Diff operation as a Mount.
// We need to make sure the home mount is present when the Mount is done, which happens by possibly reinitializing the graph driver
// in startUsingGraphDriver().
if err := s.startUsingGraphDriver(); err != nil {
return nil, err
}
defer s.stopUsingGraphDriver()
rlstore, lstores, err := s.bothLayerStoreKindsLocked()
if err != nil {
return nil, err
}
if err := rlstore.startWriting(); err != nil {
return nil, err
}
if rlstore.Exists(to) {
res, err := rlstore.Changes(from, to)
rlstore.stopWriting()
return res, err
}
rlstore.stopWriting()
for _, s := range lstores {
store := s
if err := store.startReading(); err != nil {
return nil, err
}
if store.Exists(to) {
res, err := store.Changes(from, to)
return res, true, err
store.stopReading()
return res, err
}
return nil, false, nil
}); done {
return res, err
store.stopReading()
}
return nil, ErrLayerUnknown
}
@ -2968,12 +2994,30 @@ func (s *store) Diff(from, to string, options *DiffOptions) (io.ReadCloser, erro
}
defer s.stopUsingGraphDriver()
layerStores, err := s.allLayerStoresLocked()
rlstore, lstores, err := s.bothLayerStoreKindsLocked()
if err != nil {
return nil, err
}
for _, s := range layerStores {
if err := rlstore.startWriting(); err != nil {
return nil, err
}
if rlstore.Exists(to) {
rc, err := rlstore.Diff(from, to, options)
if rc != nil && err == nil {
wrapped := ioutils.NewReadCloserWrapper(rc, func() error {
err := rc.Close()
rlstore.stopWriting()
return err
})
return wrapped, nil
}
rlstore.stopWriting()
return rc, err
}
rlstore.stopWriting()
for _, s := range lstores {
store := s
if err := store.startReading(); err != nil {
return nil, err

View File

@ -0,0 +1,138 @@
# Changelog #
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased] ##
## [0.3.1] - 2024-07-23 ##
### Changed ###
- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll`
to do the necessary "partial lookups", `Open(at)InRoot` now does less work
for both implementations (resulting in a many-fold decrease in the number of
operations for `openat2`, and a modest improvement for non-`openat2`) and is
far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)`
behaviour.
- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this
effectively just means that we no longer risk getting spurious errors during
rename races. However, for our hardened procfs handler, this in theory should
prevent mount attacks from tricking us when doing magic-link readlinks (even
when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still
potentially vulnerable to those kinds of somewhat-esoteric attacks.
Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath]
but it seems incredibly unlikely anyone is using `filepath-securejoin` on a
pre-2011 kernel.
### Fixed ###
- Several improvements were made to the errors returned by `Open(at)InRoot` and
`MkdirAll` when dealing with invalid paths under the emulated (ie.
non-`openat2`) implementation. Previously, some paths would return the wrong
error (`ENOENT` when the last component was a non-directory), and other paths
would be returned as though they were acceptable (trailing-slash components
after a non-directory would be ignored by `Open(at)InRoot`).
These changes were done to match `openat2`'s behaviour and purely is a
consistency fix (most users are going to be using `openat2` anyway).
[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d
## [0.3.0] - 2024-07-11 ##
### Added ###
- A new set of `*os.File`-based APIs have been added. These are adapted from
[libpathrs][] and we strongly suggest using them if possible (as they provide
far more protection against attacks than `SecureJoin`):
- `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File`
handle to the path. Note that the handle returned is an `O_PATH` handle,
which cannot be used for reading or writing (as well as some other
operations -- [see open(2) for more details][open.2])
- `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade
it to a regular handle. This can also be used with non-`O_PATH` handles,
but `O_PATH` is the most obvious application.
- `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to
create a directory tree within a rootfs.
As these are new APIs, they may change in the future. However, they should be
safe to start migrating to as we have extensive tests ensuring they behave
correctly and are safe against various races and other attacks.
[libpathrs]: https://github.com/openSUSE/libpathrs
[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html
## [0.2.5] - 2024-05-03 ##
### Changed ###
- Some minor changes were made to how lexical components (like `..` and `.`)
are handled during path generation in `SecureJoin`. There is no behaviour
change as a result of this fix (the resulting paths are the same).
### Fixed ###
- The error returned when we hit a symlink loop now references the correct
path. (#10)
## [0.2.4] - 2023-09-06 ##
### Security ###
- This release fixes a potential security issue in filepath-securejoin when
used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate
paths outside of the provided rootfs in certain cases), as well as improving
the overall behaviour of filepath-securejoin when dealing with Windows paths
that contain volume names. Thanks to Paulo Gomes for discovering and fixing
these issues.
### Fixed ###
- Switch to GitHub Actions for CI so we can test on Windows as well as Linux
and MacOS.
[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8
## [0.2.3] - 2021-06-04 ##
### Changed ###
- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency
on `github.com/pkg/errors`.
## [0.2.2] - 2018-09-05 ##
### Changed ###
- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own
(internal) error. This allows callers to more easily use `errors.Is` to check
for this case.
## [0.2.1] - 2018-09-05 ##
### Fixed ###
- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR`
properly within `SecureJoin`.
## [0.2.0] - 2017-07-19 ##
We now have 100% test coverage!
### Added ###
- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new
tests) or for implementing custom handling of lookup operations (such as for
rootless containers, where work is necessary to access directories with weird
modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`).
## 0.1.0 - 2017-07-19
This is our first release of `github.com/cyphar/filepath-securejoin`,
containing a full implementation with a coverage of 93.5% (the only missing
cases are the error cases, which are hard to mocktest at the moment).
[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...HEAD
[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4
[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3
[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2
[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0

View File

@ -1 +1 @@
0.3.0
0.3.1

View File

@ -40,16 +40,18 @@ func (se symlinkStackEntry) Close() {
type symlinkStack []*symlinkStackEntry
func (s symlinkStack) IsEmpty() bool {
return len(s) == 0
func (s *symlinkStack) IsEmpty() bool {
return s == nil || len(*s) == 0
}
func (s *symlinkStack) Close() {
for _, link := range *s {
link.Close()
if s != nil {
for _, link := range *s {
link.Close()
}
// TODO: Switch to clear once we switch to Go 1.21.
*s = nil
}
// TODO: Switch to clear once we switch to Go 1.21.
*s = nil
}
var (
@ -58,11 +60,16 @@ var (
)
func (s *symlinkStack) popPart(part string) error {
if s.IsEmpty() {
if s == nil || s.IsEmpty() {
// If there is nothing in the symlink stack, then the part was from the
// real path provided by the user, and this is a no-op.
return errEmptyStack
}
if part == "." {
// "." components are no-ops -- we drop them when doing SwapLink.
return nil
}
tailEntry := (*s)[len(*s)-1]
// Double-check that we are popping the component we expect.
@ -102,17 +109,13 @@ func (s *symlinkStack) PopPart(part string) error {
}
func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error {
if s == nil {
return nil
}
// Split the link target and clean up any "" parts.
linkTargetParts := slices.DeleteFunc(
strings.Split(linkTarget, "/"),
func(part string) bool { return part == "" })
// Don't add a no-op link to the stack. You can't create a no-op link
// symlink, but if the symlink is /, partialLookupInRoot has already jumped to the
// root and so there's nothing more to do.
if len(linkTargetParts) == 0 {
return nil
}
func(part string) bool { return part == "" || part == "." })
// Copy the directory so the caller doesn't close our copy.
dirCopy, err := dupFile(dir)
@ -145,7 +148,7 @@ func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, li
}
func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
if s.IsEmpty() {
if s == nil || s.IsEmpty() {
return nil, "", false
}
tailEntry := (*s)[0]
@ -157,7 +160,22 @@ func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing
// component of the requested path, returning a file handle to the final
// existing component and a string containing the remaining path components.
func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string, Err error) {
func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) {
return lookupInRoot(root, unsafePath, true)
}
func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) {
handle, remainingPath, err := lookupInRoot(root, unsafePath, false)
if remainingPath != "" && err == nil {
// should never happen
err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath)
}
// lookupInRoot(partial=false) will always close the handle if an error is
// returned, so no need to double-check here.
return handle, err
}
func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) {
unsafePath = filepath.ToSlash(unsafePath) // noop
// This is very similar to SecureJoin, except that we operate on the
@ -166,7 +184,7 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
// Try to use openat2 if possible.
if hasOpenat2() {
return partialLookupOpenat2(root, unsafePath)
return lookupOpenat2(root, unsafePath, partial)
}
// Get the "actual" root path from /proc/self/fd. This is necessary if the
@ -183,7 +201,8 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
defer func() {
if Err != nil {
// If a handle is not returned, close the internal handle.
if Handle == nil {
_ = currentDir.Close()
}
}()
@ -200,8 +219,11 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
// Note that the stack is ONLY used for book-keeping. All of the actual
// path walking logic is still based on currentPath/remainingPath and
// currentDir (as in SecureJoin).
var symlinkStack symlinkStack
defer symlinkStack.Close()
var symStack *symlinkStack
if partial {
symStack = new(symlinkStack)
defer symStack.Close()
}
var (
linksWalked int
@ -220,9 +242,11 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
} else {
part, remainingPath = remainingPath[:i], remainingPath[i+1:]
}
// Skip any "//" components.
// If we hit an empty component, we need to treat it as though it is
// "." so that trailing "/" and "//" components on a non-directory
// correctly return the right error code.
if part == "" {
continue
part = "."
}
// Apply the component lexically to the path we are building.
@ -233,7 +257,7 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
// If we logically hit the root, just clone the root rather than
// opening the part and doing all of the other checks.
if nextPath == "/" {
if err := symlinkStack.PopPart(part); err != nil {
if err := symStack.PopPart(part); err != nil {
return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err)
}
// Jump to root.
@ -258,14 +282,49 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
}
switch st.Mode() & os.ModeType {
case os.ModeDir:
case os.ModeSymlink:
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See
// Linux commit 65cfc6722361 ("readlinkat(), fchownat() and
// fstatat() with empty relative pathnames").
linkDest, err := readlinkatFile(nextDir, "")
// We don't need the handle anymore.
_ = nextDir.Close()
if err != nil {
return nil, "", err
}
linksWalked++
if linksWalked > maxSymlinkLimit {
return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
}
// Swap out the symlink's component for the link entry itself.
if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
}
// Update our logical remaining path.
remainingPath = linkDest + "/" + remainingPath
// Absolute symlinks reset any work we've already done.
if path.IsAbs(linkDest) {
// Jump to root.
rootClone, err := dupFile(root)
if err != nil {
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
_ = currentDir.Close()
currentDir = rootClone
currentPath = "/"
}
default:
// If we are dealing with a directory, simply walk into it.
_ = currentDir.Close()
currentDir = nextDir
currentPath = nextPath
// The part was real, so drop it from the symlink stack.
if err := symlinkStack.PopPart(part); err != nil {
if err := symStack.PopPart(part); err != nil {
return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err)
}
@ -286,95 +345,45 @@ func partialLookupInRoot(root *os.File, unsafePath string) (_ *os.File, _ string
return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err)
}
}
case os.ModeSymlink:
// We don't need the handle anymore.
_ = nextDir.Close()
// Unfortunately, we cannot readlink through our handle and so
// we need to do a separate readlinkat (which could race to
// give us an error if the attacker swapped the symlink with a
// non-symlink).
linkDest, err := readlinkatFile(currentDir, part)
if err != nil {
if errors.Is(err, unix.EINVAL) {
// The part was not a symlink, so assume that it's a
// regular file. It is possible for it to be a
// directory (if an attacker is swapping a directory
// and non-directory at this subpath) but erroring out
// here is better anyway.
err = fmt.Errorf("%w: path component %q is invalid: %w", errPossibleAttack, part, unix.ENOTDIR)
}
return nil, "", err
}
linksWalked++
if linksWalked > maxSymlinkLimit {
return nil, "", &os.PathError{Op: "partialLookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
}
// Swap out the symlink's component for the link entry itself.
if err := symlinkStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
}
// Update our logical remaining path.
remainingPath = linkDest + "/" + remainingPath
// Absolute symlinks reset any work we've already done.
if path.IsAbs(linkDest) {
// Jump to root.
rootClone, err := dupFile(root)
if err != nil {
return nil, "", fmt.Errorf("clone root fd: %w", err)
}
_ = currentDir.Close()
currentDir = rootClone
currentPath = "/"
}
default:
// For any other file type, we can't walk further and so we've
// hit the end of the lookup. The handling is very similar to
// ENOENT from openat(2), except that we return a handle to the
// component we just walked into (and we drop the component
// from the symlink stack).
_ = currentDir.Close()
// The part existed, so drop it from the symlink stack.
if err := symlinkStack.PopPart(part); err != nil {
return nil, "", fmt.Errorf("walking into non-directory %q failed: %w", part, err)
}
// If there are any remaining components in the symlink stack,
// we are still within a symlink resolution and thus we hit a
// dangling symlink. So pretend that the first symlink in the
// stack we hit was an ENOENT (to match openat2).
if oldDir, remainingPath, ok := symlinkStack.PopTopSymlink(); ok {
_ = nextDir.Close()
return oldDir, remainingPath, nil
}
// The current component exists, so return it.
return nextDir, remainingPath, nil
}
case errors.Is(err, os.ErrNotExist):
default:
if !partial {
return nil, "", err
}
// If there are any remaining components in the symlink stack, we
// are still within a symlink resolution and thus we hit a dangling
// symlink. So pretend that the first symlink in the stack we hit
// was an ENOENT (to match openat2).
if oldDir, remainingPath, ok := symlinkStack.PopTopSymlink(); ok {
if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok {
_ = currentDir.Close()
return oldDir, remainingPath, nil
return oldDir, remainingPath, err
}
// We have hit a final component that doesn't exist, so we have our
// partial open result. Note that we have to use the OLD remaining
// path, since the lookup failed.
return currentDir, oldRemainingPath, nil
default:
return nil, "", err
return currentDir, oldRemainingPath, err
}
}
// If the unsafePath had a trailing slash, we need to make sure we try to
// do a relative "." open so that we will correctly return an error when
// the final component is a non-directory (to match openat2). In the
// context of openat2, a trailing slash and a trailing "/." are completely
// equivalent.
if strings.HasSuffix(unsafePath, "/") {
nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
if !partial {
_ = currentDir.Close()
currentDir = nil
}
return currentDir, "", err
}
_ = currentDir.Close()
currentDir = nextDir
}
// All of the components existed!
return currentDir, "", nil
}

View File

@ -49,14 +49,14 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
// Try to open as much of the path as possible.
currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
if err != nil {
return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
}
defer func() {
if Err != nil {
_ = currentDir.Close()
}
}()
if err != nil && !errors.Is(err, unix.ENOENT) {
return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
}
// If there is an attacker deleting directories as we walk into them,
// detect this proactively. Note this is guaranteed to detect if the
@ -82,6 +82,7 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err
} else if err != nil {
return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err)
} else {
_ = currentDir.Close()
currentDir = reopenDir
}

View File

@ -9,6 +9,7 @@ package securejoin
import (
"fmt"
"os"
"strconv"
"golang.org/x/sys/unix"
)
@ -16,13 +17,9 @@ import (
// OpenatInRoot is equivalent to OpenInRoot, except that the root is provided
// using an *os.File handle, to ensure that the correct root directory is used.
func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
handle, remainingPath, err := partialLookupInRoot(root, unsafePath)
handle, err := completeLookupInRoot(root, unsafePath)
if err != nil {
return nil, err
}
if remainingPath != "" {
_ = handle.Close()
return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: unix.ENOENT}
return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err}
}
return handle, nil
}
@ -69,15 +66,36 @@ func Reopen(handle *os.File, flags int) (*os.File, error) {
return nil, err
}
// We can't operate on /proc/thread-self/fd/$n directly when doing a
// re-open, so we need to open /proc/thread-self/fd and then open a single
// final component.
procFdDir, closer, err := procThreadSelf(procRoot, "fd/")
if err != nil {
return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err)
}
defer procFdDir.Close()
defer closer()
// Try to detect if there is a mount on top of the magic-link we are about
// to open. If we are using unsafeHostProcRoot(), this could change after
// we check it (and there's nothing we can do about that) but for
// privateProcRoot() this should be guaranteed to be safe (at least since
// Linux 5.12[1], when anonymous mount namespaces were completely isolated
// from external mounts including mount propagation events).
//
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
// onto targets that reside on shared mounts").
fdStr := strconv.Itoa(int(handle.Fd()))
if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil {
return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err)
}
flags |= unix.O_CLOEXEC
fdPath := fmt.Sprintf("fd/%d", handle.Fd())
return doProcSelfMagiclink(procRoot, fdPath, func(procDirHandle *os.File, base string) (*os.File, error) {
// Rather than just wrapping openatFile, open-code it so we can copy
// handle.Name().
reopenFd, err := unix.Openat(int(procDirHandle.Fd()), base, flags, 0)
if err != nil {
return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
}
return os.NewFile(uintptr(reopenFd), handle.Name()), nil
})
// Rather than just wrapping openatFile, open-code it so we can copy
// handle.Name().
reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0)
if err != nil {
return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
}
return os.NewFile(uintptr(reopenFd), handle.Name()), nil
}

View File

@ -87,6 +87,17 @@ func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error)
return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack}
}
func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) {
if !partial {
file, err := openat2File(root, unsafePath, &unix.OpenHow{
Flags: unix.O_PATH | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
})
return file, "", err
}
return partialLookupOpenat2(root, unsafePath)
}
// partialLookupOpenat2 is an alternative implementation of
// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a
// handle to the deepest existing child of the requested path within the root.
@ -95,6 +106,7 @@ func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, e
unsafePath = filepath.ToSlash(unsafePath) // noop
endIdx := len(unsafePath)
var lastError error
for endIdx > 0 {
subpath := unsafePath[:endIdx]
@ -108,11 +120,12 @@ func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, e
endIdx += 1
}
// We found a subpath!
return handle, unsafePath[endIdx:], nil
return handle, unsafePath[endIdx:], lastError
}
if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) {
// That path doesn't exist, let's try the next directory up.
endIdx = strings.LastIndexByte(subpath, '/')
lastError = err
continue
}
return nil, "", fmt.Errorf("open subpath: %w", err)
@ -124,5 +137,5 @@ func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, e
if err != nil {
return nil, "", err
}
return rootClone, unsafePath, nil
return rootClone, unsafePath, lastError
}

View File

@ -10,7 +10,6 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"sync"
@ -160,7 +159,7 @@ func clonePrivateProcMount() (_ *os.File, Err error) {
}
func privateProcRoot() (*os.File, error) {
if !hasNewMountApi() {
if !hasNewMountApi() || testingForceGetProcRootUnsafe() {
return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
}
// Try to create a new procfs mount from scratch if we can. This ensures we
@ -199,7 +198,7 @@ func unsafeHostProcRoot() (_ *os.File, Err error) {
func doGetProcRoot() (*os.File, error) {
procRoot, err := privateProcRoot()
if err != nil || testingForceGetProcRootUnsafe(procRoot) {
if err != nil {
// Fall back to using a /proc handle if making a private mount failed.
// If we have openat2, at least we can avoid some kinds of over-mount
// attacks, but without openat2 there's not much we can do.
@ -286,14 +285,14 @@ func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThread
// procSelfFdReadlink to clean up the returned f.Name() if we use
// RESOLVE_IN_ROOT (which would lead to an infinite recursion).
handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{
Flags: unix.O_PATH | unix.O_CLOEXEC,
Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC,
Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS,
})
if err != nil {
return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err)
}
} else {
handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_CLOEXEC, 0)
handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err)
}
@ -333,10 +332,10 @@ func hasStatxMountId() bool {
return hasStatxMountIdBool
}
func checkSymlinkOvermount(dir *os.File, path string) error {
func getMountId(dir *os.File, path string) (uint64, error) {
// If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
if !hasStatxMountId() {
return nil
return 0, nil
}
var (
@ -346,31 +345,29 @@ func checkSymlinkOvermount(dir *os.File, path string) error {
wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
)
err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
if stx.Mask&wantStxMask == 0 {
// It's not a kernel limitation, for some reason we couldn't get a
// mount ID. Assume it's some kind of attack.
err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs)
}
if err != nil {
return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err}
}
return stx.Mnt_id, nil
}
func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error {
// Get the mntId of our procfs handle.
err := unix.Statx(int(dir.Fd()), "", unix.AT_EMPTY_PATH, int(wantStxMask), &stx)
expectedMountId, err := getMountId(procRoot, "")
if err != nil {
return &os.PathError{Op: "statx", Path: dir.Name(), Err: err}
return err
}
if stx.Mask&wantStxMask == 0 {
// It's not a kernel limitation, for some reason we couldn't get a
// mount ID. Assume it's some kind of attack.
return fmt.Errorf("%w: could not get mnt id of dir %s", errUnsafeProcfs, dir.Name())
}
expectedMountId := stx.Mnt_id
// Get the mntId of the target symlink.
stx = unix.Statx_t{}
err = unix.Statx(int(dir.Fd()), path, unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
// Get the mntId of the target magic-link.
gotMountId, err := getMountId(dir, path)
if err != nil {
return &os.PathError{Op: "statx", Path: dir.Name() + "/" + path, Err: err}
return err
}
if stx.Mask&wantStxMask == 0 {
// It's not a kernel limitation, for some reason we couldn't get a
// mount ID. Assume it's some kind of attack.
return fmt.Errorf("%w: could not get mnt id of symlink %s", errUnsafeProcfs, path)
}
gotMountId := stx.Mnt_id
// As long as the directory mount is alive, even with wrapping mount IDs,
// we would expect to see a different mount ID here. (Of course, if we're
// using unsafeHostProcRoot() then an attaker could change this after we
@ -381,37 +378,33 @@ func checkSymlinkOvermount(dir *os.File, path string) error {
return nil
}
func doProcSelfMagiclink[T any](procRoot *os.File, subPath string, fn func(procDirHandle *os.File, base string) (T, error)) (T, error) {
// We cannot operate on the magic-link directly with a handle, we need to
// create a handle to the parent of the magic-link and then do
// single-component operations on it.
dir, base := filepath.Dir(subPath), filepath.Base(subPath)
procDirHandle, closer, err := procThreadSelf(procRoot, dir)
func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
fdPath := fmt.Sprintf("fd/%d", fd)
procFdLink, closer, err := procThreadSelf(procRoot, fdPath)
if err != nil {
return *new(T), fmt.Errorf("get safe /proc/thread-self/%s handle: %w", dir, err)
return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err)
}
defer procDirHandle.Close()
defer procFdLink.Close()
defer closer()
// Try to detect if there is a mount on top of the symlink we are about to
// read. If we are using unsafeHostProcRoot(), this could change after we
// check it (and there's nothing we can do about that) but for
// privateProcRoot() this should be guaranteed to be safe (at least since
// Linux 5.12[1], when anonymous mount namespaces were completely isolated
// from external mounts including mount propagation events).
// Try to detect if there is a mount on top of the magic-link. Since we use the handle directly
// provide to the closure. If the closure uses the handle directly, this
// should be safe in general (a mount on top of the path afterwards would
// not affect the handle itself) and will definitely be safe if we are
// using privateProcRoot() (at least since Linux 5.12[1], when anonymous
// mount namespaces were completely isolated from external mounts including
// mount propagation events).
//
// [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
// onto targets that reside on shared mounts").
if err := checkSymlinkOvermount(procDirHandle, base); err != nil {
return *new(T), fmt.Errorf("check safety of %s proc magiclink: %w", subPath, err)
if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil {
return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err)
}
return fn(procDirHandle, base)
}
func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
fdPath := fmt.Sprintf("fd/%d", fd)
return doProcSelfMagiclink(procRoot, fdPath, readlinkatFile)
// readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit
// 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty
// relative pathnames").
return readlinkatFile(procFdLink, "")
}
func rawProcSelfFdReadlink(fd int) (string, error) {

View File

@ -42,9 +42,9 @@ func testingForcePrivateProcRootOpenTreeAtRecursive(f *os.File) bool {
testingCheckClose(*testingForceGetProcRoot >= forceGetProcRootOpenTreeAtRecursive, f)
}
func testingForceGetProcRootUnsafe(f *os.File) bool {
func testingForceGetProcRootUnsafe() bool {
return testing.Testing() && testingForceGetProcRoot != nil &&
testingCheckClose(*testingForceGetProcRoot >= forceGetProcRootUnsafe, f)
*testingForceGetProcRoot >= forceGetProcRootUnsafe
}
type forceProcThreadSelfLevel int

View File

@ -51,7 +51,7 @@ func mountedByOpenat2(path string) (bool, error) {
Resolve: unix.RESOLVE_NO_XDEV,
})
_ = unix.Close(dirfd)
switch err { //nolint:errorlint // unix errors are bare
switch err {
case nil: // definitely not a mount
_ = unix.Close(fd)
return false, nil