Use storage that better supports rootless overlayfs

overlayfs -- the kernel's version, not fuse-overlayfs -- recently learned
(as of linux 5.16.0, I believe) how to support rootless users. Previously,
rootless users had to use these storage.conf(5) settings:

* storage.driver=vfs          (aka STORAGE_DRIVER=vfs), or
* storage.driver=overlay      (aka STORAGE_DRIVER=overlay),
  storage.options.overlay.mount_program=/usr/bin/fuse-overlayfs
                              (aka STORAGE_OPTS=/usr/bin/fuse-overlayfs)

Now that a third backend is available, setting only:

* storage.driver=overlay      (aka STORAGE_DRIVER=overlay)

https://github.com/containers/podman/issues/13123 reported EXDEV errors
during the normal operation of their container. Tracing it out, the
problem turned out to be that their container was being mounted without
'userxattr'; I don't fully understand why, but mount(8) mentions this is
needed for rootless users:

> userxattr
>
>   Use the "user.overlay." xattr namespace instead of "trusted.overlay.".
>   This is useful for unprivileged mounting of overlayfs.

https://github.com/containers/storage/pull/1156 found and fixed the issue
in podman, and this just pulls in that via

    go get github.com/containers/storage@ebc90ab
    go mod vendor
    make vendor

Closes https://github.com/containers/podman/issues/13123

Signed-off-by: Nick Guenther <nick.guenther@polymtl.ca>
This commit is contained in:
Nick Guenther
2022-02-28 12:54:09 -05:00
parent 8bdda91ab7
commit 572e6464f6
31 changed files with 383 additions and 217 deletions

View File

@ -50,11 +50,14 @@ func chownByMapsMain() {
if len(toHost.UIDs()) == 0 && len(toHost.GIDs()) == 0 {
toHost = nil
}
chowner := newLChowner()
chown := func(path string, info os.FileInfo, _ error) error {
if path == "." {
return nil
}
return platformLChown(path, info, toHost, toContainer)
return chowner.LChown(path, info, toHost, toContainer)
}
if err := pwalk.Walk(".", chown); err != nil {
fmt.Fprintf(os.Stderr, "error during chown: %v", err)

View File

@ -1,3 +1,4 @@
//go:build !windows
// +build !windows
package graphdriver
@ -6,17 +7,50 @@ import (
"errors"
"fmt"
"os"
"sync"
"syscall"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
)
func platformLChown(path string, info os.FileInfo, toHost, toContainer *idtools.IDMappings) error {
type inode struct {
Dev uint64
Ino uint64
}
type platformChowner struct {
mutex sync.Mutex
inodes map[inode]bool
}
func newLChowner() *platformChowner {
return &platformChowner{
inodes: make(map[inode]bool),
}
}
func (c *platformChowner) LChown(path string, info os.FileInfo, toHost, toContainer *idtools.IDMappings) error {
st, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return nil
}
i := inode{
Dev: uint64(st.Dev),
Ino: uint64(st.Ino),
}
c.mutex.Lock()
_, found := c.inodes[i]
if !found {
c.inodes[i] = true
}
c.mutex.Unlock()
if found {
return nil
}
// Map an on-disk UID/GID pair from host to container
// using the first map, then back to the host using the
// second map. Skip that first step if they're 0, to

View File

@ -1,3 +1,4 @@
//go:build windows
// +build windows
package graphdriver
@ -9,6 +10,13 @@ import (
"github.com/containers/storage/pkg/idtools"
)
func platformLChown(path string, info os.FileInfo, toHost, toContainer *idtools.IDMappings) error {
type platformChowner struct {
}
func newLChowner() *platformChowner {
return &platformChowner{}
}
func (c *platformChowner) LChown(path string, info os.FileInfo, toHost, toContainer *idtools.IDMappings) error {
return &os.PathError{"lchown", path, syscall.EWINDOWS}
}

View File

@ -920,7 +920,9 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
defer func() {
// Clean up on failure
if retErr != nil {
os.RemoveAll(dir)
if err2 := os.RemoveAll(dir); err2 != nil {
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", dir, err2)
}
}
}()
@ -1253,6 +1255,8 @@ func (d *Driver) recreateSymlinks() error {
linkFile := filepath.Join(d.dir(targetID), "link")
data, err := ioutil.ReadFile(linkFile)
if err != nil || string(data) != link.Name() {
// NOTE: If two or more links point to the same target, we will update linkFile
// with every value of link.Name(), and set madeProgress = true every time.
if err := ioutil.WriteFile(linkFile, []byte(link.Name()), 0644); err != nil {
errs = multierror.Append(errs, errors.Wrapf(err, "correcting link for layer %s", targetID))
continue
@ -1458,6 +1462,21 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
workdir := path.Join(dir, "work")
if d.options.mountProgram == "" && unshare.IsRootless() {
optsList = append(optsList, "userxattr")
}
if options.Volatile && !hasVolatileOption(optsList) {
supported, err := d.getSupportsVolatile()
if err != nil {
return "", err
}
// If "volatile" is not supported by the file system, just ignore the request
if supported {
optsList = append(optsList, "volatile")
}
}
var opts string
if readWrite {
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), diffDir, workdir)
@ -1465,22 +1484,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
opts = fmt.Sprintf("lowerdir=%s:%s", diffDir, strings.Join(absLowers, ":"))
}
if len(optsList) > 0 {
opts = fmt.Sprintf("%s,%s", strings.Join(optsList, ","), opts)
}
if d.options.mountProgram == "" && unshare.IsRootless() {
opts = fmt.Sprintf("%s,userxattr", opts)
}
// If "volatile" is not supported by the file system, just ignore the request
if options.Volatile && !hasVolatileOption(strings.Split(opts, ",")) {
supported, err := d.getSupportsVolatile()
if err != nil {
return "", err
}
if supported {
opts = fmt.Sprintf("%s,volatile", opts)
}
opts = fmt.Sprintf("%s,%s", opts, strings.Join(optsList, ","))
}
mountData := label.FormatMountLabel(opts, options.MountLabel)
@ -1489,10 +1493,6 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
pageSize := unix.Getpagesize()
// Use relative paths and mountFrom when the mount data has exceeded
// the page size. The mount syscall fails if the mount data cannot
// fit within a page and relative links make the mount data much
// smaller at the expense of requiring a fork exec to chroot.
if d.options.mountProgram != "" {
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
if !disableShifting {
@ -1519,6 +1519,11 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
return nil
}
} else if len(mountData) > pageSize {
// Use relative paths and mountFrom when the mount data has exceeded
// the page size. The mount syscall fails if the mount data cannot
// fit within a page and relative links make the mount data much
// smaller at the expense of requiring a fork exec to chroot.
workdir = path.Join(id, "work")
//FIXME: We need to figure out to get this to work with additional stores
if readWrite {
@ -1527,6 +1532,9 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
} else {
opts = fmt.Sprintf("lowerdir=%s", strings.Join(absLowers, ":"))
}
if len(optsList) > 0 {
opts = fmt.Sprintf("%s,%s", opts, strings.Join(optsList, ","))
}
mountData = label.FormatMountLabel(opts, options.MountLabel)
if len(mountData) > pageSize {
return "", fmt.Errorf("cannot mount layer, mount label %q too large %d > page size %d", options.MountLabel, len(mountData), pageSize)

View File

@ -4,15 +4,15 @@ module github.com/containers/storage
require (
github.com/BurntSushi/toml v1.0.0
github.com/Microsoft/go-winio v0.5.1
github.com/Microsoft/go-winio v0.5.2
github.com/Microsoft/hcsshim v0.9.2
github.com/containerd/stargz-snapshotter/estargz v0.11.0
github.com/containerd/stargz-snapshotter/estargz v0.11.1
github.com/cyphar/filepath-securejoin v0.2.3
github.com/docker/go-units v0.4.0
github.com/google/go-intervals v0.0.2
github.com/hashicorp/go-multierror v1.1.1
github.com/json-iterator/go v1.1.12
github.com/klauspost/compress v1.14.2
github.com/klauspost/compress v1.14.4
github.com/klauspost/pgzip v1.2.5
github.com/mattn/go-shellwords v1.0.12
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible

View File

@ -47,8 +47,8 @@ github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugX
github.com/Microsoft/go-winio v0.4.17-0.20210211115548-6eac466e5fa3/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/go-winio v0.4.17-0.20210324224401-5516f17a5958/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY=
github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/go-winio v0.5.2 h1:a9IhgEQBCUEk6QCdml9CiJGhAws+YwffDHEMp1VMrpA=
github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY=
github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
github.com/Microsoft/hcsshim v0.8.7-0.20190325164909-8abdbb8205e4/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
github.com/Microsoft/hcsshim v0.8.7/go.mod h1:OHd7sQqRFrYd3RmSgbgji+ctCwkbq2wbEYNSzOYtcBQ=
@ -176,8 +176,8 @@ github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164/go.mod h1:+2wGSDGFY
github.com/containerd/nri v0.0.0-20210316161719-dbaa18c31c14/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
github.com/containerd/nri v0.1.0/go.mod h1:lmxnXF6oMkbqs39FiCt1s0R2HSMhcLel9vNL3m4AaeY=
github.com/containerd/stargz-snapshotter/estargz v0.4.1/go.mod h1:x7Q9dg9QYb4+ELgxmo4gBUeJB0tl5dqH1Sdz0nJU1QM=
github.com/containerd/stargz-snapshotter/estargz v0.11.0 h1:t0IW5kOmY7AXDAWRUs2uVzDhijAUOAYVr/dyRhOQvBg=
github.com/containerd/stargz-snapshotter/estargz v0.11.0/go.mod h1:/KsZXsJRllMbTKFfG0miFQWViQKdI9+9aSXs+HN0+ac=
github.com/containerd/stargz-snapshotter/estargz v0.11.1 h1:mNQqxcAWmDrV6d6yUvzFhfY8puNzoQz9v4diW+Pmei4=
github.com/containerd/stargz-snapshotter/estargz v0.11.1/go.mod h1:6VoPcf4M1wvnogWxqc4TqBWWErCS+R+ucnPZId2VbpQ=
github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20190828172938-92c8520ef9f8/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
@ -424,8 +424,9 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw=
github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.14.3/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.14.4 h1:eijASRJcobkVtSt81Olfh7JX43osYLwy5krOJo6YEu4=
github.com/klauspost/compress v1.14.4/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

View File

@ -425,37 +425,36 @@ func (r *imageStore) Create(id string, names []string, layer, metadata string, c
if created.IsZero() {
created = time.Now().UTC()
}
if err == nil {
image = &Image{
ID: id,
Digest: searchableDigest,
Digests: nil,
Names: names,
TopLayer: layer,
Metadata: metadata,
BigDataNames: []string{},
BigDataSizes: make(map[string]int64),
BigDataDigests: make(map[string]digest.Digest),
Created: created,
Flags: make(map[string]interface{}),
}
err := image.recomputeDigests()
if err != nil {
return nil, errors.Wrapf(err, "error validating digests for new image")
}
r.images = append(r.images, image)
r.idindex.Add(id)
r.byid[id] = image
for _, name := range names {
r.byname[name] = image
}
for _, digest := range image.Digests {
list := r.bydigest[digest]
r.bydigest[digest] = append(list, image)
}
err = r.Save()
image = copyImage(image)
image = &Image{
ID: id,
Digest: searchableDigest,
Digests: nil,
Names: names,
TopLayer: layer,
Metadata: metadata,
BigDataNames: []string{},
BigDataSizes: make(map[string]int64),
BigDataDigests: make(map[string]digest.Digest),
Created: created,
Flags: make(map[string]interface{}),
}
err = image.recomputeDigests()
if err != nil {
return nil, errors.Wrapf(err, "error validating digests for new image")
}
r.images = append(r.images, image)
r.idindex.Add(id)
r.byid[id] = image
for _, name := range names {
r.byname[name] = image
}
for _, digest := range image.Digests {
list := r.bydigest[digest]
r.bydigest[digest] = append(list, image)
}
err = r.Save()
image = copyImage(image)
return image, err
}

View File

@ -399,14 +399,13 @@ func (r *layerStore) Load() error {
if layer.Flags == nil {
layer.Flags = make(map[string]interface{})
}
if cleanup, ok := layer.Flags[incompleteFlag]; ok {
if b, ok := cleanup.(bool); ok && b {
err = r.deleteInternal(layer.ID)
if err != nil {
break
}
shouldSave = true
if layerHasIncompleteFlag(layer) {
logrus.Warnf("Found incomplete layer %#v, deleting it", layer.ID)
err = r.deleteInternal(layer.ID)
if err != nil {
break
}
shouldSave = true
}
}
}
@ -742,26 +741,17 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
}
if moreOptions.TemplateLayer != "" {
if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil {
if id != "" {
return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q with ID %q", moreOptions.TemplateLayer, id)
}
return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q", moreOptions.TemplateLayer)
return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q with ID %q", moreOptions.TemplateLayer, id)
}
oldMappings = templateIDMappings
} else {
if writeable {
if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil {
if id != "" {
return nil, -1, errors.Wrapf(err, "error creating read-write layer with ID %q", id)
}
return nil, -1, errors.Wrapf(err, "error creating read-write layer")
return nil, -1, errors.Wrapf(err, "error creating read-write layer with ID %q", id)
}
} else {
if err = r.driver.Create(id, parent, &opts); err != nil {
if id != "" {
return nil, -1, errors.Wrapf(err, "error creating layer with ID %q", id)
}
return nil, -1, errors.Wrapf(err, "error creating layer")
return nil, -1, errors.Wrapf(err, "error creating layer with ID %q", id)
}
}
oldMappings = parentMappings
@ -770,7 +760,9 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil {
// We don't have a record of this layer, but at least
// try to clean it up underneath us.
r.driver.Remove(id)
if err2 := r.driver.Remove(id); err2 != nil {
logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
}
return nil, -1, err
}
}
@ -795,21 +787,26 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
for flag, value := range flags {
layer.Flags[flag] = value
}
savedIncompleteLayer := false
if diff != nil {
layer.Flags[incompleteFlag] = true
err = r.Save()
if err != nil {
// We don't have a record of this layer, but at least
// try to clean it up underneath us.
r.driver.Remove(id)
if err2 := r.driver.Remove(id); err2 != nil {
logrus.Errorf("While recovering from a failure saving incomplete layer metadata, error deleting layer %#v: %v", id, err2)
}
return nil, -1, err
}
savedIncompleteLayer = true
size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff)
if err != nil {
if r.Delete(layer.ID) != nil {
if err2 := r.Delete(layer.ID); err2 != nil {
// Either a driver error or an error saving.
// We now have a layer that's been marked for
// deletion but which we failed to remove.
logrus.Errorf("While recovering from a failure applying layer diff, error deleting layer %#v: %v", layer.ID, err2)
}
return nil, -1, err
}
@ -817,9 +814,20 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
}
err = r.Save()
if err != nil {
// We don't have a record of this layer, but at least
// try to clean it up underneath us.
r.driver.Remove(id)
if savedIncompleteLayer {
if err2 := r.Delete(layer.ID); err2 != nil {
// Either a driver error or an error saving.
// We now have a layer that's been marked for
// deletion but which we failed to remove.
logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v: %v", layer.ID, err2)
}
} else {
// We don't have a record of this layer, but at least
// try to clean it up underneath us.
if err2 := r.driver.Remove(id); err2 != nil {
logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v in graph driver: %v", id, err2)
}
}
return nil, -1, err
}
layer = copyLayer(layer)
@ -1149,6 +1157,17 @@ func (r *layerStore) tspath(id string) string {
return filepath.Join(r.layerdir, id+tarSplitSuffix)
}
// layerHasIncompleteFlag returns true if layer.Flags contains an incompleteFlag set to true
func layerHasIncompleteFlag(layer *Layer) bool {
// layer.Flags[…] is defined to succeed and return ok == false if Flags == nil
if flagValue, ok := layer.Flags[incompleteFlag]; ok {
if b, ok := flagValue.(bool); ok && b {
return true
}
}
return false
}
func (r *layerStore) deleteInternal(id string) error {
if !r.IsReadWrite() {
return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to delete layers at %q", r.layerspath())
@ -1157,6 +1176,18 @@ func (r *layerStore) deleteInternal(id string) error {
if !ok {
return ErrLayerUnknown
}
// Ensure that if we are interrupted, the layer will be cleaned up.
if !layerHasIncompleteFlag(layer) {
if layer.Flags == nil {
layer.Flags = make(map[string]interface{})
}
layer.Flags[incompleteFlag] = true
if err := r.Save(); err != nil {
return err
}
}
// We never unset incompleteFlag; below, we remove the entire object from r.layers.
id = layer.ID
err := r.driver.Remove(id)
if err != nil {

View File

@ -1248,7 +1248,7 @@ func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error {
func checkChownErr(err error, name string, uid, gid int) error {
if errors.Is(err, syscall.EINVAL) {
return fmt.Errorf("potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally: %w", uid, gid, name, err)
return fmt.Errorf("potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run podman-system-migrate: %w", uid, gid, name, err)
}
return err
}

View File

@ -297,7 +297,7 @@ func parseSubidFile(path, username string) (ranges, error) {
func checkChownErr(err error, name string, uid, gid int) error {
if e, ok := err.(*os.PathError); ok && e.Err == syscall.EINVAL {
return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally", uid, gid, name)
return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run podman-system-migrate", uid, gid, name)
}
return err
}

View File

@ -2532,17 +2532,12 @@ func (s *store) DeleteContainer(id string) error {
}()
var errors []error
for {
select {
case err, ok := <-errChan:
if !ok {
return multierror.Append(nil, errors...).ErrorOrNil()
}
if err != nil {
errors = append(errors, err)
}
for err := range errChan {
if err != nil {
errors = append(errors, err)
}
}
return multierror.Append(nil, errors...).ErrorOrNil()
}
}
return ErrNotAContainer