mirror of
https://github.com/containers/podman.git
synced 2025-05-20 00:27:03 +08:00

This will appease the higher-level quota logic. Basically, to find a free quota ID to prevent reuse, we will iterate through the contents of the directory and check the quota IDs of all subdirectories, then use the first free ID found that is larger than the base ID (the one set on the base directory). Problem: our volumes use a two-tier directory structure, where the volume has an outer directory (with the name of the actual volume) and an inner directory (always named _data). We were only setting the quota on _data, meaning the outer directory did not have an ID, and the ID-choosing logic thus never detected that any IDs had been allocated and always chose the same ID. Setting the ID on the outer directory with PROJINHERIT set makes the ID allocation logic work properly, and guarantees children inherit the ID - so _data and all contents of the volume get the ID as we'd expect. No tests as we don't have a filesystem in our CI that supports XFS quotas (setting it on / needs kernel flags added). Fixes https://issues.redhat.com/browse/RHEL-18038 Signed-off-by: Matt Heon <mheon@redhat.com>
500 lines
18 KiB
Go
500 lines
18 KiB
Go
//go:build !remote && (linux || freebsd)
|
|
|
|
package libpod
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/containers/podman/v5/libpod/define"
|
|
"github.com/containers/podman/v5/libpod/events"
|
|
volplugin "github.com/containers/podman/v5/libpod/plugin"
|
|
"github.com/containers/storage"
|
|
"github.com/containers/storage/drivers/quota"
|
|
"github.com/containers/storage/pkg/fileutils"
|
|
"github.com/containers/storage/pkg/idtools"
|
|
"github.com/containers/storage/pkg/stringid"
|
|
pluginapi "github.com/docker/go-plugins-helpers/volume"
|
|
"github.com/opencontainers/selinux/go-selinux"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
const volumeSuffix = "+volume"
|
|
|
|
// NewVolume creates a new empty volume
|
|
func (r *Runtime) NewVolume(ctx context.Context, options ...VolumeCreateOption) (*Volume, error) {
|
|
if !r.valid {
|
|
return nil, define.ErrRuntimeStopped
|
|
}
|
|
return r.newVolume(ctx, false, options...)
|
|
}
|
|
|
|
// newVolume creates a new empty volume with the given options.
|
|
// The createPluginVolume can be set to true to make it not create the volume in the volume plugin,
|
|
// this is required for the UpdateVolumePlugins() function. If you are not sure, set this to false.
|
|
func (r *Runtime) newVolume(ctx context.Context, noCreatePluginVolume bool, options ...VolumeCreateOption) (_ *Volume, deferredErr error) {
|
|
volume := newVolume(r)
|
|
for _, option := range options {
|
|
if err := option(volume); err != nil {
|
|
return nil, fmt.Errorf("running volume create option: %w", err)
|
|
}
|
|
}
|
|
|
|
if volume.config.Name == "" {
|
|
volume.config.Name = stringid.GenerateRandomID()
|
|
}
|
|
if volume.config.Driver == "" {
|
|
volume.config.Driver = define.VolumeDriverLocal
|
|
}
|
|
volume.config.CreatedTime = time.Now()
|
|
|
|
// Check if volume with given name exists.
|
|
exists, err := r.state.HasVolume(volume.config.Name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("checking if volume with name %s exists: %w", volume.config.Name, err)
|
|
}
|
|
if exists {
|
|
if volume.ignoreIfExists {
|
|
existingVolume, err := r.state.Volume(volume.config.Name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading volume from state: %w", err)
|
|
}
|
|
return existingVolume, nil
|
|
} else {
|
|
return nil, fmt.Errorf("volume with name %s already exists: %w", volume.config.Name, define.ErrVolumeExists)
|
|
}
|
|
}
|
|
|
|
// Plugin can be nil if driver is local, but that's OK - superfluous
|
|
// assignment doesn't hurt much.
|
|
plugin, err := r.getVolumePlugin(volume.config)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("volume %s uses volume plugin %s but it could not be retrieved: %w", volume.config.Name, volume.config.Driver, err)
|
|
}
|
|
volume.plugin = plugin
|
|
|
|
if volume.config.Driver == define.VolumeDriverLocal {
|
|
logrus.Debugf("Validating options for local driver")
|
|
// Validate options
|
|
for key, val := range volume.config.Options {
|
|
switch strings.ToLower(key) {
|
|
case "device":
|
|
if strings.ToLower(volume.config.Options["type"]) == define.TypeBind {
|
|
if err := fileutils.Exists(val); err != nil {
|
|
return nil, fmt.Errorf("invalid volume option %s for driver 'local': %w", key, err)
|
|
}
|
|
}
|
|
case "o", "type", "uid", "gid", "size", "inodes", "noquota", "copy", "nocopy":
|
|
// Do nothing, valid keys
|
|
default:
|
|
return nil, fmt.Errorf("invalid mount option %s for driver 'local': %w", key, define.ErrInvalidArg)
|
|
}
|
|
}
|
|
} else if volume.config.Driver == define.VolumeDriverImage && !volume.UsesVolumeDriver() {
|
|
logrus.Debugf("Creating image-based volume")
|
|
var imgString string
|
|
// Validate options
|
|
for key, val := range volume.config.Options {
|
|
switch strings.ToLower(key) {
|
|
case "image":
|
|
imgString = val
|
|
default:
|
|
return nil, fmt.Errorf("invalid mount option %s for driver 'image': %w", key, define.ErrInvalidArg)
|
|
}
|
|
}
|
|
|
|
if imgString == "" {
|
|
return nil, fmt.Errorf("must provide an image name when creating a volume with the image driver: %w", define.ErrInvalidArg)
|
|
}
|
|
|
|
// Look up the image
|
|
image, _, err := r.libimageRuntime.LookupImage(imgString, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("looking up image %s to create volume failed: %w", imgString, err)
|
|
}
|
|
|
|
// Generate a c/storage name and ID for the volume.
|
|
// Use characters Podman does not allow for the name, to ensure
|
|
// no collision with containers.
|
|
volume.config.StorageID = stringid.GenerateRandomID()
|
|
volume.config.StorageName = volume.config.Name + volumeSuffix
|
|
volume.config.StorageImageID = image.ID()
|
|
|
|
// Create a backing container in c/storage.
|
|
storageConfig := storage.ContainerOptions{
|
|
LabelOpts: []string{"filetype:container_file_t", "level:s0"},
|
|
}
|
|
if len(volume.config.MountLabel) > 0 {
|
|
context, err := selinux.NewContext(volume.config.MountLabel)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get SELinux context from %s: %w", volume.config.MountLabel, err)
|
|
}
|
|
storageConfig.LabelOpts = []string{fmt.Sprintf("filetype:%s", context["type"])}
|
|
}
|
|
if _, err := r.storageService.CreateContainerStorage(ctx, r.imageContext, imgString, image.ID(), volume.config.StorageName, volume.config.StorageID, storageConfig); err != nil {
|
|
return nil, fmt.Errorf("creating backing storage for image driver: %w", err)
|
|
}
|
|
defer func() {
|
|
if deferredErr != nil {
|
|
if err := r.storageService.DeleteContainer(volume.config.StorageID); err != nil {
|
|
logrus.Errorf("Error removing volume %s backing storage: %v", volume.config.Name, err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Now we get conditional: we either need to make the volume in the
|
|
// volume plugin, or on disk if not using a plugin.
|
|
if volume.plugin != nil && !noCreatePluginVolume {
|
|
// We can't chown, or relabel, or similar the path the volume is
|
|
// using, because it's not managed by us.
|
|
// TODO: reevaluate this once we actually have volume plugins in
|
|
// use in production - it may be safe, but I can't tell without
|
|
// knowing what the actual plugin does...
|
|
if err := makeVolumeInPluginIfNotExist(volume.config.Name, volume.config.Options, volume.plugin); err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
// Create the mountpoint of this volume
|
|
volPathRoot := filepath.Join(r.config.Engine.VolumePath, volume.config.Name)
|
|
if err := os.MkdirAll(volPathRoot, 0700); err != nil {
|
|
return nil, fmt.Errorf("creating volume directory %q: %w", volPathRoot, err)
|
|
}
|
|
if err := idtools.SafeChown(volPathRoot, volume.config.UID, volume.config.GID); err != nil {
|
|
return nil, fmt.Errorf("chowning volume directory %q to %d:%d: %w", volPathRoot, volume.config.UID, volume.config.GID, err)
|
|
}
|
|
fullVolPath := filepath.Join(volPathRoot, "_data")
|
|
if err := os.MkdirAll(fullVolPath, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating volume directory %q: %w", fullVolPath, err)
|
|
}
|
|
if err := idtools.SafeChown(fullVolPath, volume.config.UID, volume.config.GID); err != nil {
|
|
return nil, fmt.Errorf("chowning volume directory %q to %d:%d: %w", fullVolPath, volume.config.UID, volume.config.GID, err)
|
|
}
|
|
if err := LabelVolumePath(fullVolPath, volume.config.MountLabel); err != nil {
|
|
return nil, err
|
|
}
|
|
switch {
|
|
case volume.config.DisableQuota:
|
|
if volume.config.Size > 0 || volume.config.Inodes > 0 {
|
|
return nil, errors.New("volume options size and inodes cannot be used without quota")
|
|
}
|
|
case volume.config.Options["type"] == define.TypeTmpfs:
|
|
// tmpfs only supports Size
|
|
if volume.config.Inodes > 0 {
|
|
return nil, errors.New("volume option inodes not supported on tmpfs filesystem")
|
|
}
|
|
case volume.config.Inodes > 0 || volume.config.Size > 0:
|
|
projectQuotaSupported := false
|
|
q, err := quota.NewControl(r.config.Engine.VolumePath)
|
|
if err == nil {
|
|
projectQuotaSupported = true
|
|
}
|
|
if !projectQuotaSupported {
|
|
return nil, errors.New("volume options size and inodes not supported. Filesystem does not support Project Quota")
|
|
}
|
|
quota := quota.Quota{
|
|
Inodes: volume.config.Inodes,
|
|
Size: volume.config.Size,
|
|
}
|
|
// Must use volPathRoot not fullVolPath, as we need the
|
|
// base path for the volume - without the `_data`
|
|
// subdirectory - so the quota ID assignment logic works
|
|
// properly.
|
|
if err := q.SetQuota(volPathRoot, quota); err != nil {
|
|
return nil, fmt.Errorf("failed to set size quota size=%d inodes=%d for volume directory %q: %w", volume.config.Size, volume.config.Inodes, fullVolPath, err)
|
|
}
|
|
}
|
|
|
|
volume.config.MountPoint = fullVolPath
|
|
}
|
|
|
|
lock, err := r.lockManager.AllocateLock()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("allocating lock for new volume: %w", err)
|
|
}
|
|
volume.lock = lock
|
|
volume.config.LockID = volume.lock.ID()
|
|
|
|
defer func() {
|
|
if deferredErr != nil {
|
|
if err := volume.lock.Free(); err != nil {
|
|
logrus.Errorf("Freeing volume lock after failed creation: %v", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
volume.valid = true
|
|
|
|
// Add the volume to state
|
|
if err := r.state.AddVolume(volume); err != nil {
|
|
return nil, fmt.Errorf("adding volume to state: %w", err)
|
|
}
|
|
defer volume.newVolumeEvent(events.Create)
|
|
return volume, nil
|
|
}
|
|
|
|
// UpdateVolumePlugins reads all volumes from all configured volume plugins and
|
|
// imports them into the libpod db. It also checks if existing libpod volumes
|
|
// are removed in the plugin, in this case we try to remove it from libpod.
|
|
// On errors we continue and try to do as much as possible. all errors are
|
|
// returned as array in the returned struct.
|
|
// This function has many race conditions, it is best effort but cannot guarantee
|
|
// a perfect state since plugins can be modified from the outside at any time.
|
|
func (r *Runtime) UpdateVolumePlugins(ctx context.Context) *define.VolumeReload {
|
|
var (
|
|
added []string
|
|
removed []string
|
|
errs []error
|
|
allPluginVolumes = map[string]struct{}{}
|
|
)
|
|
|
|
for driverName, socket := range r.config.Engine.VolumePlugins {
|
|
driver, err := volplugin.GetVolumePlugin(driverName, socket, nil, r.config)
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
continue
|
|
}
|
|
vols, err := driver.ListVolumes()
|
|
if err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to read volumes from plugin %q: %w", driverName, err))
|
|
continue
|
|
}
|
|
for _, vol := range vols {
|
|
allPluginVolumes[vol.Name] = struct{}{}
|
|
if _, err := r.newVolume(ctx, true, WithVolumeName(vol.Name), WithVolumeDriver(driverName)); err != nil {
|
|
// If the volume exists this is not an error, just ignore it and log. It is very likely
|
|
// that the volume from the plugin was already in our db.
|
|
if !errors.Is(err, define.ErrVolumeExists) {
|
|
errs = append(errs, err)
|
|
continue
|
|
}
|
|
logrus.Infof("Volume %q already exists: %v", vol.Name, err)
|
|
continue
|
|
}
|
|
added = append(added, vol.Name)
|
|
}
|
|
}
|
|
|
|
libpodVolumes, err := r.state.AllVolumes()
|
|
if err != nil {
|
|
errs = append(errs, fmt.Errorf("cannot delete dangling plugin volumes: failed to read libpod volumes: %w", err))
|
|
}
|
|
for _, vol := range libpodVolumes {
|
|
if vol.UsesVolumeDriver() {
|
|
if _, ok := allPluginVolumes[vol.Name()]; !ok {
|
|
// The volume is no longer in the plugin. Let's remove it from the libpod db.
|
|
if err := r.removeVolume(ctx, vol, false, nil, true); err != nil {
|
|
if errors.Is(err, define.ErrVolumeBeingUsed) {
|
|
// Volume is still used by at least one container. This is very bad,
|
|
// the plugin no longer has this but we still need it.
|
|
errs = append(errs, fmt.Errorf("volume was removed from the plugin %q but containers still require it: %w", vol.config.Driver, err))
|
|
continue
|
|
}
|
|
if errors.Is(err, define.ErrNoSuchVolume) || errors.Is(err, define.ErrVolumeRemoved) || errors.Is(err, define.ErrMissingPlugin) {
|
|
// Volume was already removed, no problem just ignore it and continue.
|
|
continue
|
|
}
|
|
|
|
// some other error
|
|
errs = append(errs, err)
|
|
continue
|
|
}
|
|
// Volume was successfully removed
|
|
removed = append(removed, vol.Name())
|
|
}
|
|
}
|
|
}
|
|
|
|
return &define.VolumeReload{
|
|
Added: added,
|
|
Removed: removed,
|
|
Errors: errs,
|
|
}
|
|
}
|
|
|
|
// makeVolumeInPluginIfNotExist makes a volume in the given volume plugin if it
|
|
// does not already exist.
|
|
func makeVolumeInPluginIfNotExist(name string, options map[string]string, plugin *volplugin.VolumePlugin) error {
|
|
// Ping the volume plugin to see if it exists first.
|
|
// If it does, use the existing volume in the plugin.
|
|
// Options may not match exactly, but not much we can do about
|
|
// that. Not complaining avoids a lot of the sync issues we see
|
|
// with c/storage and libpod DB.
|
|
needsCreate := true
|
|
getReq := new(pluginapi.GetRequest)
|
|
getReq.Name = name
|
|
if resp, err := plugin.GetVolume(getReq); err == nil {
|
|
// TODO: What do we do if we get a 200 response, but the
|
|
// Volume is nil? The docs on the Plugin API are very
|
|
// nonspecific, so I don't know if this is valid or
|
|
// not...
|
|
if resp != nil {
|
|
needsCreate = false
|
|
logrus.Infof("Volume %q already exists in plugin %q, using existing volume", name, plugin.Name)
|
|
}
|
|
}
|
|
if needsCreate {
|
|
createReq := new(pluginapi.CreateRequest)
|
|
createReq.Name = name
|
|
createReq.Options = options
|
|
if err := plugin.CreateVolume(createReq); err != nil {
|
|
return fmt.Errorf("creating volume %q in plugin %s: %w", name, plugin.Name, err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// removeVolume removes the specified volume from state as well tears down its mountpoint and storage.
|
|
// ignoreVolumePlugin is used to only remove the volume from the db and not the plugin,
|
|
// this is required when the volume was already removed from the plugin, i.e. in UpdateVolumePlugins().
|
|
func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool, timeout *uint, ignoreVolumePlugin bool) error {
|
|
if !v.valid {
|
|
if ok, _ := r.state.HasVolume(v.Name()); !ok {
|
|
return nil
|
|
}
|
|
return define.ErrVolumeRemoved
|
|
}
|
|
|
|
// DANGEROUS: Do not lock here yet because we might needed to remove containers first.
|
|
// In general we must always acquire the ctr lock before a volume lock so we cannot lock.
|
|
// THIS MUST BE DONE to prevent ABBA deadlocks.
|
|
// It also means the are several races around creating containers with volumes and removing
|
|
// them in parallel. However that problem exists regadless of taking the lock here or not.
|
|
|
|
deps, err := r.state.VolumeInUse(v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(deps) != 0 {
|
|
depsStr := strings.Join(deps, ", ")
|
|
if !force {
|
|
return fmt.Errorf("volume %s is being used by the following container(s): %s: %w", v.Name(), depsStr, define.ErrVolumeBeingUsed)
|
|
}
|
|
|
|
// We need to remove all containers using the volume
|
|
for _, dep := range deps {
|
|
ctr, err := r.state.Container(dep)
|
|
if err != nil {
|
|
// If the container's removed, no point in
|
|
// erroring.
|
|
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
|
|
continue
|
|
}
|
|
|
|
return fmt.Errorf("removing container %s that depends on volume %s: %w", dep, v.Name(), err)
|
|
}
|
|
|
|
logrus.Debugf("Removing container %s (depends on volume %q)", ctr.ID(), v.Name())
|
|
|
|
opts := ctrRmOpts{
|
|
Force: force,
|
|
Timeout: timeout,
|
|
}
|
|
if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
|
|
return fmt.Errorf("removing container %s that depends on volume %s: %w", ctr.ID(), v.Name(), err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ok now we are good to lock.
|
|
v.lock.Lock()
|
|
defer v.lock.Unlock()
|
|
|
|
// Update volume status to pick up a potential removal from state
|
|
if err := v.update(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// If the volume is still mounted - force unmount it
|
|
if err := v.unmount(true); err != nil {
|
|
if force {
|
|
// If force is set, evict the volume, even if errors
|
|
// occur. Otherwise we'll never be able to get rid of
|
|
// them.
|
|
logrus.Errorf("Unmounting volume %s: %v", v.Name(), err)
|
|
} else {
|
|
return fmt.Errorf("unmounting volume %s: %w", v.Name(), err)
|
|
}
|
|
}
|
|
|
|
// Set volume as invalid so it can no longer be used
|
|
v.valid = false
|
|
|
|
var removalErr error
|
|
|
|
// If we use a volume plugin, we need to remove from the plugin.
|
|
if v.UsesVolumeDriver() && !ignoreVolumePlugin {
|
|
canRemove := true
|
|
|
|
// Do we have a volume driver?
|
|
if v.plugin == nil {
|
|
canRemove = false
|
|
removalErr = fmt.Errorf("cannot remove volume %s from plugin %s, but it has been removed from Podman: %w", v.Name(), v.Driver(), define.ErrMissingPlugin)
|
|
} else {
|
|
// Ping the plugin first to verify the volume still
|
|
// exists.
|
|
// We're trying to be very tolerant of missing volumes
|
|
// in the backend, to avoid the problems we see with
|
|
// sync between c/storage and the Libpod DB.
|
|
getReq := new(pluginapi.GetRequest)
|
|
getReq.Name = v.Name()
|
|
if _, err := v.plugin.GetVolume(getReq); err != nil {
|
|
canRemove = false
|
|
removalErr = fmt.Errorf("volume %s could not be retrieved from plugin %s, but it has been removed from Podman: %w", v.Name(), v.Driver(), err)
|
|
}
|
|
}
|
|
if canRemove {
|
|
req := new(pluginapi.RemoveRequest)
|
|
req.Name = v.Name()
|
|
if err := v.plugin.RemoveVolume(req); err != nil {
|
|
return fmt.Errorf("volume %s could not be removed from plugin %s: %w", v.Name(), v.Driver(), err)
|
|
}
|
|
}
|
|
} else if v.config.Driver == define.VolumeDriverImage {
|
|
if err := v.runtime.storageService.DeleteContainer(v.config.StorageID); err != nil {
|
|
// Storage container is already gone, no problem.
|
|
if !(errors.Is(err, storage.ErrNotAContainer) || errors.Is(err, storage.ErrContainerUnknown)) {
|
|
return fmt.Errorf("removing volume %s storage: %w", v.Name(), err)
|
|
}
|
|
logrus.Infof("Storage for volume %s already removed", v.Name())
|
|
}
|
|
}
|
|
|
|
// Remove the volume from the state
|
|
if err := r.state.RemoveVolume(v); err != nil {
|
|
if removalErr != nil {
|
|
logrus.Errorf("Removing volume %s from plugin %s: %v", v.Name(), v.Driver(), removalErr)
|
|
}
|
|
return fmt.Errorf("removing volume %s: %w", v.Name(), err)
|
|
}
|
|
|
|
// Free the volume's lock
|
|
if err := v.lock.Free(); err != nil {
|
|
if removalErr == nil {
|
|
removalErr = fmt.Errorf("freeing lock for volume %s: %w", v.Name(), err)
|
|
} else {
|
|
logrus.Errorf("Freeing lock for volume %q: %v", v.Name(), err)
|
|
}
|
|
}
|
|
|
|
// Delete the mountpoint path of the volume, that is delete the volume
|
|
// from /var/lib/containers/storage/volumes
|
|
if err := v.teardownStorage(); err != nil {
|
|
if removalErr == nil {
|
|
removalErr = fmt.Errorf("cleaning up volume storage for %q: %w", v.Name(), err)
|
|
} else {
|
|
logrus.Errorf("Cleaning up volume storage for volume %q: %v", v.Name(), err)
|
|
}
|
|
}
|
|
|
|
defer v.newVolumeEvent(events.Remove)
|
|
logrus.Debugf("Removed volume %s", v.Name())
|
|
return removalErr
|
|
}
|