//go:build linux

package buildah

import (
	"context"
	"errors"
	"fmt"
	"maps"
	"os"
	"path/filepath"
	"slices"
	"strings"
	"sync"
	"syscall"

	"github.com/containers/buildah/bind"
	"github.com/containers/buildah/chroot"
	"github.com/containers/buildah/copier"
	"github.com/containers/buildah/define"
	"github.com/containers/buildah/internal"
	"github.com/containers/buildah/internal/tmpdir"
	"github.com/containers/buildah/internal/volumes"
	"github.com/containers/buildah/pkg/binfmt"
	"github.com/containers/buildah/pkg/overlay"
	"github.com/containers/buildah/pkg/parse"
	butil "github.com/containers/buildah/pkg/util"
	"github.com/containers/buildah/util"
	"github.com/docker/go-units"
	"github.com/opencontainers/runtime-spec/specs-go"
	"github.com/opencontainers/runtime-tools/generate"
	"github.com/sirupsen/logrus"
	"go.podman.io/common/libnetwork/etchosts"
	"go.podman.io/common/libnetwork/pasta"
	"go.podman.io/common/libnetwork/resolvconf"
	"go.podman.io/common/libnetwork/slirp4netns"
	nettypes "go.podman.io/common/libnetwork/types"
	netUtil "go.podman.io/common/libnetwork/util"
	"go.podman.io/common/pkg/capabilities"
	"go.podman.io/common/pkg/chown"
	"go.podman.io/common/pkg/config"
	"go.podman.io/common/pkg/hooks"
	hooksExec "go.podman.io/common/pkg/hooks/exec"
	"go.podman.io/image/v5/types"
	"go.podman.io/storage/pkg/fileutils"
	"go.podman.io/storage/pkg/idtools"
	"go.podman.io/storage/pkg/ioutils"
	"go.podman.io/storage/pkg/lockfile"
	"go.podman.io/storage/pkg/mount"
	"go.podman.io/storage/pkg/stringid"
	"go.podman.io/storage/pkg/unshare"
	"golang.org/x/sys/unix"
	"tags.cncf.io/container-device-interface/pkg/cdi"
	"tags.cncf.io/container-device-interface/pkg/parser"
)

// binfmtRegistered makes sure we only try to register binfmt_misc
// interpreters once, the first time we handle a RUN instruction.
var binfmtRegistered sync.Once

func setChildProcess() error {
	if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0); err != nil {
		fmt.Fprintf(os.Stderr, "prctl(PR_SET_CHILD_SUBREAPER, 1): %v\n", err)
		return err
	}
	return nil
}

func (b *Builder) cdiSetupDevicesInSpec(deviceSpecs []string, configDir string, spec *specs.Spec) ([]string, error) {
	var configDirs []string
	defConfig, err := config.Default()
	if err != nil {
		return nil, fmt.Errorf("failed to get container config: %w", err)
	}
	// The CDI cache prioritizes entries from directories that are later in
	// the list of ones it scans, so start with our general config, then
	// append values passed to us through API layers.
	configDirs = slices.Clone(defConfig.Engine.CdiSpecDirs.Get())
	if b.CDIConfigDir != "" {
		configDirs = append(configDirs, b.CDIConfigDir)
	}
	if configDir != "" {
		configDirs = append(configDirs, configDir)
	}
	if len(configDirs) == 0 {
		// No directories to scan for CDI configuration means that CDI
		// won't have any details for setting up any devices, so we
		// don't need to be doing anything here.
		return deviceSpecs, nil
	}
	var qualifiedDeviceSpecs, unqualifiedDeviceSpecs []string
	for _, deviceSpec := range deviceSpecs {
		if parser.IsQualifiedName(deviceSpec) {
			qualifiedDeviceSpecs = append(qualifiedDeviceSpecs, deviceSpec)
		} else {
			unqualifiedDeviceSpecs = append(unqualifiedDeviceSpecs, deviceSpec)
		}
	}
	if len(qualifiedDeviceSpecs) == 0 {
		// None of the specified devices were in the form that would be
		// handled by CDI, so we don't need to do anything here.
		return deviceSpecs, nil
	}
	if err := cdi.Configure(cdi.WithSpecDirs(configDirs...)); err != nil {
		return nil, fmt.Errorf("CDI default registry ignored configured directories %v: %w", configDirs, err)
	}
	leftoverDevices := slices.Clone(deviceSpecs)
	if err := cdi.Refresh(); err != nil {
		logrus.Warnf("CDI default registry refresh: %v", err)
	} else {
		leftoverDevices, err = cdi.InjectDevices(spec, qualifiedDeviceSpecs...)
		if err != nil {
			return nil, fmt.Errorf("CDI device injection (leftover devices: %v): %w", leftoverDevices, err)
		}
	}
	removed := slices.DeleteFunc(slices.Clone(deviceSpecs), func(t string) bool { return slices.Contains(leftoverDevices, t) })
	logrus.Debugf("CDI taking care of devices %v, leaving devices %v, skipped %v", removed, leftoverDevices, unqualifiedDeviceSpecs)
	return append(leftoverDevices, unqualifiedDeviceSpecs...), nil
}

// Extract the device list so that we can still try to make it work if
// we're running rootless and can't just mknod() the device nodes.
func separateDevicesFromRuntimeSpec(g *generate.Generator) define.ContainerDevices {
	var result define.ContainerDevices
	if g.Config != nil && g.Config.Linux != nil {
		for _, device := range g.Config.Linux.Devices {
			var bDevice define.BuildahDevice
			bDevice.Path = device.Path
			switch device.Type {
			case "b":
				bDevice.Type = 'b'
			case "c":
				bDevice.Type = 'c'
			case "u":
				bDevice.Type = 'u'
			case "p":
				bDevice.Type = 'p'
			}
			bDevice.Major = device.Major
			bDevice.Minor = device.Minor
			if device.FileMode != nil {
				bDevice.FileMode = *device.FileMode
			}
			if device.UID != nil {
				bDevice.Uid = *device.UID
			}
			if device.GID != nil {
				bDevice.Gid = *device.GID
			}
			bDevice.Source = device.Path
			bDevice.Destination = device.Path
			result = append(result, bDevice)
		}
	}
	g.ClearLinuxDevices()
	return result
}

// Run runs the specified command in the container's root filesystem.
func (b *Builder) Run(command []string, options RunOptions) error {
	var runArtifacts *runMountArtifacts
	if len(options.ExternalImageMounts) > 0 {
		defer func() {
			if runArtifacts == nil {
				// we didn't add ExternalImageMounts to the
				// list of images that we're going to unmount
				// yet and make a deferred call that cleans
				// them up, but the caller is expecting us to
				// unmount these for them because we offered to
				for _, image := range options.ExternalImageMounts {
					if _, err := b.store.UnmountImage(image, false); err != nil {
						logrus.Debugf("umounting image %q: %v", image, err)
					}
				}
			}
		}()
	}

	if os.Getenv("container") != "" {
		os, arch, variant, err := parse.Platform("")
		if err != nil {
			return fmt.Errorf("reading the current default platform")
		}
		platform := b.OCIv1.Platform
		if os != platform.OS || arch != platform.Architecture || variant != platform.Variant {
			binfmtRegistered.Do(func() {
				if err := binfmt.Register(nil); err != nil {
					logrus.Warnf("registering binfmt_misc interpreters: %v", err)
				}
			})
		}
	}

	p, err := os.MkdirTemp(tmpdir.GetTempDir(), define.Package)
	if err != nil {
		return err
	}
	// On some hosts like AH, /tmp is a symlink and we need an
	// absolute path.
	path, err := filepath.EvalSymlinks(p)
	if err != nil {
		return err
	}
	logrus.Debugf("using %q to hold bundle data", path)
	defer func() {
		if err2 := os.RemoveAll(path); err2 != nil {
			options.Logger.Error(err2)
		}
	}()

	gp, err := generate.New("linux")
	if err != nil {
		return fmt.Errorf("generating new 'linux' runtime spec: %w", err)
	}
	g := &gp

	isolation := options.Isolation
	if isolation == define.IsolationDefault {
		isolation = b.Isolation
		if isolation == define.IsolationDefault {
			isolation, err = parse.IsolationOption("")
			if err != nil {
				logrus.Debugf("got %v while trying to determine default isolation, guessing OCI", err)
				isolation = IsolationOCI
			} else if isolation == IsolationDefault {
				isolation = IsolationOCI
			}
		}
	}
	if err := checkAndOverrideIsolationOptions(isolation, &options); err != nil {
		return err
	}

	// hardwire the environment to match docker build to avoid subtle and hard-to-debug differences due to containers.conf
	b.configureEnvironment(g, options, []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"})

	if b.CommonBuildOpts == nil {
		return fmt.Errorf("invalid format on container you must recreate the container")
	}

	if err := addCommonOptsToSpec(b.CommonBuildOpts, g); err != nil {
		return err
	}

	workDir := b.WorkDir()
	if options.WorkingDir != "" {
		g.SetProcessCwd(options.WorkingDir)
		workDir = options.WorkingDir
	} else if b.WorkDir() != "" {
		g.SetProcessCwd(b.WorkDir())
		workDir = b.WorkDir()
	}
	if workDir == "" {
		workDir = string(os.PathSeparator)
	}
	setupSelinux(g, b.ProcessLabel, b.MountLabel)
	mountPoint, err := b.Mount(b.MountLabel)
	if err != nil {
		return fmt.Errorf("mounting container %q: %w", b.ContainerID, err)
	}
	defer func() {
		if err := b.Unmount(); err != nil {
			options.Logger.Errorf("error unmounting container: %v", err)
		}
	}()
	g.SetRootPath(mountPoint)
	if len(command) > 0 {
		command = runLookupPath(g, command)
		g.SetProcessArgs(command)
	} else {
		g.SetProcessArgs(nil)
	}

	// Combine the working container's set of devices with the ones for just this run.
	deviceSpecs := slices.Concat(options.DeviceSpecs, b.DeviceSpecs)
	deviceSpecs, err = b.cdiSetupDevicesInSpec(deviceSpecs, options.CDIConfigDir, g.Config) // makes changes to more than just the device list
	if err != nil {
		return err
	}
	devices := separateDevicesFromRuntimeSpec(g)
	for _, deviceSpec := range deviceSpecs {
		device, err := parse.DeviceFromPath(deviceSpec)
		if err != nil {
			return fmt.Errorf("setting up device %q: %w", deviceSpec, err)
		}
		devices = append(devices, device...)
	}
	devices = append(append(devices, options.Devices...), b.Devices...)

	// Mount devices, if any, and if we're rootless attempt to work around not
	// being able to create device nodes by bind-mounting them from the host, like podman does.
	if unshare.IsRootless() {
		// We are going to create bind mounts for devices
		// but we need to make sure that we don't override
		// anything which is already in OCI spec.
		mounts := make(map[string]any)
		for _, m := range g.Mounts() {
			mounts[m.Destination] = true
		}
		newMounts := []specs.Mount{}
		for _, d := range devices {
			// Default permission is read-only.
			perm := "ro"
			// Get permission configured for this device but only process `write`
			// permission in rootless since `mknod` is not supported anyways.
			if strings.Contains(string(d.Rule.Permissions), "w") {
				perm = "rw"
			}
			devMnt := specs.Mount{
				Destination: d.Destination,
				Type:        parse.TypeBind,
				Source:      d.Source,
				Options:     []string{"slave", "nosuid", "noexec", perm, "rbind"},
			}
			// Podman parity: podman skips these two devices hence we do the same.
			if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") {
				continue
			}
			// Device is already in OCI spec do not re-mount.
			if _, found := mounts[d.Path]; found {
				continue
			}
			newMounts = append(newMounts, devMnt)
		}
		g.Config.Mounts = append(newMounts, g.Config.Mounts...)
	} else {
		for _, d := range devices {
			sDev := specs.LinuxDevice{
				Type:     string(d.Type),
				Path:     d.Path,
				Major:    d.Major,
				Minor:    d.Minor,
				FileMode: &d.FileMode,
				UID:      &d.Uid,
				GID:      &d.Gid,
			}
			g.AddDevice(sDev)
			g.AddLinuxResourcesDevice(true, string(d.Type), &d.Major, &d.Minor, string(d.Permissions))
		}
	}

	setupMaskedPaths(g, b.CommonBuildOpts)
	setupReadOnlyPaths(g)

	setupTerminal(g, options.Terminal, options.TerminalSize)

	configureNetwork, networkString, err := b.configureNamespaces(g, &options)
	if err != nil {
		return err
	}

	homeDir, err := b.configureUIDGID(g, mountPoint, options)
	if err != nil {
		return err
	}

	g.SetProcessNoNewPrivileges(b.CommonBuildOpts.NoNewPrivileges)

	g.SetProcessApparmorProfile(b.CommonBuildOpts.ApparmorProfile)

	// Now grab the spec from the generator.  Set the generator to nil so that future contributors
	// will quickly be able to tell that they're supposed to be modifying the spec directly from here.
	spec := g.Config
	g = nil

	// Set the seccomp configuration using the specified profile name.  Some syscalls are
	// allowed if certain capabilities are to be granted (example: CAP_SYS_CHROOT and chroot),
	// so we sorted out the capabilities lists first.
	if err = setupSeccomp(spec, b.CommonBuildOpts.SeccompProfilePath); err != nil {
		return err
	}

	uid, gid := spec.Process.User.UID, spec.Process.User.GID
	if spec.Linux != nil {
		uid, gid, err = util.GetHostIDs(spec.Linux.UIDMappings, spec.Linux.GIDMappings, uid, gid)
		if err != nil {
			return err
		}
	}

	idPair := &idtools.IDPair{UID: int(uid), GID: int(gid)}

	mode := os.FileMode(0o755)
	coptions := copier.MkdirOptions{
		ChownNew: idPair,
		ChmodNew: &mode,
	}
	if err := copier.Mkdir(mountPoint, filepath.Join(mountPoint, spec.Process.Cwd), coptions); err != nil {
		return err
	}

	bindFiles := make(map[string]string)
	volumes := b.Volumes()

	// Figure out who owns files that will appear to be owned by UID/GID 0 in the container.
	rootUID, rootGID, err := util.GetHostRootIDs(spec)
	if err != nil {
		return err
	}
	rootIDPair := &idtools.IDPair{UID: int(rootUID), GID: int(rootGID)}

	hostsFile := ""
	if !options.NoHosts && !slices.Contains(volumes, config.DefaultHostsFile) && options.ConfigureNetwork != define.NetworkDisabled {
		hostsFile, err = b.createHostsFile(path, rootIDPair)
		if err != nil {
			return err
		}
		bindFiles[config.DefaultHostsFile] = hostsFile

		// Only add entries here if we do not have to do setup network,
		// if we do we have to do it much later after the network setup.
		if !configureNetwork {
			var entries etchosts.HostEntries
			isHost := true
			if spec.Linux != nil {
				for _, ns := range spec.Linux.Namespaces {
					if ns.Type == specs.NetworkNamespace {
						isHost = false
						break
					}
				}
			}
			// add host entry for local ip when running in host network
			if spec.Hostname != "" && isHost {
				ip := netUtil.GetLocalIP()
				if ip != "" {
					entries = append(entries, etchosts.HostEntry{
						Names: []string{spec.Hostname},
						IP:    ip,
					})
				}
			}
			err = b.addHostsEntries(hostsFile, mountPoint, entries, nil, "")
			if err != nil {
				return err
			}
		}
	}

	if !options.NoHostname && !(slices.Contains(volumes, "/etc/hostname")) {
		hostnameFile, err := b.generateHostname(path, spec.Hostname, rootIDPair)
		if err != nil {
			return err
		}
		// Bind /etc/hostname
		bindFiles["/etc/hostname"] = hostnameFile
	}

	resolvFile := ""
	if !slices.Contains(volumes, resolvconf.DefaultResolvConf) && options.ConfigureNetwork != define.NetworkDisabled && (len(b.CommonBuildOpts.DNSServers) != 1 || strings.ToLower(b.CommonBuildOpts.DNSServers[0]) != "none") {
		resolvFile, err = b.createResolvConf(path, rootIDPair)
		if err != nil {
			return err
		}
		bindFiles[resolvconf.DefaultResolvConf] = resolvFile

		// Only add entries here if we do not have to do setup network,
		// if we do we have to do it much later after the network setup.
		if !configureNetwork {
			err = b.addResolvConfEntries(resolvFile, nil, spec, false, true)
			if err != nil {
				return err
			}
		}
	}
	// Empty file, so no need to recreate if it exists
	if _, ok := bindFiles["/run/.containerenv"]; !ok {
		containerenvPath := filepath.Join(path, "/run/.containerenv")
		if err = os.MkdirAll(filepath.Dir(containerenvPath), 0o755); err != nil {
			return err
		}

		rootless := 0
		if unshare.IsRootless() {
			rootless = 1
		}
		// Populate the .containerenv with container information
		containerenv := fmt.Sprintf(`
engine="buildah-%s"
name=%q
id=%q
image=%q
imageid=%q
rootless=%d
`, define.Version, b.Container, b.ContainerID, b.FromImage, b.FromImageID, rootless)

		if err = ioutils.AtomicWriteFile(containerenvPath, []byte(containerenv), 0o755); err != nil {
			return err
		}
		if err := relabel(containerenvPath, b.MountLabel, false); err != nil {
			return err
		}

		bindFiles["/run/.containerenv"] = containerenvPath
	}

	// Setup OCI hooks
	_, err = b.setupOCIHooks(spec, (len(options.Mounts) > 0 || len(volumes) > 0))
	if err != nil {
		return fmt.Errorf("unable to setup OCI hooks: %w", err)
	}

	runMountInfo := runMountInfo{
		WorkDir:          workDir,
		ContextDir:       options.ContextDir,
		Secrets:          options.Secrets,
		SSHSources:       options.SSHSources,
		StageMountPoints: options.StageMountPoints,
		SystemContext:    options.SystemContext,
	}

	runArtifacts, err = b.setupMounts(mountPoint, spec, path, options.Mounts, bindFiles, volumes, options.CompatBuiltinVolumes, b.CommonBuildOpts.Volumes, options.RunMounts, runMountInfo)
	if err != nil {
		return fmt.Errorf("resolving mountpoints for container %q: %w", b.ContainerID, err)
	}
	if runArtifacts.SSHAuthSock != "" {
		sshenv := "SSH_AUTH_SOCK=" + runArtifacts.SSHAuthSock
		spec.Process.Env = append(spec.Process.Env, sshenv)
	}

	// Create any mount points that we need that aren't already present in
	// the rootfs.
	createdMountTargets, err := b.createMountTargets(spec)
	if err != nil {
		return fmt.Errorf("ensuring mount targets for container %q: %w", b.ContainerID, err)
	}
	defer func() {
		// Attempt to clean up mount targets for the sake of builds
		// that don't commit and rebase at each step, and people using
		// `buildah run` more than once, who don't expect empty mount
		// points to stick around.  They'll still get filtered out at
		// commit-time if another concurrent Run() is keeping something
		// busy.
		if _, err := copier.ConditionalRemove(mountPoint, mountPoint, copier.ConditionalRemoveOptions{
			UIDMap: b.store.UIDMap(),
			GIDMap: b.store.GIDMap(),
			Paths:  createdMountTargets,
		}); err != nil {
			options.Logger.Errorf("unable to cleanup run mount targets %v", err)
		}
	}()

	// following run was called from `buildah run`
	// and some images were mounted for this run
	// add them to cleanup artifacts
	if len(options.ExternalImageMounts) > 0 {
		runArtifacts.MountedImages = append(runArtifacts.MountedImages, options.ExternalImageMounts...)
	}

	defer func() {
		if err := b.cleanupRunMounts(runArtifacts); err != nil {
			options.Logger.Errorf("unable to cleanup run mounts %v", err)
		}
	}()

	// Handle mount flags that request that the source locations for "bind" mountpoints be
	// relabeled, and filter those flags out of the list of mount options we pass to the
	// runtime.
	for i := range spec.Mounts {
		switch spec.Mounts[i].Type {
		default:
			continue
		case "bind", "rbind":
			// all good, keep going
		}
		zflag := ""
		for _, opt := range spec.Mounts[i].Options {
			if opt == "z" || opt == "Z" {
				zflag = opt
			}
		}
		if zflag == "" {
			continue
		}
		spec.Mounts[i].Options = slices.DeleteFunc(spec.Mounts[i].Options, func(opt string) bool {
			return opt == "z" || opt == "Z"
		})
		if err := relabel(spec.Mounts[i].Source, b.MountLabel, zflag == "z"); err != nil {
			return fmt.Errorf("setting file label %q on %q: %w", b.MountLabel, spec.Mounts[i].Source, err)
		}
	}

	switch isolation {
	case define.IsolationOCI:
		var moreCreateArgs []string
		if options.NoPivot {
			moreCreateArgs = append(moreCreateArgs, "--no-pivot")
		}
		err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec,
			mountPoint, path, define.Package+"-"+filepath.Base(path), b.Container, hostsFile, resolvFile)
	case IsolationChroot:
		err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr, options.NoPivot)
	case IsolationOCIRootless:
		moreCreateArgs := []string{"--no-new-keyring"}
		if options.NoPivot {
			moreCreateArgs = append(moreCreateArgs, "--no-pivot")
		}
		err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec,
			mountPoint, path, define.Package+"-"+filepath.Base(path), b.Container, hostsFile, resolvFile)
	default:
		err = errors.New("don't know how to run this command")
	}
	return err
}

func (b *Builder) setupOCIHooks(config *specs.Spec, hasVolumes bool) (map[string][]specs.Hook, error) {
	allHooks := make(map[string][]specs.Hook)
	if len(b.CommonBuildOpts.OCIHooksDir) == 0 {
		if unshare.IsRootless() {
			return nil, nil
		}
		for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} {
			manager, err := hooks.New(context.Background(), []string{hDir}, []string{})
			if err != nil {
				if errors.Is(err, os.ErrNotExist) {
					continue
				}
				return nil, err
			}
			ociHooks, err := manager.Hooks(config, b.ImageAnnotations, hasVolumes)
			if err != nil {
				return nil, err
			}
			if len(ociHooks) > 0 || config.Hooks != nil {
				logrus.Warnf("Implicit hook directories are deprecated; set --hooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir)
			}
			maps.Copy(allHooks, ociHooks)
		}
	} else {
		manager, err := hooks.New(context.Background(), b.CommonBuildOpts.OCIHooksDir, []string{})
		if err != nil {
			return nil, err
		}

		allHooks, err = manager.Hooks(config, b.ImageAnnotations, hasVolumes)
		if err != nil {
			return nil, err
		}
	}

	hookErr, err := hooksExec.RuntimeConfigFilter(context.Background(), allHooks["precreate"], config, hooksExec.DefaultPostKillTimeout) //nolint:staticcheck
	if err != nil {
		logrus.Warnf("Container: precreate hook: %v", err)
		if hookErr != nil && hookErr != err {
			logrus.Debugf("container: precreate hook (hook error): %v", hookErr)
		}
		return nil, err
	}
	return allHooks, nil
}

func addCommonOptsToSpec(commonOpts *define.CommonBuildOptions, g *generate.Generator) error {
	// Resources - CPU
	if commonOpts.CPUPeriod != 0 {
		g.SetLinuxResourcesCPUPeriod(commonOpts.CPUPeriod)
	}
	if commonOpts.CPUQuota != 0 {
		g.SetLinuxResourcesCPUQuota(commonOpts.CPUQuota)
	}
	if commonOpts.CPUShares != 0 {
		g.SetLinuxResourcesCPUShares(commonOpts.CPUShares)
	}
	if commonOpts.CPUSetCPUs != "" {
		g.SetLinuxResourcesCPUCpus(commonOpts.CPUSetCPUs)
	}
	if commonOpts.CPUSetMems != "" {
		g.SetLinuxResourcesCPUMems(commonOpts.CPUSetMems)
	}

	// Resources - Memory
	if commonOpts.Memory != 0 {
		g.SetLinuxResourcesMemoryLimit(commonOpts.Memory)
	}
	if commonOpts.MemorySwap != 0 {
		g.SetLinuxResourcesMemorySwap(commonOpts.MemorySwap)
	}

	// cgroup membership
	if commonOpts.CgroupParent != "" {
		g.SetLinuxCgroupsPath(commonOpts.CgroupParent)
	}

	defaultContainerConfig, err := config.Default()
	if err != nil {
		return fmt.Errorf("failed to get container config: %w", err)
	}
	// Other process resource limits
	if err := addRlimits(commonOpts.Ulimit, g, defaultContainerConfig.Containers.DefaultUlimits.Get()); err != nil {
		return err
	}

	logrus.Debugf("Resources: %#v", commonOpts)
	return nil
}

func setupSlirp4netnsNetwork(config *config.Config, netns, cid string, options, hostnames []string) (func(), *netResult, error) {
	// we need the TmpDir for the slirp4netns code
	if err := os.MkdirAll(config.Engine.TmpDir, 0o751); err != nil {
		return nil, nil, fmt.Errorf("failed to create tempdir: %w", err)
	}
	res, err := slirp4netns.Setup(&slirp4netns.SetupOptions{
		Config:       config,
		ContainerID:  cid,
		Netns:        netns,
		ExtraOptions: options,
		Pdeathsig:    syscall.SIGKILL,
	})
	if err != nil {
		return nil, nil, err
	}

	ip, err := slirp4netns.GetIP(res.Subnet)
	if err != nil {
		return nil, nil, fmt.Errorf("get slirp4netns ip: %w", err)
	}

	dns, err := slirp4netns.GetDNS(res.Subnet)
	if err != nil {
		return nil, nil, fmt.Errorf("get slirp4netns dns ip: %w", err)
	}

	result := &netResult{
		entries:           etchosts.HostEntries{{IP: ip.String(), Names: hostnames}},
		dnsServers:        []string{dns.String()},
		ipv6:              res.IPv6,
		keepHostResolvers: true,
	}

	return func() {
		syscall.Kill(res.Pid, syscall.SIGKILL) //nolint:errcheck
		var status syscall.WaitStatus
		syscall.Wait4(res.Pid, &status, 0, nil) //nolint:errcheck
	}, result, nil
}

func setupPasta(config *config.Config, netns string, options, hostnames []string) (func(), *netResult, error) {
	res, err := pasta.Setup(&pasta.SetupOptions{
		Config:       config,
		Netns:        netns,
		ExtraOptions: options,
	})
	if err != nil {
		return nil, nil, err
	}

	var entries etchosts.HostEntries
	if len(res.IPAddresses) > 0 {
		entries = etchosts.HostEntries{{IP: res.IPAddresses[0].String(), Names: hostnames}}
	}

	mappedIP := ""
	if len(res.MapGuestAddrIPs) > 0 {
		mappedIP = res.MapGuestAddrIPs[0]
	}

	result := &netResult{
		entries:                           entries,
		dnsServers:                        res.DNSForwardIPs,
		excludeIPs:                        res.IPAddresses,
		ipv6:                              res.IPv6,
		keepHostResolvers:                 true,
		preferredHostContainersInternalIP: mappedIP,
	}

	return nil, result, nil
}

func (b *Builder) runConfigureNetwork(pid int, isolation define.Isolation, options RunOptions, network, containerName string, hostnames []string) (func(), *netResult, error) {
	netns := fmt.Sprintf("/proc/%d/ns/net", pid)
	var configureNetworks []string
	defConfig, err := config.Default()
	if err != nil {
		return nil, nil, fmt.Errorf("failed to get container config: %w", err)
	}

	name, networkOpts, hasOpts := strings.Cut(network, ":")
	var netOpts []string
	if hasOpts {
		netOpts = strings.Split(networkOpts, ",")
	}
	if isolation == IsolationOCIRootless && name == "" {
		switch defConfig.Network.DefaultRootlessNetworkCmd {
		case slirp4netns.BinaryName, "":
			name = slirp4netns.BinaryName
		case pasta.BinaryName:
			name = pasta.BinaryName
		default:
			return nil, nil, fmt.Errorf("invalid default_rootless_network_cmd option %q",
				defConfig.Network.DefaultRootlessNetworkCmd)
		}
	}

	switch {
	case name == slirp4netns.BinaryName:
		return setupSlirp4netnsNetwork(defConfig, netns, containerName, netOpts, hostnames)
	case name == pasta.BinaryName:
		return setupPasta(defConfig, netns, netOpts, hostnames)

	// Basically default case except we make sure to not split an empty
	// name as this would return a slice with one empty string which is
	// not a valid network name.
	case len(network) > 0:
		// old syntax allow comma separated network names
		configureNetworks = strings.Split(network, ",")
	}

	if isolation == IsolationOCIRootless {
		return nil, nil, errors.New("cannot use networks as rootless")
	}

	if len(configureNetworks) == 0 {
		configureNetworks = []string{b.NetworkInterface.DefaultNetworkName()}
	}

	// Make sure we can access the container's network namespace,
	// even after it exits, to successfully tear down the
	// interfaces.  Ensure this by opening a handle to the network
	// namespace, and using our copy to both configure and
	// deconfigure it.
	netFD, err := unix.Open(netns, unix.O_RDONLY, 0)
	if err != nil {
		return nil, nil, fmt.Errorf("opening network namespace: %w", err)
	}
	mynetns := fmt.Sprintf("/proc/%d/fd/%d", unix.Getpid(), netFD)

	networks := make(map[string]nettypes.PerNetworkOptions, len(configureNetworks))
	for i, network := range configureNetworks {
		networks[network] = nettypes.PerNetworkOptions{
			InterfaceName: fmt.Sprintf("eth%d", i),
		}
	}

	opts := nettypes.NetworkOptions{
		ContainerID:   containerName,
		ContainerName: containerName,
		Networks:      networks,
	}
	netStatus, err := b.NetworkInterface.Setup(mynetns, nettypes.SetupOptions{NetworkOptions: opts})
	if err != nil {
		return nil, nil, err
	}

	teardown := func() {
		err := b.NetworkInterface.Teardown(mynetns, nettypes.TeardownOptions{NetworkOptions: opts})
		if err != nil {
			options.Logger.Errorf("failed to cleanup network: %v", err)
		}
	}

	return teardown, netStatusToNetResult(netStatus, hostnames), nil
}

// Create pipes to use for relaying stdio.
func runMakeStdioPipe(uid, gid int) ([][]int, error) {
	stdioPipe := make([][]int, 3)
	for i := range stdioPipe {
		stdioPipe[i] = make([]int, 2)
		if err := unix.Pipe(stdioPipe[i]); err != nil {
			return nil, fmt.Errorf("creating pipe for container FD %d: %w", i, err)
		}
	}
	if err := unix.Fchown(stdioPipe[unix.Stdin][0], uid, gid); err != nil {
		return nil, fmt.Errorf("setting owner of stdin pipe descriptor: %w", err)
	}
	if err := unix.Fchown(stdioPipe[unix.Stdout][1], uid, gid); err != nil {
		return nil, fmt.Errorf("setting owner of stdout pipe descriptor: %w", err)
	}
	if err := unix.Fchown(stdioPipe[unix.Stderr][1], uid, gid); err != nil {
		return nil, fmt.Errorf("setting owner of stderr pipe descriptor: %w", err)
	}
	return stdioPipe, nil
}

func setupNamespaces(_ *logrus.Logger, g *generate.Generator, namespaceOptions define.NamespaceOptions, idmapOptions define.IDMappingOptions, policy define.NetworkConfigurationPolicy) (configureNetwork bool, networkString string, configureUTS bool, err error) {
	defaultContainerConfig, err := config.Default()
	if err != nil {
		return false, "", false, fmt.Errorf("failed to get container config: %w", err)
	}

	addSysctl := func(prefixes []string) error {
		for _, sysctl := range defaultContainerConfig.Sysctls() {
			splitn := strings.SplitN(sysctl, "=", 2)
			if len(splitn) > 2 {
				return fmt.Errorf("sysctl %q defined in containers.conf must be formatted name=value", sysctl)
			}
			for _, prefix := range prefixes {
				if strings.HasPrefix(splitn[0], prefix) {
					g.AddLinuxSysctl(splitn[0], splitn[1])
				}
			}
		}
		return nil
	}

	// Set namespace options in the container configuration.
	configureUserns := false
	specifiedNetwork := false
	for _, namespaceOption := range namespaceOptions {
		switch namespaceOption.Name {
		case string(specs.IPCNamespace):
			if !namespaceOption.Host {
				if err := addSysctl([]string{"fs.mqueue"}); err != nil {
					return false, "", false, err
				}
			}
		case string(specs.UserNamespace):
			configureUserns = false
			if !namespaceOption.Host && namespaceOption.Path == "" {
				configureUserns = true
			}
		case string(specs.NetworkNamespace):
			specifiedNetwork = true
			configureNetwork = false
			if !namespaceOption.Host && (namespaceOption.Path == "" || !filepath.IsAbs(namespaceOption.Path)) {
				if namespaceOption.Path != "" && !filepath.IsAbs(namespaceOption.Path) {
					networkString = namespaceOption.Path
					namespaceOption.Path = ""
				}
				configureNetwork = (policy != define.NetworkDisabled)
			}
		case string(specs.UTSNamespace):
			configureUTS = false
			if !namespaceOption.Host {
				if namespaceOption.Path == "" {
					configureUTS = true
				}
				if err := addSysctl([]string{"kernel.hostname", "kernel.domainame"}); err != nil {
					return false, "", false, err
				}
			}
		}
		if namespaceOption.Host {
			if err := g.RemoveLinuxNamespace(namespaceOption.Name); err != nil {
				return false, "", false, fmt.Errorf("removing %q namespace for run: %w", namespaceOption.Name, err)
			}
		} else if err := g.AddOrReplaceLinuxNamespace(namespaceOption.Name, namespaceOption.Path); err != nil {
			if namespaceOption.Path == "" {
				return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", namespaceOption.Name, err)
			}
			return false, "", false, fmt.Errorf("adding %q namespace %q for run: %w", namespaceOption.Name, namespaceOption.Path, err)
		}
	}

	// If we've got mappings, we're going to have to create a user namespace.
	if len(idmapOptions.UIDMap) > 0 || len(idmapOptions.GIDMap) > 0 || configureUserns {
		if err := g.AddOrReplaceLinuxNamespace(string(specs.UserNamespace), ""); err != nil {
			return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", string(specs.UserNamespace), err)
		}
		hostUidmap, hostGidmap, err := unshare.GetHostIDMappings("")
		if err != nil {
			return false, "", false, err
		}
		for _, m := range idmapOptions.UIDMap {
			g.AddLinuxUIDMapping(m.HostID, m.ContainerID, m.Size)
		}
		if len(idmapOptions.UIDMap) == 0 {
			for _, m := range hostUidmap {
				g.AddLinuxUIDMapping(m.ContainerID, m.ContainerID, m.Size)
			}
		}
		for _, m := range idmapOptions.GIDMap {
			g.AddLinuxGIDMapping(m.HostID, m.ContainerID, m.Size)
		}
		if len(idmapOptions.GIDMap) == 0 {
			for _, m := range hostGidmap {
				g.AddLinuxGIDMapping(m.ContainerID, m.ContainerID, m.Size)
			}
		}
		if !specifiedNetwork {
			if err := g.AddOrReplaceLinuxNamespace(string(specs.NetworkNamespace), ""); err != nil {
				return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", string(specs.NetworkNamespace), err)
			}
			configureNetwork = (policy != define.NetworkDisabled)
		}
	} else {
		if err := g.RemoveLinuxNamespace(string(specs.UserNamespace)); err != nil {
			return false, "", false, fmt.Errorf("removing %q namespace for run: %w", string(specs.UserNamespace), err)
		}
		if !specifiedNetwork {
			if err := g.RemoveLinuxNamespace(string(specs.NetworkNamespace)); err != nil {
				return false, "", false, fmt.Errorf("removing %q namespace for run: %w", string(specs.NetworkNamespace), err)
			}
		}
	}
	if configureNetwork {
		if err := addSysctl([]string{"net"}); err != nil {
			return false, "", false, err
		}
	}
	return configureNetwork, networkString, configureUTS, nil
}

func (b *Builder) configureNamespaces(g *generate.Generator, options *RunOptions) (bool, string, error) {
	defaultNamespaceOptions, err := DefaultNamespaceOptions()
	if err != nil {
		return false, "", err
	}

	namespaceOptions := defaultNamespaceOptions
	namespaceOptions.AddOrReplace(b.NamespaceOptions...)
	namespaceOptions.AddOrReplace(options.NamespaceOptions...)

	networkPolicy := options.ConfigureNetwork
	// Nothing was specified explicitly so network policy should be inherited from builder
	if networkPolicy == NetworkDefault {
		networkPolicy = b.ConfigureNetwork

		// If builder policy was NetworkDisabled and
		// we want to disable network for this run.
		// reset options.ConfigureNetwork to NetworkDisabled
		// since it will be treated as source of truth later.
		if networkPolicy == NetworkDisabled {
			options.ConfigureNetwork = networkPolicy
		}
	}
	if networkPolicy == NetworkDisabled {
		namespaceOptions.AddOrReplace(define.NamespaceOptions{{Name: string(specs.NetworkNamespace), Host: false}}...)
	}
	configureNetwork, networkString, configureUTS, err := setupNamespaces(options.Logger, g, namespaceOptions, b.IDMappingOptions, networkPolicy)
	if err != nil {
		return false, "", err
	}

	if configureUTS {
		if options.Hostname != "" {
			g.SetHostname(options.Hostname)
		} else if b.Hostname() != "" {
			g.SetHostname(b.Hostname())
		} else {
			hostname := stringid.TruncateID(b.ContainerID)
			defConfig, err := config.Default()
			if err != nil {
				return false, "", fmt.Errorf("failed to get container config: %w", err)
			}
			if defConfig.Containers.ContainerNameAsHostName {
				if mapped := mapContainerNameToHostname(b.Container); mapped != "" {
					hostname = mapped
				}
			}
			g.SetHostname(hostname)
		}
	} else {
		g.SetHostname("")
	}

	found := false
	spec := g.Config
	for i := range spec.Process.Env {
		if strings.HasPrefix(spec.Process.Env[i], "HOSTNAME=") {
			found = true
			break
		}
	}
	if !found {
		spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("HOSTNAME=%s", spec.Hostname))
	}

	return configureNetwork, networkString, nil
}

func runSetupBoundFiles(bundlePath string, bindFiles map[string]string) (mounts []specs.Mount) {
	for dest, src := range bindFiles {
		options := []string{"rbind"}
		if strings.HasPrefix(src, bundlePath) {
			options = append(options, bind.NoBindOption)
		}
		mounts = append(mounts, specs.Mount{
			Source:      src,
			Destination: dest,
			Type:        "bind",
			Options:     options,
		})
	}
	return mounts
}

func addRlimits(ulimit []string, g *generate.Generator, defaultUlimits []string) error {
	var (
		ul  *units.Ulimit
		err error
		// setup rlimits
		nofileSet bool
		nprocSet  bool
	)

	ulimit = append(defaultUlimits, ulimit...)
	for _, u := range ulimit {
		if ul, err = butil.ParseUlimit(u); err != nil {
			return fmt.Errorf("ulimit option %q requires name=SOFT:HARD, failed to be parsed: %w", u, err)
		}

		if strings.ToUpper(ul.Name) == "NOFILE" {
			nofileSet = true
		}
		if strings.ToUpper(ul.Name) == "NPROC" {
			nprocSet = true
		}
		g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Hard), uint64(ul.Soft))
	}
	if !nofileSet {
		lim := define.RLimitDefaultValue
		var rlimit unix.Rlimit
		if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err == nil {
			if lim < rlimit.Max || unshare.IsRootless() {
				lim = rlimit.Max
			}
		} else {
			logrus.Warnf("Failed to return RLIMIT_NOFILE ulimit %q", err)
		}
		g.AddProcessRlimits("RLIMIT_NOFILE", lim, lim)
	}
	if !nprocSet {
		lim := define.RLimitDefaultValue
		var rlimit unix.Rlimit
		if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err == nil {
			if lim < rlimit.Max || unshare.IsRootless() {
				lim = rlimit.Max
			}
		} else {
			logrus.Warnf("Failed to return RLIMIT_NPROC ulimit %q", err)
		}
		g.AddProcessRlimits("RLIMIT_NPROC", lim, lim)
	}

	return nil
}

func (b *Builder) runSetupVolumeMounts(mountLabel string, volumeMounts []string, optionMounts []specs.Mount, idMaps IDMaps) (mounts []specs.Mount, overlayDirs []string, Err error) {
	// Make sure the overlay directory is clean before running
	containerDir, err := b.store.ContainerDirectory(b.ContainerID)
	if err != nil {
		return nil, nil, fmt.Errorf("looking up container directory for %s: %w", b.ContainerID, err)
	}
	if err := overlay.CleanupContent(containerDir); err != nil {
		return nil, nil, fmt.Errorf("cleaning up overlay content for %s: %w", b.ContainerID, err)
	}

	parseMount := func(mountType, host, container string, options []string) (specs.Mount, error) {
		var foundrw, foundro, foundz, foundZ, foundO, foundU bool
		var rootProp, upperDir, workDir string
		for _, opt := range options {
			switch opt {
			case "rw":
				foundrw = true
			case "ro":
				foundro = true
			case "z":
				foundz = true
			case "Z":
				foundZ = true
			case "O":
				foundO = true
			case "U":
				foundU = true
			case "private", "rprivate", "slave", "rslave", "shared", "rshared":
				rootProp = opt
			}

			if strings.HasPrefix(opt, "upperdir") {
				splitOpt := strings.SplitN(opt, "=", 2)
				if len(splitOpt) > 1 {
					upperDir = splitOpt[1]
				}
			}
			if strings.HasPrefix(opt, "workdir") {
				splitOpt := strings.SplitN(opt, "=", 2)
				if len(splitOpt) > 1 {
					workDir = splitOpt[1]
				}
			}
		}
		if !foundrw && !foundro {
			options = append(options, "rw")
		}
		if foundz {
			if err := relabel(host, mountLabel, true); err != nil {
				return specs.Mount{}, err
			}
			options = slices.DeleteFunc(options, func(o string) bool { return o == "z" })
		}
		if foundZ {
			if err := relabel(host, mountLabel, false); err != nil {
				return specs.Mount{}, err
			}
			options = slices.DeleteFunc(options, func(o string) bool { return o == "Z" })
		}
		if foundU {
			if err := chown.ChangeHostPathOwnership(host, true, idMaps.processUID, idMaps.processGID); err != nil {
				return specs.Mount{}, err
			}
			options = slices.DeleteFunc(options, func(o string) bool { return o == "U" })
		}
		if foundO {
			if (upperDir != "" && workDir == "") || (workDir != "" && upperDir == "") {
				return specs.Mount{}, errors.New("if specifying upperdir then workdir must be specified or vice versa")
			}

			containerDir, err := b.store.ContainerDirectory(b.ContainerID)
			if err != nil {
				return specs.Mount{}, err
			}

			contentDir, err := overlay.TempDir(containerDir, idMaps.rootUID, idMaps.rootGID)
			if err != nil {
				return specs.Mount{}, fmt.Errorf("failed to create TempDir in the %s directory: %w", containerDir, err)
			}

			overlayOpts := overlay.Options{
				RootUID:                idMaps.rootUID,
				RootGID:                idMaps.rootGID,
				UpperDirOptionFragment: upperDir,
				WorkDirOptionFragment:  workDir,
				GraphOpts:              slices.Clone(b.store.GraphOptions()),
			}

			overlayMount, err := overlay.MountWithOptions(contentDir, host, container, &overlayOpts)
			if err == nil {
				overlayDirs = append(overlayDirs, contentDir)
			}

			// If chown true, add correct ownership to the overlay temp directories.
			if err == nil && foundU {
				if err := chown.ChangeHostPathOwnership(contentDir, true, idMaps.processUID, idMaps.processGID); err != nil {
					return specs.Mount{}, err
				}
			}

			return overlayMount, err
		}
		if rootProp == "" {
			options = append(options, "private")
		}
		if mountType != "tmpfs" {
			mountType = "bind"
			options = append(options, "rbind")
		}
		return specs.Mount{
			Destination: container,
			Type:        mountType,
			Source:      host,
			Options:     options,
		}, nil
	}

	// Bind mount volumes specified for this particular Run() invocation
	for _, i := range optionMounts {
		logrus.Debugf("setting up mounted volume at %q", i.Destination)
		mount, err := parseMount(i.Type, i.Source, i.Destination, i.Options)
		if err != nil {
			return nil, nil, err
		}
		mounts = append(mounts, mount)
	}
	// Bind mount volumes given by the user when the container was created
	for _, i := range volumeMounts {
		var options []string
		spliti := parse.SplitStringWithColonEscape(i)
		if len(spliti) > 2 {
			options = strings.Split(spliti[2], ",")
		}
		options = append(options, "rbind")
		mount, err := parseMount("bind", spliti[0], spliti[1], options)
		if err != nil {
			return nil, nil, err
		}
		mounts = append(mounts, mount)
	}
	return mounts, overlayDirs, nil
}

func setupMaskedPaths(g *generate.Generator, opts *define.CommonBuildOptions) {
	if slices.Contains(opts.Unmasks, "all") {
		return
	}
nextMaskedPath:
	for _, mp := range append(config.DefaultMaskedPaths(), opts.Masks...) {
		for _, unmask := range opts.Unmasks {
			match, err := filepath.Match(unmask, mp)
			if err != nil {
				logrus.Warnf("Invalid unmask pattern %q: %v", unmask, err)
				continue
			}
			if match {
				continue nextMaskedPath
			}
		}
		g.AddLinuxMaskedPaths(mp)
	}
}

func setupReadOnlyPaths(g *generate.Generator) {
	for _, rp := range config.DefaultReadOnlyPaths {
		g.AddLinuxReadonlyPaths(rp)
	}
}

func setupCapAdd(g *generate.Generator, caps ...string) error {
	for _, cap := range caps {
		if err := g.AddProcessCapabilityBounding(cap); err != nil {
			return fmt.Errorf("adding %q to the bounding capability set: %w", cap, err)
		}
		if err := g.AddProcessCapabilityEffective(cap); err != nil {
			return fmt.Errorf("adding %q to the effective capability set: %w", cap, err)
		}
		if err := g.AddProcessCapabilityPermitted(cap); err != nil {
			return fmt.Errorf("adding %q to the permitted capability set: %w", cap, err)
		}
	}
	return nil
}

func setupCapDrop(g *generate.Generator, caps ...string) error {
	for _, cap := range caps {
		if err := g.DropProcessCapabilityBounding(cap); err != nil {
			return fmt.Errorf("removing %q from the bounding capability set: %w", cap, err)
		}
		if err := g.DropProcessCapabilityEffective(cap); err != nil {
			return fmt.Errorf("removing %q from the effective capability set: %w", cap, err)
		}
		if err := g.DropProcessCapabilityPermitted(cap); err != nil {
			return fmt.Errorf("removing %q from the permitted capability set: %w", cap, err)
		}
	}
	return nil
}

func setupCapabilities(g *generate.Generator, defaultCapabilities, adds, drops []string) error {
	g.ClearProcessCapabilities()
	if err := setupCapAdd(g, defaultCapabilities...); err != nil {
		return err
	}
	for _, c := range adds {
		if strings.ToLower(c) == "all" {
			adds = capabilities.AllCapabilities()
			break
		}
	}
	for _, c := range drops {
		if strings.ToLower(c) == "all" {
			g.ClearProcessCapabilities()
			return nil
		}
	}
	if err := setupCapAdd(g, adds...); err != nil {
		return err
	}
	return setupCapDrop(g, drops...)
}

func addOrReplaceMount(mounts []specs.Mount, mount specs.Mount) []specs.Mount {
	for i := range mounts {
		if mounts[i].Destination == mount.Destination {
			mounts[i] = mount
			return mounts
		}
	}
	return append(mounts, mount)
}

// setupSpecialMountSpecChanges creates special mounts for depending on the namespaces
// logic taken from podman and adapted for buildah
// https://github.com/containers/podman/blob/4ba71f955a944790edda6e007e6d074009d437a7/pkg/specgen/generate/oci.go#L178
func setupSpecialMountSpecChanges(spec *specs.Spec, shmSize string) ([]specs.Mount, error) {
	mounts := spec.Mounts
	isRootless := unshare.IsRootless()
	isNewUserns := false
	isNetns := false
	isPidns := false
	isIpcns := false

	for _, namespace := range spec.Linux.Namespaces {
		switch namespace.Type {
		case specs.NetworkNamespace:
			isNetns = true
		case specs.UserNamespace:
			isNewUserns = true
		case specs.PIDNamespace:
			isPidns = true
		case specs.IPCNamespace:
			isIpcns = true
		}
	}

	addCgroup := true
	// mount sys when root and no userns or when a new netns is created
	canMountSys := (!isRootless && !isNewUserns) || isNetns
	if !canMountSys {
		addCgroup = false
		sys := "/sys"
		sysMnt := specs.Mount{
			Destination: sys,
			Type:        "bind",
			Source:      sys,
			Options:     []string{bind.NoBindOption, "rprivate", "nosuid", "noexec", "nodev", "ro", "rbind"},
		}
		mounts = addOrReplaceMount(mounts, sysMnt)
	}

	gid5Available := true
	if isRootless {
		_, gids, err := unshare.GetHostIDMappings("")
		if err != nil {
			return nil, err
		}
		gid5Available = checkIDsGreaterThan5(gids)
	}
	if gid5Available && len(spec.Linux.GIDMappings) > 0 {
		gid5Available = checkIDsGreaterThan5(spec.Linux.GIDMappings)
	}
	if !gid5Available {
		// If we have no GID mappings, the gid=5 default option would fail, so drop it.
		devPts := specs.Mount{
			Destination: "/dev/pts",
			Type:        "devpts",
			Source:      "devpts",
			Options:     []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
		}
		mounts = addOrReplaceMount(mounts, devPts)
	}

	isUserns := isNewUserns || isRootless

	if isUserns && !isIpcns {
		devMqueue := "/dev/mqueue"
		devMqueueMnt := specs.Mount{
			Destination: devMqueue,
			Type:        "bind",
			Source:      devMqueue,
			Options:     []string{bind.NoBindOption, "bind", "nosuid", "noexec", "nodev"},
		}
		mounts = addOrReplaceMount(mounts, devMqueueMnt)
	}
	if isUserns && !isPidns {
		proc := "/proc"
		procMount := specs.Mount{
			Destination: proc,
			Type:        "bind",
			Source:      proc,
			Options:     []string{bind.NoBindOption, "rbind", "nosuid", "noexec", "nodev"},
		}
		mounts = addOrReplaceMount(mounts, procMount)
	}

	if addCgroup {
		cgroupMnt := specs.Mount{
			Destination: "/sys/fs/cgroup",
			Type:        "cgroup",
			Source:      "cgroup",
			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", "rw"},
		}
		mounts = addOrReplaceMount(mounts, cgroupMnt)
	}

	// if userns and host ipc bind mount shm
	if isUserns && !isIpcns {
		// bind mount /dev/shm when it exists
		if err := fileutils.Exists("/dev/shm"); err == nil {
			shmMount := specs.Mount{
				Source:      "/dev/shm",
				Type:        "bind",
				Destination: "/dev/shm",
				Options:     []string{bind.NoBindOption, "rbind", "nosuid", "noexec", "nodev"},
			}
			mounts = addOrReplaceMount(mounts, shmMount)
		}
	} else if shmSize != "" {
		shmMount := specs.Mount{
			Source:      "shm",
			Destination: "/dev/shm",
			Type:        "tmpfs",
			Options:     []string{"private", "nodev", "noexec", "nosuid", "mode=1777", "size=" + shmSize},
		}
		mounts = addOrReplaceMount(mounts, shmMount)
	}

	return mounts, nil
}

func checkIDsGreaterThan5(ids []specs.LinuxIDMapping) bool {
	for _, r := range ids {
		if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
			return true
		}
	}
	return false
}

// Returns a Mount to add to the runtime spec's list of mounts, the ID of an
// image, the path to a mounted filesystem, and the path to an overlay
// filesystem, and an optional lock, or an error.
//
// The caller is expected to, after the command which uses the mount exits,
// clean up the overlay filesystem (if we returned one), unmount the mounted
// filesystem (if we provided the path to its mountpoint) and remove its
// mountpoint, unmount the image (if we mounted one), and release the lock (if
// we took one).
func (b *Builder) getCacheMount(tokens []string, sys *types.SystemContext, stageMountPoints map[string]internal.StageMountDetails, idMaps IDMaps, workDir, tmpDir string) (*specs.Mount, string, string, string, *lockfile.LockFile, error) {
	var optionMounts []specs.Mount
	optionMount, mountedImage, intermediateMount, overlayMount, targetLock, err := volumes.GetCacheMount(sys, tokens, b.store, b.MountLabel, stageMountPoints, idMaps.uidmap, idMaps.gidmap, workDir, tmpDir)
	if err != nil {
		return nil, "", "", "", nil, err
	}
	succeeded := false
	defer func() {
		if !succeeded {
			if overlayMount != "" {
				if err := overlay.RemoveTemp(overlayMount); err != nil {
					b.Logger.Debug(err.Error())
				}
			}
			if intermediateMount != "" {
				if err := mount.Unmount(intermediateMount); err != nil {
					b.Logger.Debugf("unmounting %q: %v", intermediateMount, err)
				}
				if err := os.Remove(intermediateMount); err != nil {
					b.Logger.Debugf("removing should-be-empty directory %q: %v", intermediateMount, err)
				}
			}
			if mountedImage != "" {
				if _, err := b.store.UnmountImage(mountedImage, false); err != nil {
					b.Logger.Debugf("unmounting image %q: %v", mountedImage, err)
				}
			}
			if targetLock != nil {
				targetLock.Unlock()
			}
		}
	}()
	optionMounts = append(optionMounts, optionMount)
	volumes, overlayDirs, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)
	if err != nil {
		return nil, "", "", "", nil, err
	}
	if len(overlayDirs) != 0 {
		return nil, "", "", "", nil, errors.New("internal error: did not expect a resolved cache mount to use the O flag")
	}
	succeeded = true
	return &volumes[0], mountedImage, intermediateMount, overlayMount, targetLock, nil
}