use rootless netns from c/common

Use the new rootlessnetns logic from c/common, drop the podman code here and make use of the new much simpler API. ref: https://github.com/containers/common/pull/1761 [NO NEW TESTS NEEDED] Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2025-07-18 01:57:24 +08:00 · 2023-11-24 18:00:24 +01:00
parent 605a29a714
commit a687c38860
38 changed files with 1171 additions and 1072 deletions
--- a/vendor/github.com/containers/common/libimage/copier.go
+++ b/vendor/github.com/containers/common/libimage/copier.go
@ -364,11 +364,13 @@ func (c *copier) copy(ctx context.Context, source, destination types.ImageRefere
 		defer cancel()
 		defer timer.Stop()

-		fmt.Fprintf(c.imageCopyOptions.ReportWriter,
-			"Pulling image %s inside systemd: setting pull timeout to %s\n",
-			source.StringWithinTransport(),
-			time.Duration(numExtensions)*extension,
-		)
+		if c.imageCopyOptions.ReportWriter != nil {
+			fmt.Fprintf(c.imageCopyOptions.ReportWriter,
+				"Pulling image %s inside systemd: setting pull timeout to %s\n",
+				source.StringWithinTransport(),
+				time.Duration(numExtensions)*extension,
+			)
+		}

 		// From `man systemd.service(5)`:
 		//
--- a/vendor/github.com/containers/common/libnetwork/cni/cni_exec.go
+++ b/vendor/github.com/containers/common/libnetwork/cni/cni_exec.go
@ -26,8 +26,10 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"

 	"github.com/containernetworking/cni/pkg/invoke"
 	"github.com/containernetworking/cni/pkg/version"
@ -80,6 +82,16 @@ func (e *cniExec) ExecPlugin(ctx context.Context, pluginPath string, stdinData [
 		c.Env = append(c.Env, "XDG_RUNTIME_DIR=")
 	}

+	// The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put
+	// /usr/sbin in $PATH for rootless users. This will break rootless networking completely.
+	// We might break existing users and we cannot expect everyone to change their $PATH so
+	// let's add /usr/sbin to $PATH ourselves.
+	path := os.Getenv("PATH")
+	if !strings.Contains(path, "/usr/sbin") {
+		path += ":/usr/sbin"
+		c.Env = append(c.Env, "PATH="+path)
+	}
+
 	err := c.Run()
 	if err != nil {
 		return nil, annotatePluginError(err, pluginPath, stdout.Bytes(), stderr.Bytes())
--- a/vendor/github.com/containers/common/libnetwork/cni/network.go
+++ b/vendor/github.com/containers/common/libnetwork/cni/network.go
@ -16,6 +16,7 @@ import (
 	"time"

 	"github.com/containernetworking/cni/libcni"
+	"github.com/containers/common/libnetwork/internal/rootlessnetns"
 	"github.com/containers/common/libnetwork/types"
 	"github.com/containers/common/pkg/config"
 	"github.com/containers/common/pkg/version"
@ -53,6 +54,9 @@ type cniNetwork struct {

 	// networks is a map with loaded networks, the key is the network name
 	networks map[string]*network
+
+	// rootlessNetns is used for the rootless network setup/teardown
+	rootlessNetns *rootlessnetns.Netns
 }

 type network struct {
@ -65,21 +69,14 @@ type network struct {
 type InitConfig struct {
 	// CNIConfigDir is directory where the cni config files are stored.
 	CNIConfigDir string
-	// CNIPluginDirs is a list of directories where cni should look for the plugins.
-	CNIPluginDirs []string
 	// RunDir is a directory where temporary files can be stored.
 	RunDir string

-	// DefaultNetwork is the name for the default network.
-	DefaultNetwork string
-	// DefaultSubnet is the default subnet for the default network.
-	DefaultSubnet string
-
-	// DefaultsubnetPools contains the subnets which must be used to allocate a free subnet by network create
-	DefaultsubnetPools []config.SubnetPool
-
 	// IsMachine describes whenever podman runs in a podman machine environment.
 	IsMachine bool
+
+	// Config containers.conf options
+	Config *config.Config
 }

 // NewCNINetworkInterface creates the ContainerNetwork interface for the CNI backend.
@ -96,12 +93,12 @@ func NewCNINetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
 		return nil, err
 	}

-	defaultNetworkName := conf.DefaultNetwork
+	defaultNetworkName := conf.Config.Network.DefaultNetwork
 	if defaultNetworkName == "" {
 		defaultNetworkName = types.DefaultNetworkName
 	}

-	defaultSubnet := conf.DefaultSubnet
+	defaultSubnet := conf.Config.Network.DefaultSubnet
 	if defaultSubnet == "" {
 		defaultSubnet = types.DefaultSubnet
 	}
@ -110,21 +107,30 @@ func NewCNINetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
 		return nil, fmt.Errorf("failed to parse default subnet: %w", err)
 	}

-	defaultSubnetPools := conf.DefaultsubnetPools
+	defaultSubnetPools := conf.Config.Network.DefaultSubnetPools
 	if defaultSubnetPools == nil {
 		defaultSubnetPools = config.DefaultSubnetPools
 	}

-	cni := libcni.NewCNIConfig(conf.CNIPluginDirs, &cniExec{})
+	var netns *rootlessnetns.Netns
+	if unshare.IsRootless() {
+		netns, err = rootlessnetns.New(conf.RunDir, rootlessnetns.CNI, conf.Config)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	cni := libcni.NewCNIConfig(conf.Config.Network.CNIPluginDirs.Values, &cniExec{})
 	n := &cniNetwork{
 		cniConfigDir:       conf.CNIConfigDir,
-		cniPluginDirs:      conf.CNIPluginDirs,
+		cniPluginDirs:      conf.Config.Network.CNIPluginDirs.Get(),
 		cniConf:            cni,
 		defaultNetwork:     defaultNetworkName,
 		defaultSubnet:      defaultNet,
 		defaultsubnetPools: defaultSubnetPools,
 		isMachine:          conf.IsMachine,
 		lock:               lock,
+		rootlessNetns:      netns,
 	}

 	return n, nil
--- a/vendor/github.com/containers/common/libnetwork/cni/run.go
+++ b/vendor/github.com/containers/common/libnetwork/cni/run.go
@ -39,61 +39,71 @@ func (n *cniNetwork) Setup(namespacePath string, options types.SetupOptions) (ma
 		return nil, fmt.Errorf("failed to set the loopback adapter up: %w", err)
 	}

-	var retErr error
-	teardownOpts := options
-	teardownOpts.Networks = map[string]types.PerNetworkOptions{}
-	// make sure to teardown the already connected networks on error
-	defer func() {
-		if retErr != nil {
-			if len(teardownOpts.Networks) > 0 {
-				err := n.teardown(namespacePath, types.TeardownOptions(teardownOpts))
-				if err != nil {
-					logrus.Warn(err)
+	results := make(map[string]types.StatusBlock, len(options.Networks))
+
+	setup := func() error {
+		var retErr error
+		teardownOpts := options
+		teardownOpts.Networks = map[string]types.PerNetworkOptions{}
+		// make sure to teardown the already connected networks on error
+		defer func() {
+			if retErr != nil {
+				if len(teardownOpts.Networks) > 0 {
+					err := n.teardown(namespacePath, types.TeardownOptions(teardownOpts))
+					if err != nil {
+						logrus.Warn(err)
+					}
 				}
 			}
+		}()
+
+		ports, err := convertSpecgenPortsToCNIPorts(options.PortMappings)
+		if err != nil {
+			return err
 		}
-	}()

-	ports, err := convertSpecgenPortsToCNIPorts(options.PortMappings)
-	if err != nil {
-		return nil, err
-	}
+		for name, netOpts := range options.Networks {
+			netOpts := netOpts
+			network := n.networks[name]
+			rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)

-	results := make(map[string]types.StatusBlock, len(options.Networks))
-	for name, netOpts := range options.Networks {
-		netOpts := netOpts
-		network := n.networks[name]
-		rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
-
-		// If we have more than one static ip we need parse the ips via runtime config,
-		// make sure to add the ips capability to the first plugin otherwise it doesn't get the ips
-		if len(netOpts.StaticIPs) > 0 && !network.cniNet.Plugins[0].Network.Capabilities["ips"] {
-			caps := make(map[string]interface{})
-			caps["capabilities"] = map[string]bool{"ips": true}
-			network.cniNet.Plugins[0], retErr = libcni.InjectConf(network.cniNet.Plugins[0], caps)
-			if retErr != nil {
-				return nil, retErr
+			// If we have more than one static ip we need parse the ips via runtime config,
+			// make sure to add the ips capability to the first plugin otherwise it doesn't get the ips
+			if len(netOpts.StaticIPs) > 0 && !network.cniNet.Plugins[0].Network.Capabilities["ips"] {
+				caps := make(map[string]interface{})
+				caps["capabilities"] = map[string]bool{"ips": true}
+				network.cniNet.Plugins[0], retErr = libcni.InjectConf(network.cniNet.Plugins[0], caps)
+				if retErr != nil {
+					return retErr
+				}
 			}
-		}

-		var res cnitypes.Result
-		res, retErr = n.cniConf.AddNetworkList(context.Background(), network.cniNet, rt)
-		// Add this network to teardown opts since it is now connected.
-		// Also add this if an errors was returned since we want to call teardown on this regardless.
-		teardownOpts.Networks[name] = netOpts
-		if retErr != nil {
-			return nil, retErr
-		}
+			var res cnitypes.Result
+			res, retErr = n.cniConf.AddNetworkList(context.Background(), network.cniNet, rt)
+			// Add this network to teardown opts since it is now connected.
+			// Also add this if an errors was returned since we want to call teardown on this regardless.
+			teardownOpts.Networks[name] = netOpts
+			if retErr != nil {
+				return retErr
+			}

-		logrus.Debugf("cni result for container %s network %s: %v", options.ContainerID, name, res)
-		var status types.StatusBlock
-		status, retErr = CNIResultToStatus(res)
-		if retErr != nil {
-			return nil, retErr
+			logrus.Debugf("cni result for container %s network %s: %v", options.ContainerID, name, res)
+			var status types.StatusBlock
+			status, retErr = CNIResultToStatus(res)
+			if retErr != nil {
+				return retErr
+			}
+			results[name] = status
 		}
-		results[name] = status
+		return nil
 	}
-	return results, nil
+
+	if n.rootlessNetns != nil {
+		err = n.rootlessNetns.Setup(len(options.Networks), setup)
+	} else {
+		err = setup()
+	}
+	return results, err
 }

 // CNIResultToStatus convert the cni result to status block
@ -225,28 +235,39 @@ func (n *cniNetwork) teardown(namespacePath string, options types.TeardownOption
 	}

 	var multiErr *multierror.Error
-	for name, netOpts := range options.Networks {
-		netOpts := netOpts
-		rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
+	teardown := func() error {
+		for name, netOpts := range options.Networks {
+			netOpts := netOpts
+			rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)

-		cniConfList, newRt, err := getCachedNetworkConfig(n.cniConf, name, rt)
-		if err == nil {
-			rt = newRt
-		} else {
-			logrus.Warnf("Failed to load cached network config: %v, falling back to loading network %s from disk", err, name)
-			network := n.networks[name]
-			if network == nil {
-				multiErr = multierror.Append(multiErr, fmt.Errorf("network %s: %w", name, types.ErrNoSuchNetwork))
-				continue
+			cniConfList, newRt, err := getCachedNetworkConfig(n.cniConf, name, rt)
+			if err == nil {
+				rt = newRt
+			} else {
+				logrus.Warnf("Failed to load cached network config: %v, falling back to loading network %s from disk", err, name)
+				network := n.networks[name]
+				if network == nil {
+					multiErr = multierror.Append(multiErr, fmt.Errorf("network %s: %w", name, types.ErrNoSuchNetwork))
+					continue
+				}
+				cniConfList = network.cniNet
 			}
-			cniConfList = network.cniNet
-		}

-		err = n.cniConf.DelNetworkList(context.Background(), cniConfList, rt)
-		if err != nil {
-			multiErr = multierror.Append(multiErr, err)
+			err = n.cniConf.DelNetworkList(context.Background(), cniConfList, rt)
+			if err != nil {
+				multiErr = multierror.Append(multiErr, err)
+			}
 		}
+		return nil
 	}
+
+	if n.rootlessNetns != nil {
+		err = n.rootlessNetns.Teardown(len(options.Networks), teardown)
+	} else {
+		err = teardown()
+	}
+	multiErr = multierror.Append(multiErr, err)
+
 	return multiErr.ErrorOrNil()
 }

@ -267,3 +288,10 @@ func getCachedNetworkConfig(cniConf *libcni.CNIConfig, name string, rt *libcni.R
 	}
 	return cniConfList, rt, nil
 }
+
+func (n *cniNetwork) RunInRootlessNetns(toRun func() error) error {
+	if n.rootlessNetns == nil {
+		return types.ErrNotRootlessNetns
+	}
+	return n.rootlessNetns.Run(n.lock, toRun)
+}
--- a/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns.go
+++ b/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns.go
@ -0,0 +1,8 @@
+package rootlessnetns
+
+type NetworkBackend int
+
+const (
+	Netavark NetworkBackend = iota
+	CNI
+)
--- a/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_freebsd.go
+++ b/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_freebsd.go
@ -0,0 +1,28 @@
+package rootlessnetns
+
+import (
+	"errors"
+
+	"github.com/containers/common/pkg/config"
+	"github.com/containers/storage/pkg/lockfile"
+)
+
+var ErrNotSupported = errors.New("rootless netns only supported on linux")
+
+type Netns struct{}
+
+func New(dir string, backend NetworkBackend, conf *config.Config) (*Netns, error) {
+	return nil, ErrNotSupported
+}
+
+func (n *Netns) Setup(nets int, toRun func() error) error {
+	return ErrNotSupported
+}
+
+func (n *Netns) Teardown(nets int, toRun func() error) error {
+	return ErrNotSupported
+}
+
+func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
+	return ErrNotSupported
+}
--- a/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_linux.go
+++ b/vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_linux.go
@ -0,0 +1,545 @@
+package rootlessnetns
+
+import (
+	"errors"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+
+	"github.com/containernetworking/plugins/pkg/ns"
+	"github.com/containers/common/libnetwork/resolvconf"
+	"github.com/containers/common/libnetwork/slirp4netns"
+	"github.com/containers/common/pkg/config"
+	"github.com/containers/common/pkg/netns"
+	"github.com/containers/common/pkg/systemd"
+	"github.com/containers/storage/pkg/homedir"
+	"github.com/containers/storage/pkg/lockfile"
+	"github.com/hashicorp/go-multierror"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/opencontainers/selinux/go-selinux/label"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	// rootlessNetnsDir is the directory name
+	rootlessNetnsDir = "rootless-netns"
+	// refCountFile file name for the ref count file
+	refCountFile = "ref-count"
+
+	// rootlessNetNsSilrp4netnsPidFile is the name of the rootless netns slirp4netns pid file
+	rootlessNetNsSilrp4netnsPidFile = "rootless-netns-slirp4netns.pid"
+
+	// persistentCNIDir is the directory where the CNI files are stored
+	persistentCNIDir = "/var/lib/cni"
+
+	tmpfs          = "tmpfs"
+	none           = "none"
+	resolvConfName = "resolv.conf"
+)
+
+type Netns struct {
+	// dir used for the rootless netns
+	dir string
+	// backend used for the network setup/teardown
+	backend NetworkBackend
+
+	// config contains containers.conf options.
+	config *config.Config
+}
+
+type rootlessNetnsError struct {
+	msg string
+	err error
+}
+
+func (e *rootlessNetnsError) Error() string {
+	msg := e.msg + ": "
+	return fmt.Sprintf("rootless netns: %s%v", msg, e.err)
+}
+
+func (e *rootlessNetnsError) Unwrap() error {
+	return e.err
+}
+
+// wrapError wraps the error with extra context
+// It will always include "rootless netns:" so the msg should not mention it again,
+// msg can be empty to just include the rootless netns part.
+// err must be non nil.
+func wrapError(msg string, err error) *rootlessNetnsError {
+	return &rootlessNetnsError{
+		msg: msg,
+		err: err,
+	}
+}
+
+func New(dir string, backend NetworkBackend, conf *config.Config) (*Netns, error) {
+	netnsDir := filepath.Join(dir, rootlessNetnsDir)
+	if err := os.MkdirAll(netnsDir, 0o700); err != nil {
+		return nil, wrapError("", err)
+	}
+	return &Netns{
+		dir:     netnsDir,
+		backend: backend,
+		config:  conf,
+	}, nil
+}
+
+// getPath is a small wrapper around filepath.Join() to have a bit less code
+func (n *Netns) getPath(path string) string {
+	return filepath.Join(n.dir, path)
+}
+
+// getOrCreateNetns returns the rootless netns, if it created a new one the
+// returned bool is set to true.
+func (n *Netns) getOrCreateNetns() (ns.NetNS, bool, error) {
+	nsPath := n.getPath(rootlessNetnsDir)
+	nsRef, err := ns.GetNS(nsPath)
+	if err == nil {
+		// TODO check if slirp4netns is alive
+		return nsRef, false, nil
+	}
+	logrus.Debugf("Creating rootless network namespace at %q", nsPath)
+	// We have to create the netns dir again here because it is possible
+	// that cleanup() removed it.
+	if err := os.MkdirAll(n.dir, 0o700); err != nil {
+		return nil, false, wrapError("", err)
+	}
+	netns, err := netns.NewNSAtPath(nsPath)
+	if err != nil {
+		return nil, false, wrapError("create netns", err)
+	}
+	err = n.setupSlirp4netns(nsPath)
+	return netns, true, err
+}
+
+func (n *Netns) cleanup() error {
+	if _, err := os.Stat(n.dir); err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			// dir does not exists no need for cleanup
+			return nil
+		}
+		return err
+	}
+
+	logrus.Debug("Cleaning up rootless network namespace")
+
+	nsPath := n.getPath(rootlessNetnsDir)
+	var multiErr *multierror.Error
+	if err := netns.UnmountNS(nsPath); err != nil {
+		multiErr = multierror.Append(multiErr, err)
+	}
+	if err := n.cleanupSlirp4netns(); err != nil {
+		multiErr = multierror.Append(multiErr, wrapError("kill slirp4netns", err))
+	}
+	if err := os.RemoveAll(n.dir); err != nil {
+		multiErr = multierror.Append(multiErr, wrapError("remove rootless netns dir", err))
+	}
+
+	return multiErr.ErrorOrNil()
+}
+
+func (n *Netns) setupSlirp4netns(nsPath string) error {
+	res, err := slirp4netns.Setup(&slirp4netns.SetupOptions{
+		Config:      n.config,
+		ContainerID: "rootless-netns",
+		Netns:       nsPath,
+	})
+	if err != nil {
+		return wrapError("start slirp4netns", err)
+	}
+	// create pid file for the slirp4netns process
+	// this is need to kill the process in the cleanup
+	pid := strconv.Itoa(res.Pid)
+	err = os.WriteFile(n.getPath(rootlessNetNsSilrp4netnsPidFile), []byte(pid), 0o600)
+	if err != nil {
+		return wrapError("write slirp4netns pid file", err)
+	}
+
+	if systemd.RunsOnSystemd() {
+		// move to systemd scope to prevent systemd from killing it
+		err = systemd.MoveRootlessNetnsSlirpProcessToUserSlice(res.Pid)
+		if err != nil {
+			// only log this, it is not fatal but can lead to issues when running podman inside systemd units
+			logrus.Errorf("failed to move the rootless netns slirp4netns process to the systemd user.slice: %v", err)
+		}
+	}
+
+	// build a new resolv.conf file which uses the slirp4netns dns server address
+	resolveIP, err := slirp4netns.GetDNS(res.Subnet)
+	if err != nil {
+		return wrapError("determine default slirp4netns DNS address", err)
+	}
+
+	if err := resolvconf.New(&resolvconf.Params{
+		Path: n.getPath(resolvConfName),
+		// fake the netns since we want to filter localhost
+		Namespaces: []specs.LinuxNamespace{
+			{Type: specs.NetworkNamespace},
+		},
+		IPv6Enabled:     res.IPv6,
+		KeepHostServers: true,
+		Nameservers:     []string{resolveIP.String()},
+	}); err != nil {
+		return wrapError("create resolv.conf", err)
+	}
+	return nil
+}
+
+func (n *Netns) cleanupSlirp4netns() error {
+	pidFile := n.getPath(rootlessNetNsSilrp4netnsPidFile)
+	b, err := os.ReadFile(pidFile)
+	if err == nil {
+		var i int
+		i, err = strconv.Atoi(string(b))
+		if err == nil {
+			// kill the slirp process so we do not leak it
+			err = syscall.Kill(i, syscall.SIGTERM)
+		}
+	}
+	return err
+}
+
+// mountAndMkdirDest convenience wrapper for mount and mkdir
+func mountAndMkdirDest(source string, target string, fstype string, flags uintptr) error {
+	if err := os.MkdirAll(target, 0o700); err != nil {
+		return wrapError("create mount point", err)
+	}
+	if err := unix.Mount(source, target, fstype, flags, ""); err != nil {
+		return wrapError(fmt.Sprintf("mount %q to %q", source, target), err)
+	}
+	return nil
+}
+
+func (n *Netns) setupMounts() error {
+	// Before we can run the given function,
+	// we have to set up all mounts correctly.
+
+	// The order of the mounts is IMPORTANT.
+	// The idea of the extra mount ns is to make /run and /var/lib/cni writeable
+	// for the cni plugins but not affecting the podman user namespace.
+	// Because the plugins also need access to XDG_RUNTIME_DIR/netns some special setup is needed.
+
+	// The following bind mounts are needed
+	// 1. XDG_RUNTIME_DIR -> XDG_RUNTIME_DIR/rootless-netns/XDG_RUNTIME_DIR
+	// 2. /run/systemd -> XDG_RUNTIME_DIR/rootless-netns/run/systemd (only if it exists)
+	// 3. XDG_RUNTIME_DIR/rootless-netns/resolv.conf -> /etc/resolv.conf or XDG_RUNTIME_DIR/rootless-netns/run/symlink/target
+	// 4. XDG_RUNTIME_DIR/rootless-netns/var/lib/cni -> /var/lib/cni (if /var/lib/cni does not exist, use the parent dir)
+	// 5. XDG_RUNTIME_DIR/rootless-netns/run -> /run
+
+	// Create a new mount namespace,
+	// this must happen inside the netns thread.
+	err := unix.Unshare(unix.CLONE_NEWNS)
+	if err != nil {
+		return wrapError("create new mount namespace", err)
+	}
+
+	xdgRuntimeDir, err := homedir.GetRuntimeDir()
+	if err != nil {
+		return fmt.Errorf("could not get runtime directory: %w", err)
+	}
+	newXDGRuntimeDir := n.getPath(xdgRuntimeDir)
+	// 1. Mount the netns into the new run to keep them accessible.
+	// Otherwise cni setup will fail because it cannot access the netns files.
+	err = mountAndMkdirDest(xdgRuntimeDir, newXDGRuntimeDir, none, unix.MS_BIND|unix.MS_SHARED|unix.MS_REC)
+	if err != nil {
+		return err
+	}
+
+	// 2. Also keep /run/systemd if it exists.
+	// Many files are symlinked into this dir, for example /dev/log.
+	runSystemd := "/run/systemd"
+	_, err = os.Stat(runSystemd)
+	if err == nil {
+		newRunSystemd := n.getPath(runSystemd)
+		err = mountAndMkdirDest(runSystemd, newRunSystemd, none, unix.MS_BIND|unix.MS_REC)
+		if err != nil {
+			return err
+		}
+	}
+
+	// 3. On some distros /etc/resolv.conf is symlinked to somewhere under /run.
+	// Because the kernel will follow the symlink before mounting, it is not
+	// possible to mount a file at /etc/resolv.conf. We have to ensure that
+	// the link target will be available in the mount ns.
+	// see: https://github.com/containers/podman/issues/10855
+	resolvePath := resolvconf.DefaultResolvConf
+	linkCount := 0
+	for i := 1; i < len(resolvePath); i++ {
+		// Do not use filepath.EvalSymlinks, we only want the first symlink under /run.
+		// If /etc/resolv.conf has more than one symlink under /run, e.g.
+		// -> /run/systemd/resolve/stub-resolv.conf -> /run/systemd/resolve/resolv.conf
+		// we would put the netns resolv.conf file to the last path. However this will
+		// break dns because the second link does not exist in the mount ns.
+		// see https://github.com/containers/podman/issues/11222
+		//
+		// We also need to resolve all path components not just the last file.
+		// see https://github.com/containers/podman/issues/12461
+
+		if resolvePath[i] != '/' {
+			// if we are at the last char we need to inc i by one because there is no final slash
+			if i == len(resolvePath)-1 {
+				i++
+			} else {
+				// not the end of path, keep going
+				continue
+			}
+		}
+		path := resolvePath[:i]
+
+		fi, err := os.Lstat(path)
+		if err != nil {
+			return fmt.Errorf("failed to stat resolv.conf path: %w", err)
+		}
+
+		// no link, just continue
+		if fi.Mode()&os.ModeSymlink == 0 {
+			continue
+		}
+
+		link, err := os.Readlink(path)
+		if err != nil {
+			return fmt.Errorf("failed to read resolv.conf symlink: %w", err)
+		}
+		linkCount++
+		if filepath.IsAbs(link) {
+			// link is as an absolute path
+			resolvePath = filepath.Join(link, resolvePath[i:])
+		} else {
+			// link is as a relative, join it with the previous path
+			base := filepath.Dir(path)
+			resolvePath = filepath.Join(base, link, resolvePath[i:])
+		}
+		// set i back to zero since we now have a new base path
+		i = 0
+
+		// we have to stop at the first path under /run because we will have an empty /run and will create the path anyway
+		// if we would continue we would need to recreate all links under /run
+		if strings.HasPrefix(resolvePath, "/run/") {
+			break
+		}
+		// make sure wo do not loop forever
+		if linkCount == 255 {
+			return errors.New("too many symlinks while resolving /etc/resolv.conf")
+		}
+	}
+	logrus.Debugf("The path of /etc/resolv.conf in the mount ns is %q", resolvePath)
+	// When /etc/resolv.conf on the host is a symlink to /run/systemd/resolve/stub-resolv.conf,
+	// we have to mount an empty filesystem on /run/systemd/resolve in the child namespace,
+	// so as to isolate the directory from the host mount namespace.
+	//
+	// Otherwise our bind-mount for /run/systemd/resolve/stub-resolv.conf is unmounted
+	// when systemd-resolved unlinks and recreates /run/systemd/resolve/stub-resolv.conf on the host.
+	// see: https://github.com/containers/podman/issues/10929
+	if strings.HasPrefix(resolvePath, "/run/systemd/resolve/") {
+		rsr := n.getPath("/run/systemd/resolve")
+		err = mountAndMkdirDest("", rsr, tmpfs, unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV)
+		if err != nil {
+			return err
+		}
+	}
+	if strings.HasPrefix(resolvePath, "/run/") {
+		resolvePath = n.getPath(resolvePath)
+		err = os.MkdirAll(filepath.Dir(resolvePath), 0o700)
+		if err != nil {
+			return wrapError("create resolv.conf directory", err)
+		}
+		// we want to bind mount on this file so we have to create the file first
+		_, err = os.OpenFile(resolvePath, os.O_CREATE|os.O_RDONLY, 0o600)
+		if err != nil {
+			return wrapError("create resolv.conf file: %w", err)
+		}
+	}
+	// mount resolv.conf to make use of the host dns
+	err = unix.Mount(n.getPath(resolvConfName), resolvePath, none, unix.MS_BIND, "")
+	if err != nil {
+		return wrapError(fmt.Sprintf("mount resolv.conf to %q", resolvePath), err)
+	}
+
+	// 4. CNI plugins need access to /var/lib/cni
+	if n.backend == CNI {
+		if err := n.mountCNIVarDir(); err != nil {
+			return err
+		}
+	}
+
+	// 5. Mount the new prepared run dir to /run, it has to be recursive to keep the other bind mounts.
+	runDir := n.getPath("run")
+	// relabel the new run directory to the iptables /run label
+	// this is important, otherwise the iptables command will fail
+	err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false)
+	if err != nil {
+		if !errors.Is(err, unix.ENOTSUP) {
+			return wrapError("relabel iptables_var_run_t", err)
+		}
+		logrus.Debugf("Labeling not supported on %q", runDir)
+	}
+	err = mountAndMkdirDest(runDir, "/run", none, unix.MS_BIND|unix.MS_REC)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (n *Netns) mountCNIVarDir() error {
+	varDir := ""
+	varTarget := persistentCNIDir
+	// we can only mount to a target dir which exists, check /var/lib/cni recursively
+	// while we could always use /var there are cases where a user might store the cni
+	// configs under /var/custom and this would break
+	for {
+		if _, err := os.Stat(varTarget); err == nil {
+			varDir = n.getPath(varTarget)
+			break
+		}
+		varTarget = filepath.Dir(varTarget)
+		if varTarget == "/" {
+			break
+		}
+	}
+	if varDir == "" {
+		return errors.New("failed to stat /var directory")
+	}
+	if err := os.MkdirAll(varDir, 0o700); err != nil {
+		return wrapError("create var dir", err)
+	}
+	// make sure to mount var first
+	err := unix.Mount(varDir, varTarget, none, unix.MS_BIND, "")
+	if err != nil {
+		return wrapError(fmt.Sprintf("mount %q to %q", varDir, varTarget), err)
+	}
+	return nil
+}
+
+func (n *Netns) runInner(toRun func() error) (err error) {
+	nsRef, newNs, err := n.getOrCreateNetns()
+	if err != nil {
+		return err
+	}
+	defer nsRef.Close()
+	// If a new netns was created make sure to clean it up again on an error to not leak it.
+	if newNs {
+		defer func() {
+			if err != nil {
+				if err := n.cleanup(); err != nil {
+					logrus.Errorf("Rootless netns cleanup error after failed setup: %v", err)
+				}
+			}
+		}()
+	}
+
+	return nsRef.Do(func(_ ns.NetNS) error {
+		if err := n.setupMounts(); err != nil {
+			return err
+		}
+		return toRun()
+	})
+}
+
+func (n *Netns) Setup(nets int, toRun func() error) error {
+	err := n.runInner(toRun)
+	if err != nil {
+		return err
+	}
+	_, err = refCount(n.dir, nets)
+	return err
+}
+
+func (n *Netns) Teardown(nets int, toRun func() error) error {
+	var multiErr *multierror.Error
+	count, countErr := refCount(n.dir, -nets)
+	if countErr != nil {
+		multiErr = multierror.Append(multiErr, countErr)
+	}
+	err := n.runInner(toRun)
+	if err != nil {
+		multiErr = multierror.Append(multiErr, err)
+	}
+
+	// only cleanup if the ref count did not throw an error
+	if count == 0 && countErr == nil {
+		err = n.cleanup()
+		if err != nil {
+			multiErr = multierror.Append(multiErr, wrapError("cleanup", err))
+		}
+	}
+
+	return multiErr.ErrorOrNil()
+}
+
+// Run any long running function in the userns.
+// We need to ensure that during setup/cleanup we are locked to avoid races.
+// However because the given function could be running a long time we must
+// unlock in between, i.e. this is used by podman unshare --rootless-nets
+// and we do not want to keep it locked for the lifetime of the given command.
+func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
+	lock.Lock()
+	defer lock.Unlock()
+	_, err := refCount(n.dir, 1)
+	if err != nil {
+		return err
+	}
+	inner := func() error {
+		lock.Unlock()
+		err = toRun()
+		lock.Lock()
+		return err
+	}
+
+	inErr := n.runInner(inner)
+	// make sure to always reset the ref counter afterwards
+	count, err := refCount(n.dir, -1)
+	if err != nil {
+		if inErr == nil {
+			return err
+		}
+		logrus.Errorf("Failed to decrement ref count: %v", err)
+		return inErr
+	}
+	if count == 0 {
+		err = n.cleanup()
+		if err != nil {
+			err = wrapError("cleanup", err)
+			if inErr == nil {
+				return err
+			}
+			logrus.Errorf("Failed to cleanup rootless netns: %v", err)
+			return inErr
+		}
+	}
+
+	return inErr
+}
+
+func refCount(dir string, inc int) (int, error) {
+	file := filepath.Join(dir, refCountFile)
+	content, err := os.ReadFile(file)
+	if err != nil && !errors.Is(err, fs.ErrNotExist) {
+		return -1, wrapError("read ref counter", err)
+	}
+
+	currentCount := 0
+	if len(content) > 0 {
+		currentCount, err = strconv.Atoi(string(content))
+		if err != nil {
+			return -1, wrapError("parse ref counter", err)
+		}
+	}
+
+	currentCount += inc
+	if currentCount < 0 {
+		logrus.Errorf("rootless netns ref counter out of sync, counter is at %d, resetting it back to 0", currentCount)
+		currentCount = 0
+	}
+
+	newNum := strconv.Itoa(currentCount)
+	if err = os.WriteFile(file, []byte(newNum), 0o600); err != nil {
+		return -1, wrapError("write ref counter", err)
+	}
+
+	return currentCount, nil
+}
--- a/vendor/github.com/containers/common/libnetwork/netavark/exec.go
+++ b/vendor/github.com/containers/common/libnetwork/netavark/exec.go
@ -10,6 +10,7 @@ import (
 	"os"
 	"os/exec"
 	"strconv"
+	"strings"

 	"github.com/sirupsen/logrus"
 )
@ -79,6 +80,15 @@ func getRustLogEnv() string {
 func (n *netavarkNetwork) execNetavark(args []string, needPlugin bool, stdin, result interface{}) error {
 	// set the netavark log level to the same as the podman
 	env := append(os.Environ(), getRustLogEnv())
+	// Netavark need access to iptables in $PATH. As it turns out debian doesn't put
+	// /usr/sbin in $PATH for rootless users. This will break rootless networking completely.
+	// We might break existing users and we cannot expect everyone to change their $PATH so
+	// let's add /usr/sbin to $PATH ourselves.
+	path := os.Getenv("PATH")
+	if !strings.Contains(path, "/usr/sbin") {
+		path += ":/usr/sbin"
+		env = append(env, "PATH="+path)
+	}
 	// if we run with debug log level lets also set RUST_BACKTRACE=1 so we can get the full stack trace in case of panics
 	if logrus.IsLevelEnabled(logrus.DebugLevel) {
 		env = append(env, "RUST_BACKTRACE=1")
--- a/vendor/github.com/containers/common/libnetwork/netavark/network.go
+++ b/vendor/github.com/containers/common/libnetwork/netavark/network.go
@ -12,6 +12,7 @@ import (
 	"strings"
 	"time"

+	"github.com/containers/common/libnetwork/internal/rootlessnetns"
 	"github.com/containers/common/libnetwork/internal/util"
 	"github.com/containers/common/libnetwork/types"
 	"github.com/containers/common/pkg/config"
@ -68,6 +69,9 @@ type netavarkNetwork struct {

 	// networks is a map with loaded networks, the key is the network name
 	networks map[string]*types.Network
+
+	// rootlessNetns is used for the rootless network setup/teardown
+	rootlessNetns *rootlessnetns.Netns
 }

 type InitConfig struct {
@ -82,26 +86,12 @@ type InitConfig struct {
 	// NetworkRunDir is where temporary files are stored, i.e.the ipam db, aardvark config
 	NetworkRunDir string

-	// FirewallDriver sets the firewall driver to use
-	FirewallDriver string
-
-	// DefaultNetwork is the name for the default network.
-	DefaultNetwork string
-	// DefaultSubnet is the default subnet for the default network.
-	DefaultSubnet string
-
-	// DefaultsubnetPools contains the subnets which must be used to allocate a free subnet by network create
-	DefaultsubnetPools []config.SubnetPool
-
-	// DNSBindPort is set the port to pass to netavark for aardvark
-	DNSBindPort uint16
-
-	// PluginDirs list of directories were netavark plugins are located
-	PluginDirs []string
-
 	// Syslog describes whenever the netavark debug output should be log to the syslog as well.
 	// This will use logrus to do so, make sure logrus is set up to log to the syslog.
 	Syslog bool
+
+	// Config containers.conf options
+	Config *config.Config
 }

 // NewNetworkInterface creates the ContainerNetwork interface for the netavark backend.
@ -118,12 +108,12 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
 		return nil, err
 	}

-	defaultNetworkName := conf.DefaultNetwork
+	defaultNetworkName := conf.Config.Network.DefaultNetwork
 	if defaultNetworkName == "" {
 		defaultNetworkName = types.DefaultNetworkName
 	}

-	defaultSubnet := conf.DefaultSubnet
+	defaultSubnet := conf.Config.Network.DefaultSubnet
 	if defaultSubnet == "" {
 		defaultSubnet = types.DefaultSubnet
 	}
@ -140,11 +130,19 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
 		return nil, err
 	}

-	defaultSubnetPools := conf.DefaultsubnetPools
+	defaultSubnetPools := conf.Config.Network.DefaultSubnetPools
 	if defaultSubnetPools == nil {
 		defaultSubnetPools = config.DefaultSubnetPools
 	}

+	var netns *rootlessnetns.Netns
+	if unshare.IsRootless() {
+		netns, err = rootlessnetns.New(conf.NetworkRunDir, rootlessnetns.Netavark, conf.Config)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	n := &netavarkNetwork{
 		networkConfigDir:   conf.NetworkConfigDir,
 		networkRunDir:      conf.NetworkRunDir,
@ -152,14 +150,15 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
 		aardvarkBinary:     conf.AardvarkBinary,
 		networkRootless:    unshare.IsRootless(),
 		ipamDBPath:         filepath.Join(conf.NetworkRunDir, "ipam.db"),
-		firewallDriver:     conf.FirewallDriver,
+		firewallDriver:     conf.Config.Network.FirewallDriver,
 		defaultNetwork:     defaultNetworkName,
 		defaultSubnet:      defaultNet,
 		defaultsubnetPools: defaultSubnetPools,
-		dnsBindPort:        conf.DNSBindPort,
-		pluginDirs:         conf.PluginDirs,
+		dnsBindPort:        conf.Config.Network.DNSBindPort,
+		pluginDirs:         conf.Config.Network.NetavarkPluginDirs.Get(),
 		lock:               lock,
 		syslog:             conf.Syslog,
+		rootlessNetns:      netns,
 	}

 	return n, nil
--- a/vendor/github.com/containers/common/libnetwork/netavark/run.go
+++ b/vendor/github.com/containers/common/libnetwork/netavark/run.go
@ -72,12 +72,24 @@ func (n *netavarkNetwork) Setup(namespacePath string, options types.SetupOptions
 	}

 	result := map[string]types.StatusBlock{}
-	err = n.execNetavark([]string{"setup", namespacePath}, needPlugin, netavarkOpts, &result)
-	if err != nil {
-		// lets dealloc ips to prevent leaking
-		if err := n.deallocIPs(&options.NetworkOptions); err != nil {
-			logrus.Error(err)
+	setup := func() error {
+		err := n.execNetavark([]string{"setup", namespacePath}, needPlugin, netavarkOpts, &result)
+		if err != nil {
+			// lets dealloc ips to prevent leaking
+			if err := n.deallocIPs(&options.NetworkOptions); err != nil {
+				logrus.Error(err)
+			}
+			return err
 		}
+		return nil
+	}
+
+	if n.rootlessNetns != nil {
+		err = n.rootlessNetns.Setup(len(options.Networks), setup)
+	} else {
+		err = setup()
+	}
+	if err != nil {
 		return nil, err
 	}

@ -112,7 +124,16 @@ func (n *netavarkNetwork) Teardown(namespacePath string, options types.TeardownO
 		return fmt.Errorf("failed to convert net opts: %w", err)
 	}

-	retErr := n.execNetavark([]string{"teardown", namespacePath}, needPlugin, netavarkOpts, nil)
+	var retErr error
+	teardown := func() error {
+		return n.execNetavark([]string{"teardown", namespacePath}, needPlugin, netavarkOpts, nil)
+	}
+
+	if n.rootlessNetns != nil {
+		retErr = n.rootlessNetns.Teardown(len(options.Networks), teardown)
+	} else {
+		retErr = teardown()
+	}

 	// when netavark returned an error we still free the used ips
 	// otherwise we could end up in a state where block the ips forever
@ -160,3 +181,10 @@ func (n *netavarkNetwork) convertNetOpts(opts types.NetworkOptions) (*netavarkOp
 	}
 	return &netavarkOptions, needsPlugin, nil
 }
+
+func (n *netavarkNetwork) RunInRootlessNetns(toRun func() error) error {
+	if n.rootlessNetns == nil {
+		return types.ErrNotRootlessNetns
+	}
+	return n.rootlessNetns.Run(n.lock, toRun)
+}
--- a/vendor/github.com/containers/common/libnetwork/network/interface.go
+++ b/vendor/github.com/containers/common/libnetwork/network/interface.go
@ -77,17 +77,12 @@ func NetworkBackend(store storage.Store, conf *config.Config, syslog bool) (type
 		}

 		netInt, err := netavark.NewNetworkInterface(&netavark.InitConfig{
-			NetworkConfigDir:   confDir,
-			NetworkRunDir:      runDir,
-			NetavarkBinary:     netavarkBin,
-			AardvarkBinary:     aardvarkBin,
-			PluginDirs:         conf.Network.NetavarkPluginDirs.Get(),
-			FirewallDriver:     conf.Network.FirewallDriver,
-			DefaultNetwork:     conf.Network.DefaultNetwork,
-			DefaultSubnet:      conf.Network.DefaultSubnet,
-			DefaultsubnetPools: conf.Network.DefaultSubnetPools,
-			DNSBindPort:        conf.Network.DNSBindPort,
-			Syslog:             syslog,
+			Config:           conf,
+			NetworkConfigDir: confDir,
+			NetworkRunDir:    runDir,
+			NetavarkBinary:   netavarkBin,
+			AardvarkBinary:   aardvarkBin,
+			Syslog:           syslog,
 		})
 		return types.Netavark, netInt, err
 	case types.CNI:
@ -181,13 +176,10 @@ func getCniInterface(conf *config.Config) (types.ContainerNetwork, error) {
 		}
 	}
 	return cni.NewCNINetworkInterface(&cni.InitConfig{
-		CNIConfigDir:       confDir,
-		CNIPluginDirs:      conf.Network.CNIPluginDirs.Get(),
-		RunDir:             conf.Engine.TmpDir,
-		DefaultNetwork:     conf.Network.DefaultNetwork,
-		DefaultSubnet:      conf.Network.DefaultSubnet,
-		DefaultsubnetPools: conf.Network.DefaultSubnetPools,
-		IsMachine:          machine.IsGvProxyBased(),
+		Config:       conf,
+		CNIConfigDir: confDir,
+		RunDir:       conf.Engine.TmpDir,
+		IsMachine:    machine.IsGvProxyBased(),
 	})
 }

--- a/vendor/github.com/containers/common/libnetwork/types/define.go
+++ b/vendor/github.com/containers/common/libnetwork/types/define.go
@ -18,6 +18,9 @@ var (
 	// exists.
 	ErrNetworkExists = errors.New("network already exists")

+	// ErrNotRootlessNetns indicates the rootless netns can only be used as root
+	ErrNotRootlessNetns = errors.New("rootless netns cannot be used as root")
+
 	// NameRegex is a regular expression to validate names.
 	// This must NOT be changed.
 	NameRegex = regexp.Delayed("^[a-zA-Z0-9][a-zA-Z0-9_.-]*$")
--- a/vendor/github.com/containers/common/libnetwork/types/network.go
+++ b/vendor/github.com/containers/common/libnetwork/types/network.go
@ -27,6 +27,10 @@ type ContainerNetwork interface {
 	// Teardown will teardown the container network namespace.
 	Teardown(namespacePath string, options TeardownOptions) error

+	// RunInRootlessNetns is used to run the given function in the rootless netns.
+	// Only used as rootless and should return an error as root.
+	RunInRootlessNetns(toRun func() error) error
+
 	// Drivers will return the list of supported network drivers
 	// for this interface.
 	Drivers() []string
--- a/vendor/github.com/containers/common/pkg/cgroups/utils_linux.go
+++ b/vendor/github.com/containers/common/pkg/cgroups/utils_linux.go
@ -4,6 +4,7 @@
 package cgroups

 import (
+	"bufio"
 	"bytes"
 	"errors"
 	"fmt"
@ -11,6 +12,7 @@ import (
 	"path"
 	"path/filepath"
 	"strings"
+	"sync"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
@ -143,3 +145,171 @@ func SetBlkioThrottle(res *configs.Resources, cgroupPath string) error {
 	}
 	return nil
 }
+
+// Code below was moved from podman/utils/utils_supported.go and should properly better
+// integrated here as some parts may be redundant.
+
+func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
+	f, err := os.Open(procFile)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	cgroup := ""
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := strings.SplitN(line, ":", 3)
+		if len(parts) != 3 {
+			return "", fmt.Errorf("cannot parse cgroup line %q", line)
+		}
+		if strings.HasPrefix(line, "0::") {
+			cgroup = line[3:]
+			break
+		}
+		if len(parts[2]) > len(cgroup) {
+			cgroup = parts[2]
+		}
+	}
+	if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
+		return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
+	}
+	return cgroup, nil
+}
+
+// GetOwnCgroup returns the cgroup for the current process.
+func GetOwnCgroup() (string, error) {
+	return getCgroupProcess("/proc/self/cgroup", true)
+}
+
+func GetOwnCgroupDisallowRoot() (string, error) {
+	return getCgroupProcess("/proc/self/cgroup", false)
+}
+
+// GetCgroupProcess returns the cgroup for the specified process process.
+func GetCgroupProcess(pid int) (string, error) {
+	return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
+}
+
+// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
+func MoveUnderCgroupSubtree(subtree string) error {
+	return MoveUnderCgroup("", subtree, nil)
+}
+
+// MoveUnderCgroup moves a group of processes to a new cgroup.
+// If cgroup is the empty string, then the current calling process cgroup is used.
+// If processes is empty, then the processes from the current cgroup are moved.
+func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
+	procFile := "/proc/self/cgroup"
+	f, err := os.Open(procFile)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	unifiedMode, err := IsCgroup2UnifiedMode()
+	if err != nil {
+		return err
+	}
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := strings.SplitN(line, ":", 3)
+		if len(parts) != 3 {
+			return fmt.Errorf("cannot parse cgroup line %q", line)
+		}
+
+		// root cgroup, skip it
+		if parts[2] == "/" && !(unifiedMode && parts[1] == "") {
+			continue
+		}
+
+		cgroupRoot := "/sys/fs/cgroup"
+		// Special case the unified mount on hybrid cgroup and named hierarchies.
+		// This works on Fedora 31, but we should really parse the mounts to see
+		// where the cgroup hierarchy is mounted.
+		if parts[1] == "" && !unifiedMode {
+			// If it is not using unified mode, the cgroup v2 hierarchy is
+			// usually mounted under /sys/fs/cgroup/unified
+			cgroupRoot = filepath.Join(cgroupRoot, "unified")
+
+			// Ignore the unified mount if it doesn't exist
+			if _, err := os.Stat(cgroupRoot); err != nil && os.IsNotExist(err) {
+				continue
+			}
+		} else if parts[1] != "" {
+			// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
+			controller := strings.TrimPrefix(parts[1], "name=")
+			cgroupRoot = filepath.Join(cgroupRoot, controller)
+		}
+
+		parentCgroup := cgroup
+		if parentCgroup == "" {
+			parentCgroup = parts[2]
+		}
+		newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
+		if err := os.MkdirAll(newCgroup, 0o755); err != nil && !os.IsExist(err) {
+			return err
+		}
+
+		f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0o755)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		if len(processes) > 0 {
+			for _, pid := range processes {
+				if _, err := f.WriteString(fmt.Sprintf("%d\n", pid)); err != nil {
+					logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
+				}
+			}
+		} else {
+			processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
+			if err != nil {
+				return err
+			}
+			for _, pid := range bytes.Split(processesData, []byte("\n")) {
+				if len(pid) == 0 {
+					continue
+				}
+				if _, err := f.Write(pid); err != nil {
+					logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
+				}
+			}
+		}
+	}
+	return nil
+}
+
+var (
+	maybeMoveToSubCgroupSync    sync.Once
+	maybeMoveToSubCgroupSyncErr error
+)
+
+// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
+// it is running in the root cgroup on a system that uses cgroupv2.
+func MaybeMoveToSubCgroup() error {
+	maybeMoveToSubCgroupSync.Do(func() {
+		unifiedMode, err := IsCgroup2UnifiedMode()
+		if err != nil {
+			maybeMoveToSubCgroupSyncErr = err
+			return
+		}
+		if !unifiedMode {
+			maybeMoveToSubCgroupSyncErr = nil
+			return
+		}
+		cgroup, err := GetOwnCgroup()
+		if err != nil {
+			maybeMoveToSubCgroupSyncErr = err
+			return
+		}
+		if cgroup == "/" {
+			maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
+		}
+	})
+	return maybeMoveToSubCgroupSyncErr
+}
--- a/vendor/github.com/containers/common/pkg/netns/netns_linux.go
+++ b/vendor/github.com/containers/common/pkg/netns/netns_linux.go
@ -32,10 +32,12 @@ import (
 	"github.com/containernetworking/plugins/pkg/ns"
 	"github.com/containers/storage/pkg/homedir"
 	"github.com/containers/storage/pkg/unshare"
-	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )

+// threadNsPath is the /proc path to the current netns handle for the current thread
+const threadNsPath = "/proc/thread-self/ns/net"
+
 // GetNSRunDir returns the dir of where to create the netNS. When running
 // rootless, it needs to be at a location writable by user.
 func GetNSRunDir() (string, error) {
@ -49,6 +51,10 @@ func GetNSRunDir() (string, error) {
 	return "/run/netns", nil
 }

+func NewNSAtPath(nsPath string) (ns.NetNS, error) {
+	return newNSPath(nsPath)
+}
+
 // NewNS creates a new persistent (bind-mounted) network namespace and returns
 // an object representing that namespace, without switching to it.
 func NewNS() (ns.NetNS, error) {
@ -111,8 +117,12 @@ func NewNSWithName(name string) (ns.NetNS, error) {
 		}
 	}

-	// create an empty file at the mount point
 	nsPath := path.Join(nsRunDir, name)
+	return newNSPath(nsPath)
+}
+
+func newNSPath(nsPath string) (ns.NetNS, error) {
+	// create an empty file at the mount point
 	mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600)
 	if err != nil {
 		return nil, err
@ -140,24 +150,10 @@ func NewNSWithName(name string) (ns.NetNS, error) {
 		// Don't unlock. By not unlocking, golang will kill the OS thread when the
 		// goroutine is done (for go1.10+)

-		threadNsPath := getCurrentThreadNetNSPath()
-
-		var origNS ns.NetNS
-		origNS, err = ns.GetNS(threadNsPath)
-		if err != nil {
-			logrus.Warnf("Cannot open current network namespace %s: %q", threadNsPath, err)
-			return
-		}
-		defer func() {
-			if err := origNS.Close(); err != nil {
-				logrus.Errorf("Unable to close namespace: %q", err)
-			}
-		}()
-
 		// create a new netns on the current thread
 		err = unix.Unshare(unix.CLONE_NEWNET)
 		if err != nil {
-			logrus.Warnf("Cannot create a new network namespace: %q", err)
+			err = fmt.Errorf("unshare network namespace: %w", err)
 			return
 		}

@ -181,13 +177,8 @@ func NewNSWithName(name string) (ns.NetNS, error) {

 // UnmountNS unmounts the given netns path
 func UnmountNS(nsPath string) error {
-	nsRunDir, err := GetNSRunDir()
-	if err != nil {
-		return err
-	}
-
 	// Only unmount if it's been bind-mounted (don't touch namespaces in /proc...)
-	if strings.HasPrefix(nsPath, nsRunDir) {
+	if !strings.HasPrefix(nsPath, "/proc/") {
 		if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil {
 			return fmt.Errorf("failed to unmount NS: at %s: %v", nsPath, err)
 		}
@ -199,11 +190,3 @@ func UnmountNS(nsPath string) error {

 	return nil
 }
-
-// getCurrentThreadNetNSPath copied from pkg/ns
-func getCurrentThreadNetNSPath() string {
-	// /proc/self/ns/net returns the namespace of the main thread, not
-	// of whatever thread this goroutine is running on.  Make sure we
-	// use the thread's net namespace since the thread is switching around
-	return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
-}
--- a/vendor/github.com/containers/common/pkg/systemd/systemd_linux.go
+++ b/vendor/github.com/containers/common/pkg/systemd/systemd_linux.go
@ -0,0 +1,151 @@
+package systemd
+
+import (
+	"context"
+	"crypto/rand"
+	"fmt"
+	"os"
+	"strconv"
+	"sync"
+
+	"github.com/containers/common/pkg/cgroups"
+	"github.com/containers/storage/pkg/unshare"
+	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
+	"github.com/godbus/dbus/v5"
+	"github.com/sirupsen/logrus"
+)
+
+var (
+	runsOnSystemdOnce sync.Once
+	runsOnSystemd     bool
+)
+
+// RunsOnSystemd returns whether the system is using systemd
+func RunsOnSystemd() bool {
+	runsOnSystemdOnce.Do(func() {
+		// per sd_booted(3), check for this dir
+		fd, err := os.Stat("/run/systemd/system")
+		runsOnSystemd = err == nil && fd.IsDir()
+	})
+	return runsOnSystemd
+}
+
+func moveProcessPIDFileToScope(pidPath, slice, scope string) error {
+	data, err := os.ReadFile(pidPath)
+	if err != nil {
+		// do not raise an error if the file doesn't exist
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return fmt.Errorf("cannot read pid file: %w", err)
+	}
+	pid, err := strconv.ParseUint(string(data), 10, 0)
+	if err != nil {
+		return fmt.Errorf("cannot parse pid file %s: %w", pidPath, err)
+	}
+
+	return moveProcessToScope(int(pid), slice, scope)
+}
+
+func moveProcessToScope(pid int, slice, scope string) error {
+	err := RunUnderSystemdScope(pid, slice, scope)
+	// If the PID is not valid anymore, do not return an error.
+	if dbusErr, ok := err.(dbus.Error); ok {
+		if dbusErr.Name == "org.freedesktop.DBus.Error.UnixProcessIdUnknown" {
+			return nil
+		}
+	}
+	return err
+}
+
+// MoveRootlessNetnsSlirpProcessToUserSlice moves the slirp4netns process for the rootless netns
+// into a different scope so that systemd does not kill it with a container.
+func MoveRootlessNetnsSlirpProcessToUserSlice(pid int) error {
+	randBytes := make([]byte, 4)
+	_, err := rand.Read(randBytes)
+	if err != nil {
+		return err
+	}
+	return moveProcessToScope(pid, "user.slice", fmt.Sprintf("rootless-netns-%x.scope", randBytes))
+}
+
+// MovePauseProcessToScope moves the pause process used for rootless mode to keep the namespaces alive to
+// a separate scope.
+func MovePauseProcessToScope(pausePidPath string) {
+	var err error
+
+	for i := 0; i < 10; i++ {
+		randBytes := make([]byte, 4)
+		_, err = rand.Read(randBytes)
+		if err != nil {
+			logrus.Errorf("failed to read random bytes: %v", err)
+			continue
+		}
+		err = moveProcessPIDFileToScope(pausePidPath, "user.slice", fmt.Sprintf("podman-pause-%x.scope", randBytes))
+		if err == nil {
+			return
+		}
+	}
+
+	if err != nil {
+		unified, err2 := cgroups.IsCgroup2UnifiedMode()
+		if err2 != nil {
+			logrus.Warnf("Failed to detect if running with cgroup unified: %v", err)
+		}
+		if RunsOnSystemd() && unified {
+			logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err)
+		} else {
+			logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err)
+		}
+	}
+}
+
+// RunUnderSystemdScope adds the specified pid to a systemd scope
+func RunUnderSystemdScope(pid int, slice string, unitName string) error {
+	var properties []systemdDbus.Property
+	var conn *systemdDbus.Conn
+	var err error
+
+	if unshare.GetRootlessUID() != 0 {
+		conn, err = cgroups.UserConnection(unshare.GetRootlessUID())
+		if err != nil {
+			return err
+		}
+	} else {
+		conn, err = systemdDbus.NewWithContext(context.Background())
+		if err != nil {
+			return err
+		}
+	}
+	defer conn.Close()
+	properties = append(properties, systemdDbus.PropSlice(slice))
+	properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
+	properties = append(properties, newProp("Delegate", true))
+	properties = append(properties, newProp("DefaultDependencies", false))
+	ch := make(chan string)
+	_, err = conn.StartTransientUnitContext(context.Background(), unitName, "replace", properties, ch)
+	if err != nil {
+		// On errors check if the cgroup already exists, if it does move the process there
+		if props, err := conn.GetUnitTypePropertiesContext(context.Background(), unitName, "Scope"); err == nil {
+			if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
+				if err := cgroups.MoveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err == nil {
+					return nil
+				}
+				// On errors return the original error message we got from StartTransientUnit.
+			}
+		}
+		return err
+	}
+
+	// Block until job is started
+	<-ch
+
+	return nil
+}
+
+func newProp(name string, units interface{}) systemdDbus.Property {
+	return systemdDbus.Property{
+		Name:  name,
+		Value: dbus.MakeVariant(units),
+	}
+}
--- a/vendor/github.com/containers/common/pkg/systemd/systemd_unsupported.go
+++ b/vendor/github.com/containers/common/pkg/systemd/systemd_unsupported.go
@ -0,0 +1,15 @@
+//go:build !linux
+
+package systemd
+
+import "errors"
+
+func RunsOnSystemd() bool {
+	return false
+}
+
+func MovePauseProcessToScope(pausePidPath string) {}
+
+func RunUnderSystemdScope(pid int, slice string, unitName string) error {
+	return errors.New("RunUnderSystemdScope not supported on this OS")
+}