Fix race conditions in rootless cni setup

There was an race condition when calling `GetRootlessCNINetNs()`. It
created the rootless cni directory before it got locked. Therefore
another process could have called cleanup and removed this directory
before it was used resulting in errors. The lockfile got moved into the
XDG_RUNTIME_DIR directory to prevent a panic when the parent dir was
removed by cleanup.

Fixes #10930
Fixes #10922

To make this even more robust `GetRootlessCNINetNs()` will now return
locked. This guarantees that we can run `Do()` after `GetRootlessCNINetNs()`
before another process could have called `Cleanup()` in between.

[NO TESTS NEEDED] CI is flaking, hopefully this will fix it.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2021-07-15 11:22:01 +02:00
parent 1568247ec8
commit 0007c98ddb
2 changed files with 208 additions and 196 deletions

View File

@ -108,7 +108,7 @@ func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, port
type RootlessCNI struct { type RootlessCNI struct {
ns ns.NetNS ns ns.NetNS
dir string dir string
lock lockfile.Locker Lock lockfile.Locker
} }
// getPath will join the given path to the rootless cni dir // getPath will join the given path to the rootless cni dir
@ -234,16 +234,17 @@ func (r *RootlessCNI) Do(toRun func() error) error {
return err return err
} }
// Cleanup the rootless cni namespace if needed // Cleanup the rootless cni namespace if needed.
// check if we have running containers with the bridge network mode // It checks if we have running containers with the bridge network mode.
// Cleanup() will try to lock RootlessCNI, therefore you have to call it with an unlocked
func (r *RootlessCNI) Cleanup(runtime *Runtime) error { func (r *RootlessCNI) Cleanup(runtime *Runtime) error {
_, err := os.Stat(r.dir) _, err := os.Stat(r.dir)
if os.IsNotExist(err) { if os.IsNotExist(err) {
// the directory does not exists no need for cleanup // the directory does not exists no need for cleanup
return nil return nil
} }
r.lock.Lock() r.Lock.Lock()
defer r.lock.Unlock() defer r.Lock.Unlock()
running := func(c *Container) bool { running := func(c *Container) bool {
// we cannot use c.state() because it will try to lock the container // we cannot use c.state() because it will try to lock the container
// using c.state.State directly should be good enough for this use case // using c.state.State directly should be good enough for this use case
@ -301,26 +302,37 @@ func (r *RootlessCNI) Cleanup(runtime *Runtime) error {
// GetRootlessCNINetNs returns the rootless cni object. If create is set to true // GetRootlessCNINetNs returns the rootless cni object. If create is set to true
// the rootless cni namespace will be created if it does not exists already. // the rootless cni namespace will be created if it does not exists already.
// If called as root it returns always nil.
// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) { func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
if !rootless.IsRootless() {
return nil, nil
}
var rootlessCNINS *RootlessCNI var rootlessCNINS *RootlessCNI
if rootless.IsRootless() {
runDir, err := util.GetRuntimeDir() runDir, err := util.GetRuntimeDir()
if err != nil { if err != nil {
return nil, err return nil, err
} }
cniDir := filepath.Join(runDir, "rootless-cni")
err = os.MkdirAll(cniDir, 0700)
if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni directory")
}
lfile := filepath.Join(cniDir, "rootless-cni.lck") lfile := filepath.Join(runDir, "rootless-cni.lock")
lock, err := lockfile.GetLockfile(lfile) lock, err := lockfile.GetLockfile(lfile)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "failed to get rootless-cni lockfile") return nil, errors.Wrap(err, "failed to get rootless-cni lockfile")
} }
lock.Lock() lock.Lock()
defer lock.Unlock() defer func() {
// In case of an error (early exit) rootlessCNINS will be nil.
// Make sure to unlock otherwise we could deadlock.
if rootlessCNINS == nil {
lock.Unlock()
}
}()
cniDir := filepath.Join(runDir, "rootless-cni")
err = os.MkdirAll(cniDir, 0700)
if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni directory")
}
nsDir, err := netns.GetNSRunDir() nsDir, err := netns.GetNSRunDir()
if err != nil { if err != nil {
@ -329,7 +341,10 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
path := filepath.Join(nsDir, rootlessCNINSName) path := filepath.Join(nsDir, rootlessCNINSName)
ns, err := ns.GetNS(path) ns, err := ns.GetNS(path)
if err != nil { if err != nil {
if new { if !new {
// return a error if we could not get the namespace and should no create one
return nil, errors.Wrap(err, "error getting rootless cni network namespace")
}
// create a new namespace // create a new namespace
logrus.Debug("creating rootless cni network namespace") logrus.Debug("creating rootless cni network namespace")
ns, err = netns.NewNSWithName(rootlessCNINSName) ns, err = netns.NewNSWithName(rootlessCNINSName)
@ -479,10 +494,6 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
if err != nil { if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni netns directory") return nil, errors.Wrap(err, "could not create rootless-cni netns directory")
} }
} else {
// return a error if we could not get the namespace and should no create one
return nil, errors.Wrap(err, "error getting rootless cni network namespace")
}
} }
// The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put // The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put
@ -495,11 +506,12 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
os.Setenv("PATH", path) os.Setenv("PATH", path)
} }
// Important set rootlessCNINS as last step.
// Do not return any errors after this.
rootlessCNINS = &RootlessCNI{ rootlessCNINS = &RootlessCNI{
ns: ns, ns: ns,
dir: cniDir, dir: cniDir,
lock: lock, Lock: lock,
}
} }
return rootlessCNINS, nil return rootlessCNINS, nil
} }
@ -548,9 +560,8 @@ func (r *Runtime) setUpOCICNIPod(podNetwork ocicni.PodNetwork) ([]ocicni.NetResu
// rootlessCNINS is nil if we are root // rootlessCNINS is nil if we are root
if rootlessCNINS != nil { if rootlessCNINS != nil {
// execute the cni setup in the rootless net ns // execute the cni setup in the rootless net ns
rootlessCNINS.lock.Lock()
err = rootlessCNINS.Do(setUpPod) err = rootlessCNINS.Do(setUpPod)
rootlessCNINS.lock.Unlock() rootlessCNINS.Lock.Unlock()
} else { } else {
err = setUpPod() err = setUpPod()
} }
@ -762,9 +773,8 @@ func (r *Runtime) teardownOCICNIPod(podNetwork ocicni.PodNetwork) error {
// rootlessCNINS is nil if we are root // rootlessCNINS is nil if we are root
if rootlessCNINS != nil { if rootlessCNINS != nil {
// execute the cni setup in the rootless net ns // execute the cni setup in the rootless net ns
rootlessCNINS.lock.Lock()
err = rootlessCNINS.Do(tearDownPod) err = rootlessCNINS.Do(tearDownPod)
rootlessCNINS.lock.Unlock() rootlessCNINS.Lock.Unlock()
if err == nil { if err == nil {
err = rootlessCNINS.Cleanup(r) err = rootlessCNINS.Cleanup(r)
} }

View File

@ -403,6 +403,8 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e
if err != nil { if err != nil {
return err return err
} }
// make sure to unlock, unshare can run for a long time
rootlesscni.Lock.Unlock()
defer rootlesscni.Cleanup(ic.Libpod) defer rootlesscni.Cleanup(ic.Libpod)
return rootlesscni.Do(unshare) return rootlesscni.Do(unshare)
} }