Fix race conditions in rootless cni setup

There was an race condition when calling `GetRootlessCNINetNs()`. It
created the rootless cni directory before it got locked. Therefore
another process could have called cleanup and removed this directory
before it was used resulting in errors. The lockfile got moved into the
XDG_RUNTIME_DIR directory to prevent a panic when the parent dir was
removed by cleanup.

Fixes #10930
Fixes #10922

To make this even more robust `GetRootlessCNINetNs()` will now return
locked. This guarantees that we can run `Do()` after `GetRootlessCNINetNs()`
before another process could have called `Cleanup()` in between.

[NO TESTS NEEDED] CI is flaking, hopefully this will fix it.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2021-07-15 11:22:01 +02:00
parent 1568247ec8
commit 0007c98ddb
2 changed files with 208 additions and 196 deletions

View File

@ -108,7 +108,7 @@ func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, port
type RootlessCNI struct {
ns ns.NetNS
dir string
lock lockfile.Locker
Lock lockfile.Locker
}
// getPath will join the given path to the rootless cni dir
@ -234,16 +234,17 @@ func (r *RootlessCNI) Do(toRun func() error) error {
return err
}
// Cleanup the rootless cni namespace if needed
// check if we have running containers with the bridge network mode
// Cleanup the rootless cni namespace if needed.
// It checks if we have running containers with the bridge network mode.
// Cleanup() will try to lock RootlessCNI, therefore you have to call it with an unlocked
func (r *RootlessCNI) Cleanup(runtime *Runtime) error {
_, err := os.Stat(r.dir)
if os.IsNotExist(err) {
// the directory does not exists no need for cleanup
return nil
}
r.lock.Lock()
defer r.lock.Unlock()
r.Lock.Lock()
defer r.Lock.Unlock()
running := func(c *Container) bool {
// we cannot use c.state() because it will try to lock the container
// using c.state.State directly should be good enough for this use case
@ -301,26 +302,37 @@ func (r *RootlessCNI) Cleanup(runtime *Runtime) error {
// GetRootlessCNINetNs returns the rootless cni object. If create is set to true
// the rootless cni namespace will be created if it does not exists already.
// If called as root it returns always nil.
// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
if !rootless.IsRootless() {
return nil, nil
}
var rootlessCNINS *RootlessCNI
if rootless.IsRootless() {
runDir, err := util.GetRuntimeDir()
if err != nil {
return nil, err
}
cniDir := filepath.Join(runDir, "rootless-cni")
err = os.MkdirAll(cniDir, 0700)
if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni directory")
}
lfile := filepath.Join(cniDir, "rootless-cni.lck")
lfile := filepath.Join(runDir, "rootless-cni.lock")
lock, err := lockfile.GetLockfile(lfile)
if err != nil {
return nil, errors.Wrap(err, "failed to get rootless-cni lockfile")
}
lock.Lock()
defer lock.Unlock()
defer func() {
// In case of an error (early exit) rootlessCNINS will be nil.
// Make sure to unlock otherwise we could deadlock.
if rootlessCNINS == nil {
lock.Unlock()
}
}()
cniDir := filepath.Join(runDir, "rootless-cni")
err = os.MkdirAll(cniDir, 0700)
if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni directory")
}
nsDir, err := netns.GetNSRunDir()
if err != nil {
@ -329,7 +341,10 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
path := filepath.Join(nsDir, rootlessCNINSName)
ns, err := ns.GetNS(path)
if err != nil {
if new {
if !new {
// return a error if we could not get the namespace and should no create one
return nil, errors.Wrap(err, "error getting rootless cni network namespace")
}
// create a new namespace
logrus.Debug("creating rootless cni network namespace")
ns, err = netns.NewNSWithName(rootlessCNINSName)
@ -479,10 +494,6 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
if err != nil {
return nil, errors.Wrap(err, "could not create rootless-cni netns directory")
}
} else {
// return a error if we could not get the namespace and should no create one
return nil, errors.Wrap(err, "error getting rootless cni network namespace")
}
}
// The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put
@ -495,11 +506,12 @@ func (r *Runtime) GetRootlessCNINetNs(new bool) (*RootlessCNI, error) {
os.Setenv("PATH", path)
}
// Important set rootlessCNINS as last step.
// Do not return any errors after this.
rootlessCNINS = &RootlessCNI{
ns: ns,
dir: cniDir,
lock: lock,
}
Lock: lock,
}
return rootlessCNINS, nil
}
@ -548,9 +560,8 @@ func (r *Runtime) setUpOCICNIPod(podNetwork ocicni.PodNetwork) ([]ocicni.NetResu
// rootlessCNINS is nil if we are root
if rootlessCNINS != nil {
// execute the cni setup in the rootless net ns
rootlessCNINS.lock.Lock()
err = rootlessCNINS.Do(setUpPod)
rootlessCNINS.lock.Unlock()
rootlessCNINS.Lock.Unlock()
} else {
err = setUpPod()
}
@ -762,9 +773,8 @@ func (r *Runtime) teardownOCICNIPod(podNetwork ocicni.PodNetwork) error {
// rootlessCNINS is nil if we are root
if rootlessCNINS != nil {
// execute the cni setup in the rootless net ns
rootlessCNINS.lock.Lock()
err = rootlessCNINS.Do(tearDownPod)
rootlessCNINS.lock.Unlock()
rootlessCNINS.Lock.Unlock()
if err == nil {
err = rootlessCNINS.Cleanup(r)
}

View File

@ -403,6 +403,8 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e
if err != nil {
return err
}
// make sure to unlock, unshare can run for a long time
rootlesscni.Lock.Unlock()
defer rootlesscni.Cleanup(ic.Libpod)
return rootlesscni.Do(unshare)
}