rootless netns: recover from invalid netns

I made a change in c/common[1] to prevent duplicates in netns names.
This now causes problem in podman[2] where the rootless netns will no
longer work after the netns got invalid but the underlying path still
exists. AFAICT this happens when the podman pause process got killed and
we are now in a different user namespace.

While I do not know what causes this, this commit should make it at
least possible to recover from this situation automatically as it used
to be before[1].

the problem with that is that containers started before it will not be
able to talk to contianers started after this. A restart of the previous
container will fix it but this was also the case before.

[NO NEW TESTS NEEDED]

[1] https://github.com/containers/common/pull/1381
[2] https://github.com/containers/podman/issues/17903#issuecomment-1494169843

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2023-04-03 15:56:39 +02:00
parent 80a199a83c
commit 2051e54e01

View File

@ -365,15 +365,26 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
netnsName := fmt.Sprintf("%s-%x", rootlessNetNsName, hash[:10])
path := filepath.Join(nsDir, netnsName)
ns, err := ns.GetNS(path)
nsReference, err := ns.GetNS(path)
if err != nil {
if !new {
// return an error if we could not get the namespace and should no create one
return nil, fmt.Errorf("getting rootless network namespace: %w", err)
}
// When the netns is not valid but the file exists we have to remove it first,
// https://github.com/containers/common/pull/1381 changed the behavior from
// NewNSWithName()so it will now error whe the file already exists.
// https://github.com/containers/podman/issues/17903#issuecomment-1494329622
if errors.As(err, &ns.NSPathNotNSErr{}) {
logrus.Infof("rootless netns is no longer valid: %v", err)
// ignore errors, if something is wrong NewNSWithName() will fail below anyway
_ = os.Remove(path)
}
// create a new namespace
logrus.Debugf("creating rootless network namespace with name %q", netnsName)
ns, err = netns.NewNSWithName(netnsName)
nsReference, err = netns.NewNSWithName(netnsName)
if err != nil {
return nil, fmt.Errorf("creating rootless network namespace: %w", err)
}
@ -408,7 +419,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
}
// Note we do not use --exit-fd, we kill this process by pid
cmdArgs = append(cmdArgs, "-c", "-r", "3")
cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0")
cmdArgs = append(cmdArgs, "--netns-type=path", nsReference.Path(), "tap0")
cmd := exec.Command(path, cmdArgs...)
logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " "))
@ -540,7 +551,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
// Important set rootlessNetNS as last step.
// Do not return any errors after this.
rootlessNetNS = &RootlessNetNS{
ns: ns,
ns: nsReference,
dir: rootlessNetNsDir,
Lock: lock,
}