mirror of
https://github.com/containers/podman.git
synced 2025-05-17 23:26:08 +08:00
rootless netns: recover from invalid netns
I made a change in c/common[1] to prevent duplicates in netns names. This now causes problem in podman[2] where the rootless netns will no longer work after the netns got invalid but the underlying path still exists. AFAICT this happens when the podman pause process got killed and we are now in a different user namespace. While I do not know what causes this, this commit should make it at least possible to recover from this situation automatically as it used to be before[1]. the problem with that is that containers started before it will not be able to talk to contianers started after this. A restart of the previous container will fix it but this was also the case before. [NO NEW TESTS NEEDED] [1] https://github.com/containers/common/pull/1381 [2] https://github.com/containers/podman/issues/17903#issuecomment-1494169843 Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
@ -365,15 +365,26 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
|
||||
netnsName := fmt.Sprintf("%s-%x", rootlessNetNsName, hash[:10])
|
||||
|
||||
path := filepath.Join(nsDir, netnsName)
|
||||
ns, err := ns.GetNS(path)
|
||||
nsReference, err := ns.GetNS(path)
|
||||
if err != nil {
|
||||
if !new {
|
||||
// return an error if we could not get the namespace and should no create one
|
||||
return nil, fmt.Errorf("getting rootless network namespace: %w", err)
|
||||
}
|
||||
|
||||
// When the netns is not valid but the file exists we have to remove it first,
|
||||
// https://github.com/containers/common/pull/1381 changed the behavior from
|
||||
// NewNSWithName()so it will now error whe the file already exists.
|
||||
// https://github.com/containers/podman/issues/17903#issuecomment-1494329622
|
||||
if errors.As(err, &ns.NSPathNotNSErr{}) {
|
||||
logrus.Infof("rootless netns is no longer valid: %v", err)
|
||||
// ignore errors, if something is wrong NewNSWithName() will fail below anyway
|
||||
_ = os.Remove(path)
|
||||
}
|
||||
|
||||
// create a new namespace
|
||||
logrus.Debugf("creating rootless network namespace with name %q", netnsName)
|
||||
ns, err = netns.NewNSWithName(netnsName)
|
||||
nsReference, err = netns.NewNSWithName(netnsName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating rootless network namespace: %w", err)
|
||||
}
|
||||
@ -408,7 +419,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
|
||||
}
|
||||
// Note we do not use --exit-fd, we kill this process by pid
|
||||
cmdArgs = append(cmdArgs, "-c", "-r", "3")
|
||||
cmdArgs = append(cmdArgs, "--netns-type=path", ns.Path(), "tap0")
|
||||
cmdArgs = append(cmdArgs, "--netns-type=path", nsReference.Path(), "tap0")
|
||||
|
||||
cmd := exec.Command(path, cmdArgs...)
|
||||
logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " "))
|
||||
@ -540,7 +551,7 @@ func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
|
||||
// Important set rootlessNetNS as last step.
|
||||
// Do not return any errors after this.
|
||||
rootlessNetNS = &RootlessNetNS{
|
||||
ns: ns,
|
||||
ns: nsReference,
|
||||
dir: rootlessNetNsDir,
|
||||
Lock: lock,
|
||||
}
|
||||
|
Reference in New Issue
Block a user