Files
podman/libpod/networking_linux.go
Giuseppe Scrivano 170fd7b038 rootless: fix a regression when using -d
when using -d and port mapping, make sure the correct fd is injected
into conmon.

Move the pipe creation earlier as the fd must be known at the time we
create the container through conmon.

Closes: https://github.com/containers/libpod/issues/5167

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
2020-02-18 15:33:38 +01:00

600 lines
18 KiB
Go

// +build linux
package libpod
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
cnitypes "github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/netns"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/rootlessport"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// Get an OCICNI network config
func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, ports []ocicni.PortMapping, staticIP net.IP, staticMAC net.HardwareAddr) ocicni.PodNetwork {
var networkKey string
if len(networks) > 0 {
// This is inconsistent for >1 network, but it's probably the
// best we can do.
networkKey = networks[0]
} else {
networkKey = r.netPlugin.GetDefaultNetworkName()
}
network := ocicni.PodNetwork{
Name: name,
Namespace: name, // TODO is there something else we should put here? We don't know about Kube namespaces
ID: id,
NetNS: nsPath,
RuntimeConfig: map[string]ocicni.RuntimeConfig{
networkKey: {PortMappings: ports},
},
}
// If we have extra networks, add them
if len(networks) > 0 {
network.Networks = make([]ocicni.NetAttachment, len(networks))
for i, netName := range networks {
network.Networks[i].Name = netName
}
}
if staticIP != nil || staticMAC != nil {
// For static IP or MAC, we need to populate networks even if
// it's just the default.
if len(networks) == 0 {
// If len(networks) == 0 this is guaranteed to be the
// default network.
network.Networks = []ocicni.NetAttachment{{Name: networkKey}}
}
var rt ocicni.RuntimeConfig = ocicni.RuntimeConfig{PortMappings: ports}
if staticIP != nil {
rt.IP = staticIP.String()
}
if staticMAC != nil {
rt.MAC = staticMAC.String()
}
network.RuntimeConfig = map[string]ocicni.RuntimeConfig{
networkKey: rt,
}
}
return network
}
// Create and configure a new network namespace for a container
func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) {
var requestedIP net.IP
if ctr.requestedIP != nil {
requestedIP = ctr.requestedIP
// cancel request for a specific IP in case the container is reused later
ctr.requestedIP = nil
} else {
requestedIP = ctr.config.StaticIP
}
var requestedMAC net.HardwareAddr
if ctr.requestedMAC != nil {
requestedMAC = ctr.requestedMAC
// cancel request for a specific MAC in case the container is reused later
ctr.requestedMAC = nil
} else {
requestedMAC = ctr.config.StaticMAC
}
podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctrNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP, requestedMAC)
results, err := r.netPlugin.SetUpPod(podNetwork)
if err != nil {
return nil, errors.Wrapf(err, "error configuring network namespace for container %s", ctr.ID())
}
defer func() {
if err != nil {
if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil {
logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", ctr.ID(), err2)
}
}
}()
networkStatus := make([]*cnitypes.Result, 0)
for idx, r := range results {
logrus.Debugf("[%d] CNI result: %v", idx, r.Result.String())
resultCurrent, err := cnitypes.GetResult(r.Result)
if err != nil {
return nil, errors.Wrapf(err, "error parsing CNI plugin result %q: %v", r.Result.String(), err)
}
networkStatus = append(networkStatus, resultCurrent)
}
return networkStatus, nil
}
// Create and configure a new network namespace for a container
func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, err error) {
ctrNS, err := netns.NewNS()
if err != nil {
return nil, nil, errors.Wrapf(err, "error creating network namespace for container %s", ctr.ID())
}
defer func() {
if err != nil {
if err2 := netns.UnmountNS(ctrNS); err2 != nil {
logrus.Errorf("Error unmounting partially created network namespace for container %s: %v", ctr.ID(), err2)
}
if err2 := ctrNS.Close(); err2 != nil {
logrus.Errorf("Error closing partially created network namespace for container %s: %v", ctr.ID(), err2)
}
}
}()
logrus.Debugf("Made network namespace at %s for container %s", ctrNS.Path(), ctr.ID())
networkStatus := []*cnitypes.Result{}
if !rootless.IsRootless() && ctr.config.NetMode != "slirp4netns" {
networkStatus, err = r.configureNetNS(ctr, ctrNS)
}
return ctrNS, networkStatus, err
}
func checkSlirpFlags(path string) (bool, bool, bool, error) {
cmd := exec.Command(path, "--help")
out, err := cmd.CombinedOutput()
if err != nil {
return false, false, false, errors.Wrapf(err, "slirp4netns %q", out)
}
return strings.Contains(string(out), "--disable-host-loopback"), strings.Contains(string(out), "--mtu"), strings.Contains(string(out), "--enable-sandbox"), nil
}
// Configure the network namespace for a rootless container
func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
path := r.config.NetworkCmdPath
if path == "" {
var err error
path, err = exec.LookPath("slirp4netns")
if err != nil {
logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err)
return nil
}
}
syncR, syncW, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to open pipe")
}
defer errorhandling.CloseQuiet(syncR)
defer errorhandling.CloseQuiet(syncW)
havePortMapping := len(ctr.Config().PortMappings) > 0
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID))
cmdArgs := []string{}
dhp, mtu, sandbox, err := checkSlirpFlags(path)
if err != nil {
return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err)
}
if dhp {
cmdArgs = append(cmdArgs, "--disable-host-loopback")
}
if mtu {
cmdArgs = append(cmdArgs, "--mtu", "65520")
}
if sandbox {
cmdArgs = append(cmdArgs, "--enable-sandbox")
}
// the slirp4netns arguments being passed are describes as follows:
// from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns
// -c, --configure Brings up the tap interface
// -e, --exit-fd=FD specify the FD for terminating slirp4netns
// -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished
cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4")
netnsPath := ""
if !ctr.config.PostConfigureNetNS {
ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless network sync pipe")
}
netnsPath = ctr.state.NetNS.Path()
cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0")
} else {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
// we don't use --netns-path here (unavailable for slirp4netns < v0.4)
cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0")
}
cmd := exec.Command(path, cmdArgs...)
logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " "))
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
// workaround for https://github.com/rootless-containers/slirp4netns/pull/153
if sandbox {
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS
cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS
}
// Leak one end of the pipe in slirp4netns, the other will be sent to conmon
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncR, syncW)
logFile, err := os.Create(logPath)
if err != nil {
return errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath)
}
defer logFile.Close()
// Unlink immediately the file so we won't need to worry about cleaning it up later.
// It is still accessible through the open fd logFile.
if err := os.Remove(logPath); err != nil {
return errors.Wrapf(err, "delete file %s", logPath)
}
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "failed to start slirp4netns process")
}
defer func() {
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release command process: %q", err)
}
}()
if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil {
return err
}
if havePortMapping {
return r.setupRootlessPortMapping(ctr, netnsPath)
}
return nil
}
func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error {
prog := filepath.Base(cmd.Path)
if len(cmd.Args) > 0 {
prog = cmd.Args[0]
}
b := make([]byte, 16)
for {
if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil {
return errors.Wrapf(err, "error setting %s pipe timeout", prog)
}
// FIXME: return err as soon as proc exits, without waiting for timeout
if _, err := syncR.Read(b); err == nil {
break
} else {
if os.IsTimeout(err) {
// Check if the process is still running.
var status syscall.WaitStatus
pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
if err != nil {
return errors.Wrapf(err, "failed to read %s process status", prog)
}
if pid != cmd.Process.Pid {
continue
}
if status.Exited() {
// Seek at the beginning of the file and read all its content
if _, err := logFile.Seek(0, 0); err != nil {
logrus.Errorf("could not seek log file: %q", err)
}
logContent, err := ioutil.ReadAll(logFile)
if err != nil {
return errors.Wrapf(err, "%s failed", prog)
}
return errors.Errorf("%s failed: %q", prog, logContent)
}
if status.Signaled() {
return errors.Errorf("%s killed by signal", prog)
}
continue
}
return errors.Wrapf(err, "failed to read from %s sync pipe", prog)
}
}
return nil
}
func (r *Runtime) setupRootlessPortMapping(ctr *Container, netnsPath string) (err error) {
syncR, syncW, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to open pipe")
}
defer errorhandling.CloseQuiet(syncR)
defer errorhandling.CloseQuiet(syncW)
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID))
logFile, err := os.Create(logPath)
if err != nil {
return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath)
}
defer logFile.Close()
// Unlink immediately the file so we won't need to worry about cleaning it up later.
// It is still accessible through the open fd logFile.
if err := os.Remove(logPath); err != nil {
return errors.Wrapf(err, "delete file %s", logPath)
}
if !ctr.config.PostConfigureNetNS {
ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless port sync pipe")
}
}
cfg := rootlessport.Config{
Mappings: ctr.config.PortMappings,
NetNSPath: netnsPath,
ExitFD: 3,
ReadyFD: 4,
TmpDir: ctr.runtime.config.TmpDir,
}
cfgJSON, err := json.Marshal(cfg)
if err != nil {
return err
}
cfgR := bytes.NewReader(cfgJSON)
var stdout bytes.Buffer
cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid()))
cmd.Args = []string{rootlessport.ReexecKey}
// Leak one end of the pipe in rootlessport process, the other will be sent to conmon
if ctr.rootlessPortSyncR != nil {
defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR)
}
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW)
cmd.Stdin = cfgR
// stdout is for human-readable error, stderr is for debug log
cmd.Stdout = &stdout
cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "})
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "failed to start rootlessport process")
}
defer func() {
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release rootlessport process: %q", err)
}
}()
if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil {
stdoutStr := stdout.String()
if stdoutStr != "" {
// err contains full debug log and too verbose, so return stdoutStr
logrus.Debug(err)
return errors.Errorf("failed to expose ports via rootlessport: %q", stdoutStr)
}
return err
}
logrus.Debug("rootlessport is ready")
return nil
}
// Configure the network namespace using the container process
func (r *Runtime) setupNetNS(ctr *Container) (err error) {
nsProcess := fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
b := make([]byte, 16)
if _, err := rand.Reader.Read(b); err != nil {
return errors.Wrapf(err, "failed to generate random netns name")
}
nsPath := fmt.Sprintf("/var/run/netns/cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
if err := os.MkdirAll(filepath.Dir(nsPath), 0711); err != nil {
return errors.Wrapf(err, "cannot create %s", filepath.Dir(nsPath))
}
mountPointFd, err := os.Create(nsPath)
if err != nil {
return errors.Wrapf(err, "cannot open %s", nsPath)
}
if err := mountPointFd.Close(); err != nil {
return err
}
if err := unix.Mount(nsProcess, nsPath, "none", unix.MS_BIND, ""); err != nil {
return errors.Wrapf(err, "cannot mount %s", nsPath)
}
netNS, err := ns.GetNS(nsPath)
if err != nil {
return err
}
networkStatus, err := r.configureNetNS(ctr, netNS)
// Assign NetNS attributes to container
ctr.state.NetNS = netNS
ctr.state.NetworkStatus = networkStatus
return err
}
// Join an existing network namespace
func joinNetNS(path string) (ns.NetNS, error) {
netNS, err := ns.GetNS(path)
if err != nil {
return nil, errors.Wrapf(err, "error retrieving network namespace at %s", path)
}
return netNS, nil
}
// Close a network namespace.
// Differs from teardownNetNS() in that it will not attempt to undo the setup of
// the namespace, but will instead only close the open file descriptor
func (r *Runtime) closeNetNS(ctr *Container) error {
if ctr.state.NetNS == nil {
// The container has no network namespace, we're set
return nil
}
if err := ctr.state.NetNS.Close(); err != nil {
return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID())
}
ctr.state.NetNS = nil
return nil
}
// Tear down a network namespace, undoing all state associated with it.
func (r *Runtime) teardownNetNS(ctr *Container) error {
if ctr.state.NetNS == nil {
// The container has no network namespace, we're set
return nil
}
logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID())
// rootless containers do not use the CNI plugin
if !rootless.IsRootless() && ctr.config.NetMode != "slirp4netns" {
var requestedIP net.IP
if ctr.requestedIP != nil {
requestedIP = ctr.requestedIP
// cancel request for a specific IP in case the container is reused later
ctr.requestedIP = nil
} else {
requestedIP = ctr.config.StaticIP
}
var requestedMAC net.HardwareAddr
if ctr.requestedMAC != nil {
requestedMAC = ctr.requestedMAC
// cancel request for a specific MAC in case the container is reused later
ctr.requestedMAC = nil
} else {
requestedMAC = ctr.config.StaticMAC
}
podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP, requestedMAC)
if err := r.netPlugin.TearDownPod(podNetwork); err != nil {
return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID())
}
}
// First unmount the namespace
if err := netns.UnmountNS(ctr.state.NetNS); err != nil {
return errors.Wrapf(err, "error unmounting network namespace for container %s", ctr.ID())
}
// Now close the open file descriptor
if err := ctr.state.NetNS.Close(); err != nil {
return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID())
}
ctr.state.NetNS = nil
return nil
}
func getContainerNetNS(ctr *Container) (string, error) {
if ctr.state.NetNS != nil {
return ctr.state.NetNS.Path(), nil
}
if ctr.config.NetNsCtr != "" {
c, err := ctr.runtime.GetContainer(ctr.config.NetNsCtr)
if err != nil {
return "", err
}
if err = c.syncContainer(); err != nil {
return "", err
}
return c.state.NetNS.Path(), nil
}
return "", nil
}
func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) {
var netStats *netlink.LinkStatistics
// rootless v2 cannot seem to resolve its network connection to
// collect statistics. For now, we allow stats to at least run
// by returning nil
if rootless.IsRootless() {
return netStats, nil
}
netNSPath, netPathErr := getContainerNetNS(ctr)
if netPathErr != nil {
return nil, netPathErr
}
if netNSPath == "" {
// If netNSPath is empty, it was set as none, and no netNS was set up
// this is a valid state and thus return no error, nor any statistics
return nil, nil
}
err := ns.WithNetNSPath(netNSPath, func(_ ns.NetNS) error {
link, err := netlink.LinkByName(ocicni.DefaultInterfaceName)
if err != nil {
return err
}
netStats = link.Attrs().Statistics
return nil
})
return netStats, err
}
func (c *Container) getContainerNetworkInfo(data *InspectContainerData) *InspectContainerData {
if c.state.NetNS != nil && len(c.state.NetworkStatus) > 0 {
// Report network settings from the first pod network
result := c.state.NetworkStatus[0]
// Go through our IP addresses
for _, ctrIP := range result.IPs {
ipWithMask := ctrIP.Address.String()
splitIP := strings.Split(ipWithMask, "/")
mask, _ := strconv.Atoi(splitIP[1])
if ctrIP.Version == "4" {
data.NetworkSettings.IPAddress = splitIP[0]
data.NetworkSettings.IPPrefixLen = mask
data.NetworkSettings.Gateway = ctrIP.Gateway.String()
} else {
data.NetworkSettings.GlobalIPv6Address = splitIP[0]
data.NetworkSettings.GlobalIPv6PrefixLen = mask
data.NetworkSettings.IPv6Gateway = ctrIP.Gateway.String()
}
}
// Set network namespace path
data.NetworkSettings.SandboxKey = c.state.NetNS.Path()
// Set MAC address of interface linked with network namespace path
for _, i := range result.Interfaces {
if i.Sandbox == data.NetworkSettings.SandboxKey {
data.NetworkSettings.MacAddress = i.Mac
}
}
}
return data
}
type logrusDebugWriter struct {
prefix string
}
func (w *logrusDebugWriter) Write(p []byte) (int, error) {
logrus.Debugf("%s%s", w.prefix, string(p))
return len(p), nil
}