Files
podman/libpod/networking_linux.go
Akihiro Suda 556cc1fadf rootlessport: honor ctr.runtime.config.TmpDir
Previously, rootlessport was using /var/tmp as the tmp dir.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
2020-01-09 14:06:33 +09:00

592 lines
18 KiB
Go

// +build linux
package libpod
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
cnitypes "github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/netns"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/rootlessport"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// Get an OCICNI network config
func (r *Runtime) getPodNetwork(id, name, nsPath string, networks []string, ports []ocicni.PortMapping, staticIP net.IP, staticMAC net.HardwareAddr) ocicni.PodNetwork {
var networkKey string
if len(networks) > 0 {
// This is inconsistent for >1 network, but it's probably the
// best we can do.
networkKey = networks[0]
} else {
networkKey = r.netPlugin.GetDefaultNetworkName()
}
network := ocicni.PodNetwork{
Name: name,
Namespace: name, // TODO is there something else we should put here? We don't know about Kube namespaces
ID: id,
NetNS: nsPath,
RuntimeConfig: map[string]ocicni.RuntimeConfig{
networkKey: {PortMappings: ports},
},
}
// If we have extra networks, add them
if len(networks) > 0 {
network.Networks = make([]ocicni.NetAttachment, len(networks))
for i, netName := range networks {
network.Networks[i].Name = netName
}
}
if staticIP != nil || staticMAC != nil {
// For static IP or MAC, we need to populate networks even if
// it's just the default.
if len(networks) == 0 {
// If len(networks) == 0 this is guaranteed to be the
// default network.
network.Networks = []ocicni.NetAttachment{{Name: networkKey}}
}
var rt ocicni.RuntimeConfig = ocicni.RuntimeConfig{PortMappings: ports}
if staticIP != nil {
rt.IP = staticIP.String()
}
if staticMAC != nil {
rt.MAC = staticMAC.String()
}
network.RuntimeConfig = map[string]ocicni.RuntimeConfig{
networkKey: rt,
}
}
return network
}
// Create and configure a new network namespace for a container
func (r *Runtime) configureNetNS(ctr *Container, ctrNS ns.NetNS) ([]*cnitypes.Result, error) {
var requestedIP net.IP
if ctr.requestedIP != nil {
requestedIP = ctr.requestedIP
// cancel request for a specific IP in case the container is reused later
ctr.requestedIP = nil
} else {
requestedIP = ctr.config.StaticIP
}
var requestedMAC net.HardwareAddr
if ctr.requestedMAC != nil {
requestedMAC = ctr.requestedMAC
// cancel request for a specific MAC in case the container is reused later
ctr.requestedMAC = nil
} else {
requestedMAC = ctr.config.StaticMAC
}
podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctrNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP, requestedMAC)
results, err := r.netPlugin.SetUpPod(podNetwork)
if err != nil {
return nil, errors.Wrapf(err, "error configuring network namespace for container %s", ctr.ID())
}
defer func() {
if err != nil {
if err2 := r.netPlugin.TearDownPod(podNetwork); err2 != nil {
logrus.Errorf("Error tearing down partially created network namespace for container %s: %v", ctr.ID(), err2)
}
}
}()
networkStatus := make([]*cnitypes.Result, 0)
for idx, r := range results {
logrus.Debugf("[%d] CNI result: %v", idx, r.Result.String())
resultCurrent, err := cnitypes.GetResult(r.Result)
if err != nil {
return nil, errors.Wrapf(err, "error parsing CNI plugin result %q: %v", r.Result.String(), err)
}
networkStatus = append(networkStatus, resultCurrent)
}
return networkStatus, nil
}
// Create and configure a new network namespace for a container
func (r *Runtime) createNetNS(ctr *Container) (n ns.NetNS, q []*cnitypes.Result, err error) {
ctrNS, err := netns.NewNS()
if err != nil {
return nil, nil, errors.Wrapf(err, "error creating network namespace for container %s", ctr.ID())
}
defer func() {
if err != nil {
if err2 := netns.UnmountNS(ctrNS); err2 != nil {
logrus.Errorf("Error unmounting partially created network namespace for container %s: %v", ctr.ID(), err2)
}
if err2 := ctrNS.Close(); err2 != nil {
logrus.Errorf("Error closing partially created network namespace for container %s: %v", ctr.ID(), err2)
}
}
}()
logrus.Debugf("Made network namespace at %s for container %s", ctrNS.Path(), ctr.ID())
networkStatus := []*cnitypes.Result{}
if !rootless.IsRootless() {
networkStatus, err = r.configureNetNS(ctr, ctrNS)
}
return ctrNS, networkStatus, err
}
func checkSlirpFlags(path string) (bool, bool, bool, error) {
cmd := exec.Command(path, "--help")
out, err := cmd.CombinedOutput()
if err != nil {
return false, false, false, errors.Wrapf(err, "slirp4netns %q", out)
}
return strings.Contains(string(out), "--disable-host-loopback"), strings.Contains(string(out), "--mtu"), strings.Contains(string(out), "--enable-sandbox"), nil
}
// Configure the network namespace for a rootless container
func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
path := r.config.NetworkCmdPath
if path == "" {
var err error
path, err = exec.LookPath("slirp4netns")
if err != nil {
logrus.Errorf("could not find slirp4netns, the network namespace won't be configured: %v", err)
return nil
}
}
syncR, syncW, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to open pipe")
}
defer errorhandling.CloseQuiet(syncR)
defer errorhandling.CloseQuiet(syncW)
havePortMapping := len(ctr.Config().PortMappings) > 0
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("slirp4netns-%s.log", ctr.config.ID))
cmdArgs := []string{}
dhp, mtu, sandbox, err := checkSlirpFlags(path)
if err != nil {
return errors.Wrapf(err, "error checking slirp4netns binary %s: %q", path, err)
}
if dhp {
cmdArgs = append(cmdArgs, "--disable-host-loopback")
}
if mtu {
cmdArgs = append(cmdArgs, "--mtu", "65520")
}
if sandbox {
cmdArgs = append(cmdArgs, "--enable-sandbox")
}
// the slirp4netns arguments being passed are describes as follows:
// from the slirp4netns documentation: https://github.com/rootless-containers/slirp4netns
// -c, --configure Brings up the tap interface
// -e, --exit-fd=FD specify the FD for terminating slirp4netns
// -r, --ready-fd=FD specify the FD to write to when the initialization steps are finished
cmdArgs = append(cmdArgs, "-c", "-e", "3", "-r", "4")
netnsPath := ""
if !ctr.config.PostConfigureNetNS {
ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless network sync pipe")
}
netnsPath = ctr.state.NetNS.Path()
cmdArgs = append(cmdArgs, "--netns-type=path", netnsPath, "tap0")
} else {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
netnsPath = fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
// we don't use --netns-path here (unavailable for slirp4netns < v0.4)
cmdArgs = append(cmdArgs, fmt.Sprintf("%d", ctr.state.PID), "tap0")
}
cmd := exec.Command(path, cmdArgs...)
logrus.Debugf("slirp4netns command: %s", strings.Join(cmd.Args, " "))
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
// workaround for https://github.com/rootless-containers/slirp4netns/pull/153
if sandbox {
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNS
cmd.SysProcAttr.Unshareflags = syscall.CLONE_NEWNS
}
// Leak one end of the pipe in slirp4netns, the other will be sent to conmon
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncR, syncW)
logFile, err := os.Create(logPath)
if err != nil {
return errors.Wrapf(err, "failed to open slirp4netns log file %s", logPath)
}
defer logFile.Close()
// Unlink immediately the file so we won't need to worry about cleaning it up later.
// It is still accessible through the open fd logFile.
if err := os.Remove(logPath); err != nil {
return errors.Wrapf(err, "delete file %s", logPath)
}
cmd.Stdout = logFile
cmd.Stderr = logFile
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "failed to start slirp4netns process")
}
defer func() {
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release comman process: %q", err)
}
}()
if err := waitForSync(syncR, cmd, logFile, 1*time.Second); err != nil {
return err
}
if havePortMapping {
return r.setupRootlessPortMapping(ctr, netnsPath)
}
return nil
}
func waitForSync(syncR *os.File, cmd *exec.Cmd, logFile io.ReadSeeker, timeout time.Duration) error {
prog := filepath.Base(cmd.Path)
if len(cmd.Args) > 0 {
prog = cmd.Args[0]
}
b := make([]byte, 16)
for {
if err := syncR.SetDeadline(time.Now().Add(timeout)); err != nil {
return errors.Wrapf(err, "error setting %s pipe timeout", prog)
}
// FIXME: return err as soon as proc exits, without waiting for timeout
if _, err := syncR.Read(b); err == nil {
break
} else {
if os.IsTimeout(err) {
// Check if the process is still running.
var status syscall.WaitStatus
pid, err := syscall.Wait4(cmd.Process.Pid, &status, syscall.WNOHANG, nil)
if err != nil {
return errors.Wrapf(err, "failed to read %s process status", prog)
}
if pid != cmd.Process.Pid {
continue
}
if status.Exited() {
// Seek at the beginning of the file and read all its content
if _, err := logFile.Seek(0, 0); err != nil {
logrus.Errorf("could not seek log file: %q", err)
}
logContent, err := ioutil.ReadAll(logFile)
if err != nil {
return errors.Wrapf(err, "%s failed", prog)
}
return errors.Errorf("%s failed: %q", prog, logContent)
}
if status.Signaled() {
return errors.Errorf("%s killed by signal", prog)
}
continue
}
return errors.Wrapf(err, "failed to read from %s sync pipe", prog)
}
}
return nil
}
func (r *Runtime) setupRootlessPortMapping(ctr *Container, netnsPath string) (err error) {
syncR, syncW, err := os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to open pipe")
}
defer errorhandling.CloseQuiet(syncR)
defer errorhandling.CloseQuiet(syncW)
logPath := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("rootlessport-%s.log", ctr.config.ID))
logFile, err := os.Create(logPath)
if err != nil {
return errors.Wrapf(err, "failed to open rootlessport log file %s", logPath)
}
defer logFile.Close()
// Unlink immediately the file so we won't need to worry about cleaning it up later.
// It is still accessible through the open fd logFile.
if err := os.Remove(logPath); err != nil {
return errors.Wrapf(err, "delete file %s", logPath)
}
ctr.rootlessPortSyncR, ctr.rootlessPortSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless port sync pipe")
}
cfg := rootlessport.Config{
Mappings: ctr.config.PortMappings,
NetNSPath: netnsPath,
ExitFD: 3,
ReadyFD: 4,
TmpDir: ctr.runtime.config.TmpDir,
}
cfgJSON, err := json.Marshal(cfg)
if err != nil {
return err
}
cfgR := bytes.NewReader(cfgJSON)
var stdout bytes.Buffer
cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid()))
cmd.Args = []string{rootlessport.ReexecKey}
// Leak one end of the pipe in rootlessport process, the other will be sent to conmon
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessPortSyncR, syncW)
cmd.Stdin = cfgR
// stdout is for human-readable error, stderr is for debug log
cmd.Stdout = &stdout
cmd.Stderr = io.MultiWriter(logFile, &logrusDebugWriter{"rootlessport: "})
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
if err := cmd.Start(); err != nil {
return errors.Wrapf(err, "failed to start rootlessport process")
}
defer func() {
if err := cmd.Process.Release(); err != nil {
logrus.Errorf("unable to release rootlessport process: %q", err)
}
}()
if err := waitForSync(syncR, cmd, logFile, 3*time.Second); err != nil {
stdoutStr := stdout.String()
if stdoutStr != "" {
// err contains full debug log and too verbose, so return stdoutStr
logrus.Debug(err)
return errors.Errorf("failed to expose ports via rootlessport: %q", stdoutStr)
}
return err
}
logrus.Debug("rootlessport is ready")
return nil
}
// Configure the network namespace using the container process
func (r *Runtime) setupNetNS(ctr *Container) (err error) {
nsProcess := fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID)
b := make([]byte, 16)
if _, err := rand.Reader.Read(b); err != nil {
return errors.Wrapf(err, "failed to generate random netns name")
}
nsPath := fmt.Sprintf("/var/run/netns/cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
if err := os.MkdirAll(filepath.Dir(nsPath), 0711); err != nil {
return errors.Wrapf(err, "cannot create %s", filepath.Dir(nsPath))
}
mountPointFd, err := os.Create(nsPath)
if err != nil {
return errors.Wrapf(err, "cannot open %s", nsPath)
}
if err := mountPointFd.Close(); err != nil {
return err
}
if err := unix.Mount(nsProcess, nsPath, "none", unix.MS_BIND, ""); err != nil {
return errors.Wrapf(err, "cannot mount %s", nsPath)
}
netNS, err := ns.GetNS(nsPath)
if err != nil {
return err
}
networkStatus, err := r.configureNetNS(ctr, netNS)
// Assign NetNS attributes to container
ctr.state.NetNS = netNS
ctr.state.NetworkStatus = networkStatus
return err
}
// Join an existing network namespace
func joinNetNS(path string) (ns.NetNS, error) {
netNS, err := ns.GetNS(path)
if err != nil {
return nil, errors.Wrapf(err, "error retrieving network namespace at %s", path)
}
return netNS, nil
}
// Close a network namespace.
// Differs from teardownNetNS() in that it will not attempt to undo the setup of
// the namespace, but will instead only close the open file descriptor
func (r *Runtime) closeNetNS(ctr *Container) error {
if ctr.state.NetNS == nil {
// The container has no network namespace, we're set
return nil
}
if err := ctr.state.NetNS.Close(); err != nil {
return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID())
}
ctr.state.NetNS = nil
return nil
}
// Tear down a network namespace, undoing all state associated with it.
func (r *Runtime) teardownNetNS(ctr *Container) error {
if ctr.state.NetNS == nil {
// The container has no network namespace, we're set
return nil
}
logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS.Path(), ctr.ID())
// rootless containers do not use the CNI plugin
if !rootless.IsRootless() {
var requestedIP net.IP
if ctr.requestedIP != nil {
requestedIP = ctr.requestedIP
// cancel request for a specific IP in case the container is reused later
ctr.requestedIP = nil
} else {
requestedIP = ctr.config.StaticIP
}
var requestedMAC net.HardwareAddr
if ctr.requestedMAC != nil {
requestedMAC = ctr.requestedMAC
// cancel request for a specific MAC in case the container is reused later
ctr.requestedMAC = nil
} else {
requestedMAC = ctr.config.StaticMAC
}
podNetwork := r.getPodNetwork(ctr.ID(), ctr.Name(), ctr.state.NetNS.Path(), ctr.config.Networks, ctr.config.PortMappings, requestedIP, requestedMAC)
if err := r.netPlugin.TearDownPod(podNetwork); err != nil {
return errors.Wrapf(err, "error tearing down CNI namespace configuration for container %s", ctr.ID())
}
}
// First unmount the namespace
if err := netns.UnmountNS(ctr.state.NetNS); err != nil {
return errors.Wrapf(err, "error unmounting network namespace for container %s", ctr.ID())
}
// Now close the open file descriptor
if err := ctr.state.NetNS.Close(); err != nil {
return errors.Wrapf(err, "error closing network namespace for container %s", ctr.ID())
}
ctr.state.NetNS = nil
return nil
}
func getContainerNetNS(ctr *Container) (string, error) {
if ctr.state.NetNS != nil {
return ctr.state.NetNS.Path(), nil
}
if ctr.config.NetNsCtr != "" {
c, err := ctr.runtime.GetContainer(ctr.config.NetNsCtr)
if err != nil {
return "", err
}
if err = c.syncContainer(); err != nil {
return "", err
}
return c.state.NetNS.Path(), nil
}
return "", nil
}
func getContainerNetIO(ctr *Container) (*netlink.LinkStatistics, error) {
var netStats *netlink.LinkStatistics
// rootless v2 cannot seem to resolve its network connection to
// collect statistics. For now, we allow stats to at least run
// by returning nil
if rootless.IsRootless() {
return netStats, nil
}
netNSPath, netPathErr := getContainerNetNS(ctr)
if netPathErr != nil {
return nil, netPathErr
}
if netNSPath == "" {
// If netNSPath is empty, it was set as none, and no netNS was set up
// this is a valid state and thus return no error, nor any statistics
return nil, nil
}
err := ns.WithNetNSPath(netNSPath, func(_ ns.NetNS) error {
link, err := netlink.LinkByName(ocicni.DefaultInterfaceName)
if err != nil {
return err
}
netStats = link.Attrs().Statistics
return nil
})
return netStats, err
}
func (c *Container) getContainerNetworkInfo(data *InspectContainerData) *InspectContainerData {
if c.state.NetNS != nil && len(c.state.NetworkStatus) > 0 {
// Report network settings from the first pod network
result := c.state.NetworkStatus[0]
// Go through our IP addresses
for _, ctrIP := range result.IPs {
ipWithMask := ctrIP.Address.String()
splitIP := strings.Split(ipWithMask, "/")
mask, _ := strconv.Atoi(splitIP[1])
if ctrIP.Version == "4" {
data.NetworkSettings.IPAddress = splitIP[0]
data.NetworkSettings.IPPrefixLen = mask
data.NetworkSettings.Gateway = ctrIP.Gateway.String()
} else {
data.NetworkSettings.GlobalIPv6Address = splitIP[0]
data.NetworkSettings.GlobalIPv6PrefixLen = mask
data.NetworkSettings.IPv6Gateway = ctrIP.Gateway.String()
}
}
// Set network namespace path
data.NetworkSettings.SandboxKey = c.state.NetNS.Path()
// Set MAC address of interface linked with network namespace path
for _, i := range result.Interfaces {
if i.Sandbox == data.NetworkSettings.SandboxKey {
data.NetworkSettings.MacAddress = i.Mac
}
}
}
return data
}
type logrusDebugWriter struct {
prefix string
}
func (w *logrusDebugWriter) Write(p []byte) (int, error) {
logrus.Debugf("%s%s", w.prefix, string(p))
return len(p), nil
}