Files
podman/libpod/networking_common.go
Valentin Rothberg e966c86d98 container.conf: support attributed string slices
All `[]string`s in containers.conf have now been migrated to attributed
string slices which require some adjustments in Buildah and Podman.

[NO NEW TESTS NEEDED]

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
2023-10-27 12:44:33 +02:00

750 lines
24 KiB
Go

//go:build !remote && (linux || freebsd)
// +build !remote
// +build linux freebsd
package libpod
import (
"errors"
"fmt"
"regexp"
"sort"
"github.com/containers/common/libnetwork/etchosts"
"github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/machine"
"github.com/containers/common/pkg/util"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/namespaces"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/storage/pkg/lockfile"
"github.com/sirupsen/logrus"
)
// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
// This is needed because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
// For machine the HostIP must only be used by gvproxy and never in the VM.
func (c *Container) convertPortMappings() []types.PortMapping {
if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 {
return c.config.PortMappings
}
// if we run in a machine VM we have to ignore the host IP part
newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings))
for _, port := range c.config.PortMappings {
port.HostIP = ""
newPorts = append(newPorts, port)
}
return newPorts
}
func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions {
nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers.Get())+len(c.config.DNSServer))
nameservers = append(nameservers, c.runtime.config.Containers.DNSServers.Get()...)
for _, ip := range c.config.DNSServer {
nameservers = append(nameservers, ip.String())
}
opts := types.NetworkOptions{
ContainerID: c.config.ID,
ContainerName: getNetworkPodName(c),
DNSServers: nameservers,
}
opts.PortMappings = c.convertPortMappings()
// If the container requested special network options use this instead of the config.
// This is the case for container restore or network reload.
if c.perNetworkOpts != nil {
opts.Networks = c.perNetworkOpts
} else {
opts.Networks = networkOpts
}
return opts
}
// setUpNetwork will set up the networks, on error it will also tear down the cni
// networks. If rootless it will join/create the rootless network namespace.
func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
rootlessNetNS, err := r.GetRootlessNetNs(true)
if err != nil {
return nil, err
}
var results map[string]types.StatusBlock
setUpPod := func() error {
results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
return err
}
// rootlessNetNS is nil if we are root
if rootlessNetNS != nil {
// execute the setup in the rootless net ns
err = rootlessNetNS.Do(setUpPod)
rootlessNetNS.Lock.Unlock()
} else {
err = setUpPod()
}
return results, err
}
// getNetworkPodName return the pod name (hostname) used by dns backend.
// If we are in the pod network namespace use the pod name otherwise the container name
func getNetworkPodName(c *Container) string {
if c.config.NetMode.IsPod() || c.IsInfra() {
pod, err := c.runtime.state.Pod(c.PodID())
if err == nil {
return pod.Name()
}
}
return c.Name()
}
// Tear down a container's network configuration and joins the
// rootless net ns as rootless user
func (r *Runtime) teardownNetworkBackend(ns string, opts types.NetworkOptions) error {
rootlessNetNS, err := r.GetRootlessNetNs(false)
if err != nil {
return err
}
tearDownPod := func() error {
if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil {
return fmt.Errorf("tearing down network namespace configuration for container %s: %w", opts.ContainerID, err)
}
return nil
}
// rootlessNetNS is nil if we are root
if rootlessNetNS != nil {
// execute the network setup in the rootless net ns
err = rootlessNetNS.Do(tearDownPod)
if cerr := rootlessNetNS.Cleanup(r); cerr != nil {
logrus.WithError(cerr).Error("failed to clean up rootless netns")
}
rootlessNetNS.Lock.Unlock()
} else {
err = tearDownPod()
}
return err
}
// Tear down a container's network backend configuration, but do not tear down the
// namespace itself.
func (r *Runtime) teardownNetwork(ctr *Container) error {
if ctr.state.NetNS == "" {
// The container has no network namespace, we're set
return nil
}
logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS, ctr.ID())
networks, err := ctr.networks()
if err != nil {
return err
}
if !ctr.config.NetMode.IsSlirp4netns() &&
!ctr.config.NetMode.IsPasta() && len(networks) > 0 {
netOpts := ctr.getNetworkOptions(networks)
return r.teardownNetworkBackend(ctr.state.NetNS, netOpts)
}
return nil
}
// isBridgeNetMode checks if the given network mode is bridge.
// It returns nil when it is set to bridge and an error otherwise.
func isBridgeNetMode(n namespaces.NetworkMode) error {
if !n.IsBridge() {
return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid)
}
return nil
}
// Reload only works with containers with a configured network.
// It will tear down, and then reconfigure, the network of the container.
// This is mainly used when a reload of firewall rules wipes out existing
// firewall configuration.
// Efforts will be made to preserve MAC and IP addresses.
// Only works on containers with bridge networking at present, though in the future we could
// extend this to stop + restart slirp4netns
func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
if ctr.state.NetNS == "" {
return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid)
}
if err := isBridgeNetMode(ctr.config.NetMode); err != nil {
return nil, err
}
logrus.Infof("Going to reload container %s network", ctr.ID())
err := r.teardownNetwork(ctr)
if err != nil {
// teardownNetwork will error if the iptables rules do not exist and this is the case after
// a firewall reload. The purpose of network reload is to recreate the rules if they do
// not exists so we should not log this specific error as error. This would confuse users otherwise.
// iptables-legacy and iptables-nft will create different errors. Make sure to match both.
b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error())
if rerr == nil && !b {
logrus.Error(err)
} else {
logrus.Info(err)
}
}
networkOpts, err := ctr.networks()
if err != nil {
return nil, err
}
// Set the same network settings as before..
netStatus := ctr.getNetworkStatus()
for network, perNetOpts := range networkOpts {
for name, netInt := range netStatus[network].Interfaces {
perNetOpts.InterfaceName = name
perNetOpts.StaticMAC = netInt.MacAddress
for _, netAddress := range netInt.Subnets {
perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
}
// Normally interfaces have a length of 1, only for some special cni configs we could get more.
// For now just use the first interface to get the ips this should be good enough for most cases.
break
}
networkOpts[network] = perNetOpts
}
ctr.perNetworkOpts = networkOpts
return r.configureNetNS(ctr, ctr.state.NetNS)
}
// Produce an InspectNetworkSettings containing information on the container
// network.
func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
if c.config.NetNsCtr != "" {
netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr)
if err != nil {
return nil, err
}
// see https://github.com/containers/podman/issues/10090
// the container has to be locked for syncContainer()
netNsCtr.lock.Lock()
defer netNsCtr.lock.Unlock()
// Have to sync to ensure that state is populated
if err := netNsCtr.syncContainer(); err != nil {
return nil, err
}
logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr)
return netNsCtr.getContainerNetworkInfo()
}
settings := new(define.InspectNetworkSettings)
settings.Ports = makeInspectPorts(c.config.PortMappings, c.config.ExposedPorts)
networks, err := c.networks()
if err != nil {
return nil, err
}
setDefaultNetworks := func() {
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, 1)
name := c.NetworkMode()
addedNet := new(define.InspectAdditionalNetwork)
addedNet.NetworkID = name
settings.Networks[name] = addedNet
}
if c.state.NetNS == "" {
if networkNSPath, set := c.joinedNetworkNSPath(); networkNSPath != "" {
if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil {
// fallback to dummy configuration
settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
} else {
// do not propagate error inspecting a joined network ns
logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err)
}
return settings, nil
} else if set {
// network none case, if running allow user to join netns via sandbox key
// https://github.com/containers/podman/issues/16716
if c.state.PID > 0 {
settings.SandboxKey = fmt.Sprintf("/proc/%d/ns/net", c.state.PID)
}
}
// We can't do more if the network is down.
// We still want to make dummy configurations for each network
// the container joined.
if len(networks) > 0 {
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
for net, opts := range networks {
cniNet := new(define.InspectAdditionalNetwork)
cniNet.NetworkID = net
cniNet.Aliases = opts.Aliases
settings.Networks[net] = cniNet
}
} else {
setDefaultNetworks()
}
return settings, nil
}
// Set network namespace path
settings.SandboxKey = c.state.NetNS
netStatus := c.getNetworkStatus()
// If this is empty, we're probably slirp4netns
if len(netStatus) == 0 {
return settings, nil
}
// If we have networks - handle that here
if len(networks) > 0 {
if len(networks) != len(netStatus) {
return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal)
}
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
for name, opts := range networks {
result := netStatus[name]
addedNet := new(define.InspectAdditionalNetwork)
addedNet.NetworkID = name
addedNet.Aliases = opts.Aliases
addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
settings.Networks[name] = addedNet
}
// if not only the default network is connected we can return here
// otherwise we have to populate the InspectBasicNetworkConfig settings
_, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork]
if !(len(networks) == 1 && isDefaultNet) {
return settings, nil
}
} else {
setDefaultNetworks()
}
// If not joining networks, we should have at most 1 result
if len(netStatus) > 1 {
return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal)
}
if len(netStatus) == 1 {
for _, status := range netStatus {
settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status)
}
}
return settings, nil
}
// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI
// result
func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig {
config := define.InspectBasicNetworkConfig{}
interfaceNames := make([]string, 0, len(result.Interfaces))
for interfaceName := range result.Interfaces {
interfaceNames = append(interfaceNames, interfaceName)
}
// ensure consistent inspect results by sorting
sort.Strings(interfaceNames)
for _, interfaceName := range interfaceNames {
netInt := result.Interfaces[interfaceName]
for _, netAddress := range netInt.Subnets {
size, _ := netAddress.IPNet.Mask.Size()
if netAddress.IPNet.IP.To4() != nil {
// ipv4
if config.IPAddress == "" {
config.IPAddress = netAddress.IPNet.IP.String()
config.IPPrefixLen = size
config.Gateway = netAddress.Gateway.String()
} else {
config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
}
} else {
// ipv6
if config.GlobalIPv6Address == "" {
config.GlobalIPv6Address = netAddress.IPNet.IP.String()
config.GlobalIPv6PrefixLen = size
config.IPv6Gateway = netAddress.Gateway.String()
} else {
config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
}
}
}
if config.MacAddress == "" {
config.MacAddress = netInt.MacAddress.String()
} else {
config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String())
}
}
return config
}
// NetworkDisconnect removes a container from the network
func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error {
// only the bridge mode supports cni networks
if err := isBridgeNetMode(c.config.NetMode); err != nil {
return err
}
c.lock.Lock()
defer c.lock.Unlock()
networks, err := c.networks()
if err != nil {
return err
}
// check if network exists and if the input is an ID we get the name
// CNI and netavark and the libpod db only uses names so it is important that we only use the name
netName, err = c.runtime.normalizeNetworkName(netName)
if err != nil {
return err
}
_, nameExists := networks[netName]
if !nameExists && len(networks) > 0 {
return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName)
}
if err := c.syncContainer(); err != nil {
return err
}
// get network status before we disconnect
networkStatus := c.getNetworkStatus()
if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil {
return err
}
c.newNetworkEvent(events.NetworkDisconnect, netName)
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
return nil
}
if c.state.NetNS == "" {
return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork)
}
opts := types.NetworkOptions{
ContainerID: c.config.ID,
ContainerName: getNetworkPodName(c),
}
opts.PortMappings = c.convertPortMappings()
opts.Networks = map[string]types.PerNetworkOptions{
netName: networks[netName],
}
if err := c.runtime.teardownNetworkBackend(c.state.NetNS, opts); err != nil {
return err
}
// update network status if container is running
oldStatus, statusExist := networkStatus[netName]
delete(networkStatus, netName)
c.state.NetworkStatus = networkStatus
err = c.save()
if err != nil {
return err
}
// Reload ports when there are still connected networks, maybe we removed the network interface with the child ip.
// Reloading without connected networks does not make sense, so we can skip this step.
if rootless.IsRootless() && len(networkStatus) > 0 {
if err := c.reloadRootlessRLKPortMapping(); err != nil {
return err
}
}
// Update resolv.conf if required
if statusExist {
stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs))
for _, ip := range oldStatus.DNSServerIPs {
stringIPs = append(stringIPs, ip.String())
}
if len(stringIPs) > 0 {
logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs)
if err := c.removeNameserver(stringIPs); err != nil {
return err
}
}
// update /etc/hosts file
if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
// sync the names with c.getHostsEntries()
names := []string{c.Hostname(), c.config.Name}
rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...)
if len(rm) > 0 {
// make sure to lock this file to prevent concurrent writes when
// this is used a net dependency container
lock, err := lockfile.GetLockFile(file)
if err != nil {
return fmt.Errorf("failed to lock hosts file: %w", err)
}
logrus.Debugf("Remove /etc/hosts entries %v", rm)
lock.Lock()
err = etchosts.Remove(file, rm)
lock.Unlock()
if err != nil {
return err
}
}
}
}
return nil
}
// ConnectNetwork connects a container to a given network
func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
// only the bridge mode supports networks
if err := isBridgeNetMode(c.config.NetMode); err != nil {
return err
}
c.lock.Lock()
defer c.lock.Unlock()
networks, err := c.networks()
if err != nil {
return err
}
// check if network exists and if the input is an ID we get the name
// CNI and netavark and the libpod db only uses names so it is important that we only use the name
netName, err = c.runtime.normalizeNetworkName(netName)
if err != nil {
return err
}
if err := c.syncContainer(); err != nil {
return err
}
// get network status before we connect
networkStatus := c.getNetworkStatus()
netOpts.Aliases = append(netOpts.Aliases, getExtraNetworkAliases(c)...)
if netOpts.InterfaceName == "" {
netOpts.InterfaceName = getFreeInterfaceName(networks)
if netOpts.InterfaceName == "" {
return errors.New("could not find free network interface name")
}
}
if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
// Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts
if errors.Is(err, define.ErrNetworkConnected) && !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
return nil
}
return err
}
c.newNetworkEvent(events.NetworkConnect, netName)
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
return nil
}
if c.state.NetNS == "" {
return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork)
}
opts := types.NetworkOptions{
ContainerID: c.config.ID,
ContainerName: getNetworkPodName(c),
}
opts.PortMappings = c.convertPortMappings()
opts.Networks = map[string]types.PerNetworkOptions{
netName: netOpts,
}
results, err := c.runtime.setUpNetwork(c.state.NetNS, opts)
if err != nil {
return err
}
if len(results) != 1 {
return errors.New("when adding aliases, results must be of length 1")
}
// we need to get the old host entries before we add the new one to the status
// if we do not add do it here we will get the wrong existing entries which will throw of the logic
// we could also copy the map but this does not seem worth it
// sync the hostNames with c.getHostsEntries()
hostNames := []string{c.Hostname(), c.config.Name}
oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...)
// update network status
if networkStatus == nil {
networkStatus = make(map[string]types.StatusBlock, 1)
}
networkStatus[netName] = results[netName]
c.state.NetworkStatus = networkStatus
err = c.save()
if err != nil {
return err
}
// The first network needs a port reload to set the correct child ip for the rootlessport process.
// Adding a second network does not require a port reload because the child ip is still valid.
if rootless.IsRootless() && len(networks) == 0 {
if err := c.reloadRootlessRLKPortMapping(); err != nil {
return err
}
}
ipv6 := c.checkForIPv6(networkStatus)
// Update resolv.conf if required
stringIPs := make([]string, 0, len(results[netName].DNSServerIPs))
for _, ip := range results[netName].DNSServerIPs {
if (ip.To4() == nil) && !ipv6 {
continue
}
stringIPs = append(stringIPs, ip.String())
}
if len(stringIPs) > 0 {
logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs)
if err := c.addNameserver(stringIPs); err != nil {
return err
}
}
// update /etc/hosts file
if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
// make sure to lock this file to prevent concurrent writes when
// this is used a net dependency container
lock, err := lockfile.GetLockFile(file)
if err != nil {
return fmt.Errorf("failed to lock hosts file: %w", err)
}
new := etchosts.GetNetworkHostEntries(results, hostNames...)
logrus.Debugf("Add /etc/hosts entries %v", new)
// use special AddIfExists API to make sure we only add new entries if an old one exists
// see the AddIfExists() comment for more information
lock.Lock()
err = etchosts.AddIfExists(file, oldHostEntries, new)
lock.Unlock()
if err != nil {
return err
}
}
return nil
}
// get a free interface name for a new network
// return an empty string if no free name was found
func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string {
ifNames := make([]string, 0, len(networks))
for _, opts := range networks {
ifNames = append(ifNames, opts.InterfaceName)
}
for i := 0; i < 100000; i++ {
ifName := fmt.Sprintf("eth%d", i)
if !util.StringInSlice(ifName, ifNames) {
return ifName
}
}
return ""
}
func getExtraNetworkAliases(c *Container) []string {
// always add the short id as alias for docker compat
alias := []string{c.config.ID[:12]}
// if an explicit hostname was set add it as well
if c.config.Spec.Hostname != "" {
alias = append(alias, c.config.Spec.Hostname)
}
return alias
}
// DisconnectContainerFromNetwork removes a container from its network
func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
ctr, err := r.LookupContainer(nameOrID)
if err != nil {
return err
}
return ctr.NetworkDisconnect(nameOrID, netName, force)
}
// ConnectContainerToNetwork connects a container to a network
func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
ctr, err := r.LookupContainer(nameOrID)
if err != nil {
return err
}
return ctr.NetworkConnect(nameOrID, netName, netOpts)
}
// normalizeNetworkName takes a network name, a partial or a full network ID and returns the network name.
// If the network is not found an error is returned.
func (r *Runtime) normalizeNetworkName(nameOrID string) (string, error) {
net, err := r.network.NetworkInspect(nameOrID)
if err != nil {
return "", err
}
return net.Name, nil
}
// ocicniPortsToNetTypesPorts convert the old port format to the new one
// while deduplicating ports into ranges
func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
if len(ports) == 0 {
return nil
}
newPorts := make([]types.PortMapping, 0, len(ports))
// first sort the ports
sort.Slice(ports, func(i, j int) bool {
return compareOCICNIPorts(ports[i], ports[j])
})
// we already check if the slice is empty so we can use the first element
currentPort := types.PortMapping{
HostIP: ports[0].HostIP,
HostPort: uint16(ports[0].HostPort),
ContainerPort: uint16(ports[0].ContainerPort),
Protocol: ports[0].Protocol,
Range: 1,
}
for i := 1; i < len(ports); i++ {
if ports[i].HostIP == currentPort.HostIP &&
ports[i].Protocol == currentPort.Protocol &&
ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) &&
ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) {
currentPort.Range++
} else {
newPorts = append(newPorts, currentPort)
currentPort = types.PortMapping{
HostIP: ports[i].HostIP,
HostPort: uint16(ports[i].HostPort),
ContainerPort: uint16(ports[i].ContainerPort),
Protocol: ports[i].Protocol,
Range: 1,
}
}
}
newPorts = append(newPorts, currentPort)
return newPorts
}
// compareOCICNIPorts will sort the ocicni ports by
// 1) host ip
// 2) protocol
// 3) hostPort
// 4) container port
func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool {
if i.HostIP != j.HostIP {
return i.HostIP < j.HostIP
}
if i.Protocol != j.Protocol {
return i.Protocol < j.Protocol
}
if i.HostPort != j.HostPort {
return i.HostPort < j.HostPort
}
return i.ContainerPort < j.ContainerPort
}