mirror of
https://github.com/containers/podman.git
synced 2025-06-01 17:17:47 +08:00

Allow kpod create/run to create contianers in different network namespaces, uts namespaces and IPC Namespaces. This patch just handles the simple join the host, or another containers namespaces. Lots more work needed to full integrate --net Signed-off-by: Daniel J Walsh <dwalsh@redhat.com> Closes: #64 Approved by: mheon
544 lines
15 KiB
Go
544 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"strings"
|
|
|
|
"github.com/docker/docker/daemon/caps"
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/go-units"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/opencontainers/runtime-tools/generate"
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
|
"github.com/pkg/errors"
|
|
"github.com/projectatomic/libpod/libpod"
|
|
ann "github.com/projectatomic/libpod/pkg/annotations"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
func blockAccessToKernelFilesystems(config *createConfig, g *generate.Generator) {
|
|
if !config.privileged {
|
|
for _, mp := range []string{
|
|
"/proc/kcore",
|
|
"/proc/latency_stats",
|
|
"/proc/timer_list",
|
|
"/proc/timer_stats",
|
|
"/proc/sched_debug",
|
|
"/proc/scsi",
|
|
"/sys/firmware",
|
|
} {
|
|
g.AddLinuxMaskedPaths(mp)
|
|
}
|
|
|
|
for _, rp := range []string{
|
|
"/proc/asound",
|
|
"/proc/bus",
|
|
"/proc/fs",
|
|
"/proc/irq",
|
|
"/proc/sys",
|
|
"/proc/sysrq-trigger",
|
|
} {
|
|
g.AddLinuxReadonlyPaths(rp)
|
|
}
|
|
}
|
|
}
|
|
|
|
func addPidNS(config *createConfig, g *generate.Generator) error {
|
|
pidMode := config.pidMode
|
|
if pidMode.IsHost() {
|
|
return g.RemoveLinuxNamespace(libpod.PIDNamespace)
|
|
}
|
|
if pidMode.IsContainer() {
|
|
ctr, err := config.runtime.LookupContainer(pidMode.Container())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "container %q not found", pidMode.Container())
|
|
}
|
|
pid, err := ctr.PID()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Failed to get pid of container %q", pidMode.Container())
|
|
}
|
|
pidNsPath := fmt.Sprintf("/proc/%d/ns/pid", pid)
|
|
if err := g.AddOrReplaceLinuxNamespace(libpod.PIDNamespace, pidNsPath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func addNetNS(config *createConfig, g *generate.Generator) error {
|
|
netMode := config.netMode
|
|
if netMode.IsHost() {
|
|
return g.RemoveLinuxNamespace(libpod.NetNamespace)
|
|
}
|
|
if netMode.IsNone() {
|
|
return libpod.ErrNotImplemented
|
|
}
|
|
if netMode.IsBridge() {
|
|
return libpod.ErrNotImplemented
|
|
}
|
|
if netMode.IsContainer() {
|
|
ctr, err := config.runtime.LookupContainer(netMode.ConnectedContainer())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "container %q not found", netMode.ConnectedContainer())
|
|
}
|
|
pid, err := ctr.PID()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Failed to get pid of container %q", netMode.ConnectedContainer())
|
|
}
|
|
nsPath := fmt.Sprintf("/proc/%d/ns/net", pid)
|
|
if err := g.AddOrReplaceLinuxNamespace(libpod.NetNamespace, nsPath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func addUTSNS(config *createConfig, g *generate.Generator) error {
|
|
utsMode := config.utsMode
|
|
if utsMode.IsHost() {
|
|
return g.RemoveLinuxNamespace(libpod.UTSNamespace)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func addIpcNS(config *createConfig, g *generate.Generator) error {
|
|
ipcMode := config.ipcMode
|
|
if ipcMode.IsHost() {
|
|
return g.RemoveLinuxNamespace(libpod.IPCNamespace)
|
|
}
|
|
if ipcMode.IsContainer() {
|
|
ctr, err := config.runtime.LookupContainer(ipcMode.Container())
|
|
if err != nil {
|
|
return errors.Wrapf(err, "container %q not found", ipcMode.Container())
|
|
}
|
|
pid, err := ctr.PID()
|
|
if err != nil {
|
|
return errors.Wrapf(err, "Failed to get pid of container %q", ipcMode.Container())
|
|
}
|
|
nsPath := fmt.Sprintf("/proc/%d/ns/ipc", pid)
|
|
if err := g.AddOrReplaceLinuxNamespace(libpod.IPCNamespace, nsPath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func addRlimits(config *createConfig, g *generate.Generator) error {
|
|
var (
|
|
ul *units.Ulimit
|
|
err error
|
|
)
|
|
|
|
for _, u := range config.resources.ulimit {
|
|
if ul, err = units.ParseUlimit(u); err != nil {
|
|
return errors.Wrapf(err, "ulimit option %q requires name=SOFT:HARD, failed to be parsed", u)
|
|
}
|
|
|
|
g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Soft), uint64(ul.Hard))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func setupCapabilities(config *createConfig, configSpec *spec.Spec) error {
|
|
var err error
|
|
var caplist []string
|
|
if config.privileged {
|
|
caplist = caps.GetAllCapabilities()
|
|
} else {
|
|
caplist, err = caps.TweakCapabilities(configSpec.Process.Capabilities.Bounding, config.capAdd, config.capDrop)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
configSpec.Process.Capabilities.Bounding = caplist
|
|
configSpec.Process.Capabilities.Permitted = caplist
|
|
configSpec.Process.Capabilities.Inheritable = caplist
|
|
configSpec.Process.Capabilities.Effective = caplist
|
|
return nil
|
|
}
|
|
|
|
// Parses information needed to create a container into an OCI runtime spec
|
|
func createConfigToOCISpec(config *createConfig) (*spec.Spec, error) {
|
|
g := generate.New()
|
|
g.AddCgroupsMount("ro")
|
|
g.SetProcessCwd(config.workDir)
|
|
g.SetProcessArgs(config.command)
|
|
g.SetProcessTerminal(config.tty)
|
|
// User and Group must go together
|
|
g.SetProcessUID(config.user)
|
|
g.SetProcessGID(config.group)
|
|
for _, gid := range config.groupAdd {
|
|
g.AddProcessAdditionalGid(gid)
|
|
}
|
|
for key, val := range config.GetAnnotations() {
|
|
g.AddAnnotation(key, val)
|
|
}
|
|
g.SetRootReadonly(config.readOnlyRootfs)
|
|
g.SetHostname(config.hostname)
|
|
if config.hostname != "" {
|
|
g.AddProcessEnv("HOSTNAME", config.hostname)
|
|
}
|
|
|
|
for _, sysctl := range config.sysctl {
|
|
s := strings.SplitN(sysctl, "=", 2)
|
|
g.AddLinuxSysctl(s[0], s[1])
|
|
}
|
|
|
|
// RESOURCES - MEMORY
|
|
if config.resources.memory != 0 {
|
|
g.SetLinuxResourcesMemoryLimit(config.resources.memory)
|
|
}
|
|
if config.resources.memoryReservation != 0 {
|
|
g.SetLinuxResourcesMemoryReservation(config.resources.memoryReservation)
|
|
}
|
|
if config.resources.memorySwap != 0 {
|
|
g.SetLinuxResourcesMemorySwap(config.resources.memorySwap)
|
|
}
|
|
if config.resources.kernelMemory != 0 {
|
|
g.SetLinuxResourcesMemoryKernel(config.resources.kernelMemory)
|
|
}
|
|
if config.resources.memorySwappiness != -1 {
|
|
g.SetLinuxResourcesMemorySwappiness(uint64(config.resources.memorySwappiness))
|
|
}
|
|
g.SetLinuxResourcesMemoryDisableOOMKiller(config.resources.disableOomKiller)
|
|
g.SetProcessOOMScoreAdj(config.resources.oomScoreAdj)
|
|
|
|
// RESOURCES - CPU
|
|
|
|
if config.resources.cpuShares != 0 {
|
|
g.SetLinuxResourcesCPUShares(config.resources.cpuShares)
|
|
}
|
|
if config.resources.cpuQuota != 0 {
|
|
g.SetLinuxResourcesCPUQuota(config.resources.cpuQuota)
|
|
}
|
|
if config.resources.cpuPeriod != 0 {
|
|
g.SetLinuxResourcesCPUPeriod(config.resources.cpuPeriod)
|
|
}
|
|
if config.resources.cpuRtRuntime != 0 {
|
|
g.SetLinuxResourcesCPURealtimeRuntime(config.resources.cpuRtRuntime)
|
|
}
|
|
if config.resources.cpuRtPeriod != 0 {
|
|
g.SetLinuxResourcesCPURealtimePeriod(config.resources.cpuRtPeriod)
|
|
}
|
|
if config.resources.cpus != "" {
|
|
g.SetLinuxResourcesCPUCpus(config.resources.cpus)
|
|
}
|
|
if config.resources.cpusetMems != "" {
|
|
g.SetLinuxResourcesCPUMems(config.resources.cpusetMems)
|
|
}
|
|
|
|
// SECURITY OPTS
|
|
g.SetProcessNoNewPrivileges(config.noNewPrivileges)
|
|
g.SetProcessApparmorProfile(config.apparmorProfile)
|
|
g.SetProcessSelinuxLabel(config.processLabel)
|
|
g.SetLinuxMountLabel(config.mountLabel)
|
|
blockAccessToKernelFilesystems(config, &g)
|
|
|
|
// RESOURCES - PIDS
|
|
if config.resources.pidsLimit != 0 {
|
|
g.SetLinuxResourcesPidsLimit(config.resources.pidsLimit)
|
|
}
|
|
|
|
for _, i := range config.tmpfs {
|
|
options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
|
|
spliti := strings.SplitN(i, ":", 2)
|
|
if len(spliti) > 1 {
|
|
if _, _, err := mount.ParseTmpfsOptions(spliti[1]); err != nil {
|
|
return nil, err
|
|
}
|
|
options = strings.Split(spliti[1], ",")
|
|
}
|
|
// Default options if nothing passed
|
|
g.AddTmpfsMount(spliti[0], options)
|
|
}
|
|
|
|
for name, val := range config.env {
|
|
g.AddProcessEnv(name, val)
|
|
}
|
|
|
|
if err := addRlimits(config, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := addPidNS(config, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := addNetNS(config, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := addUTSNS(config, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := addIpcNS(config, &g); err != nil {
|
|
return nil, err
|
|
}
|
|
configSpec := g.Spec()
|
|
|
|
if config.seccompProfilePath != "" && config.seccompProfilePath != "unconfined" {
|
|
seccompProfile, err := ioutil.ReadFile(config.seccompProfilePath)
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "opening seccomp profile (%s) failed", config.seccompProfilePath)
|
|
}
|
|
var seccompConfig spec.LinuxSeccomp
|
|
if err := json.Unmarshal(seccompProfile, &seccompConfig); err != nil {
|
|
return nil, errors.Wrapf(err, "decoding seccomp profile (%s) failed", config.seccompProfilePath)
|
|
}
|
|
configSpec.Linux.Seccomp = &seccompConfig
|
|
}
|
|
|
|
// BIND MOUNTS
|
|
mounts, err := config.GetVolumeMounts()
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "error getting volume mounts")
|
|
}
|
|
configSpec.Mounts = append(configSpec.Mounts, mounts...)
|
|
|
|
// HANDLE CAPABILITIES
|
|
if err := setupCapabilities(config, configSpec); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
/*
|
|
Hooks: &configSpec.Hooks{},
|
|
//Annotations
|
|
Resources: &configSpec.LinuxResources{
|
|
Devices: config.GetDefaultDevices(),
|
|
BlockIO: &blkio,
|
|
//HugepageLimits:
|
|
Network: &configSpec.LinuxNetwork{
|
|
// ClassID *uint32
|
|
// Priorites []LinuxInterfacePriority
|
|
},
|
|
},
|
|
//CgroupsPath:
|
|
//Namespaces: []LinuxNamespace
|
|
//Devices
|
|
// DefaultAction:
|
|
// Architectures
|
|
// Syscalls:
|
|
},
|
|
// RootfsPropagation
|
|
// MaskedPaths
|
|
// ReadonlyPaths:
|
|
// IntelRdt
|
|
},
|
|
}
|
|
*/
|
|
return configSpec, nil
|
|
}
|
|
|
|
func (c *createConfig) CreateBlockIO() (spec.LinuxBlockIO, error) {
|
|
bio := spec.LinuxBlockIO{}
|
|
bio.Weight = &c.resources.blkioWeight
|
|
if len(c.resources.blkioWeightDevice) > 0 {
|
|
var lwds []spec.LinuxWeightDevice
|
|
for _, i := range c.resources.blkioWeightDevice {
|
|
wd, err := validateweightDevice(i)
|
|
if err != nil {
|
|
return bio, errors.Wrapf(err, "invalid values for blkio-weight-device")
|
|
}
|
|
wdStat := getStatFromPath(wd.path)
|
|
lwd := spec.LinuxWeightDevice{
|
|
Weight: &wd.weight,
|
|
}
|
|
lwd.Major = int64(unix.Major(wdStat.Rdev))
|
|
lwd.Minor = int64(unix.Minor(wdStat.Rdev))
|
|
lwds = append(lwds, lwd)
|
|
}
|
|
}
|
|
if len(c.resources.deviceReadBps) > 0 {
|
|
readBps, err := makeThrottleArray(c.resources.deviceReadBps)
|
|
if err != nil {
|
|
return bio, err
|
|
}
|
|
bio.ThrottleReadBpsDevice = readBps
|
|
}
|
|
if len(c.resources.deviceWriteBps) > 0 {
|
|
writeBpds, err := makeThrottleArray(c.resources.deviceWriteBps)
|
|
if err != nil {
|
|
return bio, err
|
|
}
|
|
bio.ThrottleWriteBpsDevice = writeBpds
|
|
}
|
|
if len(c.resources.deviceReadIOps) > 0 {
|
|
readIOps, err := makeThrottleArray(c.resources.deviceReadIOps)
|
|
if err != nil {
|
|
return bio, err
|
|
}
|
|
bio.ThrottleReadIOPSDevice = readIOps
|
|
}
|
|
if len(c.resources.deviceWriteIOps) > 0 {
|
|
writeIOps, err := makeThrottleArray(c.resources.deviceWriteIOps)
|
|
if err != nil {
|
|
return bio, err
|
|
}
|
|
bio.ThrottleWriteIOPSDevice = writeIOps
|
|
}
|
|
|
|
return bio, nil
|
|
}
|
|
|
|
// GetAnnotations returns the all the annotations for the container
|
|
func (c *createConfig) GetAnnotations() map[string]string {
|
|
a := getDefaultAnnotations()
|
|
// TODO - Which annotations do we want added by default
|
|
// TODO - This should be added to the DB long term
|
|
if c.tty {
|
|
a["io.kubernetes.cri-o.TTY"] = "true"
|
|
}
|
|
return a
|
|
}
|
|
|
|
func getDefaultAnnotations() map[string]string {
|
|
var annotations map[string]string
|
|
annotations = make(map[string]string)
|
|
annotations[ann.Annotations] = ""
|
|
annotations[ann.ContainerID] = ""
|
|
annotations[ann.ContainerName] = ""
|
|
annotations[ann.ContainerType] = ""
|
|
annotations[ann.Created] = ""
|
|
annotations[ann.HostName] = ""
|
|
annotations[ann.IP] = ""
|
|
annotations[ann.Image] = ""
|
|
annotations[ann.ImageName] = ""
|
|
annotations[ann.ImageRef] = ""
|
|
annotations[ann.KubeName] = ""
|
|
annotations[ann.Labels] = ""
|
|
annotations[ann.LogPath] = ""
|
|
annotations[ann.Metadata] = ""
|
|
annotations[ann.Name] = ""
|
|
annotations[ann.PrivilegedRuntime] = ""
|
|
annotations[ann.ResolvPath] = ""
|
|
annotations[ann.HostnamePath] = ""
|
|
annotations[ann.SandboxID] = ""
|
|
annotations[ann.SandboxName] = ""
|
|
annotations[ann.ShmPath] = ""
|
|
annotations[ann.MountPoint] = ""
|
|
annotations[ann.TrustedSandbox] = ""
|
|
annotations[ann.TTY] = "false"
|
|
annotations[ann.Stdin] = ""
|
|
annotations[ann.StdinOnce] = ""
|
|
annotations[ann.Volumes] = ""
|
|
|
|
return annotations
|
|
}
|
|
|
|
//GetVolumeMounts takes user provided input for bind mounts and creates Mount structs
|
|
func (c *createConfig) GetVolumeMounts() ([]spec.Mount, error) {
|
|
var m []spec.Mount
|
|
var options []string
|
|
for _, i := range c.volumes {
|
|
// We need to handle SELinux options better here, specifically :Z
|
|
spliti := strings.Split(i, ":")
|
|
if len(spliti) > 2 {
|
|
options = strings.Split(spliti[2], ",")
|
|
}
|
|
options = append(options, "rbind")
|
|
// var foundrw, foundro,
|
|
var foundz, foundZ bool
|
|
for _, opt := range options {
|
|
switch opt {
|
|
// case "rw":
|
|
// foundrw = true
|
|
// case "ro":
|
|
// foundro = true
|
|
case "z":
|
|
foundz = true
|
|
case "Z":
|
|
foundZ = true
|
|
}
|
|
}
|
|
// if !foundro && !foundrw {
|
|
// // rw option is default
|
|
// options = append(options, "rw")
|
|
// }
|
|
if foundz {
|
|
if err := label.Relabel(spliti[0], c.mountLabel, true); err != nil {
|
|
return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
|
|
}
|
|
}
|
|
if foundZ {
|
|
if err := label.Relabel(spliti[0], c.mountLabel, false); err != nil {
|
|
return nil, errors.Wrapf(err, "relabel failed %q", spliti[0])
|
|
}
|
|
}
|
|
|
|
m = append(m, spec.Mount{
|
|
Destination: spliti[1],
|
|
Type: string(TypeBind),
|
|
Source: spliti[0],
|
|
Options: options,
|
|
})
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
//GetTmpfsMounts takes user provided input for tmpfs mounts and creates Mount structs
|
|
func (c *createConfig) GetTmpfsMounts() []spec.Mount {
|
|
var m []spec.Mount
|
|
for _, i := range c.tmpfs {
|
|
// Default options if nothing passed
|
|
options := []string{"rw", "noexec", "nosuid", "nodev", "size=65536k"}
|
|
spliti := strings.Split(i, ":")
|
|
destPath := spliti[0]
|
|
if len(spliti) > 1 {
|
|
options = strings.Split(spliti[1], ",")
|
|
}
|
|
m = append(m, spec.Mount{
|
|
Destination: destPath,
|
|
Type: string(TypeTmpfs),
|
|
Options: options,
|
|
Source: string(TypeTmpfs),
|
|
})
|
|
}
|
|
return m
|
|
}
|
|
|
|
func (c *createConfig) GetContainerCreateOptions() ([]libpod.CtrCreateOption, error) {
|
|
var options []libpod.CtrCreateOption
|
|
|
|
// Uncomment after talking to mheon about unimplemented funcs
|
|
// options = append(options, libpod.WithLabels(c.labels))
|
|
|
|
if c.interactive {
|
|
options = append(options, libpod.WithStdin())
|
|
}
|
|
if c.name != "" {
|
|
logrus.Debug("appending name %s", c.name)
|
|
options = append(options, libpod.WithName(c.name))
|
|
}
|
|
|
|
return options, nil
|
|
}
|
|
|
|
func getStatFromPath(path string) unix.Stat_t {
|
|
s := unix.Stat_t{}
|
|
_ = unix.Stat(path, &s)
|
|
return s
|
|
}
|
|
|
|
func makeThrottleArray(throttleInput []string) ([]spec.LinuxThrottleDevice, error) {
|
|
var ltds []spec.LinuxThrottleDevice
|
|
for _, i := range throttleInput {
|
|
t, err := validateBpsDevice(i)
|
|
if err != nil {
|
|
return []spec.LinuxThrottleDevice{}, err
|
|
}
|
|
ltd := spec.LinuxThrottleDevice{}
|
|
ltd.Rate = t.rate
|
|
ltdStat := getStatFromPath(t.path)
|
|
ltd.Major = int64(unix.Major(ltdStat.Rdev))
|
|
ltd.Minor = int64(unix.Major(ltdStat.Rdev))
|
|
ltds = append(ltds, ltd)
|
|
}
|
|
return ltds, nil
|
|
}
|