Merge pull request #4220 from mheon/null_runtime

Move OCI runtime implementation behind an interface
This commit is contained in:
OpenShift Merge Robot
2019-10-11 20:55:37 +02:00
committed by GitHub
23 changed files with 1975 additions and 1664 deletions

View File

@ -129,7 +129,7 @@ type Container struct {
valid bool
lock lock.Locker
runtime *Runtime
ociRuntime *OCIRuntime
ociRuntime OCIRuntime
rootlessSlirpSyncR *os.File
rootlessSlirpSyncW *os.File

View File

@ -187,7 +187,7 @@ func (c *Container) StopWithTimeout(timeout uint) error {
return define.ErrCtrStopped
}
return c.stop(timeout)
return c.stop(timeout, false)
}
// Kill sends a signal to a container
@ -205,13 +205,15 @@ func (c *Container) Kill(signal uint) error {
return errors.Wrapf(define.ErrCtrStateInvalid, "can only kill running containers. %s is in state %s", c.ID(), c.state.State.String())
}
defer c.newContainerEvent(events.Kill)
if err := c.ociRuntime.killContainer(c, signal); err != nil {
// Hardcode all = false, we only use all when removing.
if err := c.ociRuntime.KillContainer(c, signal, false); err != nil {
return err
}
c.state.StoppedByUser = true
c.newContainerEvent(events.Kill)
return c.save()
}
@ -221,7 +223,7 @@ func (c *Container) Kill(signal uint) error {
// Sometimes, the $RUNTIME exec call errors, and if that is the case, the exit code is the exit code of the call.
// Otherwise, the exit code will be the exit code of the executed call inside of the container.
// TODO investigate allowing exec without attaching
func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, error) {
func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs uint, resize chan remotecommand.TerminalSize, detachKeys string) (int, error) {
var capList []string
if !c.batched {
c.lock.Lock()
@ -278,7 +280,19 @@ func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir
user = c.config.User
}
pid, attachChan, err := c.ociRuntime.execContainer(c, cmd, capList, env, tty, workDir, user, sessionID, streams, preserveFDs, resize, detachKeys)
opts := new(ExecOptions)
opts.Cmd = cmd
opts.CapAdd = capList
opts.Env = env
opts.Terminal = tty
opts.Cwd = workDir
opts.User = user
opts.Streams = streams
opts.PreserveFDs = preserveFDs
opts.Resize = resize
opts.DetachKeys = detachKeys
pid, attachChan, err := c.ociRuntime.ExecContainer(c, sessionID, opts)
if err != nil {
ec := define.ExecErrorCodeGeneric
// Conmon will pass a non-zero exit code from the runtime as a pid here.
@ -524,7 +538,10 @@ func (c *Container) WaitWithInterval(waitTimeout time.Duration) (int32, error) {
return -1, define.ErrCtrRemoved
}
exitFile := c.exitFilePath()
exitFile, err := c.exitFilePath()
if err != nil {
return -1, err
}
chWait := make(chan error, 1)
defer close(chWait)
@ -639,7 +656,7 @@ func (c *Container) Sync() error {
(c.state.State != define.ContainerStateConfigured) &&
(c.state.State != define.ContainerStateExited) {
oldState := c.state.State
if err := c.ociRuntime.updateContainerStatus(c, true); err != nil {
if err := c.ociRuntime.UpdateContainerStatus(c, true); err != nil {
return err
}
// Only save back to DB if state changed
@ -687,7 +704,7 @@ func (c *Container) Refresh(ctx context.Context) error {
// Next, if the container is running, stop it
if c.state.State == define.ContainerStateRunning {
if err := c.stop(c.config.StopTimeout); err != nil {
if err := c.stop(c.config.StopTimeout, false); err != nil {
return err
}
}
@ -696,8 +713,10 @@ func (c *Container) Refresh(ctx context.Context) error {
if len(c.state.ExecSessions) > 0 {
logrus.Infof("Killing %d exec sessions in container %s. They will not be restored after refresh.",
len(c.state.ExecSessions), c.ID())
if err := c.ociRuntime.execStopContainer(c, c.config.StopTimeout); err != nil {
return err
}
for _, session := range c.state.ExecSessions {
if err := c.ociRuntime.ExecStopContainer(c, session.ID, c.StopTimeout()); err != nil {
return errors.Wrapf(err, "error stopping exec session %s of container %s", session.ID, c.ID())
}
}

View File

@ -50,11 +50,11 @@ func (c *Container) Commit(ctx context.Context, destImage string, options Contai
}
if c.state.State == define.ContainerStateRunning && options.Pause {
if err := c.ociRuntime.pauseContainer(c); err != nil {
if err := c.pause(); err != nil {
return nil, errors.Wrapf(err, "error pausing container %q", c.ID())
}
defer func() {
if err := c.ociRuntime.unpauseContainer(c); err != nil {
if err := c.unpause(); err != nil {
logrus.Errorf("error unpausing container %q: %v", c.ID(), err)
}
}()

View File

@ -131,13 +131,13 @@ func (c *Container) CheckpointPath() string {
}
// AttachSocketPath retrieves the path of the container's attach socket
func (c *Container) AttachSocketPath() string {
return filepath.Join(c.ociRuntime.socketsDir, c.ID(), "attach")
func (c *Container) AttachSocketPath() (string, error) {
return c.ociRuntime.AttachSocketPath(c)
}
// exitFilePath gets the path to the container's exit file
func (c *Container) exitFilePath() string {
return filepath.Join(c.ociRuntime.exitsDir, c.ID())
func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c)
}
// create a bundle path and associated files for an exec session
@ -167,12 +167,8 @@ func (c *Container) cleanupExecBundle(sessionID string) error {
if err := os.RemoveAll(c.execBundlePath(sessionID)); err != nil && !os.IsNotExist(err) {
return err
}
// Clean up the sockets dir. Issue #3962
// Also ignore if it doesn't exist for some reason; hence the conditional return below
if err := os.RemoveAll(filepath.Join(c.ociRuntime.socketsDir, sessionID)); err != nil && !os.IsNotExist(err) {
return err
}
return nil
return c.ociRuntime.ExecContainerCleanup(c, sessionID)
}
// the path to a containers exec session bundle
@ -191,8 +187,8 @@ func (c *Container) execLogPath(sessionID string) string {
}
// the socket conmon creates for an exec session
func (c *Container) execAttachSocketPath(sessionID string) string {
return filepath.Join(c.ociRuntime.socketsDir, sessionID, "attach")
func (c *Container) execAttachSocketPath(sessionID string) (string, error) {
return c.ociRuntime.ExecAttachSocketPath(c, sessionID)
}
// execExitFileDir gets the path to the container's exit file
@ -202,7 +198,7 @@ func (c *Container) execExitFileDir(sessionID string) string {
// execOCILog returns the file path for the exec sessions oci log
func (c *Container) execOCILog(sessionID string) string {
if !c.ociRuntime.supportsJSON {
if !c.ociRuntime.SupportsJSONErrors() {
return ""
}
return filepath.Join(c.execBundlePath(sessionID), "oci-log")
@ -233,12 +229,15 @@ func (c *Container) readExecExitCode(sessionID string) (int, error) {
// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
exitFile := c.exitFilePath()
exitFile, err := c.exitFilePath()
if err != nil {
return err
}
chWait := make(chan error)
defer close(chWait)
_, err := WaitForFile(exitFile, chWait, time.Second*5)
_, err = WaitForFile(exitFile, chWait, time.Second*5)
if err != nil {
// Exit file did not appear
// Reset our state
@ -253,7 +252,7 @@ func (c *Container) waitForExitFileAndSync() error {
return err
}
if err := c.ociRuntime.updateContainerStatus(c, false); err != nil {
if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
return err
}
@ -388,7 +387,7 @@ func (c *Container) syncContainer() error {
(c.state.State != define.ContainerStateExited) {
oldState := c.state.State
// TODO: optionally replace this with a stat for the exit file
if err := c.ociRuntime.updateContainerStatus(c, false); err != nil {
if err := c.ociRuntime.UpdateContainerStatus(c, false); err != nil {
return err
}
// Only save back to DB if state changed
@ -649,7 +648,10 @@ func (c *Container) removeConmonFiles() error {
}
// Remove the exit file so we don't leak memory in tmpfs
exitFile := filepath.Join(c.ociRuntime.exitsDir, c.ID())
exitFile, err := c.exitFilePath()
if err != nil {
return err
}
if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "error removing container %s exit file", c.ID())
}
@ -938,9 +940,13 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
// With the spec complete, do an OCI create
if err := c.ociRuntime.createContainer(c, nil); err != nil {
if err := c.ociRuntime.CreateContainer(c, nil); err != nil {
// Fedora 31 is carrying a patch to display improved error
// messages to better handle the V2 transition. This is NOT
// upstream in any OCI runtime.
// TODO: Remove once runc supports cgroupsv2
if strings.Contains(err.Error(), "this version of runc doesn't work on cgroups v2") {
logrus.Errorf("oci runtime %q does not support CGroups V2: use system migrate to mitigate", c.ociRuntime.name)
logrus.Errorf("oci runtime %q does not support CGroups V2: use system migrate to mitigate", c.ociRuntime.Name())
}
return err
}
@ -1088,7 +1094,7 @@ func (c *Container) start() error {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
if err := c.ociRuntime.startContainer(c); err != nil {
if err := c.ociRuntime.StartContainer(c); err != nil {
return err
}
logrus.Debugf("Started container %s", c.ID())
@ -1110,10 +1116,28 @@ func (c *Container) start() error {
}
// Internal, non-locking function to stop container
func (c *Container) stop(timeout uint) error {
func (c *Container) stop(timeout uint, all bool) error {
logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout)
if err := c.ociRuntime.stopContainer(c, timeout); err != nil {
// We can't use --all if CGroups aren't present.
// Rootless containers with CGroups v1 and NoCgroups are both cases
// where this can happen.
if all {
if c.config.NoCgroups {
all = false
} else if rootless.IsRootless() {
// Only do this check if we need to
unified, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return err
}
if !unified {
all = false
}
}
}
if err := c.ociRuntime.StopContainer(c, timeout, all); err != nil {
return err
}
@ -1150,7 +1174,7 @@ func (c *Container) pause() error {
}
}
if err := c.ociRuntime.pauseContainer(c); err != nil {
if err := c.ociRuntime.PauseContainer(c); err != nil {
return err
}
@ -1167,7 +1191,7 @@ func (c *Container) unpause() error {
return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups")
}
if err := c.ociRuntime.unpauseContainer(c); err != nil {
if err := c.ociRuntime.UnpauseContainer(c); err != nil {
return err
}
@ -1188,7 +1212,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e
if c.state.State == define.ContainerStateRunning {
conmonPID := c.state.ConmonPID
if err := c.stop(timeout); err != nil {
if err := c.stop(timeout, false); err != nil {
return err
}
// Old versions of conmon have a bug where they create the exit file before
@ -1475,7 +1499,7 @@ func (c *Container) delete(ctx context.Context) (err error) {
span.SetTag("struct", "container")
defer span.Finish()
if err := c.ociRuntime.deleteContainer(c); err != nil {
if err := c.ociRuntime.DeleteContainer(c); err != nil {
return errors.Wrapf(err, "error removing container %s from runtime", c.ID())
}

View File

@ -419,27 +419,11 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
g.AddProcessEnv("container", "libpod")
}
unified, err := cgroups.IsCgroup2UnifiedMode()
cgroupPath, err := c.getOCICgroupPath()
if err != nil {
return nil, err
}
if (rootless.IsRootless() && !unified) || c.config.NoCgroups {
g.SetLinuxCgroupsPath("")
} else if c.runtime.config.CgroupManager == SystemdCgroupsManager {
// When runc is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
g.SetLinuxCgroupsPath(systemdCgroups)
} else {
cgroupPath, err := c.CGroupPath()
if err != nil {
return nil, err
}
logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
g.SetLinuxCgroupsPath(cgroupPath)
}
g.SetLinuxCgroupsPath(cgroupPath)
// Mounts need to be sorted so paths will not cover other paths
mounts := sortMounts(g.Mounts())
@ -659,7 +643,7 @@ func (c *Container) checkpointRestoreSupported() (err error) {
if !criu.CheckForCriu() {
return errors.Errorf("Checkpoint/Restore requires at least CRIU %d", criu.MinCriuVersion)
}
if !c.ociRuntime.featureCheckCheckpointing() {
if !c.ociRuntime.SupportsCheckpoint() {
return errors.Errorf("Configured runtime does not support checkpoint/restore")
}
return nil
@ -695,7 +679,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
return err
}
if err := c.ociRuntime.checkpointContainer(c, options); err != nil {
if err := c.ociRuntime.CheckpointContainer(c, options); err != nil {
return err
}
@ -923,7 +907,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}
}
if err := c.ociRuntime.createContainer(c, &options); err != nil {
if err := c.ociRuntime.CreateContainer(c, &options); err != nil {
return err
}
@ -1332,3 +1316,30 @@ func (c *Container) refreshCNI() error {
podNetwork := c.runtime.getPodNetwork(c.ID(), c.config.Name, "", c.config.Networks, c.config.PortMappings, c.config.StaticIP)
return c.runtime.netPlugin.TearDownPod(podNetwork)
}
// Get cgroup path in a format suitable for the OCI spec
func (c *Container) getOCICgroupPath() (string, error) {
unified, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return "", err
}
if (rootless.IsRootless() && !unified) || c.config.NoCgroups {
return "", nil
} else if c.runtime.config.CgroupManager == SystemdCgroupsManager {
// When runc is set to use Systemd as a cgroup manager, it
// expects cgroups to be passed as follows:
// slice:prefix:name
systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
logrus.Debugf("Setting CGroups for container %s to %s", c.ID(), systemdCgroups)
return systemdCgroups, nil
} else if c.runtime.config.CgroupManager == CgroupfsCgroupsManager {
cgroupPath, err := c.CGroupPath()
if err != nil {
return "", err
}
logrus.Debugf("Setting CGroup path for container %s to %s", c.ID(), cgroupPath)
return cgroupPath, nil
} else {
return "", errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager %s requested", c.runtime.config.CgroupManager)
}
}

View File

@ -44,3 +44,7 @@ func (c *Container) copyOwnerAndPerms(source, dest string) error {
func (c *Container) refreshCNI() error {
return define.ErrNotImplemented
}
func (c *Container) getOCICgroupPath() (string, error) {
return "", define.ErrNotImplemented
}

View File

@ -141,7 +141,7 @@ func (c *Container) runHealthCheck() (HealthCheckStatus, error) {
logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID())
timeStart := time.Now()
hcResult := HealthCheckSuccess
_, hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0, nil, "")
_, hcErr := c.Exec(false, false, map[string]string{}, newCommand, "", "", streams, 0, nil, "")
if hcErr != nil {
errCause := errors.Cause(hcErr)
hcResult = HealthCheckFailure

View File

@ -15,7 +15,6 @@ import (
"github.com/containers/buildah"
"github.com/containers/libpod/pkg/cgroups"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/utils"
"github.com/containers/storage"
"github.com/containers/storage/pkg/system"
"github.com/pkg/errors"
@ -48,14 +47,7 @@ func (r *Runtime) hostInfo() (map[string]interface{}, error) {
info["MemFree"] = mi.MemFree
info["SwapTotal"] = mi.SwapTotal
info["SwapFree"] = mi.SwapFree
conmonVersion, _ := r.GetConmonVersion()
ociruntimeVersion, _ := r.GetOCIRuntimeVersion()
hostDistributionInfo := r.GetHostDistributionInfo()
info["Conmon"] = map[string]interface{}{
"path": r.conmonPath,
"package": r.defaultOCIRuntime.conmonPackage(),
"version": conmonVersion,
}
if rootless.IsRootless() {
if path, err := exec.LookPath("slirp4netns"); err == nil {
logrus.Warnf("Failed to retrieve program version for %s: %v", path, err)
@ -82,11 +74,6 @@ func (r *Runtime) hostInfo() (map[string]interface{}, error) {
idmappings["gidmap"] = gidmappings
info["IDMappings"] = idmappings
}
info["OCIRuntime"] = map[string]interface{}{
"path": r.defaultOCIRuntime.path,
"package": r.defaultOCIRuntime.pathPackage(),
"version": ociruntimeVersion,
}
info["Distribution"] = map[string]interface{}{
"distribution": hostDistributionInfo["Distribution"],
"version": hostDistributionInfo["Version"],
@ -98,6 +85,15 @@ func (r *Runtime) hostInfo() (map[string]interface{}, error) {
}
info["kernel"] = kv
runtimeInfo, err := r.defaultOCIRuntime.RuntimeInfo()
if err != nil {
logrus.Errorf("Error getting info on OCI runtime %s: %v", r.defaultOCIRuntime.Name(), err)
} else {
for k, v := range runtimeInfo {
info[k] = v
}
}
up, err := readUptime()
if err != nil {
return nil, errors.Wrapf(err, "error reading up time")
@ -228,29 +224,6 @@ func readUptime() (string, error) {
return string(f[0]), nil
}
// GetConmonVersion returns a string representation of the conmon version
func (r *Runtime) GetConmonVersion() (string, error) {
output, err := utils.ExecCmd(r.conmonPath, "--version")
if err != nil {
return "", err
}
return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil
}
// GetOCIRuntimePath returns the path to the OCI Runtime Path the runtime is using
func (r *Runtime) GetOCIRuntimePath() string {
return r.defaultOCIRuntime.path
}
// GetOCIRuntimeVersion returns a string representation of the oci runtimes version
func (r *Runtime) GetOCIRuntimeVersion() (string, error) {
output, err := utils.ExecCmd(r.GetOCIRuntimePath(), "--version")
if err != nil {
return "", err
}
return strings.TrimSuffix(output, "\n"), nil
}
// GetHostDistributionInfo returns a map containing the host's distribution and version
func (r *Runtime) GetHostDistributionInfo() map[string]string {
dist := make(map[string]string)

View File

@ -157,7 +157,7 @@ func (r *Runtime) setupRootlessNetNS(ctr *Container) (err error) {
defer errorhandling.CloseQuiet(syncW)
havePortMapping := len(ctr.Config().PortMappings) > 0
apiSocket := filepath.Join(ctr.ociRuntime.tmpDir, fmt.Sprintf("%s.net", ctr.config.ID))
apiSocket := filepath.Join(ctr.runtime.config.TmpDir, fmt.Sprintf("%s.net", ctr.config.ID))
cmdArgs := []string{}
if havePortMapping {

View File

@ -1,441 +1,132 @@
package libpod
import (
"bytes"
"fmt"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/containers/libpod/libpod/define"
"github.com/containers/libpod/pkg/util"
"github.com/cri-o/ocicni/pkg/ocicni"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
// TODO import these functions into libpod and remove the import
// Trying to keep libpod from depending on CRI-O code
"github.com/containers/libpod/utils"
"k8s.io/client-go/tools/remotecommand"
)
// OCI code is undergoing heavy rewrite
// OCIRuntime is an implementation of an OCI runtime.
// The OCI runtime implementation is expected to be a fairly thin wrapper around
// the actual runtime, and is not expected to include things like state
// management logic - e.g., we do not expect it to determine on its own that
// calling 'UnpauseContainer()' on a container that is not paused is an error.
// The code calling the OCIRuntime will manage this.
// TODO: May want to move the Attach() code under this umbrella. It's highly OCI
// runtime dependent.
// TODO: May want to move the conmon cleanup code here too - it depends on
// Conmon being in use.
type OCIRuntime interface {
// Name returns the name of the runtime.
Name() string
// Path returns the path to the runtime executable.
Path() string
const (
// CgroupfsCgroupsManager represents cgroupfs native cgroup manager
CgroupfsCgroupsManager = "cgroupfs"
// SystemdCgroupsManager represents systemd native cgroup manager
SystemdCgroupsManager = "systemd"
// CreateContainer creates the container in the OCI runtime.
CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error
// UpdateContainerStatus updates the status of the given container.
// It includes a switch for whether to perform a hard query of the
// runtime. If unset, the exit file (if supported by the implementation)
// will be used.
UpdateContainerStatus(ctr *Container, useRuntime bool) error
// StartContainer starts the given container.
StartContainer(ctr *Container) error
// KillContainer sends the given signal to the given container.
// If all is set, all processes in the container will be signalled;
// otherwise, only init will be signalled.
KillContainer(ctr *Container, signal uint, all bool) error
// StopContainer stops the given container.
// The container's stop signal (or SIGTERM if unspecified) will be sent
// first.
// After the given timeout, SIGKILL will be sent.
// If the given timeout is 0, SIGKILL will be sent immediately, and the
// stop signal will be omitted.
// If all is set, we will attempt to use the --all flag will `kill` in
// the OCI runtime to kill all processes in the container, including
// exec sessions. This is only supported if the container has cgroups.
StopContainer(ctr *Container, timeout uint, all bool) error
// DeleteContainer deletes the given container from the OCI runtime.
DeleteContainer(ctr *Container) error
// PauseContainer pauses the given container.
PauseContainer(ctr *Container) error
// UnpauseContainer unpauses the given container.
UnpauseContainer(ctr *Container) error
// ContainerCreateTimeout represents the value of container creating timeout
ContainerCreateTimeout = 240 * time.Second
// ExecContainer executes a command in a running container.
// Returns an int (exit code), error channel (errors from attach), and
// error (errors that occurred attempting to start the exec session).
ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error)
// ExecStopContainer stops a given exec session in a running container.
// SIGTERM with be sent initially, then SIGKILL after the given timeout.
// If timeout is 0, SIGKILL will be sent immediately, and SIGTERM will
// be omitted.
ExecStopContainer(ctr *Container, sessionID string, timeout uint) error
// ExecContainerCleanup cleans up after an exec session exits.
// It removes any files left by the exec session that are no longer
// needed, including the attach socket.
ExecContainerCleanup(ctr *Container, sessionID string) error
// Timeout before declaring that runtime has failed to kill a given
// container
killContainerTimeout = 5 * time.Second
// DefaultShmSize is the default shm size
DefaultShmSize = 64 * 1024 * 1024
// NsRunDir is the default directory in which running network namespaces
// are stored
NsRunDir = "/var/run/netns"
)
// CheckpointContainer checkpoints the given container.
// Some OCI runtimes may not support this - if SupportsCheckpoint()
// returns false, this is not implemented, and will always return an
// error.
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error
// OCIRuntime represents an OCI-compatible runtime that libpod can call into
// to perform container operations
type OCIRuntime struct {
name string
path string
conmonPath string
conmonEnv []string
cgroupManager string
tmpDir string
exitsDir string
socketsDir string
logSizeMax int64
noPivot bool
reservePorts bool
supportsJSON bool
supportsNoCgroups bool
sdNotify bool
// SupportsCheckpoint returns whether this OCI runtime
// implementation supports the CheckpointContainer() operation.
SupportsCheckpoint() bool
// SupportsJSONErrors is whether the runtime can return JSON-formatted
// error messages.
SupportsJSONErrors() bool
// SupportsNoCgroups is whether the runtime supports running containers
// without cgroups.
SupportsNoCgroups() bool
// AttachSocketPath is the path to the socket to attach to a given
// container.
// TODO: If we move Attach code in here, this should be made internal.
// We don't want to force all runtimes to share the same attach
// implementation.
AttachSocketPath(ctr *Container) (string, error)
// ExecAttachSocketPath is the path to the socket to attach to a given
// exec session in the given container.
// TODO: Probably should be made internal.
ExecAttachSocketPath(ctr *Container, sessionID string) (string, error)
// ExitFilePath is the path to a container's exit file.
// All runtime implementations must create an exit file when containers
// exit, containing the exit code of the container (as a string).
// This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error)
// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (map[string]interface{}, error)
}
// ociError is used to parse the OCI runtime JSON log. It is not part of the
// OCI runtime specifications, it follows what runc does
type ociError struct {
Level string `json:"level,omitempty"`
Time string `json:"time,omitempty"`
Msg string `json:"msg,omitempty"`
}
// Make a new OCI runtime with provided options.
// The first path that points to a valid executable will be used.
func newOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON, supportsNoCgroups bool) (*OCIRuntime, error) {
if name == "" {
return nil, errors.Wrapf(define.ErrInvalidArg, "the OCI runtime must be provided a non-empty name")
}
runtime := new(OCIRuntime)
runtime.name = name
runtime.conmonPath = conmonPath
runtime.conmonEnv = runtimeCfg.ConmonEnvVars
runtime.cgroupManager = runtimeCfg.CgroupManager
runtime.tmpDir = runtimeCfg.TmpDir
runtime.logSizeMax = runtimeCfg.MaxLogSize
runtime.noPivot = runtimeCfg.NoPivotRoot
runtime.reservePorts = runtimeCfg.EnablePortReservation
runtime.sdNotify = runtimeCfg.SDNotify
// TODO: probe OCI runtime for feature and enable automatically if
// available.
runtime.supportsJSON = supportsJSON
runtime.supportsNoCgroups = supportsNoCgroups
foundPath := false
for _, path := range paths {
stat, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
continue
}
return nil, errors.Wrapf(err, "cannot stat %s", path)
}
if !stat.Mode().IsRegular() {
continue
}
foundPath = true
runtime.path = path
logrus.Debugf("using runtime %q", path)
break
}
// Search the $PATH as last fallback
if !foundPath {
if foundRuntime, err := exec.LookPath(name); err == nil {
foundPath = true
runtime.path = foundRuntime
logrus.Debugf("using runtime %q from $PATH: %q", name, foundRuntime)
}
}
if !foundPath {
return nil, errors.Wrapf(define.ErrInvalidArg, "no valid executable found for OCI runtime %s", name)
}
runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket")
if runtime.cgroupManager != CgroupfsCgroupsManager && runtime.cgroupManager != SystemdCgroupsManager {
return nil, errors.Wrapf(define.ErrInvalidArg, "invalid cgroup manager specified: %s", runtime.cgroupManager)
}
// Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, errors.Wrapf(err, "error creating OCI runtime exit files directory %s",
runtime.exitsDir)
}
}
if err := os.MkdirAll(runtime.socketsDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, errors.Wrapf(err, "error creating OCI runtime attach sockets directory %s",
runtime.socketsDir)
}
}
return runtime, nil
}
// Create systemd unit name for cgroup scopes
func createUnitName(prefix string, name string) string {
return fmt.Sprintf("%s-%s.scope", prefix, name)
}
func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
var files []*os.File
notifySCTP := false
for _, i := range ports {
switch i.Protocol {
case "udp":
addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort))
if err != nil {
return nil, errors.Wrapf(err, "cannot resolve the UDP address")
}
server, err := net.ListenUDP("udp", addr)
if err != nil {
return nil, errors.Wrapf(err, "cannot listen on the UDP port")
}
f, err := server.File()
if err != nil {
return nil, errors.Wrapf(err, "cannot get file for UDP socket")
}
files = append(files, f)
case "tcp":
addr, err := net.ResolveTCPAddr("tcp4", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort))
if err != nil {
return nil, errors.Wrapf(err, "cannot resolve the TCP address")
}
server, err := net.ListenTCP("tcp4", addr)
if err != nil {
return nil, errors.Wrapf(err, "cannot listen on the TCP port")
}
f, err := server.File()
if err != nil {
return nil, errors.Wrapf(err, "cannot get file for TCP socket")
}
files = append(files, f)
case "sctp":
if !notifySCTP {
notifySCTP = true
logrus.Warnf("port reservation for SCTP is not supported")
}
default:
return nil, fmt.Errorf("unknown protocol %s", i.Protocol)
}
}
return files, nil
}
// updateContainerStatus retrieves the current status of the container from the
// runtime. It updates the container's state but does not save it.
// If useRunc is false, we will not directly hit runc to see the container's
// status, but will instead only check for the existence of the conmon exit file
// and update state to stopped if it exists.
func (r *OCIRuntime) updateContainerStatus(ctr *Container, useRuntime bool) error {
exitFile := ctr.exitFilePath()
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
// If not using the OCI runtime, we don't need to do most of this.
if !useRuntime {
// If the container's not running, nothing to do.
if ctr.state.State != define.ContainerStateRunning && ctr.state.State != define.ContainerStatePaused {
return nil
}
// Check for the exit file conmon makes
info, err := os.Stat(exitFile)
if err != nil {
if os.IsNotExist(err) {
// Container is still running, no error
return nil
}
return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID())
}
// Alright, it exists. Transition to Stopped state.
ctr.state.State = define.ContainerStateStopped
ctr.state.PID = 0
ctr.state.ConmonPID = 0
// Read the exit file to get our stopped time and exit code.
return ctr.handleExitFile(exitFile, info)
}
// Store old state so we know if we were already stopped
oldState := ctr.state.State
state := new(spec.State)
cmd := exec.Command(r.path, "state", ctr.ID())
cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
outPipe, err := cmd.StdoutPipe()
if err != nil {
return errors.Wrapf(err, "getting stdout pipe")
}
errPipe, err := cmd.StderrPipe()
if err != nil {
return errors.Wrapf(err, "getting stderr pipe")
}
if err := cmd.Start(); err != nil {
out, err2 := ioutil.ReadAll(errPipe)
if err2 != nil {
return errors.Wrapf(err, "error getting container %s state", ctr.ID())
}
if strings.Contains(string(out), "does not exist") {
if err := ctr.removeConmonFiles(); err != nil {
logrus.Debugf("unable to remove conmon files for container %s", ctr.ID())
}
ctr.state.ExitCode = -1
ctr.state.FinishedTime = time.Now()
ctr.state.State = define.ContainerStateExited
return nil
}
return errors.Wrapf(err, "error getting container %s state. stderr/out: %s", ctr.ID(), out)
}
defer func() {
_ = cmd.Wait()
}()
if err := errPipe.Close(); err != nil {
return err
}
out, err := ioutil.ReadAll(outPipe)
if err != nil {
return errors.Wrapf(err, "error reading stdout: %s", ctr.ID())
}
if err := json.NewDecoder(bytes.NewBuffer(out)).Decode(state); err != nil {
return errors.Wrapf(err, "error decoding container status for container %s", ctr.ID())
}
ctr.state.PID = state.Pid
switch state.Status {
case "created":
ctr.state.State = define.ContainerStateCreated
case "paused":
ctr.state.State = define.ContainerStatePaused
case "running":
ctr.state.State = define.ContainerStateRunning
case "stopped":
ctr.state.State = define.ContainerStateStopped
default:
return errors.Wrapf(define.ErrInternal, "unrecognized status returned by runtime for container %s: %s",
ctr.ID(), state.Status)
}
// Only grab exit status if we were not already stopped
// If we were, it should already be in the database
if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
var fi os.FileInfo
chWait := make(chan error)
defer close(chWait)
_, err := WaitForFile(exitFile, chWait, time.Second*5)
if err == nil {
fi, err = os.Stat(exitFile)
}
if err != nil {
ctr.state.ExitCode = -1
ctr.state.FinishedTime = time.Now()
logrus.Errorf("No exit file for container %s found: %v", ctr.ID(), err)
return nil
}
return ctr.handleExitFile(exitFile, fi)
}
return nil
}
// startContainer starts the given container
// Sets time the container was started, but does not save it.
func (r *OCIRuntime) startContainer(ctr *Container) error {
// TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "start", ctr.ID()); err != nil {
return err
}
ctr.state.StartedTime = time.Now()
return nil
}
// killContainer sends the given signal to the given container
func (r *OCIRuntime) killContainer(ctr *Container, signal uint) error {
logrus.Debugf("Sending signal %d to container %s", signal, ctr.ID())
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "kill", ctr.ID(), fmt.Sprintf("%d", signal)); err != nil {
return errors.Wrapf(err, "error sending signal to container %s", ctr.ID())
}
return nil
}
// deleteContainer deletes a container from the OCI runtime
func (r *OCIRuntime) deleteContainer(ctr *Container) error {
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "delete", "--force", ctr.ID())
}
// pauseContainer pauses the given container
func (r *OCIRuntime) pauseContainer(ctr *Container) error {
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "pause", ctr.ID())
}
// unpauseContainer unpauses the given container
func (r *OCIRuntime) unpauseContainer(ctr *Container) error {
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "resume", ctr.ID())
}
// checkpointContainer checkpoints the given container
func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
return err
}
// imagePath is used by CRIU to store the actual checkpoint files
imagePath := ctr.CheckpointPath()
// workPath will be used to store dump.log and stats-dump
workPath := ctr.bundlePath()
logrus.Debugf("Writing checkpoint to %s", imagePath)
logrus.Debugf("Writing checkpoint logs to %s", workPath)
args := []string{}
args = append(args, "checkpoint")
args = append(args, "--image-path")
args = append(args, imagePath)
args = append(args, "--work-path")
args = append(args, workPath)
if options.KeepRunning {
args = append(args, "--leave-running")
}
if options.TCPEstablished {
args = append(args, "--tcp-established")
}
args = append(args, ctr.ID())
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
}
func (r *OCIRuntime) featureCheckCheckpointing() bool {
// Check if the runtime implements checkpointing. Currently only
// runc's checkpoint/restore implementation is supported.
cmd := exec.Command(r.path, "checkpoint", "-h")
if err := cmd.Start(); err != nil {
return false
}
if err := cmd.Wait(); err == nil {
return true
}
return false
// ExecOptions are options passed into ExecContainer. They control the command
// that will be executed and how the exec will proceed.
type ExecOptions struct {
// Cmd is the command to execute.
Cmd []string
// CapAdd is a set of capabilities to add to the executed command.
CapAdd []string
// Env is a set of environment variables to add to the container.
Env map[string]string
// Terminal is whether to create a new TTY for the exec session.
Terminal bool
// Cwd is the working directory for the executed command. If unset, the
// working directory of the container will be used.
Cwd string
// User is the user the command will be executed as. If unset, the user
// the container was run as will be used.
User string
// Streams are the streams that will be attached to the container.
Streams *AttachStreams
// PreserveFDs is a number of additional file descriptors (in addition
// to 0, 1, 2) that will be passed to the executed process. The total FDs
// passed will be 3 + PreserveFDs.
PreserveFDs uint
// Resize is a channel where terminal resize events are sent to be
// handled.
Resize chan remotecommand.TerminalSize
// DetachKeys is a set of keys that, when pressed in sequence, will
// detach from the container.
DetachKeys string
}

View File

@ -47,7 +47,11 @@ func (c *Container) attach(streams *AttachStreams, keys string, resize <-chan re
registerResizeFunc(resize, c.bundlePath())
socketPath := buildSocketPath(c.AttachSocketPath())
attachSock, err := c.AttachSocketPath()
if err != nil {
return err
}
socketPath := buildSocketPath(attachSock)
conn, err := net.DialUnix("unixpacket", nil, &net.UnixAddr{Name: socketPath, Net: "unixpacket"})
if err != nil {
@ -108,7 +112,11 @@ func (c *Container) attachToExec(streams *AttachStreams, keys string, resize <-c
logrus.Debugf("Attaching to container %s exec session %s", c.ID(), sessionID)
// set up the socket path, such that it is the correct length and location for exec
socketPath := buildSocketPath(c.execAttachSocketPath(sessionID))
sockPath, err := c.execAttachSocketPath(sessionID)
if err != nil {
return err
}
socketPath := buildSocketPath(sockPath)
// 2: read from attachFd that the parent process has set up the console socket
if _, err := readConmonPipeData(attachFd, ""); err != nil {

1421
libpod/oci_conmon_linux.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
// +build !linux
package libpod
import (
"github.com/containers/libpod/libpod/define"
)
const (
osNotSupported = "Not supported on this OS"
)
// ConmonOCIRuntime is not supported on this OS.
type ConmonOCIRuntime struct {
}
// newConmonOCIRuntime is not supported on this OS.
func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtimeCfg *RuntimeConfig, supportsJSON, supportsNoCgroups bool) (OCIRuntime, error) {
return nil, define.ErrNotImplemented
}
// Name is not supported on this OS.
func (r *ConmonOCIRuntime) Name() string {
return osNotSupported
}
// Path is not supported on this OS.
func (r *ConmonOCIRuntime) Path() string {
return osNotSupported
}
// CreateContainer is not supported on this OS.
func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error {
return define.ErrNotImplemented
}
// UpdateContainerStatus is not supported on this OS.
func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container, useRuntime bool) error {
return define.ErrNotImplemented
}
// StartContainer is not supported on this OS.
func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
return define.ErrNotImplemented
}
// KillContainer is not supported on this OS.
func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool) error {
return define.ErrNotImplemented
}
// StopContainer is not supported on this OS.
func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) error {
return define.ErrNotImplemented
}
// DeleteContainer is not supported on this OS.
func (r *ConmonOCIRuntime) DeleteContainer(ctr *Container) error {
return define.ErrNotImplemented
}
// PauseContainer is not supported on this OS.
func (r *ConmonOCIRuntime) PauseContainer(ctr *Container) error {
return define.ErrNotImplemented
}
// UnpauseContainer is not supported on this OS.
func (r *ConmonOCIRuntime) UnpauseContainer(ctr *Container) error {
return define.ErrNotImplemented
}
// ExecContainer is not supported on this OS.
func (r *ConmonOCIRuntime) ExecContainer(ctr *Container, sessionID string, options *ExecOptions) (int, chan error, error) {
return -1, nil, define.ErrNotImplemented
}
// ExecStopContainer is not supported on this OS.
func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, timeout uint) error {
return define.ErrNotImplemented
}
// CheckpointContainer is not supported on this OS.
func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
return define.ErrNotImplemented
}
// SupportsCheckpoint is not supported on this OS.
func (r *ConmonOCIRuntime) SupportsCheckpoint() bool {
return false
}
// SupportsJSONErrors is not supported on this OS.
func (r *ConmonOCIRuntime) SupportsJSONErrors() bool {
return false
}
// SupportsNoCgroups is not supported on this OS.
func (r *ConmonOCIRuntime) SupportsNoCgroups() bool {
return false
}
// AttachSocketPath is not supported on this OS.
func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) {
return "", define.ErrNotImplemented
}
// ExecAttachSocketPath is not supported on this OS.
func (r *ConmonOCIRuntime) ExecAttachSocketPath(ctr *Container, sessionID string) (string, error) {
return "", define.ErrNotImplemented
}
// ExitFilePath is not supported on this OS.
func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
return "", define.ErrNotImplemented
}
// RuntimeInfo is not supported on this OS.
func (r *ConmonOCIRuntime) RuntimeInfo() (map[string]interface{}, error) {
return nil, define.ErrNotImplemented
}
// Package is not supported on this OS.
func (r *ConmonOCIRuntime) Package() string {
return osNotSupported
}
// ConmonPackage is not supported on this OS.
func (r *ConmonOCIRuntime) ConmonPackage() string {
return osNotSupported
}

View File

@ -1,556 +0,0 @@
// +build linux
package libpod
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/libpod/libpod/define"
"github.com/containers/libpod/pkg/cgroups"
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/lookup"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
"github.com/containers/libpod/utils"
"github.com/coreos/go-systemd/activation"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// createOCIContainer generates this container's main conmon instance and prepares it for starting
func (r *OCIRuntime) createOCIContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) {
var stderrBuf bytes.Buffer
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
parentSyncPipe, childSyncPipe, err := newPipe()
if err != nil {
return errors.Wrapf(err, "error creating socket pair")
}
defer errorhandling.CloseQuiet(parentSyncPipe)
childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return errors.Wrapf(err, "error creating socket pair for start pipe")
}
defer errorhandling.CloseQuiet(parentStartPipe)
var ociLog string
if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
ociLog = filepath.Join(ctr.state.RunDir, "oci-log")
}
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog)
if ctr.config.Spec.Process.Terminal {
args = append(args, "-t")
} else if ctr.config.Stdin {
args = append(args, "-i")
}
if ctr.config.ConmonPidFile != "" {
args = append(args, "--conmon-pidfile", ctr.config.ConmonPidFile)
}
if r.noPivot {
args = append(args, "--no-pivot")
}
if len(ctr.config.ExitCommand) > 0 {
args = append(args, "--exit-command", ctr.config.ExitCommand[0])
for _, arg := range ctr.config.ExitCommand[1:] {
args = append(args, []string{"--exit-command-arg", arg}...)
}
}
if restoreOptions != nil {
args = append(args, "--restore", ctr.CheckpointPath())
if restoreOptions.TCPEstablished {
args = append(args, "--runtime-opt", "--tcp-established")
}
}
logrus.WithFields(logrus.Fields{
"args": args,
}).Debugf("running conmon: %s", r.conmonPath)
cmd := exec.Command(r.conmonPath, args...)
cmd.Dir = ctr.bundlePath()
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
// TODO this is probably a really bad idea for some uses
// Make this configurable
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if ctr.config.Spec.Process.Terminal {
cmd.Stderr = &stderrBuf
}
// 0, 1 and 2 are stdin, stdout and stderr
conmonEnv, envFiles, err := r.configureConmonEnv(runtimeDir)
if err != nil {
return err
}
cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3), fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
cmd.Env = append(cmd.Env, conmonEnv...)
cmd.ExtraFiles = append(cmd.ExtraFiles, childSyncPipe, childStartPipe)
cmd.ExtraFiles = append(cmd.ExtraFiles, envFiles...)
if r.reservePorts && !ctr.config.NetMode.IsSlirp4netns() {
ports, err := bindPorts(ctr.config.PortMappings)
if err != nil {
return err
}
// Leak the port we bound in the conmon process. These fd's won't be used
// by the container and conmon will keep the ports busy so that another
// process cannot use them.
cmd.ExtraFiles = append(cmd.ExtraFiles, ports...)
}
if ctr.config.NetMode.IsSlirp4netns() {
if ctr.config.PostConfigureNetNS {
ctr.rootlessSlirpSyncR, ctr.rootlessSlirpSyncW, err = os.Pipe()
if err != nil {
return errors.Wrapf(err, "failed to create rootless network sync pipe")
}
} else {
if ctr.rootlessSlirpSyncR != nil {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncR)
}
if ctr.rootlessSlirpSyncW != nil {
defer errorhandling.CloseQuiet(ctr.rootlessSlirpSyncW)
}
}
// Leak one end in conmon, the other one will be leaked into slirp4netns
cmd.ExtraFiles = append(cmd.ExtraFiles, ctr.rootlessSlirpSyncW)
}
err = startCommandGivenSelinux(cmd)
// regardless of whether we errored or not, we no longer need the children pipes
childSyncPipe.Close()
childStartPipe.Close()
if err != nil {
return err
}
if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe, ctr.ID()); err != nil {
return err
}
/* Wait for initial setup and fork, and reap child */
err = cmd.Wait()
if err != nil {
return err
}
pid, err := readConmonPipeData(parentSyncPipe, ociLog)
if err != nil {
if err2 := r.deleteContainer(ctr); err2 != nil {
logrus.Errorf("Error removing container %s from runtime after creation failed", ctr.ID())
}
return err
}
ctr.state.PID = pid
conmonPID, err := readConmonPidFile(ctr.config.ConmonPidFile)
if err != nil {
logrus.Warnf("error reading conmon pid file for container %s: %s", ctr.ID(), err.Error())
} else if conmonPID > 0 {
// conmon not having a pid file is a valid state, so don't set it if we don't have it
logrus.Infof("Got Conmon PID as %d", conmonPID)
ctr.state.ConmonPID = conmonPID
}
return nil
}
// prepareProcessExec returns the path of the process.json used in runc exec -p
// caller is responsible to close the returned *os.File if needed.
func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string) (*os.File, error) {
f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
if err != nil {
return nil, err
}
pspec := c.config.Spec.Process
pspec.Args = cmd
// We need to default this to false else it will inherit terminal as true
// from the container.
pspec.Terminal = false
if tty {
pspec.Terminal = true
}
if len(env) > 0 {
pspec.Env = append(pspec.Env, env...)
}
if cwd != "" {
pspec.Cwd = cwd
}
overrides := c.getUserOverrides()
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, overrides)
if err != nil {
return nil, err
}
// If user was set, look it up in the container to get a UID to use on
// the host
if user != "" {
sgids := make([]uint32, 0, len(execUser.Sgids))
for _, sgid := range execUser.Sgids {
sgids = append(sgids, uint32(sgid))
}
processUser := spec.User{
UID: uint32(execUser.Uid),
GID: uint32(execUser.Gid),
AdditionalGids: sgids,
}
pspec.User = processUser
}
hasHomeSet := false
for _, s := range pspec.Env {
if strings.HasPrefix(s, "HOME=") {
hasHomeSet = true
break
}
}
if !hasHomeSet {
pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home))
}
processJSON, err := json.Marshal(pspec)
if err != nil {
return nil, err
}
if err := ioutil.WriteFile(f.Name(), processJSON, 0644); err != nil {
return nil, err
}
return f, nil
}
// configureConmonEnv gets the environment values to add to conmon's exec struct
// TODO this may want to be less hardcoded/more configurable in the future
func (r *OCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File, error) {
env := make([]string, 0, 6)
env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED")))
env = append(env, fmt.Sprintf("_CONTAINERS_ROOTLESS_UID=%s", os.Getenv("_CONTAINERS_ROOTLESS_UID")))
home, err := homeDir()
if err != nil {
return nil, nil, err
}
env = append(env, fmt.Sprintf("HOME=%s", home))
extraFiles := make([]*os.File, 0)
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
}
if !r.sdNotify {
if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok {
env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1")
fds := activation.Files(false)
extraFiles = append(extraFiles, fds...)
}
} else {
logrus.Debug("disabling SD notify")
}
return env, extraFiles, nil
}
// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *OCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath string) []string {
// set the conmon API version to be able to use the correct sync struct keys
args := []string{"--api-version", "1"}
if r.cgroupManager == SystemdCgroupsManager && !ctr.config.NoCgroups {
args = append(args, "-s")
}
args = append(args, "-c", ctr.ID())
args = append(args, "-u", cuuid)
args = append(args, "-r", r.path)
args = append(args, "-b", bundlePath)
args = append(args, "-p", pidPath)
var logDriver string
switch ctr.LogDriver() {
case JournaldLogging:
logDriver = JournaldLogging
case JSONLogging:
fallthrough
default: //nolint-stylecheck
// No case here should happen except JSONLogging, but keep this here in case the options are extended
logrus.Errorf("%s logging specified but not supported. Choosing k8s-file logging instead", ctr.LogDriver())
fallthrough
case "":
// to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
// since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
fallthrough
case KubernetesLogging:
logDriver = fmt.Sprintf("%s:%s", KubernetesLogging, logPath)
}
args = append(args, "-l", logDriver)
args = append(args, "--exit-dir", exitDir)
args = append(args, "--socket-dir-path", r.socketsDir)
if r.logSizeMax >= 0 {
args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax))
}
logLevel := logrus.GetLevel()
args = append(args, "--log-level", logLevel.String())
if logLevel == logrus.DebugLevel {
logrus.Debugf("%s messages will be logged to syslog", r.conmonPath)
args = append(args, "--syslog")
}
if ociLogPath != "" {
args = append(args, "--runtime-arg", "--log-format=json", "--runtime-arg", "--log", fmt.Sprintf("--runtime-arg=%s", ociLogPath))
}
if ctr.config.NoCgroups {
logrus.Debugf("Running with no CGroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
}
return args
}
// startCommandGivenSelinux starts a container ensuring to set the labels of
// the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled
func startCommandGivenSelinux(cmd *exec.Cmd) error {
if !selinux.GetEnabled() {
return cmd.Start()
}
// Set the label of the conmon process to be level :s0
// This will allow the container processes to talk to fifo-files
// passed into the container by conmon
var (
plabel string
con selinux.Context
err error
)
plabel, err = selinux.CurrentLabel()
if err != nil {
return errors.Wrapf(err, "Failed to get current SELinux label")
}
con, err = selinux.NewContext(plabel)
if err != nil {
return errors.Wrapf(err, "Failed to get new context from SELinux label")
}
runtime.LockOSThread()
if con["level"] != "s0" && con["level"] != "" {
con["level"] = "s0"
if err = label.SetProcessLabel(con.Get()); err != nil {
runtime.UnlockOSThread()
return err
}
}
err = cmd.Start()
// Ignore error returned from SetProcessLabel("") call,
// can't recover.
if labelErr := label.SetProcessLabel(""); labelErr != nil {
logrus.Errorf("unable to set process label: %q", err)
}
runtime.UnlockOSThread()
return err
}
// moveConmonToCgroupAndSignal gets a container's cgroupParent and moves the conmon process to that cgroup
// it then signals for conmon to start by sending nonse data down the start fd
func (r *OCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec.Cmd, startFd *os.File, uuid string) error {
mustCreateCgroup := true
// If cgroup creation is disabled - just signal.
if ctr.config.NoCgroups {
mustCreateCgroup = false
}
if rootless.IsRootless() {
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
if err != nil {
return err
}
mustCreateCgroup = !ownsCgroup
}
if mustCreateCgroup {
cgroupParent := ctr.CgroupParent()
if r.cgroupManager == SystemdCgroupsManager {
unitName := createUnitName("libpod-conmon", ctr.ID())
realCgroupParent := cgroupParent
splitParent := strings.Split(cgroupParent, "/")
if strings.HasSuffix(cgroupParent, ".slice") && len(splitParent) > 1 {
realCgroupParent = splitParent[len(splitParent)-1]
}
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
}
} else {
cgroupPath := filepath.Join(ctr.config.CgroupParent, "conmon")
control, err := cgroups.New(cgroupPath, &spec.LinuxResources{})
if err != nil {
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
} else {
// we need to remove this defer and delete the cgroup once conmon exits
// maybe need a conmon monitor?
if err := control.AddPid(cmd.Process.Pid); err != nil {
logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
}
}
}
}
/* We set the cgroup, now the child can start creating children */
if err := writeConmonPipeData(startFd); err != nil {
return err
}
return nil
}
// newPipe creates a unix socket pair for communication
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
// readConmonPidFile attempts to read conmon's pid from its pid file
func readConmonPidFile(pidFile string) (int, error) {
// Let's try reading the Conmon pid at the same time.
if pidFile != "" {
contents, err := ioutil.ReadFile(pidFile)
if err != nil {
return -1, err
}
// Convert it to an int
conmonPID, err := strconv.Atoi(string(contents))
if err != nil {
return -1, err
}
return conmonPID, nil
}
return 0, nil
}
// readConmonPipeData attempts to read a syncInfo struct from the pipe
func readConmonPipeData(pipe *os.File, ociLog string) (int, error) {
// syncInfo is used to return data from monitor process to daemon
type syncInfo struct {
Data int `json:"data"`
Message string `json:"message,omitempty"`
}
// Wait to get container pid from conmon
type syncStruct struct {
si *syncInfo
err error
}
ch := make(chan syncStruct)
go func() {
var si *syncInfo
rdr := bufio.NewReader(pipe)
b, err := rdr.ReadBytes('\n')
if err != nil {
ch <- syncStruct{err: err}
}
if err := json.Unmarshal(b, &si); err != nil {
ch <- syncStruct{err: err}
return
}
ch <- syncStruct{si: si}
}()
data := -1
select {
case ss := <-ch:
if ss.err != nil {
if ociLog != "" {
ociLogData, err := ioutil.ReadFile(ociLog)
if err == nil {
var ociErr ociError
if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
return -1, getOCIRuntimeError(ociErr.Msg)
}
}
}
return -1, errors.Wrapf(ss.err, "error reading container (probably exited) json message")
}
logrus.Debugf("Received: %d", ss.si.Data)
if ss.si.Data < 0 {
if ociLog != "" {
ociLogData, err := ioutil.ReadFile(ociLog)
if err == nil {
var ociErr ociError
if err := json.Unmarshal(ociLogData, &ociErr); err == nil {
return ss.si.Data, getOCIRuntimeError(ociErr.Msg)
}
}
}
// If we failed to parse the JSON errors, then print the output as it is
if ss.si.Message != "" {
return ss.si.Data, getOCIRuntimeError(ss.si.Message)
}
return ss.si.Data, errors.Wrapf(define.ErrInternal, "container create failed")
}
data = ss.si.Data
case <-time.After(ContainerCreateTimeout):
return -1, errors.Wrapf(define.ErrInternal, "container creation timeout")
}
return data, nil
}
func getOCIRuntimeError(runtimeMsg string) error {
r := strings.ToLower(runtimeMsg)
if match, _ := regexp.MatchString(".*permission denied.*|.*operation not permitted.*", r); match {
return errors.Wrapf(define.ErrOCIRuntimePermissionDenied, "%s", strings.Trim(runtimeMsg, "\n"))
}
if match, _ := regexp.MatchString(".*executable file not found in.*|.*no such file or directory.*", r); match {
return errors.Wrapf(define.ErrOCIRuntimeNotFound, "%s", strings.Trim(runtimeMsg, "\n"))
}
return errors.Wrapf(define.ErrOCIRuntime, "%s", strings.Trim(runtimeMsg, "\n"))
}
// writeConmonPipeData writes nonse data to a pipe
func writeConmonPipeData(pipe *os.File) error {
someData := []byte{0}
_, err := pipe.Write(someData)
return err
}
// formatRuntimeOpts prepends opts passed to it with --runtime-opt for passing to conmon
func formatRuntimeOpts(opts ...string) []string {
args := make([]string, 0, len(opts)*2)
for _, o := range opts {
args = append(args, "--runtime-opt", o)
}
return args
}

View File

@ -1,503 +0,0 @@
// +build linux
package libpod
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/libpod/libpod/define"
"github.com/containers/libpod/pkg/errorhandling"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
"github.com/containers/libpod/utils"
pmount "github.com/containers/storage/pkg/mount"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"k8s.io/client-go/tools/remotecommand"
)
// makeAccessible changes the path permission and each parent directory to have --x--x--x
func makeAccessible(path string, uid, gid int) error {
for ; path != "/"; path = filepath.Dir(path) {
st, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
continue
}
if st.Mode()&0111 != 0111 {
if err := os.Chmod(path, st.Mode()|0111); err != nil {
return err
}
}
}
return nil
}
// CreateContainer creates a container in the OCI runtime
// TODO terminal support for container
// Presently just ignoring conmon opts related to it
func (r *OCIRuntime) createContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) {
if len(ctr.config.IDMappings.UIDMap) != 0 || len(ctr.config.IDMappings.GIDMap) != 0 {
for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.VolumePath} {
if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil {
return err
}
}
// if we are running a non privileged container, be sure to umount some kernel paths so they are not
// bind mounted inside the container at all.
if !ctr.config.Privileged && !rootless.IsRootless() {
ch := make(chan error)
go func() {
runtime.LockOSThread()
err := func() error {
fd, err := os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid()))
if err != nil {
return err
}
defer errorhandling.CloseQuiet(fd)
// create a new mountns on the current thread
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
return err
}
defer func() {
if err := unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS); err != nil {
logrus.Errorf("unable to clone new namespace: %q", err)
}
}()
// don't spread our mounts around. We are setting only /sys to be slave
// so that the cleanup process is still able to umount the storage and the
// changes are propagated to the host.
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
if err != nil {
return errors.Wrapf(err, "cannot make /sys slave")
}
mounts, err := pmount.GetMounts()
if err != nil {
return err
}
for _, m := range mounts {
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
continue
}
err = unix.Unmount(m.Mountpoint, 0)
if err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "cannot unmount %s", m.Mountpoint)
}
}
return r.createOCIContainer(ctr, restoreOptions)
}()
ch <- err
}()
err := <-ch
return err
}
}
return r.createOCIContainer(ctr, restoreOptions)
}
func (r *OCIRuntime) pathPackage() string {
return packageVersion(r.path)
}
func (r *OCIRuntime) conmonPackage() string {
return packageVersion(r.conmonPath)
}
// execContainer executes a command in a running container
// TODO: Add --detach support
// TODO: Convert to use conmon
// TODO: add --pid-file and use that to generate exec session tracking
func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, chan error, error) {
if len(cmd) == 0 {
return -1, nil, errors.Wrapf(define.ErrInvalidArg, "must provide a command to execute")
}
if sessionID == "" {
return -1, nil, errors.Wrapf(define.ErrEmptyID, "must provide a session ID for exec")
}
// create sync pipe to receive the pid
parentSyncPipe, childSyncPipe, err := newPipe()
if err != nil {
return -1, nil, errors.Wrapf(err, "error creating socket pair")
}
defer errorhandling.CloseQuiet(parentSyncPipe)
// create start pipe to set the cgroup before running
// attachToExec is responsible for closing parentStartPipe
childStartPipe, parentStartPipe, err := newPipe()
if err != nil {
return -1, nil, errors.Wrapf(err, "error creating socket pair")
}
// We want to make sure we close the parent{Start,Attach}Pipes if we fail
// but also don't want to close them after attach to exec is called
attachToExecCalled := false
defer func() {
if !attachToExecCalled {
errorhandling.CloseQuiet(parentStartPipe)
}
}()
// create the attach pipe to allow attach socket to be created before
// $RUNTIME exec starts running. This is to make sure we can capture all output
// from the process through that socket, rather than half reading the log, half attaching to the socket
// attachToExec is responsible for closing parentAttachPipe
parentAttachPipe, childAttachPipe, err := newPipe()
if err != nil {
return -1, nil, errors.Wrapf(err, "error creating socket pair")
}
defer func() {
if !attachToExecCalled {
errorhandling.CloseQuiet(parentAttachPipe)
}
}()
childrenClosed := false
defer func() {
if !childrenClosed {
errorhandling.CloseQuiet(childSyncPipe)
errorhandling.CloseQuiet(childAttachPipe)
errorhandling.CloseQuiet(childStartPipe)
}
}()
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return -1, nil, err
}
processFile, err := prepareProcessExec(c, cmd, env, tty, cwd, user, sessionID)
if err != nil {
return -1, nil, err
}
var ociLog string
if logrus.GetLevel() != logrus.DebugLevel && r.supportsJSON {
ociLog = c.execOCILog(sessionID)
}
args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog)
if preserveFDs > 0 {
args = append(args, formatRuntimeOpts("--preserve-fds", strconv.Itoa(preserveFDs))...)
}
for _, capability := range capAdd {
args = append(args, formatRuntimeOpts("--cap", capability)...)
}
if tty {
args = append(args, "-t")
}
// Append container ID and command
args = append(args, "-e")
// TODO make this optional when we can detach
args = append(args, "--exec-attach")
args = append(args, "--exec-process-spec", processFile.Name())
logrus.WithFields(logrus.Fields{
"args": args,
}).Debugf("running conmon: %s", r.conmonPath)
execCmd := exec.Command(r.conmonPath, args...)
if streams.AttachInput {
execCmd.Stdin = streams.InputStream
}
if streams.AttachOutput {
execCmd.Stdout = streams.OutputStream
}
if streams.AttachError {
execCmd.Stderr = streams.ErrorStream
}
conmonEnv, extraFiles, err := r.configureConmonEnv(runtimeDir)
if err != nil {
return -1, nil, err
}
if preserveFDs > 0 {
for fd := 3; fd < 3+preserveFDs; fd++ {
execCmd.ExtraFiles = append(execCmd.ExtraFiles, os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)))
}
}
// we don't want to step on users fds they asked to preserve
// Since 0-2 are used for stdio, start the fds we pass in at preserveFDs+3
execCmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", preserveFDs+3), fmt.Sprintf("_OCI_STARTPIPE=%d", preserveFDs+4), fmt.Sprintf("_OCI_ATTACHPIPE=%d", preserveFDs+5))
execCmd.Env = append(execCmd.Env, conmonEnv...)
execCmd.ExtraFiles = append(execCmd.ExtraFiles, childSyncPipe, childStartPipe, childAttachPipe)
execCmd.ExtraFiles = append(execCmd.ExtraFiles, extraFiles...)
execCmd.Dir = c.execBundlePath(sessionID)
execCmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
err = startCommandGivenSelinux(execCmd)
// We don't need children pipes on the parent side
errorhandling.CloseQuiet(childSyncPipe)
errorhandling.CloseQuiet(childAttachPipe)
errorhandling.CloseQuiet(childStartPipe)
childrenClosed = true
if err != nil {
return -1, nil, errors.Wrapf(err, "cannot start container %s", c.ID())
}
if err := r.moveConmonToCgroupAndSignal(c, execCmd, parentStartPipe, sessionID); err != nil {
return -1, nil, err
}
if preserveFDs > 0 {
for fd := 3; fd < 3+preserveFDs; fd++ {
// These fds were passed down to the runtime. Close them
// and not interfere
if err := os.NewFile(uintptr(fd), fmt.Sprintf("fd-%d", fd)).Close(); err != nil {
logrus.Debugf("unable to close file fd-%d", fd)
}
}
}
// TODO Only create if !detach
// Attach to the container before starting it
attachChan := make(chan error)
go func() {
// attachToExec is responsible for closing pipes
attachChan <- c.attachToExec(streams, detachKeys, resize, sessionID, parentStartPipe, parentAttachPipe)
close(attachChan)
}()
attachToExecCalled = true
pid, err := readConmonPipeData(parentSyncPipe, ociLog)
return pid, attachChan, err
}
// Wait for a container which has been sent a signal to stop
func waitContainerStop(ctr *Container, timeout time.Duration) error {
done := make(chan struct{})
chControl := make(chan struct{})
go func() {
for {
select {
case <-chControl:
return
default:
// Check if the process is still around
err := unix.Kill(ctr.state.PID, 0)
if err == unix.ESRCH {
close(done)
return
}
time.Sleep(100 * time.Millisecond)
}
}
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
logrus.Debugf("container %s did not die within timeout %d", ctr.ID(), timeout)
return errors.Errorf("container %s did not die within timeout", ctr.ID())
}
}
// Wait for a set of given PIDs to stop
func waitPidsStop(pids []int, timeout time.Duration) error {
done := make(chan struct{})
chControl := make(chan struct{})
go func() {
for {
select {
case <-chControl:
return
default:
allClosed := true
for _, pid := range pids {
if err := unix.Kill(pid, 0); err != unix.ESRCH {
allClosed = false
break
}
}
if allClosed {
close(done)
return
}
time.Sleep(100 * time.Millisecond)
}
}
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
return errors.Errorf("given PIDs did not die within timeout")
}
}
// stopContainer stops a container, first using its given stop signal (or
// SIGTERM if no signal was specified), then using SIGKILL
// Timeout is given in seconds. If timeout is 0, the container will be
// immediately kill with SIGKILL
// Does not set finished time for container, assumes you will run updateStatus
// after to pull the exit code
func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error {
logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
// Ping the container to see if it's alive
// If it's not, it's already stopped, return
err := unix.Kill(ctr.state.PID, 0)
if err == unix.ESRCH {
return nil
}
stopSignal := ctr.config.StopSignal
if stopSignal == 0 {
stopSignal = uint(syscall.SIGTERM)
}
if timeout > 0 {
if err := r.killContainer(ctr, stopSignal); err != nil {
// Is the container gone?
// If so, it probably died between the first check and
// our sending the signal
// The container is stopped, so exit cleanly
err := unix.Kill(ctr.state.PID, 0)
if err == unix.ESRCH {
return nil
}
return err
}
if err := waitContainerStop(ctr, time.Duration(timeout)*time.Second); err != nil {
logrus.Warnf("Timed out stopping container %s, resorting to SIGKILL", ctr.ID())
} else {
// No error, the container is dead
return nil
}
}
var args []string
if rootless.IsRootless() || ctr.config.NoCgroups {
// we don't use --all for rootless containers as the OCI runtime might use
// the cgroups to determine the PIDs, but for rootless containers there is
// not any.
// Same logic for NoCgroups - we can't use cgroups as the user
// explicitly requested none be created.
args = []string{"kill", ctr.ID(), "KILL"}
} else {
args = []string{"kill", "--all", ctr.ID(), "KILL"}
}
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, args...); err != nil {
// Again, check if the container is gone. If it is, exit cleanly.
err := unix.Kill(ctr.state.PID, 0)
if err == unix.ESRCH {
return nil
}
return errors.Wrapf(err, "error sending SIGKILL to container %s", ctr.ID())
}
// Give runtime a few seconds to make it happen
if err := waitContainerStop(ctr, killContainerTimeout); err != nil {
return err
}
return nil
}
// execStopContainer stops all active exec sessions in a container
// It will also stop all other processes in the container. It is only intended
// to be used to assist in cleanup when removing a container.
// SIGTERM is used by default to stop processes. If SIGTERM fails, SIGKILL will be used.
func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
// Do we have active exec sessions?
if len(ctr.state.ExecSessions) == 0 {
return nil
}
// Get a list of active exec sessions
execSessions := []int{}
for _, session := range ctr.state.ExecSessions {
pid := session.PID
// Ping the PID with signal 0 to see if it still exists
if err := unix.Kill(pid, 0); err == unix.ESRCH {
continue
}
execSessions = append(execSessions, pid)
}
// All the sessions may be dead
// If they are, just return
if len(execSessions) == 0 {
return nil
}
runtimeDir, err := util.GetRuntimeDir()
if err != nil {
return err
}
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
// If timeout is 0, just use SIGKILL
if timeout > 0 {
// Stop using SIGTERM by default
// Use SIGSTOP after a timeout
logrus.Debugf("Killing all processes in container %s with SIGTERM", ctr.ID())
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "kill", "--all", ctr.ID(), "TERM"); err != nil {
return errors.Wrapf(err, "error sending SIGTERM to container %s processes", ctr.ID())
}
// Wait for all processes to stop
if err := waitPidsStop(execSessions, time.Duration(timeout)*time.Second); err != nil {
logrus.Warnf("Timed out stopping container %s exec sessions", ctr.ID())
} else {
// No error, all exec sessions are dead
return nil
}
}
// Send SIGKILL
logrus.Debugf("Killing all processes in container %s with SIGKILL", ctr.ID())
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, env, r.path, "kill", "--all", ctr.ID(), "KILL"); err != nil {
return errors.Wrapf(err, "error sending SIGKILL to container %s processes", ctr.ID())
}
// Give the processes a few seconds to go down
if err := waitPidsStop(execSessions, killContainerTimeout); err != nil {
return errors.Wrapf(err, "failed to kill container %s exec sessions", ctr.ID())
}
return nil
}

View File

@ -1,47 +0,0 @@
// +build !linux
package libpod
import (
"os"
"os/exec"
"github.com/containers/libpod/libpod/define"
"k8s.io/client-go/tools/remotecommand"
)
func (r *OCIRuntime) moveConmonToCgroup(ctr *Container, cgroupParent string, cmd *exec.Cmd) error {
return define.ErrOSNotSupported
}
func newPipe() (parent *os.File, child *os.File, err error) {
return nil, nil, define.ErrNotImplemented
}
func (r *OCIRuntime) createContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (err error) {
return define.ErrNotImplemented
}
func (r *OCIRuntime) pathPackage() string {
return ""
}
func (r *OCIRuntime) conmonPackage() string {
return ""
}
func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) {
return define.ErrOSNotSupported
}
func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
return define.ErrOSNotSupported
}
func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error {
return define.ErrOSNotSupported
}
func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string, streams *AttachStreams, preserveFDs int, resize chan remotecommand.TerminalSize, detachKeys string) (int, chan error, error) {
return -1, nil, define.ErrOSNotSupported
}

113
libpod/oci_util.go Normal file
View File

@ -0,0 +1,113 @@
package libpod
import (
"fmt"
"net"
"os"
"regexp"
"strings"
"time"
"github.com/containers/libpod/libpod/define"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const (
// CgroupfsCgroupsManager represents cgroupfs native cgroup manager
CgroupfsCgroupsManager = "cgroupfs"
// SystemdCgroupsManager represents systemd native cgroup manager
SystemdCgroupsManager = "systemd"
// ContainerCreateTimeout is the timeout before we decide we've failed
// to create a container.
// TODO: Make this generic - all OCI runtime operations should use the
// same timeout, this one.
// TODO: Consider dropping from 240 to 60 seconds. I don't think waiting
// 4 minutes versus 1 minute makes a real difference.
ContainerCreateTimeout = 240 * time.Second
// Timeout before declaring that runtime has failed to kill a given
// container
killContainerTimeout = 5 * time.Second
// DefaultShmSize is the default shm size
DefaultShmSize = 64 * 1024 * 1024
// NsRunDir is the default directory in which running network namespaces
// are stored
NsRunDir = "/var/run/netns"
)
// ociError is used to parse the OCI runtime JSON log. It is not part of the
// OCI runtime specifications, it follows what runc does
type ociError struct {
Level string `json:"level,omitempty"`
Time string `json:"time,omitempty"`
Msg string `json:"msg,omitempty"`
}
// Create systemd unit name for cgroup scopes
func createUnitName(prefix string, name string) string {
return fmt.Sprintf("%s-%s.scope", prefix, name)
}
// Bind ports to keep them closed on the host
func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) {
var files []*os.File
notifySCTP := false
for _, i := range ports {
switch i.Protocol {
case "udp":
addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort))
if err != nil {
return nil, errors.Wrapf(err, "cannot resolve the UDP address")
}
server, err := net.ListenUDP("udp", addr)
if err != nil {
return nil, errors.Wrapf(err, "cannot listen on the UDP port")
}
f, err := server.File()
if err != nil {
return nil, errors.Wrapf(err, "cannot get file for UDP socket")
}
files = append(files, f)
case "tcp":
addr, err := net.ResolveTCPAddr("tcp4", fmt.Sprintf("%s:%d", i.HostIP, i.HostPort))
if err != nil {
return nil, errors.Wrapf(err, "cannot resolve the TCP address")
}
server, err := net.ListenTCP("tcp4", addr)
if err != nil {
return nil, errors.Wrapf(err, "cannot listen on the TCP port")
}
f, err := server.File()
if err != nil {
return nil, errors.Wrapf(err, "cannot get file for TCP socket")
}
files = append(files, f)
case "sctp":
if !notifySCTP {
notifySCTP = true
logrus.Warnf("port reservation for SCTP is not supported")
}
default:
return nil, fmt.Errorf("unknown protocol %s", i.Protocol)
}
}
return files, nil
}
func getOCIRuntimeError(runtimeMsg string) error {
r := strings.ToLower(runtimeMsg)
if match, _ := regexp.MatchString(".*permission denied.*|.*operation not permitted.*", r); match {
return errors.Wrapf(define.ErrOCIRuntimePermissionDenied, "%s", strings.Trim(runtimeMsg, "\n"))
}
if match, _ := regexp.MatchString(".*executable file not found in.*|.*no such file or directory.*", r); match {
return errors.Wrapf(define.ErrOCIRuntimeNotFound, "%s", strings.Trim(runtimeMsg, "\n"))
}
return errors.Wrapf(define.ErrOCIRuntime, "%s", strings.Trim(runtimeMsg, "\n"))
}

View File

@ -123,7 +123,7 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
if timeout > -1 {
stopTimeout = uint(timeout)
}
if err := ctr.stop(stopTimeout); err != nil {
if err := ctr.stop(stopTimeout, false); err != nil {
ctr.lock.Unlock()
ctrErrors[ctr.ID()] = err
continue
@ -370,7 +370,7 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) {
continue
}
if err := ctr.ociRuntime.killContainer(ctr, signal); err != nil {
if err := ctr.ociRuntime.KillContainer(ctr, signal, false); err != nil {
ctr.lock.Unlock()
ctrErrors[ctr.ID()] = err
continue

View File

@ -99,8 +99,8 @@ type Runtime struct {
store storage.Store
storageService *storageService
imageContext *types.SystemContext
defaultOCIRuntime *OCIRuntime
ociRuntimes map[string]*OCIRuntime
defaultOCIRuntime OCIRuntime
ociRuntimes map[string]OCIRuntime
netPlugin ocicni.CNIPlugin
conmonPath string
imageRuntime *image.Runtime
@ -1057,7 +1057,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
}
// Get us at least one working OCI runtime.
runtime.ociRuntimes = make(map[string]*OCIRuntime)
runtime.ociRuntimes = make(map[string]OCIRuntime)
// Is the old runtime_path defined?
if runtime.config.RuntimePath != nil {
@ -1076,7 +1076,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
json := supportsJSON[name]
nocgroups := supportsNoCgroups[name]
ociRuntime, err := newOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, json, nocgroups)
ociRuntime, err := newConmonOCIRuntime(name, runtime.config.RuntimePath, runtime.conmonPath, runtime.config, json, nocgroups)
if err != nil {
return err
}
@ -1090,7 +1090,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
json := supportsJSON[name]
nocgroups := supportsNoCgroups[name]
ociRuntime, err := newOCIRuntime(name, paths, runtime.conmonPath, runtime.config, json, nocgroups)
ociRuntime, err := newConmonOCIRuntime(name, paths, runtime.conmonPath, runtime.config, json, nocgroups)
if err != nil {
// Don't fatally error.
// This will allow us to ship configs including optional
@ -1113,7 +1113,7 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (err error) {
json := supportsJSON[name]
nocgroups := supportsNoCgroups[name]
ociRuntime, err := newOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, json, nocgroups)
ociRuntime, err := newConmonOCIRuntime(name, []string{runtime.config.OCIRuntime}, runtime.conmonPath, runtime.config, json, nocgroups)
if err != nil {
return err
}
@ -1478,6 +1478,11 @@ func (r *Runtime) SystemContext() *types.SystemContext {
return r.imageContext
}
// GetOCIRuntimePath retrieves the path of the default OCI runtime.
func (r *Runtime) GetOCIRuntimePath() string {
return r.defaultOCIRuntime.Path()
}
// Since runc does not currently support cgroupV2
// Change to default crun on first running of libpod.conf
// TODO Once runc has support for cgroups, this function should be removed.

View File

@ -102,7 +102,7 @@ func (r *Runtime) initContainerVariables(rSpec *spec.Spec, config *ContainerConf
ctr.config.StopTimeout = define.CtrRemoveTimeout
ctr.config.OCIRuntime = r.defaultOCIRuntime.name
ctr.config.OCIRuntime = r.defaultOCIRuntime.Name()
// Set namespace based on current runtime namespace
// Do so before options run so they can override it
@ -167,8 +167,8 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai
// Check NoCgroups support
if ctr.config.NoCgroups {
if !ctr.ociRuntime.supportsNoCgroups {
return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not compatible with NoCgroups", ctr.ociRuntime.name)
if !ctr.ociRuntime.SupportsNoCgroups() {
return nil, errors.Wrapf(define.ErrInvalidArg, "requested OCI runtime %s is not compatible with NoCgroups", ctr.ociRuntime.Name())
}
}
@ -264,6 +264,14 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (c *Contai
g.RemoveMount("/etc/hosts")
g.RemoveMount("/run/.containerenv")
g.RemoveMount("/run/secrets")
// Regenerate CGroup paths so they don't point to the old
// container ID.
cgroupPath, err := ctr.getOCICgroupPath()
if err != nil {
return nil, err
}
g.SetLinuxCgroupsPath(cgroupPath)
}
// Set up storage for the container
@ -430,7 +438,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
}
if c.state.State == define.ContainerStatePaused {
if err := c.ociRuntime.killContainer(c, 9); err != nil {
if err := c.ociRuntime.KillContainer(c, 9, false); err != nil {
return err
}
if err := c.unpause(); err != nil {
@ -444,15 +452,15 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
// Check that the container's in a good state to be removed
if c.state.State == define.ContainerStateRunning {
if err := c.stop(c.StopTimeout()); err != nil {
if err := c.stop(c.StopTimeout(), true); err != nil {
return errors.Wrapf(err, "cannot remove container %s as it could not be stopped", c.ID())
}
}
// Check that all of our exec sessions have finished
if len(c.state.ExecSessions) != 0 {
if err := c.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
return err
for _, session := range c.state.ExecSessions {
if err := c.ociRuntime.ExecStopContainer(c, session.ID, c.StopTimeout()); err != nil {
return errors.Wrapf(err, "error stopping exec session %s of container %s", session.ID, c.ID())
}
}

View File

@ -899,7 +899,7 @@ func (r *LocalRuntime) execPS(c *libpod.Container, args []string) ([]string, err
}()
cmd := append([]string{"ps"}, args...)
ec, err := c.Exec(false, false, []string{}, cmd, "", "", streams, 0, nil, "")
ec, err := c.Exec(false, false, map[string]string{}, cmd, "", "", streams, 0, nil, "")
if err != nil {
return nil, err
} else if ec != 0 {
@ -959,12 +959,6 @@ func (r *LocalRuntime) ExecContainer(ctx context.Context, cli *cliconfig.ExecVal
return ec, errors.Wrapf(err, "unable to process environment variables")
}
// Build env slice of key=value strings for Exec
envs := []string{}
for k, v := range env {
envs = append(envs, fmt.Sprintf("%s=%s", k, v))
}
streams := new(libpod.AttachStreams)
streams.OutputStream = os.Stdout
streams.ErrorStream = os.Stderr
@ -975,7 +969,7 @@ func (r *LocalRuntime) ExecContainer(ctx context.Context, cli *cliconfig.ExecVal
streams.AttachOutput = true
streams.AttachError = true
ec, err = ExecAttachCtr(ctx, ctr.Container, cli.Tty, cli.Privileged, envs, cmd, cli.User, cli.Workdir, streams, cli.PreserveFDs, cli.DetachKeys)
ec, err = ExecAttachCtr(ctx, ctr.Container, cli.Tty, cli.Privileged, env, cmd, cli.User, cli.Workdir, streams, uint(cli.PreserveFDs), cli.DetachKeys)
return define.TranslateExecErrorToExitCode(ec, err), err
}

View File

@ -13,7 +13,7 @@ import (
)
// ExecAttachCtr execs and attaches to a container
func ExecAttachCtr(ctx context.Context, ctr *libpod.Container, tty, privileged bool, env, cmd []string, user, workDir string, streams *libpod.AttachStreams, preserveFDs int, detachKeys string) (int, error) {
func ExecAttachCtr(ctx context.Context, ctr *libpod.Container, tty, privileged bool, env map[string]string, cmd []string, user, workDir string, streams *libpod.AttachStreams, preserveFDs uint, detachKeys string) (int, error) {
resize := make(chan remotecommand.TerminalSize)
haveTerminal := terminal.IsTerminal(int(os.Stdin.Fd()))

View File

@ -9,6 +9,7 @@ import (
"io"
"io/ioutil"
"os"
"strings"
"sync"
"syscall"
"time"
@ -563,9 +564,14 @@ func (i *LibpodAPI) GetAttachSockets(call iopodman.VarlinkCall, name string) err
}
}
sockPath, err := ctr.AttachSocketPath()
if err != nil {
return call.ReplyErrorOccurred(err.Error())
}
s := iopodman.Sockets{
Container_id: ctr.ID(),
Io_socket: ctr.AttachSocketPath(),
Io_socket: sockPath,
Control_socket: ctr.ControlSocketPath(),
}
return call.ReplyGetAttachSockets(s)
@ -811,9 +817,19 @@ func (i *LibpodAPI) ExecContainer(call iopodman.VarlinkCall, opts iopodman.ExecO
// ACK the client upgrade request
call.ReplyExecContainer()
envs := []string{}
envs := make(map[string]string)
if opts.Env != nil {
envs = *opts.Env
// HACK: The Varlink API uses the old []string format for env,
// storage as "k=v". Split on the = and turn into the new map
// format.
for _, env := range *opts.Env {
splitEnv := strings.SplitN(env, "=", 2)
if len(splitEnv) == 1 {
logrus.Errorf("Got badly-formatted environment variable %q in exec", env)
continue
}
envs[splitEnv[0]] = splitEnv[1]
}
}
var user string