Merge pull request #1689 from mheon/add_runc_timeout

Do not call out to runc for sync
This commit is contained in:
OpenShift Merge Robot
2018-11-07 09:36:03 -08:00
committed by GitHub
4 changed files with 117 additions and 33 deletions

View File

@ -685,7 +685,7 @@ func (c *Container) Sync() error {
(c.state.State != ContainerStateConfigured) {
oldState := c.state.State
// TODO: optionally replace this with a stat for the exit file
if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil {
if err := c.runtime.ociRuntime.updateContainerStatus(c, true); err != nil {
return err
}
// Only save back to DB if state changed

View File

@ -13,8 +13,10 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/buildah/imagebuildah"
"github.com/containers/libpod/pkg/ctime"
"github.com/containers/libpod/pkg/hooks"
"github.com/containers/libpod/pkg/hooks/exec"
"github.com/containers/libpod/pkg/lookup"
@ -31,6 +33,7 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/text/language"
kwait "k8s.io/apimachinery/pkg/util/wait"
)
const (
@ -147,6 +150,77 @@ func (c *Container) execPidPath(sessionID string) string {
return filepath.Join(c.state.RunDir, "exec_pid_"+sessionID)
}
// exitFilePath gets the path to the container's exit file
func (c *Container) exitFilePath() string {
return filepath.Join(c.runtime.ociRuntime.exitsDir, c.ID())
}
// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
exitFile := c.exitFilePath()
err := kwait.ExponentialBackoff(
kwait.Backoff{
Duration: 500 * time.Millisecond,
Factor: 1.2,
Steps: 6,
},
func() (bool, error) {
_, err := os.Stat(exitFile)
if err != nil {
// wait longer
return false, nil
}
return true, nil
})
if err != nil {
// Exit file did not appear
// Reset our state
c.state.ExitCode = -1
c.state.FinishedTime = time.Now()
c.state.State = ContainerStateStopped
if err2 := c.save(); err2 != nil {
logrus.Errorf("Error saving container %s state: %v", c.ID(), err2)
}
return err
}
if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil {
return err
}
return c.save()
}
// Handle the container exit file.
// The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := ioutil.ReadFile(exitFile)
if err != nil {
return errors.Wrapf(err, "failed to read exit file for container %s", c.ID())
}
statusCode, err := strconv.Atoi(string(statusCodeStr))
if err != nil {
return errors.Wrapf(err, "error converting exit status code (%q) for container %s to int",
c.ID(), statusCodeStr)
}
c.state.ExitCode = int32(statusCode)
oomFilePath := filepath.Join(c.bundlePath(), "oom")
if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true
}
c.state.Exited = true
return nil
}
// Sync this container with on-disk state and runtime status
// Should only be called with container lock held
// This function should suffice to ensure a container's state is accurate and
@ -162,7 +236,7 @@ func (c *Container) syncContainer() error {
(c.state.State != ContainerStateExited) {
oldState := c.state.State
// TODO: optionally replace this with a stat for the exit file
if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil {
if err := c.runtime.ociRuntime.updateContainerStatus(c, false); err != nil {
return err
}
// Only save back to DB if state changed
@ -673,13 +747,8 @@ func (c *Container) stop(timeout uint) error {
return err
}
// Sync the container's state to pick up return code
if err := c.runtime.ociRuntime.updateContainerStatus(c); err != nil {
return err
}
// Container should clean itself up
return nil
// Wait until we have an exit file, and sync once we do
return c.waitForExitFileAndSync()
}
// Internal, non-locking function to pause a container

View File

@ -11,12 +11,10 @@ import (
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/libpod/pkg/ctime"
"github.com/containers/libpod/pkg/rootless"
"github.com/containers/libpod/pkg/util"
"github.com/coreos/go-systemd/activation"
@ -443,6 +441,7 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res
}
return errors.Wrapf(ErrInternal, "container create failed")
}
ctr.state.PID = ss.si.Pid
case <-time.After(ContainerCreateTimeout):
return errors.Wrapf(ErrInternal, "container creation timeout")
}
@ -451,17 +450,47 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, res
// updateContainerStatus retrieves the current status of the container from the
// runtime. It updates the container's state but does not save it.
func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
state := new(spec.State)
// If useRunc is false, we will not directly hit runc to see the container's
// status, but will instead only check for the existence of the conmon exit file
// and update state to stopped if it exists.
func (r *OCIRuntime) updateContainerStatus(ctr *Container, useRunc bool) error {
exitFile := ctr.exitFilePath()
runtimeDir, err := util.GetRootlessRuntimeDir()
if err != nil {
return err
}
// If not using runc, we don't need to do most of this.
if !useRunc {
// If the container's not running, nothing to do.
if ctr.state.State != ContainerStateRunning {
return nil
}
// Check for the exit file conmon makes
info, err := os.Stat(exitFile)
if err != nil {
if os.IsNotExist(err) {
// Container is still running, no error
return nil
}
return errors.Wrapf(err, "error running stat on container %s exit file", ctr.ID())
}
// Alright, it exists. Transition to Stopped state.
ctr.state.State = ContainerStateStopped
// Read the exit file to get our stopped time and exit code.
return ctr.handleExitFile(exitFile, info)
}
// Store old state so we know if we were already stopped
oldState := ctr.state.State
state := new(spec.State)
cmd := exec.Command(r.path, "state", ctr.ID())
cmd.Env = append(cmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
outPipe, err := cmd.StdoutPipe()
@ -480,6 +509,8 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
}
if strings.Contains(string(out), "does not exist") {
ctr.removeConmonFiles()
ctr.state.ExitCode = -1
ctr.state.FinishedTime = time.Now()
ctr.state.State = ContainerStateExited
return nil
}
@ -514,7 +545,6 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
// Only grab exit status if we were not already stopped
// If we were, it should already be in the database
if ctr.state.State == ContainerStateStopped && oldState != ContainerStateStopped {
exitFile := filepath.Join(r.exitsDir, ctr.ID())
var fi os.FileInfo
err = kwait.ExponentialBackoff(
kwait.Backoff{
@ -538,24 +568,7 @@ func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
return nil
}
ctr.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := ioutil.ReadFile(exitFile)
if err != nil {
return errors.Wrapf(err, "failed to read exit file for container %s", ctr.ID())
}
statusCode, err := strconv.Atoi(string(statusCodeStr))
if err != nil {
return errors.Wrapf(err, "error converting exit status code for container %s to int",
ctr.ID())
}
ctr.state.ExitCode = int32(statusCode)
oomFilePath := filepath.Join(ctr.bundlePath(), "oom")
if _, err = os.Stat(oomFilePath); err == nil {
ctr.state.OOMKilled = true
}
ctr.state.Exited = true
return ctr.handleExitFile(exitFile, fi)
}
return nil
@ -601,6 +614,8 @@ func (r *OCIRuntime) killContainer(ctr *Container, signal uint) error {
// Does not set finished time for container, assumes you will run updateStatus
// after to pull the exit code
func (r *OCIRuntime) stopContainer(ctr *Container, timeout uint) error {
logrus.Debugf("Stopping container %s (PID %d)", ctr.ID(), ctr.state.PID)
// Ping the container to see if it's alive
// If it's not, it's already stopped, return
err := unix.Kill(ctr.state.PID, 0)

View File

@ -256,7 +256,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool)
}
// Need to update container state to make sure we know it's stopped
if err := c.syncContainer(); err != nil {
if err := c.waitForExitFileAndSync(); err != nil {
return err
}
} else if !(c.state.State == ContainerStateConfigured ||