Modify pod API to move Init() into Start()

Separate Init() and Start() does not make sense on the pod side,
where we may have to start containers in order to initialize
others due to dependency orders.

Also adjusts internal containers API for more code sharing.

Signed-off-by: Matthew Heon <matthew.heon@gmail.com>

Closes: #478
Approved by: rhatdan
This commit is contained in:
Matthew Heon
2018-03-12 15:32:10 -04:00
committed by Atomic Bot
parent 58c35daea2
commit 40d302be8f
3 changed files with 233 additions and 56 deletions

View File

@ -59,35 +59,11 @@ func (c *Container) Init() (err error) {
}
}()
if err := c.makeBindMounts(); err != nil {
return err
}
// Generate the OCI spec
spec, err := c.generateSpec()
if err != nil {
return err
}
// Save the OCI spec to disk
if err := c.saveSpec(spec); err != nil {
return err
}
// With the spec complete, do an OCI create
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil {
return err
}
logrus.Debugf("Created container %s in OCI runtime", c.ID())
c.state.State = ContainerStateCreated
return c.save()
return c.init()
}
// Start starts a container
func (c *Container) Start() error {
func (c *Container) Start() (err error) {
if !c.locked {
c.lock.Lock()
defer c.lock.Unlock()
@ -107,20 +83,33 @@ func (c *Container) Start() error {
return errors.Wrapf(ErrNotImplemented, "restarting a stopped container is not yet supported")
}
// Mount storage for the container
// Mount storage for the container if necessary
if err := c.mountStorage(); err != nil {
return err
}
defer func() {
if err != nil {
if err2 := c.cleanupStorage(); err2 != nil {
logrus.Errorf("Error cleaning up storage for container %s: %v", c.ID(), err2)
}
}
}()
if err := c.runtime.ociRuntime.startContainer(c); err != nil {
return err
// Create a network namespace if necessary
if c.config.CreateNetNS && c.state.NetNS == nil {
if err := c.runtime.createNetNS(c); err != nil {
return err
}
}
defer func() {
if err != nil {
if err2 := c.runtime.teardownNetNS(c); err2 != nil {
logrus.Errorf("Error tearing down network namespace for container %s: %v", c.ID(), err2)
}
}
}()
logrus.Debugf("Started container %s", c.ID())
c.state.State = ContainerStateRunning
return c.save()
return c.start()
}
// Stop uses the container's stop signal (or SIGTERM if no signal was specified)
@ -138,6 +127,12 @@ func (c *Container) Stop() error {
}
}
if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateUnknown ||
c.state.State == ContainerStatePaused {
return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers")
}
return c.stop(c.config.StopTimeout)
}
@ -154,6 +149,12 @@ func (c *Container) StopWithTimeout(timeout uint) error {
}
}
if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateUnknown ||
c.state.State == ContainerStatePaused {
return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers")
}
return c.stop(timeout)
}

View File

@ -313,16 +313,111 @@ func (c *Container) save() error {
return nil
}
// Initialize a container, creating it in the runtime
func (c *Container) init() error {
if err := c.makeBindMounts(); err != nil {
return err
}
// Generate the OCI spec
spec, err := c.generateSpec()
if err != nil {
return err
}
// Save the OCI spec to disk
if err := c.saveSpec(spec); err != nil {
return err
}
// With the spec complete, do an OCI create
if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil {
return err
}
logrus.Debugf("Created container %s in OCI runtime", c.ID())
c.state.State = ContainerStateCreated
return c.save()
}
// Initialize (if necessary) and start a container
// Performs all necessary steps to start a container that is not running
// Does not lock or check validity
func (c *Container) initAndStart() (err error) {
// If we are ContainerStateUnknown, throw an error
if c.state.State == ContainerStateUnknown {
return errors.Wrapf(ErrCtrStateInvalid, "container %s is in an unknown state", c.ID())
}
// If we are running, do nothing
if c.state.State == ContainerStateRunning {
return nil
}
// If we are paused, throw an error
if c.state.State == ContainerStatePaused {
return errors.Wrapf(ErrCtrStateInvalid, "cannot start paused container %s", c.ID())
}
// TODO remove this once we can restart containers
if c.state.State == ContainerStateStopped {
return errors.Wrapf(ErrNotImplemented, "restarting containers is not yet implemented")
}
// Mount if necessary
if err := c.mountStorage(); err != nil {
return err
}
defer func() {
if err != nil {
if err2 := c.cleanupStorage(); err2 != nil {
logrus.Errorf("Error cleaning up storage for container %s: %v", c.ID(), err2)
}
}
}()
// Create a network namespace if necessary
if c.config.CreateNetNS && c.state.NetNS == nil {
if err := c.runtime.createNetNS(c); err != nil {
return err
}
}
defer func() {
if err != nil {
if err2 := c.runtime.teardownNetNS(c); err2 != nil {
logrus.Errorf("Error tearing down network namespace for container %s: %v", c.ID(), err2)
}
}
}()
// If we are ContainerStateConfigured we need to init()
if c.state.State == ContainerStateConfigured {
if err := c.init(); err != nil {
return err
}
}
// Now start the container
return c.start()
}
// Internal, non-locking function to start a container
func (c *Container) start() error {
if err := c.runtime.ociRuntime.startContainer(c); err != nil {
return err
}
logrus.Debugf("Started container %s", c.ID())
c.state.State = ContainerStateRunning
return c.save()
}
// Internal, non-locking function to stop container
func (c *Container) stop(timeout uint) error {
logrus.Debugf("Stopping ctr %s with timeout %d", c.ID(), timeout)
if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateUnknown ||
c.state.State == ContainerStatePaused {
return errors.Wrapf(ErrCtrStateInvalid, "can only stop created, running, or stopped containers")
}
if err := c.runtime.ociRuntime.stopContainer(c, timeout); err != nil {
return err
}

View File

@ -68,12 +68,13 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
return pod, nil
}
// Init initializes all containers within a pod that have not been initialized
func (p *Pod) Init() error {
return ErrNotImplemented
}
// TODO: need function to produce a directed graph of containers
// This would allow us to properly determine stop/start order
// Start starts all containers within a pod
// It combines the effects of Init() and Start() on a container
// If a container has already been initialized it will be started,
// otherwise it will be initialized then started.
// Containers that are already running or have been paused are ignored
// All containers are started independently, in order dictated by their
// dependencies.
@ -97,6 +98,11 @@ func (p *Pod) Start() (map[string]error, error) {
return nil, err
}
// Maintain a map of containers still to start
ctrsToStart := make(map[string]*Container)
// Maintain a map of all containers so we can easily look up dependencies
allCtrsMap := make(map[string]*Container)
// We need to lock all the containers
for _, ctr := range allCtrs {
ctr.lock.Lock()
@ -105,10 +111,80 @@ func (p *Pod) Start() (map[string]error, error) {
if err := ctr.syncContainer(); err != nil {
return nil, err
}
if ctr.state.State == ContainerStateConfigured ||
ctr.state.State == ContainerStateCreated ||
ctr.state.State == ContainerStateStopped {
ctrsToStart[ctr.ID()] = ctr
}
allCtrsMap[ctr.ID()] = ctr
}
ctrErrors := make(map[string]error)
// Loop at most 10 times, to prevent potential infinite loops in
// dependencies
loopCounter := 10
// Loop while we still have containers to start
for len(ctrsToStart) > 0 {
// Loop through all containers, attempting to start them
for id, ctr := range ctrsToStart {
// TODO remove this when we support restarting containers
if ctr.state.State == ContainerStateStopped {
ctrErrors[id] = errors.Wrapf(ErrNotImplemented, "starting stopped containers is not yet supported")
delete(ctrsToStart, id)
continue
}
// TODO should we only do a dependencies check if we are not ContainerStateCreated?
depsOK := true
var depErr error
// Check container dependencies
for _, depID := range ctr.Dependencies() {
depCtr := allCtrsMap[depID]
if depCtr.state.State != ContainerStateRunning &&
depCtr.state.State != ContainerStatePaused {
// We are definitely not OK to init, a dependency is not up
depsOK = false
// Check to see if the dependency errored
// If it did, error here too
if _, ok := ctrErrors[depID]; ok {
depErr = errors.Wrapf(ErrCtrStateInvalid, "dependency %s of container %s failed to start", depID, id)
}
break
}
}
if !depsOK {
// Only if one of the containers dependencies failed should we stop trying
// Otherwise, assume it's just yet to start, retry starting this container later
if depErr != nil {
ctrErrors[id] = depErr
delete(ctrsToStart, id)
}
continue
}
// Initialize and start the container
if err := ctr.initAndStart(); err != nil {
ctrErrors[id] = err
}
delete(ctrsToStart, id)
}
loopCounter = loopCounter - 1
if loopCounter == 0 {
// Loop through all remaining containers and add an error
for id := range ctrsToStart {
ctrErrors[id] = errors.Wrapf(ErrInternal, "exceeded maximum attempts trying to start container %s", id)
}
break
}
}
// Start all containers
for _, ctr := range allCtrs {
// Ignore containers that are not created or stopped
@ -148,6 +224,8 @@ func (p *Pod) Start() (map[string]error, error) {
// Each container will use its own stop timeout
// Only running containers will be stopped. Paused, stopped, or created
// containers will be ignored.
// If cleanup is true, mounts and network namespaces will be cleaned up after
// the container is stopped.
// All containers are stopped independently. An error stopping one container
// will not prevent other containers being stopped.
// An error and a map[string]error are returned
@ -157,7 +235,7 @@ func (p *Pod) Start() (map[string]error, error) {
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrCtrExists
// If both error and the map are nil, all containers were stopped without error
func (p *Pod) Stop() (map[string]error, error) {
func (p *Pod) Stop(cleanup bool) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
@ -182,6 +260,9 @@ func (p *Pod) Stop() (map[string]error, error) {
ctrErrors := make(map[string]error)
// TODO: There may be cases where it makes sense to order stops based on
// dependencies. Should we bother with this?
// Stop to all containers
for _, ctr := range allCtrs {
// Ignore containers that are not running
@ -189,22 +270,22 @@ func (p *Pod) Stop() (map[string]error, error) {
continue
}
if err := ctr.runtime.ociRuntime.stopContainer(ctr, ctr.config.StopTimeout); err != nil {
if err := ctr.stop(ctr.config.StopTimeout); err != nil {
ctrErrors[ctr.ID()] = err
continue
}
logrus.Debugf("Stopped container %s", ctr.ID())
if cleanup {
// Clean up storage to ensure we don't leave dangling mounts
if err := ctr.cleanupStorage(); err != nil {
ctrErrors[ctr.ID()] = err
continue
}
// Sync container state to pick up return code
if err := ctr.runtime.ociRuntime.updateContainerStatus(ctr); err != nil {
ctrErrors[ctr.ID()] = err
continue
}
// Clean up storage to ensure we don't leave dangling mounts
if err := ctr.cleanupStorage(); err != nil {
ctrErrors[ctr.ID()] = err
// Clean up network namespace
if err := ctr.cleanupNetwork(); err != nil {
ctrErrors[ctr.ID()] = err
}
}
}