mirror of
				https://github.com/containers/podman.git
				synced 2025-10-25 18:25:59 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			274 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			274 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package libpod
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"os/exec"
 | |
| 	"path/filepath"
 | |
| 	"syscall"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/containerd/cgroups"
 | |
| 	spec "github.com/opencontainers/runtime-spec/specs-go"
 | |
| 	"github.com/pkg/errors"
 | |
| 	"github.com/sirupsen/logrus"
 | |
| 	"golang.org/x/sys/unix"
 | |
| 
 | |
| 	// TODO import these functions into libpod and remove the import
 | |
| 	// Trying to keep libpod from depending on CRI-O code
 | |
| 	"github.com/projectatomic/libpod/utils"
 | |
| )
 | |
| 
 | |
| // OCI code is undergoing heavy rewrite
 | |
| 
 | |
| const (
 | |
| 	// CgroupfsCgroupsManager represents cgroupfs native cgroup manager
 | |
| 	CgroupfsCgroupsManager = "cgroupfs"
 | |
| 	// SystemdCgroupsManager represents systemd native cgroup manager
 | |
| 	SystemdCgroupsManager = "systemd"
 | |
| 
 | |
| 	// ContainerCreateTimeout represents the value of container creating timeout
 | |
| 	ContainerCreateTimeout = 240 * time.Second
 | |
| )
 | |
| 
 | |
| // OCIRuntime represents an OCI-compatible runtime that libpod can call into
 | |
| // to perform container operations
 | |
| type OCIRuntime struct {
 | |
| 	name          string
 | |
| 	path          string
 | |
| 	conmonPath    string
 | |
| 	conmonEnv     []string
 | |
| 	cgroupManager string
 | |
| 	tmpDir        string
 | |
| 	exitsDir      string
 | |
| 	socketsDir    string
 | |
| 	logSizeMax    int64
 | |
| 	noPivot       bool
 | |
| }
 | |
| 
 | |
| // syncInfo is used to return data from monitor process to daemon
 | |
| type syncInfo struct {
 | |
| 	Pid     int    `json:"pid"`
 | |
| 	Message string `json:"message,omitempty"`
 | |
| }
 | |
| 
 | |
| // Make a new OCI runtime with provided options
 | |
| func newOCIRuntime(name string, path string, conmonPath string, conmonEnv []string, cgroupManager string, tmpDir string, logSizeMax int64, noPivotRoot bool) (*OCIRuntime, error) {
 | |
| 	runtime := new(OCIRuntime)
 | |
| 	runtime.name = name
 | |
| 	runtime.path = path
 | |
| 	runtime.conmonPath = conmonPath
 | |
| 	runtime.conmonEnv = conmonEnv
 | |
| 	runtime.cgroupManager = cgroupManager
 | |
| 	runtime.tmpDir = tmpDir
 | |
| 	runtime.logSizeMax = logSizeMax
 | |
| 	runtime.noPivot = noPivotRoot
 | |
| 
 | |
| 	runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
 | |
| 	runtime.socketsDir = filepath.Join(runtime.tmpDir, "socket")
 | |
| 
 | |
| 	if cgroupManager != CgroupfsCgroupsManager && cgroupManager != SystemdCgroupsManager {
 | |
| 		return nil, errors.Wrapf(ErrInvalidArg, "invalid cgroup manager specified: %s", cgroupManager)
 | |
| 	}
 | |
| 
 | |
| 	// Create the exit files and attach sockets directories
 | |
| 	if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
 | |
| 		// The directory is allowed to exist
 | |
| 		if !os.IsExist(err) {
 | |
| 			return nil, errors.Wrapf(err, "error creating OCI runtime exit files directory %s",
 | |
| 				runtime.exitsDir)
 | |
| 		}
 | |
| 	}
 | |
| 	if err := os.MkdirAll(runtime.socketsDir, 0750); err != nil {
 | |
| 		// The directory is allowed to exist
 | |
| 		if !os.IsExist(err) {
 | |
| 			return nil, errors.Wrapf(err, "error creating OCI runtime attach sockets directory %s",
 | |
| 				runtime.socketsDir)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return runtime, nil
 | |
| }
 | |
| 
 | |
| // newPipe creates a unix socket pair for communication
 | |
| func newPipe() (parent *os.File, child *os.File, err error) {
 | |
| 	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 	return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
 | |
| }
 | |
| 
 | |
| // Create systemd unit name for cgroup scopes
 | |
| func createUnitName(prefix string, name string) string {
 | |
| 	return fmt.Sprintf("%s-%s.scope", prefix, name)
 | |
| }
 | |
| 
 | |
| // CreateContainer creates a container in the OCI runtime
 | |
| // TODO terminal support for container
 | |
| // Presently just ignoring conmon opts related to it
 | |
| func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) error {
 | |
| 	var stderrBuf bytes.Buffer
 | |
| 
 | |
| 	parentPipe, childPipe, err := newPipe()
 | |
| 	if err != nil {
 | |
| 		return errors.Wrapf(err, "error creating socket pair")
 | |
| 	}
 | |
| 
 | |
| 	childStartPipe, parentStartPipe, err := newPipe()
 | |
| 	if err != nil {
 | |
| 		return errors.Wrapf(err, "error creating socket pair for start pipe")
 | |
| 	}
 | |
| 
 | |
| 	defer parentPipe.Close()
 | |
| 	defer parentStartPipe.Close()
 | |
| 
 | |
| 	args := []string{}
 | |
| 	if r.cgroupManager == SystemdCgroupsManager {
 | |
| 		args = append(args, "-s")
 | |
| 	}
 | |
| 	args = append(args, "-c", ctr.ID())
 | |
| 	args = append(args, "-u", ctr.ID())
 | |
| 	args = append(args, "-r", r.path)
 | |
| 	args = append(args, "-b", ctr.bundlePath())
 | |
| 	args = append(args, "-p", filepath.Join(ctr.state.RunDir, "pidfile"))
 | |
| 	// TODO container log location should be configurable
 | |
| 	// The default also likely shouldn't be this
 | |
| 	args = append(args, "-l", filepath.Join(ctr.config.StaticDir, "ctr.log"))
 | |
| 	args = append(args, "--exit-dir", r.exitsDir)
 | |
| 	args = append(args, "--socket-dir-path", r.socketsDir)
 | |
| 	if ctr.config.Spec.Process.Terminal {
 | |
| 		args = append(args, "-t")
 | |
| 	} else if ctr.config.Stdin {
 | |
| 		args = append(args, "-i")
 | |
| 	}
 | |
| 	if r.logSizeMax >= 0 {
 | |
| 		args = append(args, "--log-size-max", fmt.Sprintf("%v", r.logSizeMax))
 | |
| 	}
 | |
| 	if r.noPivot {
 | |
| 		args = append(args, "--no-pivot")
 | |
| 	}
 | |
| 	logrus.WithFields(logrus.Fields{
 | |
| 		"args": args,
 | |
| 	}).Debugf("running conmon: %s", r.conmonPath)
 | |
| 
 | |
| 	cmd := exec.Command(r.conmonPath, args...)
 | |
| 	cmd.Dir = ctr.state.RunDir
 | |
| 	cmd.SysProcAttr = &syscall.SysProcAttr{
 | |
| 		Setpgid: true,
 | |
| 	}
 | |
| 	// TODO this is probably a really bad idea for some uses
 | |
| 	// Make this configurable
 | |
| 	cmd.Stdin = os.Stdin
 | |
| 	cmd.Stdout = os.Stdout
 | |
| 	cmd.Stderr = os.Stderr
 | |
| 	if ctr.config.Spec.Process.Terminal {
 | |
| 		cmd.Stderr = &stderrBuf
 | |
| 	}
 | |
| 
 | |
| 	cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe, childStartPipe)
 | |
| 	// 0, 1 and 2 are stdin, stdout and stderr
 | |
| 	cmd.Env = append(r.conmonEnv, fmt.Sprintf("_OCI_SYNCPIPE=%d", 3))
 | |
| 	cmd.Env = append(cmd.Env, fmt.Sprintf("_OCI_STARTPIPE=%d", 4))
 | |
| 
 | |
| 	err = cmd.Start()
 | |
| 	if err != nil {
 | |
| 		childPipe.Close()
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// We don't need childPipe on the parent side
 | |
| 	childPipe.Close()
 | |
| 	childStartPipe.Close()
 | |
| 
 | |
| 	// Move conmon to specified cgroup
 | |
| 	if r.cgroupManager == SystemdCgroupsManager {
 | |
| 		logrus.Infof("Running conmon under slice %s and unitName %s", cgroupParent, createUnitName("libpod-conmon", ctr.ID()))
 | |
| 		if err = utils.RunUnderSystemdScope(cmd.Process.Pid, cgroupParent, createUnitName("libpod-conmon", ctr.ID())); err != nil {
 | |
| 			logrus.Warnf("Failed to add conmon to systemd sandbox cgroup: %v", err)
 | |
| 		}
 | |
| 	} else {
 | |
| 		control, err := cgroups.New(cgroups.V1, cgroups.StaticPath(filepath.Join(cgroupParent, "/libpod-conmon-"+ctr.ID())), &spec.LinuxResources{})
 | |
| 		if err != nil {
 | |
| 			logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
 | |
| 		} else {
 | |
| 			// XXX: this defer does nothing as the cgroup can't be deleted cause
 | |
| 			// it contains the conmon pid in tasks
 | |
| 			// we need to remove this defer and delete the cgroup once conmon exits
 | |
| 			// maybe need a conmon monitor?
 | |
| 			defer control.Delete()
 | |
| 			if err := control.Add(cgroups.Process{Pid: cmd.Process.Pid}); err != nil {
 | |
| 				logrus.Warnf("Failed to add conmon to cgroupfs sandbox cgroup: %v", err)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* We set the cgroup, now the child can start creating children */
 | |
| 	someData := []byte{0}
 | |
| 	_, err = parentStartPipe.Write(someData)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	/* Wait for initial setup and fork, and reap child */
 | |
| 	err = cmd.Wait()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// TODO should do a defer r.deleteContainer(ctr) here if err != nil
 | |
| 	// Need deleteContainer to be working first, though...
 | |
| 
 | |
| 	// Wait to get container pid from conmon
 | |
| 	type syncStruct struct {
 | |
| 		si  *syncInfo
 | |
| 		err error
 | |
| 	}
 | |
| 	ch := make(chan syncStruct)
 | |
| 	go func() {
 | |
| 		var si *syncInfo
 | |
| 		if err = json.NewDecoder(parentPipe).Decode(&si); err != nil {
 | |
| 			ch <- syncStruct{err: err}
 | |
| 			return
 | |
| 		}
 | |
| 		ch <- syncStruct{si: si}
 | |
| 	}()
 | |
| 
 | |
| 	select {
 | |
| 	case ss := <-ch:
 | |
| 		if ss.err != nil {
 | |
| 			return errors.Wrapf(ss.err, "error reading container (probably exited) json message")
 | |
| 		}
 | |
| 		logrus.Debugf("Received container pid: %d", ss.si.Pid)
 | |
| 		if ss.si.Pid == -1 {
 | |
| 			if ss.si.Message != "" {
 | |
| 				return errors.Wrapf(ErrInternal, "container create failed: %s", ss.si.Message)
 | |
| 			}
 | |
| 			return errors.Wrapf(ErrInternal, "container create failed")
 | |
| 		}
 | |
| 	case <-time.After(ContainerCreateTimeout):
 | |
| 		return errors.Wrapf(ErrInternal, "container creation timeout")
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // updateContainerStatus retrieves the current status of the container from the
 | |
| // runtime
 | |
| func (r *OCIRuntime) updateContainerStatus(ctr *Container) error {
 | |
| 	return ErrNotImplemented
 | |
| }
 | |
| 
 | |
| // startContainer starts the given container
 | |
| func (r *OCIRuntime) startContainer(ctr *Container) error {
 | |
| 	// TODO: streams should probably *not* be our STDIN/OUT/ERR - redirect to buffers?
 | |
| 	if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "start", ctr.ID()); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// TODO record start time in container struct
 | |
| 
 | |
| 	return nil
 | |
| }
 | 
