mirror of
https://github.com/containers/podman.git
synced 2025-09-10 15:46:07 +08:00
Implement SD-NOTIFY proxy in conmon
This leverages conmon's ability to proxy the SD-NOTIFY socket. This prevents locking caused by OCI runtime blocking, waiting for SD-NOTIFY messages, and instead passes the messages directly up to the host. NOTE: Also re-enable the auto-update tests which has been disabled due to flakiness. With this change, Podman properly integrates into systemd. Fixes: #7316 Signed-off-by: Joseph Gooch <mrwizard@dok.org> Signed-off-by: Daniel J Walsh <dwalsh@redhat.com> Signed-off-by: Valentin Rothberg <rothberg@redhat.com>
This commit is contained in:

committed by
Valentin Rothberg

parent
30b036c5d3
commit
c22f3e8b4e
@ -126,6 +126,10 @@ type Container struct {
|
||||
// This is true if a container is restored from a checkpoint.
|
||||
restoreFromCheckpoint bool
|
||||
|
||||
// Used to query the NOTIFY_SOCKET once along with setting up
|
||||
// mounts etc.
|
||||
notifySocket string
|
||||
|
||||
slirp4netnsSubnet *net.IPNet
|
||||
}
|
||||
|
||||
|
@ -352,6 +352,10 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := c.mountNotifySocket(g); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get host UID and GID based on the container process UID and GID.
|
||||
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
|
||||
if err != nil {
|
||||
@ -777,6 +781,41 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
|
||||
return g.Config, nil
|
||||
}
|
||||
|
||||
// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
|
||||
// and if the sdnotify mode is set to container. It also sets c.notifySocket
|
||||
// to avoid redundantly looking up the env variable.
|
||||
func (c *Container) mountNotifySocket(g generate.Generator) error {
|
||||
notify, ok := os.LookupEnv("NOTIFY_SOCKET")
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
c.notifySocket = notify
|
||||
|
||||
if c.config.SdNotifyMode != define.SdNotifyModeContainer {
|
||||
return nil
|
||||
}
|
||||
|
||||
notifyDir := filepath.Join(c.bundlePath(), "notify")
|
||||
logrus.Debugf("checking notify %q dir", notifyDir)
|
||||
if err := os.MkdirAll(notifyDir, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return errors.Wrapf(err, "unable to create notify %q dir", notifyDir)
|
||||
}
|
||||
}
|
||||
if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil {
|
||||
return errors.Wrapf(err, "relabel failed %q", notifyDir)
|
||||
}
|
||||
logrus.Debugf("add bindmount notify %q dir", notifyDir)
|
||||
if _, ok := c.state.BindMounts["/run/notify"]; !ok {
|
||||
c.state.BindMounts["/run/notify"] = notifyDir
|
||||
}
|
||||
|
||||
// Set the container's notify socket to the proxy socket created by conmon
|
||||
g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd expects to have /run, /run/lock and /tmp on tmpfs
|
||||
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
|
||||
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
|
||||
@ -1730,6 +1769,7 @@ rootless=%d
|
||||
c.state.BindMounts[dest] = src
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@ var initInodes = map[string]bool{
|
||||
"/etc/resolv.conf": true,
|
||||
"/proc": true,
|
||||
"/run": true,
|
||||
"/run/notify": true,
|
||||
"/run/.containerenv": true,
|
||||
"/run/secrets": true,
|
||||
"/sys": true,
|
||||
|
@ -462,7 +462,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
|
||||
Setpgid: true,
|
||||
}
|
||||
|
||||
err = startCommandGivenSelinux(execCmd)
|
||||
err = startCommandGivenSelinux(execCmd, c)
|
||||
|
||||
// We don't need children pipes on the parent side
|
||||
errorhandling.CloseQuiet(childSyncPipe)
|
||||
|
@ -364,11 +364,6 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error {
|
||||
return err
|
||||
}
|
||||
env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)}
|
||||
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
|
||||
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
|
||||
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
|
||||
}
|
||||
}
|
||||
if path, ok := os.LookupEnv("PATH"); ok {
|
||||
env = append(env, fmt.Sprintf("PATH=%s", path))
|
||||
}
|
||||
@ -1014,12 +1009,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||
}
|
||||
}
|
||||
|
||||
if ctr.config.SdNotifyMode == define.SdNotifyModeIgnore {
|
||||
if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
|
||||
logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
pidfile := ctr.config.PidFile
|
||||
if pidfile == "" {
|
||||
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
|
||||
@ -1027,6 +1016,10 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||
|
||||
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
|
||||
|
||||
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.notifySocket != "" {
|
||||
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.notifySocket))
|
||||
}
|
||||
|
||||
if ctr.config.Spec.Process.Terminal {
|
||||
args = append(args, "-t")
|
||||
} else if ctr.config.Stdin {
|
||||
@ -1171,7 +1164,8 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||
}
|
||||
}
|
||||
|
||||
err = startCommandGivenSelinux(cmd)
|
||||
err = startCommandGivenSelinux(cmd, ctr)
|
||||
|
||||
// regardless of whether we errored or not, we no longer need the children pipes
|
||||
childSyncPipe.Close()
|
||||
childStartPipe.Close()
|
||||
@ -1203,7 +1197,13 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||
// conmon not having a pid file is a valid state, so don't set it if we don't have it
|
||||
logrus.Infof("Got Conmon PID as %d", conmonPID)
|
||||
ctr.state.ConmonPID = conmonPID
|
||||
if ctr.config.SdNotifyMode != define.SdNotifyModeIgnore {
|
||||
|
||||
// Send the MAINPID via sdnotify if needed.
|
||||
switch ctr.config.SdNotifyMode {
|
||||
case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
|
||||
// Nothing to do or conmon takes care of it already.
|
||||
|
||||
default:
|
||||
if sent, err := daemon.SdNotify(false, fmt.Sprintf("MAINPID=%d", conmonPID)); err != nil {
|
||||
logrus.Errorf("Error notifying systemd of Conmon PID: %v", err)
|
||||
} else if sent {
|
||||
@ -1239,11 +1239,6 @@ func (r *ConmonOCIRuntime) configureConmonEnv(ctr *Container, runtimeDir string)
|
||||
}
|
||||
|
||||
extraFiles := make([]*os.File, 0)
|
||||
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer {
|
||||
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
|
||||
env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify))
|
||||
}
|
||||
}
|
||||
if !r.sdNotify {
|
||||
if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok {
|
||||
env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1")
|
||||
@ -1335,7 +1330,23 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
|
||||
|
||||
// startCommandGivenSelinux starts a container ensuring to set the labels of
|
||||
// the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled
|
||||
func startCommandGivenSelinux(cmd *exec.Cmd) error {
|
||||
func startCommandGivenSelinux(cmd *exec.Cmd, ctr *Container) error {
|
||||
// Make sure to unset the NOTIFY_SOCKET and reset if afterwards if needed.
|
||||
switch ctr.config.SdNotifyMode {
|
||||
case define.SdNotifyModeContainer, define.SdNotifyModeIgnore:
|
||||
if ctr.notifySocket != "" {
|
||||
if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil {
|
||||
logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err := os.Setenv("NOTIFY_SOCKET", ctr.notifySocket); err != nil {
|
||||
logrus.Errorf("Error resetting NOTIFY_SOCKET=%s", ctr.notifySocket)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
if !selinux.GetEnabled() {
|
||||
return cmd.Start()
|
||||
}
|
||||
|
@ -221,7 +221,6 @@ function _confirm_update() {
|
||||
}
|
||||
|
||||
@test "podman auto-update - label io.containers.autoupdate=local with rollback" {
|
||||
skip "This test flakes way too often, see #11175"
|
||||
# sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just
|
||||
# assume that we work only with crun, nothing else.
|
||||
# [copied from 260-sdnotify.bats]
|
||||
|
@ -130,6 +130,8 @@ function _assert_mainpid_is_conmon() {
|
||||
_stop_socat
|
||||
}
|
||||
|
||||
# These tests can fail in dev. environment because of SELinux.
|
||||
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
|
||||
@test "sdnotify : container" {
|
||||
# Sigh... we need to pull a humongous image because it has systemd-notify.
|
||||
# (IMPORTANT: fedora:32 and above silently removed systemd-notify; this
|
||||
@ -150,7 +152,7 @@ function _assert_mainpid_is_conmon() {
|
||||
wait_for_ready $cid
|
||||
|
||||
run_podman logs $cid
|
||||
is "${lines[0]}" "/.*/container\.sock/notify" "NOTIFY_SOCKET is passed to container"
|
||||
is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container"
|
||||
|
||||
# With container, READY=1 isn't necessarily the last message received;
|
||||
# just look for it anywhere in received messages
|
||||
|
Reference in New Issue
Block a user