kube play: sd-notify integration

Integrate sd-notify policies into `kube play`.  The policies can be
configured for all contianers via the `io.containers.sdnotify`
annotation or for indidivual containers via the
`io.containers.sdnotify/$name` annotation.

The `kube play` process will wait for all containers to be ready by
waiting for the individual `READY=1` messages which are received via
the `pkg/systemd/notifyproxy` proxy mechanism.

Also update the simple "container" sd-notify test as it did not fully
test the expected behavior which became obvious when adding the new
tests.

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2022-08-08 11:44:52 +02:00
parent 3fc126e152
commit 79e21b5b16
13 changed files with 324 additions and 37 deletions

View File

@ -182,6 +182,10 @@ func playFlags(cmd *cobra.Command) {
}
func Play(cmd *cobra.Command, args []string) error {
if playOptions.ServiceContainer && !playOptions.StartCLI { // Sanity check to be future proof
return fmt.Errorf("--service-container does not work with --start=stop")
}
// TLS verification in c/image is controlled via a `types.OptionalBool`
// which allows for distinguishing among set-true, set-false, unspecified
// which is important to implement a sane way of dealing with defaults of

View File

@ -133,5 +133,9 @@ func (c *Container) validate() error {
if len(c.config.InitContainerType) > 0 && len(c.config.Pod) < 1 {
return fmt.Errorf("init containers must be created in a pod: %w", define.ErrInvalidArg)
}
if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
}
return nil
}

View File

@ -81,13 +81,6 @@ const NoLogging = "none"
// PassthroughLogging is the string conmon expects when specifying to use the passthrough driver
const PassthroughLogging = "passthrough"
// Strings used for --sdnotify option to podman
const (
SdNotifyModeContainer = "container"
SdNotifyModeConmon = "conmon"
SdNotifyModeIgnore = "ignore"
)
// DefaultRlimitValue is the value set by default for nofile and nproc
const RLimitDefaultValue = uint64(1048576)

20
libpod/define/sdnotify.go Normal file
View File

@ -0,0 +1,20 @@
package define
import "fmt"
// Strings used for --sdnotify option to podman
const (
SdNotifyModeContainer = "container"
SdNotifyModeConmon = "conmon"
SdNotifyModeIgnore = "ignore"
)
// ValidateSdNotifyMode validates the specified mode.
func ValidateSdNotifyMode(mode string) error {
switch mode {
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore:
return nil
default:
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore)
}
}

View File

@ -6,14 +6,12 @@ import (
"net"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containers/buildah/pkg/parse"
nettypes "github.com/containers/common/libnetwork/types"
"github.com/containers/common/pkg/config"
"github.com/containers/common/pkg/secrets"
cutil "github.com/containers/common/pkg/util"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/types"
"github.com/containers/podman/v4/libpod/define"
@ -29,12 +27,6 @@ import (
"github.com/sirupsen/logrus"
)
// Runtime Creation Options
var (
// SdNotifyModeValues describes the only values that SdNotifyMode can be
SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore}
)
// WithStorageConfig uses the given configuration to set up container storage.
// If this is not specified, the system default configuration will be used
// instead.
@ -631,9 +623,8 @@ func WithSdNotifyMode(mode string) CtrCreateOption {
return define.ErrCtrFinalized
}
// verify values
if len(mode) > 0 && !cutil.StringInSlice(strings.ToLower(mode), SdNotifyModeValues) {
return fmt.Errorf("--sdnotify values must be one of %q: %w", strings.Join(SdNotifyModeValues, ", "), define.ErrInvalidArg)
if err := define.ValidateSdNotifyMode(mode); err != nil {
return err
}
ctr.config.SdNotifyMode = mode

View File

@ -27,13 +27,19 @@ import (
"github.com/containers/podman/v4/pkg/specgen/generate"
"github.com/containers/podman/v4/pkg/specgen/generate/kube"
"github.com/containers/podman/v4/pkg/specgenutil"
"github.com/containers/podman/v4/pkg/systemd/notifyproxy"
"github.com/containers/podman/v4/pkg/util"
"github.com/coreos/go-systemd/v22/daemon"
"github.com/ghodss/yaml"
"github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
yamlv3 "gopkg.in/yaml.v3"
)
// sdNotifyAnnotation allows for configuring service-global and
// container-specific sd-notify modes.
const sdNotifyAnnotation = "io.containers.sdnotify"
// createServiceContainer creates a container that can later on
// be associated with the pods of a K8s yaml. It will be started along with
// the first pod.
@ -73,7 +79,12 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
return nil, fmt.Errorf("creating runtime spec for service container: %w", err)
}
opts = append(opts, libpod.WithIsService())
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeConmon))
// Set the sd-notify mode to "ignore". Podman is responsible for
// sending the notify messages when all containers are ready.
// The mode for individual containers or entire pods can be configured
// via the `sdNotifyAnnotation` annotation in the K8s YAML.
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
// Create a new libpod container based on the spec.
ctr, err := ic.Libpod.NewContainer(ctx, runtimeSpec, spec, false, opts...)
@ -96,6 +107,10 @@ func k8sName(content []byte, suffix string) string {
}
func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options entities.PlayKubeOptions) (_ *entities.PlayKubeReport, finalErr error) {
if options.ServiceContainer && options.Start == types.OptionalBoolFalse { // Sanity check to be future proof
return nil, fmt.Errorf("running a service container requires starting the pod(s)")
}
report := &entities.PlayKubeReport{}
validKinds := 0
@ -121,6 +136,8 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
var configMaps []v1.ConfigMap
ranContainers := false
var serviceContainer *libpod.Container
// create pod on each document if it is a pod or deployment
// any other kube kind will be skipped
for _, document := range documentList {
@ -130,8 +147,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
}
// TODO: create constants for the various "kinds" of yaml files.
var serviceContainer *libpod.Container
if options.ServiceContainer && (kind == "Pod" || kind == "Deployment") {
if options.ServiceContainer && serviceContainer == nil && (kind == "Pod" || kind == "Deployment") {
ctr, err := ic.createServiceContainer(ctx, k8sName(content, "service"), options)
if err != nil {
return nil, err
@ -178,6 +194,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
case "Deployment":
var deploymentYAML v1apps.Deployment
@ -192,6 +209,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
case "PersistentVolumeClaim":
var pvcYAML v1.PersistentVolumeClaim
@ -239,6 +257,20 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
return nil, fmt.Errorf("YAML document does not contain any supported kube kind")
}
if options.ServiceContainer && ranContainers {
// We can consider the service to be up and running now.
// Send the sd-notify messages pointing systemd to the
// service container.
data, err := serviceContainer.Inspect(false)
if err != nil {
return nil, err
}
message := fmt.Sprintf("MAINPID=%d\n%s", data.State.ConmonPid, daemon.SdNotifyReady)
if err := notifyproxy.SendMessage("", message); err != nil {
return nil, err
}
}
return report, nil
}
@ -280,6 +312,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
report entities.PlayKubeReport
)
mainSdNotifyMode, err := getSdNotifyMode(annotations, "")
if err != nil {
return nil, err
}
// Create the secret manager before hand
secretsManager, err := ic.Libpod.SecretsManager()
if err != nil {
@ -562,6 +599,9 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
initContainers = append(initContainers, ctr)
}
var sdNotifyProxies []*notifyproxy.NotifyProxy // containers' sd-notify proxies
for _, container := range podYAML.Spec.Containers {
// Error out if the same name is used for more than one container
if _, ok := ctrNames[container.Name]; ok {
@ -606,7 +646,31 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
sdNotifyMode := mainSdNotifyMode
ctrNotifyMode, err := getSdNotifyMode(annotations, container.Name)
if err != nil {
return nil, err
}
if ctrNotifyMode != "" {
sdNotifyMode = ctrNotifyMode
}
if sdNotifyMode == "" { // Default to "ignore"
sdNotifyMode = define.SdNotifyModeIgnore
}
opts = append(opts, libpod.WithSdNotifyMode(sdNotifyMode))
// Create a notify proxy for the container.
if sdNotifyMode != "" && sdNotifyMode != define.SdNotifyModeIgnore {
proxy, err := notifyproxy.New("")
if err != nil {
return nil, err
}
sdNotifyProxies = append(sdNotifyProxies, proxy)
opts = append(opts, libpod.WithSdNotifySocket(proxy.SocketPath()))
}
ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...)
if err != nil {
return nil, err
@ -624,6 +688,13 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
playKubePod.ContainerErrors = append(playKubePod.ContainerErrors, fmt.Errorf("error starting container %s: %w", id, err).Error())
fmt.Println(playKubePod.ContainerErrors)
}
// Wait for each proxy to receive a READY message.
for _, proxy := range sdNotifyProxies {
if err := proxy.WaitAndClose(); err != nil {
return nil, err
}
}
}
playKubePod.ID = pod.ID()

View File

@ -0,0 +1,16 @@
package abi
import "github.com/containers/podman/v4/libpod/define"
// getSdNotifyMode returns the `sdNotifyAnnotation/$name` for the specified
// name. If name is empty, it'll only look for `sdNotifyAnnotation`.
func getSdNotifyMode(annotations map[string]string, name string) (string, error) {
var mode string
switch len(name) {
case 0:
mode = annotations[sdNotifyAnnotation]
default:
mode = annotations[sdNotifyAnnotation+"/"+name]
}
return mode, define.ValidateSdNotifyMode(mode)
}

View File

@ -0,0 +1,38 @@
package abi
import (
"testing"
"github.com/containers/podman/v4/libpod/define"
"github.com/stretchr/testify/require"
)
func TestGetSdNotifyMode(t *testing.T) {
tests := []struct {
key, value, name, result string
mustError bool
}{
{sdNotifyAnnotation, define.SdNotifyModeConmon, "", define.SdNotifyModeConmon, false},
{sdNotifyAnnotation + "/container-a", define.SdNotifyModeContainer, "container-a", define.SdNotifyModeContainer, false},
{sdNotifyAnnotation + "/container-b", define.SdNotifyModeIgnore, "container-b", define.SdNotifyModeIgnore, false},
{sdNotifyAnnotation + "/container-c", "", "container-c", "", false},
{sdNotifyAnnotation + "-/wrong-key", "xxx", "wrong-key", "", false},
{sdNotifyAnnotation + "/container-error", "invalid", "container-error", "", true},
}
annotations := make(map[string]string)
// Populate the annotations
for _, test := range tests {
annotations[test.key] = test.value
}
// Run the tests
for _, test := range tests {
result, err := getSdNotifyMode(annotations, test.name)
if test.mustError {
require.Error(t, err, "%v", test)
continue
}
require.NoError(t, err, "%v", test)
require.Equal(t, test.result, result, "%v", test)
}
}

View File

@ -67,9 +67,9 @@ func (s *SpecGenerator) Validate() error {
if len(s.ContainerBasicConfig.Systemd) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerBasicConfig.Systemd), SystemDValues) {
return fmt.Errorf("--systemd values must be one of %q: %w", strings.Join(SystemDValues, ", "), ErrInvalidSpecConfig)
}
// sdnotify values must be container, conmon, or ignore
if len(s.ContainerBasicConfig.SdNotifyMode) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerBasicConfig.SdNotifyMode), SdNotifyModeValues) {
return fmt.Errorf("--sdnotify values must be one of %q: %w", strings.Join(SdNotifyModeValues, ", "), ErrInvalidSpecConfig)
if err := define.ValidateSdNotifyMode(s.ContainerBasicConfig.SdNotifyMode); err != nil {
return err
}
//

View File

@ -353,9 +353,11 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
}
if len(s.SdNotifyMode) > 0 {
options = append(options, libpod.WithSdNotifyMode(s.SdNotifyMode))
}
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
options = append(options, libpod.WithSdNotifySocket(notify))
if s.SdNotifyMode != define.SdNotifyModeIgnore {
if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok {
options = append(options, libpod.WithSdNotifySocket(notify))
}
}
}
if pod != nil {

View File

@ -12,7 +12,15 @@ import (
)
// SendMessage sends the specified message to the specified socket.
// No message is sent if no socketPath is provided and the NOTIFY_SOCKET
// variable is not set either.
func SendMessage(socketPath string, message string) error {
if socketPath == "" {
socketPath, _ = os.LookupEnv("NOTIFY_SOCKET")
if socketPath == "" {
return nil
}
}
socketAddr := &net.UnixAddr{
Name: socketPath,
Net: "unixgram",

View File

@ -92,7 +92,7 @@ function _assert_mainpid_is_conmon() {
cid="$output"
run_podman container inspect $cid --format "{{.Config.SdNotifyMode}} {{.Config.SdNotifySocket}}"
is "$output" "ignore $NOTIFY_SOCKET"
is "$output" "ignore " "NOTIFY_SOCKET is not set with 'ignore' mode"
run_podman 1 start --attach $cid
is "$output" "" "\$NOTIFY_SOCKET in container"
@ -122,6 +122,7 @@ function _assert_mainpid_is_conmon() {
is "$output" "READY" "\$NOTIFY_SOCKET in container"
# The 'echo's help us debug failed runs
wait_for_file $_SOCAT_LOG
run cat $_SOCAT_LOG
echo "socat log:"
echo "$output"
@ -141,7 +142,7 @@ READY=1" "sdnotify sent MAINPID and READY"
# These tests can fail in dev. environment because of SELinux.
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "sdnotify : container" {
skip_if_aarch64 "FIXME: #15074 - fails on aarch64 non-remote"
skip_if_aarch64 "FIXME: #15277 sdnotify doesn't work on aarch64"
# Sigh... we need to pull a humongous image because it has systemd-notify.
# (IMPORTANT: fedora:32 and above silently removed systemd-notify; this
# caused CI to hang. That's why we explicitly require fedora:31)
@ -156,7 +157,7 @@ READY=1" "sdnotify sent MAINPID and READY"
_start_socat
run_podman run -d --sdnotify=container $_FEDORA \
sh -c 'printenv NOTIFY_SOCKET;echo READY;systemd-notify --ready;while ! test -f /stop;do sleep 0.1;done'
sh -c 'printenv NOTIFY_SOCKET; echo READY; while ! test -f /stop;do sleep 0.1;done;systemd-notify --ready'
cid="$output"
wait_for_ready $cid
@ -166,6 +167,8 @@ READY=1" "sdnotify sent MAINPID and READY"
run_podman logs $cid
is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container"
run_podman container inspect $cid --format "{{.State.ConmonPid}}"
mainPID="$output"
# With container, READY=1 isn't necessarily the last message received;
# just look for it anywhere in received messages
run cat $_SOCAT_LOG
@ -173,19 +176,25 @@ READY=1" "sdnotify sent MAINPID and READY"
echo "socat log:"
echo "$output"
is "$output" ".*READY=1" "received READY=1 through notify socket"
_assert_mainpid_is_conmon "$output"
is "$output" "MAINPID=$mainPID" "Container is not ready yet, so we only know the main PID"
# Done. Stop container, clean up.
run_podman exec $cid touch /stop
run_podman wait $cid
wait_for_file $_SOCAT_LOG
run cat $_SOCAT_LOG
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$mainPID
READY=1"
run_podman rm $cid
run_podman rmi $_FEDORA
_stop_socat
}
@test "sdnotify : play kube" {
@test "sdnotify : play kube - no policies" {
# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
@ -214,8 +223,15 @@ EOF
_start_socat
run_podman play kube --service-container=true $yaml_source
# Make sure the containers have the correct policy.
run_podman container inspect test_pod-test $service_container --format "{{.Config.SdNotifyMode}}"
is "$output" "ignore
ignore"
run_podman container inspect $service_container --format "{{.State.ConmonPid}}"
mainPID="$output"
wait_for_file $_SOCAT_LOG
# The 'echo's help us debug failed runs
run cat $_SOCAT_LOG
echo "socat log:"
@ -228,9 +244,116 @@ READY=1" "sdnotify sent MAINPID and READY"
# Clean up pod and pause image
run_podman play kube --down $PODMAN_TMPDIR/test.yaml
run_podman version --format "{{.Server.Version}}-{{.Server.Built}}"
podman rmi -f localhost/podman-pause:$output
run_podman rmi $(pause_image)
}
@test "sdnotify : play kube - with policies" {
skip_if_aarch64 "FIXME: #15277 sdnotify doesn't work on aarch64"
# Sigh... we need to pull a humongous image because it has systemd-notify.
# (IMPORTANT: fedora:32 and above silently removed systemd-notify; this
# caused CI to hang. That's why we explicitly require fedora:31)
# FIXME: is there a smaller image we could use?
local _FEDORA="$PODMAN_TEST_IMAGE_REGISTRY/$PODMAN_TEST_IMAGE_USER/fedora:31"
# Pull that image. Retry in case of flakes.
run_podman pull $_FEDORA || \
run_podman pull $_FEDORA || \
run_podman pull $_FEDORA
# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
labels:
app: test
name: test_pod
annotations:
io.containers.sdnotify: "container"
io.containers.sdnotify/b: "conmon"
spec:
containers:
- command:
- /bin/sh
- -c
- 'printenv NOTIFY_SOCKET; echo READY; while ! test -f /stop;do sleep 0.1;done;systemd-notify --ready'
image: $_FEDORA
name: a
- command:
- /bin/sh
- -c
- 'echo READY; top'
image: $IMAGE
name: b
EOF
container_a="test_pod-a"
container_b="test_pod-b"
# The name of the service container is predictable: the first 12 characters
# of the hash of the YAML file followed by the "-service" suffix
yaml_sha=$(sha256sum $yaml_source)
service_container="${yaml_sha:0:12}-service"
export NOTIFY_SOCKET=$PODMAN_TMPDIR/conmon.sock
_start_socat
# Run `play kube` in the background as it will wait for all containers to
# send the READY=1 message.
timeout --foreground -v --kill=10 60 \
$PODMAN play kube --service-container=true $yaml_source &>/dev/null &
# Wait for both containers to be running
for i in $(seq 1 20); do
run_podman "?" container wait $container_a $container_b --condition="running"
if [[ $status == 0 ]]; then
break
fi
sleep 0.5
# Just for debugging
run_podman ps -a
done
if [[ $status != 0 ]]; then
die "container $container_a and/or $container_b did not start"
fi
# Make sure the containers have the correct policy
run_podman container inspect $container_a $container_b $service_container --format "{{.Config.SdNotifyMode}}"
is "$output" "container
conmon
ignore"
is "$(< $_SOCAT_LOG)" "" "nothing received on socket"
# Make sure the container received a "proxy" socket and is not using the
# one of `kube play`
run_podman container inspect $container_a --format "{{.Config.SdNotifySocket}}"
assert "$output" != $NOTIFY_SOCKET
run_podman logs $container_a
is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container"
# Instruct the container to send the READY
run_podman exec $container_a /bin/touch /stop
run_podman container inspect $service_container --format "{{.State.ConmonPid}}"
main_pid="$output"
run_podman container wait $container_a
wait_for_file $_SOCAT_LOG
# The 'echo's help us debug failed runs
run cat $_SOCAT_LOG
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$main_pid
READY=1" "sdnotify sent MAINPID and READY"
_stop_socat
# Clean up pod and pause image
run_podman play kube --down $yaml_source
run_podman rmi $_FEDORA $(pause_image)
}
# vim: filetype=sh

View File

@ -342,6 +342,23 @@ function wait_for_port() {
die "Timed out waiting for $host:$port"
}
###################
# wait_for_file # Returns once file is available on host
###################
function wait_for_file() {
local file=$1 # The path to the file
local _timeout=${2:-5} # Optional; default 5 seconds
# Wait
while [ $_timeout -gt 0 ]; do
test -e $file && return
sleep 1
_timeout=$(( $_timeout - 1 ))
done
die "Timed out waiting for $file"
}
# END podman helpers
###############################################################################
# BEGIN miscellaneous tools