kube play: support auto updates and rollbacks

Add auto-update support to `podman kube play`. Auto-update policies can be configured for: * the entire pod via the `io.containers.autoupdate` annotation * a specific container via the `io.containers.autoupdate/$name` annotation To make use of rollbacks, the `io.containers.sdnotify` policy should be set to `container` such that the workload running _inside_ the container can send the READY message via the NOTIFY_SOCKET once ready. For further details on auto updates and rollbacks, please refer to the specific article [1]. Since auto updates and rollbacks bases on Podman's systemd integration, the k8s YAML must be executed in the `podman-kube@` systemd template. For further details on how to run k8s YAML in systemd via Podman, please refer to the specific article [2]. An examplary k8s YAML may look as follows: ```YAML apiVersion: v1 kind: Pod metadata: annotations: io.containers.autoupdate: "local" io.containers.autoupdate/b: "registry" labels: app: test name: test_pod spec: containers: - command: - top image: alpine name: a - command: - top image: alpine name: b ``` [1] https://www.redhat.com/sysadmin/podman-auto-updates-rollbacks [2] https://www.redhat.com/sysadmin/kubernetes-workloads-podman-systemd Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
2025-09-18 15:54:49 +08:00 · 2022-08-30 11:17:25 +02:00
parent bdfc4df1f2
commit 274d34a25a
9 changed files with 240 additions and 43 deletions
--- a/docs/source/markdown/podman-auto-update.1.md.in
+++ b/docs/source/markdown/podman-auto-update.1.md.in
@ -29,6 +29,18 @@ This data is then being used in the auto-update sequence to instruct systemd (vi
 Note that **podman auto-update** relies on systemd. The systemd units are expected to be generated with **[podman-generate-systemd --new](podman-generate-systemd.1.md#--new)**, or similar units that create new containers in order to run the updated images.
 Systemd units that start and stop a container cannot run a new image.

+### Auto Updates and Kubernetes YAML
+
+Podman supports auto updates for Kubernetes workloads.  As mentioned above, `podman auto-update` requires the containers to be running systemd.  Podman ships with a systemd template that can be instantiated with a Kubernetes YAML file, see podman-generate-systemd(1).
+
+To enable auto updates for containers running in a Kubernetes workload, set the following Podman-specific annotations in the YAML:
+ * `io.containers.autoupdate: "registry|local"` to apply the auto-update policy to all containers
+ * `io.containers.autoupdate/$container: "registry|local"` to apply the auto-update policy to `$container` only
+ * `io.containers.sdnotify: "conmon|container"` to apply the sdnotify policy to all containers
+ * `io.containers.sdnotify/$container: "conmon|container"` to apply the sdnotify policy to `$container` only
+
+By default, the autoupdate policy is set to "disabled", the sdnotify policy is set to "conmon".
+
 ### Systemd Unit and Timer

 Podman ships with a `podman-auto-update.service` systemd unit. This unit is triggered daily at midnight by the `podman-auto-update.timer` systemd timer.  The timer can be altered for custom time-based updates if desired.  The unit can further be invoked by other systemd units (e.g., via the dependency tree) or manually via **systemctl start podman-auto-update.service**.
--- a/pkg/autoupdate/autoupdate.go
+++ b/pkg/autoupdate/autoupdate.go
@ -188,13 +188,8 @@ func AutoUpdate(ctx context.Context, runtime *libpod.Runtime, options entities.A
 // updateUnit auto updates the tasks in the specified systemd unit.
 func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) []error {
 	var errors []error
-	// Sanity check: we'll support that in the future.
-	if len(tasks) != 1 {
-		errors = append(errors, fmt.Errorf("only 1 task per unit supported but unit %s has %d", unit, len(tasks)))
-		return errors
-	}
-
 	tasksUpdated := false
+
 	for _, task := range tasks {
 		err := func() error { // Use an anonymous function to avoid spaghetti continue's
 			updateAvailable, err := task.updateAvailable(ctx)
@ -255,6 +250,9 @@ func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) []
 	}

 	if err := u.restartSystemdUnit(ctx, unit); err != nil {
+		for _, task := range tasks {
+			task.status = statusFailed
+		}
 		err = fmt.Errorf("restarting unit %s during rollback: %w", unit, err)
 		errors = append(errors, err)
 		return errors
@ -283,7 +281,16 @@ func (t *task) report() *entities.AutoUpdateReport {
 func (t *task) updateAvailable(ctx context.Context) (bool, error) {
 	switch t.policy {
 	case PolicyRegistryImage:
-		return t.registryUpdateAvailable(ctx)
+		// Errors checking for updates only should not be fatal.
+		// Especially on Edge systems, connection may be limited or
+		// there may just be a temporary downtime of the registry.
+		// But make sure to leave some breadcrumbs in the debug logs
+		// such that potential issues _can_ be analyzed if needed.
+		available, err := t.registryUpdateAvailable(ctx)
+		if err != nil {
+			logrus.Debugf("Error checking updates for image %s: %v (ignoring error)", t.rawImageName, err)
+		}
+		return available, nil
 	case PolicyLocalImage:
 		return t.localUpdateAvailable()
 	default:
--- a/pkg/domain/infra/abi/play.go
+++ b/pkg/domain/infra/abi/play.go
@ -661,9 +661,10 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY

 		opts = append(opts, libpod.WithSdNotifyMode(sdNotifyMode))

+		var proxy *notifyproxy.NotifyProxy
 		// Create a notify proxy for the container.
 		if sdNotifyMode != "" && sdNotifyMode != define.SdNotifyModeIgnore {
-			proxy, err := notifyproxy.New("")
+			proxy, err = notifyproxy.New("")
 			if err != nil {
 				return nil, err
 			}
@ -675,6 +676,9 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
 		if err != nil {
 			return nil, err
 		}
+		if proxy != nil {
+			proxy.AddContainer(ctr)
+		}
 		containers = append(containers, ctr)
 	}

@ -774,21 +778,26 @@ func (ic *ContainerEngine) getImageAndLabelInfo(ctx context.Context, cwd string,
 	}

 	// Handle kube annotations
-	for k, v := range annotations {
-		switch k {
-		// Auto update annotation without container name will apply to
-		// all containers within the pod
-		case autoupdate.Label, autoupdate.AuthfileLabel:
-			labels[k] = v
-		// Auto update annotation with container name will apply only
-		// to the specified container
-		case fmt.Sprintf("%s/%s", autoupdate.Label, container.Name),
-			fmt.Sprintf("%s/%s", autoupdate.AuthfileLabel, container.Name):
-			prefixAndCtr := strings.Split(k, "/")
-			labels[prefixAndCtr[0]] = v
+	setLabel := func(label string) {
+		var result string
+		ctrSpecific := fmt.Sprintf("%s/%s", label, container.Name)
+		for k, v := range annotations {
+			switch k {
+			case label:
+				result = v
+			case ctrSpecific:
+				labels[label] = v
+				return
+			}
+		}
+		if result != "" {
+			labels[label] = result
 		}
 	}

+	setLabel(autoupdate.Label)
+	setLabel(autoupdate.AuthfileLabel)
+
 	return pulledImage, labels, nil
 }

--- a/pkg/specgen/generate/kube/kube.go
+++ b/pkg/specgen/generate/kube/kube.go
@ -7,6 +7,7 @@ import (
 	"fmt"
 	"math"
 	"net"
+	"os"
 	"regexp"
 	"runtime"
 	"strconv"
@ -26,6 +27,7 @@ import (
 	"github.com/containers/podman/v4/pkg/k8s.io/apimachinery/pkg/api/resource"
 	"github.com/containers/podman/v4/pkg/specgen"
 	"github.com/containers/podman/v4/pkg/specgen/generate"
+	systemdDefine "github.com/containers/podman/v4/pkg/systemd/define"
 	"github.com/containers/podman/v4/pkg/util"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/go-units"
@ -445,6 +447,12 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
 		}
 	}

+	// Make sure the container runs in a systemd unit which is
+	// stored as a label at container creation.
+	if unit := os.Getenv(systemdDefine.EnvVariable); unit != "" {
+		s.Labels[systemdDefine.EnvVariable] = unit
+	}
+
 	return s, nil
 }

--- a/pkg/systemd/notifyproxy/notifyproxy.go
+++ b/pkg/systemd/notifyproxy/notifyproxy.go
@ -1,12 +1,17 @@
 package notifyproxy

 import (
+	"errors"
+	"fmt"
+	"io"
 	"io/ioutil"
 	"net"
 	"os"
 	"strings"
 	"syscall"
+	"time"

+	"github.com/containers/podman/v4/libpod/define"
 	"github.com/coreos/go-systemd/v22/daemon"
 	"github.com/sirupsen/logrus"
 )
@ -39,6 +44,7 @@ func SendMessage(socketPath string, message string) error {
 type NotifyProxy struct {
 	connection *net.UnixConn
 	socketPath string
+	container  Container // optional
 }

 // New creates a NotifyProxy.  The specified temp directory can be left empty.
@ -77,9 +83,26 @@ func (p *NotifyProxy) close() error {
 	return p.connection.Close()
 }

+// AddContainer associates a container with the proxy.
+func (p *NotifyProxy) AddContainer(container Container) {
+	p.container = container
+}
+
+// ErrNoReadyMessage is returned when we are waiting for the READY message of a
+// container that is not in the running state anymore.
+var ErrNoReadyMessage = errors.New("container stopped running before READY message was received")
+
+// Container avoids a circular dependency among this package and libpod.
+type Container interface {
+	State() (define.ContainerStatus, error)
+	ID() string
+}
+
 // WaitAndClose waits until receiving the `READY` notify message and close the
 // listener. Note that the this function must only be executed inside a systemd
 // service which will kill the process after a given timeout.
+// If the (optional) container stopped running before the `READY` is received,
+// the waiting gets canceled and ErrNoReadyMessage is returned.
 func (p *NotifyProxy) WaitAndClose() error {
 	defer func() {
 		if err := p.close(); err != nil {
@ -87,16 +110,48 @@ func (p *NotifyProxy) WaitAndClose() error {
 		}
 	}()

+	const bufferSize = 1024
+	sBuilder := strings.Builder{}
 	for {
-		buf := make([]byte, 1024)
-		num, err := p.connection.Read(buf)
-		if err != nil {
+		// Set a read deadline of one second such that we achieve a
+		// non-blocking read and can check if the container has already
+		// stopped running; in that case no READY message will be send
+		// and we're done.
+		if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
 			return err
 		}
-		for _, s := range strings.Split(string(buf[:num]), "\n") {
-			if s == daemon.SdNotifyReady {
+
+		for {
+			buffer := make([]byte, bufferSize)
+			num, err := p.connection.Read(buffer)
+			if err != nil {
+				if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) {
+					return err
+				}
+			}
+			sBuilder.Write(buffer[:num])
+			if num != bufferSize || buffer[num-1] == '\n' {
+				break
+			}
+		}
+
+		for _, line := range strings.Split(sBuilder.String(), "\n") {
+			if line == daemon.SdNotifyReady {
 				return nil
 			}
 		}
+		sBuilder.Reset()
+
+		if p.container == nil {
+			continue
+		}
+
+		state, err := p.container.State()
+		if err != nil {
+			return err
+		}
+		if state != define.ContainerStateRunning {
+			return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
+		}
 	}
 }
--- a/pkg/systemd/notifyproxy/notifyproxy_test.go
+++ b/pkg/systemd/notifyproxy/notifyproxy_test.go
@ -41,7 +41,7 @@ func TestWaitAndClose(t *testing.T) {
 	default:
 	}

-	sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else")
+	sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else\n")
 	done := func() bool {
 		for i := 0; i < 10; i++ {
 			select {
--- a/test/system/250-systemd.bats
+++ b/test/system/250-systemd.bats
@ -301,24 +301,16 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
 }

@test "podman-kube@.service template" {
-    # If running from a podman source directory, build and use the source
-    # version of the play-kube-@ unit file
-    unit_name="podman-kube@.service"
-    unit_file="contrib/systemd/system/${unit_name}"
-    if [[ -e ${unit_file}.in ]]; then
-        echo "# [Building & using $unit_name from source]" >&3
-        # Force regenerating unit file (existing one may have /usr/bin path)
-        rm -f $unit_file
-        BINDIR=$(dirname $PODMAN) make $unit_file
-        cp $unit_file $UNIT_DIR/$unit_name
-    fi
-
+    install_kube_template
    # Create the YAMl file
    yaml_source="$PODMAN_TMPDIR/test.yaml"
    cat >$yaml_source <<EOF
 apiVersion: v1
 kind: Pod
 metadata:
+  annotations:
+      io.containers.autoupdate: "local"
+      io.containers.autoupdate/b: "registry"
  labels:
    app: test
  name: test_pod
@ -327,8 +319,11 @@ spec:
  - command:
    - top
    image: $IMAGE
-    name: test
-    resources: {}
+    name: a
+  - command:
+    - top
+    image: $IMAGE
+    name: b
 EOF

    # Dispatch the YAML file
@ -349,6 +344,12 @@ EOF
    run_podman 125 container rm $service_container
    is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

+    # Add a simple `auto-update --dry-run` test here to avoid too much redundancy
+    # with 255-auto-update.bats
+    run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
+    is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,local.*" "global auto-update policy gets applied"
+    is "$output" ".*$service_name,.* (test_pod-b),$IMAGE,false,registry.*" "container-specified auto-update policy gets applied"
+
    # Kill the pod and make sure the service is not running.
    # The restart policy is set to "never" since there is no
    # design yet for propagating exit codes up to the service
--- a/test/system/255-auto-update.bats
+++ b/test/system/255-auto-update.bats
@ -266,8 +266,6 @@ EOF

    # Generate a healthy image that will run correctly.
    run_podman build -t quay.io/libpod/$image -f $dockerfile1
-    podman image inspect --format "{{.ID}}" $image
-    oldID="$output"

    generate_service $image local /runme --sdnotify=container noTag
    _wait_service_ready container-$cname.service
@ -277,7 +275,7 @@ EOF

    # Generate an unhealthy image that will fail.
    run_podman build -t quay.io/libpod/$image -f $dockerfile2
-    podman image inspect --format "{{.ID}}" $image
+    run_podman image inspect --format "{{.ID}}" $image
    newID="$output"

    run_podman auto-update --dry-run --format "{{.Unit}},{{.Image}},{{.Updated}},{{.Policy}}"
@ -409,4 +407,97 @@ EOF
    _confirm_update $cname $ori_image
 }

+@test "podman-kube@.service template with rollback" {
+    # sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just
+    # assume that we work only with crun, nothing else.
+    # [copied from 260-sdnotify.bats]
+    runtime=$(podman_runtime)
+    if [[ "$runtime" != "crun" ]]; then
+        skip "this test only works with crun, not $runtime"
+    fi
+
+    install_kube_template
+
+    dockerfile1=$PODMAN_TMPDIR/Dockerfile.1
+    cat >$dockerfile1 <<EOF
+FROM quay.io/libpod/fedora:31
+RUN echo -e "#!/bin/sh\n\
+printenv NOTIFY_SOCKET; echo READY; systemd-notify --ready;\n\
+trap 'echo Received SIGTERM, finishing; exit' SIGTERM; echo WAITING; while :; do sleep 0.1; done" \
+>> /runme
+RUN chmod +x /runme
+EOF
+
+    dockerfile2=$PODMAN_TMPDIR/Dockerfile.2
+    cat >$dockerfile2 <<EOF
+FROM quay.io/libpod/fedora:31
+RUN echo -e "#!/bin/sh\n\
+exit 1" >> /runme
+RUN chmod +x /runme
+EOF
+    local_image=localhost/image:$(random_string 10)
+
+    # Generate a healthy image that will run correctly.
+    run_podman build -t $local_image -f $dockerfile1
+    run_podman image inspect --format "{{.ID}}" $local_image
+    oldID="$output"
+
+    # Create the YAMl file
+    yaml_source="$PODMAN_TMPDIR/test.yaml"
+    cat >$yaml_source <<EOF
+apiVersion: v1
+kind: Pod
+metadata:
+  annotations:
+      io.containers.autoupdate: "registry"
+      io.containers.autoupdate/b: "local"
+      io.containers.sdnotify/b: "container"
+  labels:
+    app: test
+  name: test_pod
+spec:
+  containers:
+  - command:
+    - top
+    image: $IMAGE
+    name: a
+  - command:
+    - /runme
+    image: $local_image
+    name: b
+EOF
+
+    # Dispatch the YAML file
+    service_name="podman-kube@$(systemd-escape $yaml_source).service"
+    systemctl start $service_name
+    systemctl is-active $service_name
+
+    # Make sure the containers are properly configured
+    run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
+    is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied"
+    is "$output" ".*$service_name,.* (test_pod-b),$local_image,false,local.*" "container-specified auto-update policy gets applied"
+
+    # Generate a broken image that will fail.
+    run_podman build -t $local_image -f $dockerfile2
+    run_podman image inspect --format "{{.ID}}" $local_image
+    newID="$output"
+
+    assert "$oldID" != "$newID" "broken image really is a new one"
+
+    # Make sure container b sees the new image
+    run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
+    is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied"
+    is "$output" ".*$service_name,.* (test_pod-b),$local_image,pending,local.*" "container b sees the new image"
+
+    # Now update and check for the rollback
+    run_podman auto-update --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
+    is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,rolled back,registry.*" "container a was rolled back as the update of b failed"
+    is "$output" ".*$service_name,.* (test_pod-b),$local_image,rolled back,local.*" "container b was rolled back as its update has failed"
+
+    # Clean up
+    systemctl stop $service_name
+    run_podman rmi -f $(pause_image) $local_image $newID $oldID
+    rm -f $UNIT_DIR/$unit_name
+}
+
 # vim: filetype=sh
--- a/test/system/helpers.systemd.bash
+++ b/test/system/helpers.systemd.bash
@ -32,3 +32,17 @@ journalctl() {
 systemd-run() {
    command systemd-run $_DASHUSER "$@";
 }
+
+install_kube_template() {
+    # If running from a podman source directory, build and use the source
+    # version of the play-kube-@ unit file
+    unit_name="podman-kube@.service"
+    unit_file="contrib/systemd/system/${unit_name}"
+    if [[ -e ${unit_file}.in ]]; then
+        echo "# [Building & using $unit_name from source]" >&3
+        # Force regenerating unit file (existing one may have /usr/bin path)
+        rm -f $unit_file
+        BINDIR=$(dirname $PODMAN) make $unit_file
+        cp $unit_file $UNIT_DIR/$unit_name
+    fi
+}