kube play: support auto updates and rollbacks

Add auto-update support to `podman kube play`.  Auto-update policies can
be configured for:
 * the entire pod via the `io.containers.autoupdate` annotation
 * a specific container via the `io.containers.autoupdate/$name` annotation

To make use of rollbacks, the `io.containers.sdnotify` policy should be
set to `container` such that the workload running _inside_ the container
can send the READY message via the NOTIFY_SOCKET once ready.  For
further details on auto updates and rollbacks, please refer to the
specific article [1].

Since auto updates and rollbacks bases on Podman's systemd integration,
the k8s YAML must be executed in the `podman-kube@` systemd template.
For further details on how to run k8s YAML in systemd via Podman, please
refer to the specific article [2].

An examplary k8s YAML may look as follows:
```YAML
apiVersion: v1
kind: Pod
metadata:
  annotations:
      io.containers.autoupdate: "local"
      io.containers.autoupdate/b: "registry"
  labels:
    app: test
  name: test_pod
spec:
  containers:
  - command:
    - top
    image: alpine
    name: a
  - command:
    - top
    image: alpine
    name: b
```

[1] https://www.redhat.com/sysadmin/podman-auto-updates-rollbacks
[2] https://www.redhat.com/sysadmin/kubernetes-workloads-podman-systemd

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2022-08-30 11:17:25 +02:00
parent bdfc4df1f2
commit 274d34a25a
9 changed files with 240 additions and 43 deletions

View File

@ -29,6 +29,18 @@ This data is then being used in the auto-update sequence to instruct systemd (vi
Note that **podman auto-update** relies on systemd. The systemd units are expected to be generated with **[podman-generate-systemd --new](podman-generate-systemd.1.md#--new)**, or similar units that create new containers in order to run the updated images.
Systemd units that start and stop a container cannot run a new image.
### Auto Updates and Kubernetes YAML
Podman supports auto updates for Kubernetes workloads. As mentioned above, `podman auto-update` requires the containers to be running systemd. Podman ships with a systemd template that can be instantiated with a Kubernetes YAML file, see podman-generate-systemd(1).
To enable auto updates for containers running in a Kubernetes workload, set the following Podman-specific annotations in the YAML:
* `io.containers.autoupdate: "registry|local"` to apply the auto-update policy to all containers
* `io.containers.autoupdate/$container: "registry|local"` to apply the auto-update policy to `$container` only
* `io.containers.sdnotify: "conmon|container"` to apply the sdnotify policy to all containers
* `io.containers.sdnotify/$container: "conmon|container"` to apply the sdnotify policy to `$container` only
By default, the autoupdate policy is set to "disabled", the sdnotify policy is set to "conmon".
### Systemd Unit and Timer
Podman ships with a `podman-auto-update.service` systemd unit. This unit is triggered daily at midnight by the `podman-auto-update.timer` systemd timer. The timer can be altered for custom time-based updates if desired. The unit can further be invoked by other systemd units (e.g., via the dependency tree) or manually via **systemctl start podman-auto-update.service**.

View File

@ -188,13 +188,8 @@ func AutoUpdate(ctx context.Context, runtime *libpod.Runtime, options entities.A
// updateUnit auto updates the tasks in the specified systemd unit.
func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) []error {
var errors []error
// Sanity check: we'll support that in the future.
if len(tasks) != 1 {
errors = append(errors, fmt.Errorf("only 1 task per unit supported but unit %s has %d", unit, len(tasks)))
return errors
}
tasksUpdated := false
for _, task := range tasks {
err := func() error { // Use an anonymous function to avoid spaghetti continue's
updateAvailable, err := task.updateAvailable(ctx)
@ -255,6 +250,9 @@ func (u *updater) updateUnit(ctx context.Context, unit string, tasks []*task) []
}
if err := u.restartSystemdUnit(ctx, unit); err != nil {
for _, task := range tasks {
task.status = statusFailed
}
err = fmt.Errorf("restarting unit %s during rollback: %w", unit, err)
errors = append(errors, err)
return errors
@ -283,7 +281,16 @@ func (t *task) report() *entities.AutoUpdateReport {
func (t *task) updateAvailable(ctx context.Context) (bool, error) {
switch t.policy {
case PolicyRegistryImage:
return t.registryUpdateAvailable(ctx)
// Errors checking for updates only should not be fatal.
// Especially on Edge systems, connection may be limited or
// there may just be a temporary downtime of the registry.
// But make sure to leave some breadcrumbs in the debug logs
// such that potential issues _can_ be analyzed if needed.
available, err := t.registryUpdateAvailable(ctx)
if err != nil {
logrus.Debugf("Error checking updates for image %s: %v (ignoring error)", t.rawImageName, err)
}
return available, nil
case PolicyLocalImage:
return t.localUpdateAvailable()
default:

View File

@ -661,9 +661,10 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
opts = append(opts, libpod.WithSdNotifyMode(sdNotifyMode))
var proxy *notifyproxy.NotifyProxy
// Create a notify proxy for the container.
if sdNotifyMode != "" && sdNotifyMode != define.SdNotifyModeIgnore {
proxy, err := notifyproxy.New("")
proxy, err = notifyproxy.New("")
if err != nil {
return nil, err
}
@ -675,6 +676,9 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
if proxy != nil {
proxy.AddContainer(ctr)
}
containers = append(containers, ctr)
}
@ -774,20 +778,25 @@ func (ic *ContainerEngine) getImageAndLabelInfo(ctx context.Context, cwd string,
}
// Handle kube annotations
setLabel := func(label string) {
var result string
ctrSpecific := fmt.Sprintf("%s/%s", label, container.Name)
for k, v := range annotations {
switch k {
// Auto update annotation without container name will apply to
// all containers within the pod
case autoupdate.Label, autoupdate.AuthfileLabel:
labels[k] = v
// Auto update annotation with container name will apply only
// to the specified container
case fmt.Sprintf("%s/%s", autoupdate.Label, container.Name),
fmt.Sprintf("%s/%s", autoupdate.AuthfileLabel, container.Name):
prefixAndCtr := strings.Split(k, "/")
labels[prefixAndCtr[0]] = v
case label:
result = v
case ctrSpecific:
labels[label] = v
return
}
}
if result != "" {
labels[label] = result
}
}
setLabel(autoupdate.Label)
setLabel(autoupdate.AuthfileLabel)
return pulledImage, labels, nil
}

View File

@ -7,6 +7,7 @@ import (
"fmt"
"math"
"net"
"os"
"regexp"
"runtime"
"strconv"
@ -26,6 +27,7 @@ import (
"github.com/containers/podman/v4/pkg/k8s.io/apimachinery/pkg/api/resource"
"github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/podman/v4/pkg/specgen/generate"
systemdDefine "github.com/containers/podman/v4/pkg/systemd/define"
"github.com/containers/podman/v4/pkg/util"
"github.com/docker/docker/pkg/system"
"github.com/docker/go-units"
@ -445,6 +447,12 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
}
}
// Make sure the container runs in a systemd unit which is
// stored as a label at container creation.
if unit := os.Getenv(systemdDefine.EnvVariable); unit != "" {
s.Labels[systemdDefine.EnvVariable] = unit
}
return s, nil
}

View File

@ -1,12 +1,17 @@
package notifyproxy
import (
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strings"
"syscall"
"time"
"github.com/containers/podman/v4/libpod/define"
"github.com/coreos/go-systemd/v22/daemon"
"github.com/sirupsen/logrus"
)
@ -39,6 +44,7 @@ func SendMessage(socketPath string, message string) error {
type NotifyProxy struct {
connection *net.UnixConn
socketPath string
container Container // optional
}
// New creates a NotifyProxy. The specified temp directory can be left empty.
@ -77,9 +83,26 @@ func (p *NotifyProxy) close() error {
return p.connection.Close()
}
// AddContainer associates a container with the proxy.
func (p *NotifyProxy) AddContainer(container Container) {
p.container = container
}
// ErrNoReadyMessage is returned when we are waiting for the READY message of a
// container that is not in the running state anymore.
var ErrNoReadyMessage = errors.New("container stopped running before READY message was received")
// Container avoids a circular dependency among this package and libpod.
type Container interface {
State() (define.ContainerStatus, error)
ID() string
}
// WaitAndClose waits until receiving the `READY` notify message and close the
// listener. Note that the this function must only be executed inside a systemd
// service which will kill the process after a given timeout.
// If the (optional) container stopped running before the `READY` is received,
// the waiting gets canceled and ErrNoReadyMessage is returned.
func (p *NotifyProxy) WaitAndClose() error {
defer func() {
if err := p.close(); err != nil {
@ -87,16 +110,48 @@ func (p *NotifyProxy) WaitAndClose() error {
}
}()
const bufferSize = 1024
sBuilder := strings.Builder{}
for {
buf := make([]byte, 1024)
num, err := p.connection.Read(buf)
if err != nil {
// Set a read deadline of one second such that we achieve a
// non-blocking read and can check if the container has already
// stopped running; in that case no READY message will be send
// and we're done.
if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
return err
}
for _, s := range strings.Split(string(buf[:num]), "\n") {
if s == daemon.SdNotifyReady {
for {
buffer := make([]byte, bufferSize)
num, err := p.connection.Read(buffer)
if err != nil {
if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) {
return err
}
}
sBuilder.Write(buffer[:num])
if num != bufferSize || buffer[num-1] == '\n' {
break
}
}
for _, line := range strings.Split(sBuilder.String(), "\n") {
if line == daemon.SdNotifyReady {
return nil
}
}
sBuilder.Reset()
if p.container == nil {
continue
}
state, err := p.container.State()
if err != nil {
return err
}
if state != define.ContainerStateRunning {
return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
}
}
}

View File

@ -41,7 +41,7 @@ func TestWaitAndClose(t *testing.T) {
default:
}
sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else")
sendMessage(t, proxy, daemon.SdNotifyReady+"\nsomething else\n")
done := func() bool {
for i := 0; i < 10; i++ {
select {

View File

@ -301,24 +301,16 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
}
@test "podman-kube@.service template" {
# If running from a podman source directory, build and use the source
# version of the play-kube-@ unit file
unit_name="podman-kube@.service"
unit_file="contrib/systemd/system/${unit_name}"
if [[ -e ${unit_file}.in ]]; then
echo "# [Building & using $unit_name from source]" >&3
# Force regenerating unit file (existing one may have /usr/bin path)
rm -f $unit_file
BINDIR=$(dirname $PODMAN) make $unit_file
cp $unit_file $UNIT_DIR/$unit_name
fi
install_kube_template
# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
annotations:
io.containers.autoupdate: "local"
io.containers.autoupdate/b: "registry"
labels:
app: test
name: test_pod
@ -327,8 +319,11 @@ spec:
- command:
- top
image: $IMAGE
name: test
resources: {}
name: a
- command:
- top
image: $IMAGE
name: b
EOF
# Dispatch the YAML file
@ -349,6 +344,12 @@ EOF
run_podman 125 container rm $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"
# Add a simple `auto-update --dry-run` test here to avoid too much redundancy
# with 255-auto-update.bats
run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,local.*" "global auto-update policy gets applied"
is "$output" ".*$service_name,.* (test_pod-b),$IMAGE,false,registry.*" "container-specified auto-update policy gets applied"
# Kill the pod and make sure the service is not running.
# The restart policy is set to "never" since there is no
# design yet for propagating exit codes up to the service

View File

@ -266,8 +266,6 @@ EOF
# Generate a healthy image that will run correctly.
run_podman build -t quay.io/libpod/$image -f $dockerfile1
podman image inspect --format "{{.ID}}" $image
oldID="$output"
generate_service $image local /runme --sdnotify=container noTag
_wait_service_ready container-$cname.service
@ -277,7 +275,7 @@ EOF
# Generate an unhealthy image that will fail.
run_podman build -t quay.io/libpod/$image -f $dockerfile2
podman image inspect --format "{{.ID}}" $image
run_podman image inspect --format "{{.ID}}" $image
newID="$output"
run_podman auto-update --dry-run --format "{{.Unit}},{{.Image}},{{.Updated}},{{.Policy}}"
@ -409,4 +407,97 @@ EOF
_confirm_update $cname $ori_image
}
@test "podman-kube@.service template with rollback" {
# sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just
# assume that we work only with crun, nothing else.
# [copied from 260-sdnotify.bats]
runtime=$(podman_runtime)
if [[ "$runtime" != "crun" ]]; then
skip "this test only works with crun, not $runtime"
fi
install_kube_template
dockerfile1=$PODMAN_TMPDIR/Dockerfile.1
cat >$dockerfile1 <<EOF
FROM quay.io/libpod/fedora:31
RUN echo -e "#!/bin/sh\n\
printenv NOTIFY_SOCKET; echo READY; systemd-notify --ready;\n\
trap 'echo Received SIGTERM, finishing; exit' SIGTERM; echo WAITING; while :; do sleep 0.1; done" \
>> /runme
RUN chmod +x /runme
EOF
dockerfile2=$PODMAN_TMPDIR/Dockerfile.2
cat >$dockerfile2 <<EOF
FROM quay.io/libpod/fedora:31
RUN echo -e "#!/bin/sh\n\
exit 1" >> /runme
RUN chmod +x /runme
EOF
local_image=localhost/image:$(random_string 10)
# Generate a healthy image that will run correctly.
run_podman build -t $local_image -f $dockerfile1
run_podman image inspect --format "{{.ID}}" $local_image
oldID="$output"
# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
annotations:
io.containers.autoupdate: "registry"
io.containers.autoupdate/b: "local"
io.containers.sdnotify/b: "container"
labels:
app: test
name: test_pod
spec:
containers:
- command:
- top
image: $IMAGE
name: a
- command:
- /runme
image: $local_image
name: b
EOF
# Dispatch the YAML file
service_name="podman-kube@$(systemd-escape $yaml_source).service"
systemctl start $service_name
systemctl is-active $service_name
# Make sure the containers are properly configured
run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied"
is "$output" ".*$service_name,.* (test_pod-b),$local_image,false,local.*" "container-specified auto-update policy gets applied"
# Generate a broken image that will fail.
run_podman build -t $local_image -f $dockerfile2
run_podman image inspect --format "{{.ID}}" $local_image
newID="$output"
assert "$oldID" != "$newID" "broken image really is a new one"
# Make sure container b sees the new image
run_podman auto-update --dry-run --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,false,registry.*" "global auto-update policy gets applied"
is "$output" ".*$service_name,.* (test_pod-b),$local_image,pending,local.*" "container b sees the new image"
# Now update and check for the rollback
run_podman auto-update --format "{{.Unit}},{{.Container}},{{.Image}},{{.Updated}},{{.Policy}}"
is "$output" ".*$service_name,.* (test_pod-a),$IMAGE,rolled back,registry.*" "container a was rolled back as the update of b failed"
is "$output" ".*$service_name,.* (test_pod-b),$local_image,rolled back,local.*" "container b was rolled back as its update has failed"
# Clean up
systemctl stop $service_name
run_podman rmi -f $(pause_image) $local_image $newID $oldID
rm -f $UNIT_DIR/$unit_name
}
# vim: filetype=sh

View File

@ -32,3 +32,17 @@ journalctl() {
systemd-run() {
command systemd-run $_DASHUSER "$@";
}
install_kube_template() {
# If running from a podman source directory, build and use the source
# version of the play-kube-@ unit file
unit_name="podman-kube@.service"
unit_file="contrib/systemd/system/${unit_name}"
if [[ -e ${unit_file}.in ]]; then
echo "# [Building & using $unit_name from source]" >&3
# Force regenerating unit file (existing one may have /usr/bin path)
rm -f $unit_file
BINDIR=$(dirname $PODMAN) make $unit_file
cp $unit_file $UNIT_DIR/$unit_name
fi
}