From 5327df1921c9476af15ef38ebcddea69a32d74c7 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Wed, 5 Mar 2025 14:24:40 +0100 Subject: [PATCH] quadlet kube: correctly mark unit as failed When no containers could be started we need to make sure the unit status reflects this. This means we should not send the READ=1 message and not keep the service container running when we were unable to start any container. There is the question what should happen when only a subset was started. For systemd we can only be either running or failed. And as podman kube play also just keeps the partial started pods running I opted to let systemd keep considering this as success. Fixes #20667 Fixes https://issues.redhat.com/browse/RHEL-80471 Signed-off-by: Paul Holzinger --- pkg/domain/infra/abi/play.go | 33 ++++++++++++++++++---- test/system/252-quadlet.bats | 53 ++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 5 deletions(-) diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 7aa636792b..6ffbf4cf54 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -283,6 +283,19 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options var configMaps []v1.ConfigMap ranContainers := false + // set the ranContainers bool to true if at least one container was successfully started. + setRanContainers := func(r *entities.PlayKubeReport) { + if !ranContainers { + for _, p := range r.Pods { + // If the list of container errors is less then the total number of pod containers then we know it didn't start. + if len(p.ContainerErrors) < len(p.Containers)+len(p.InitContainers) { + ranContainers = true + break + } + } + } + } + // FIXME: both, the service container and the proxies, should ideally // be _state_ of an object. The Kube code below is quite Spaghetti-code // which we should refactor at some point to make it easier to extend @@ -364,7 +377,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options report.Pods = append(report.Pods, r.Pods...) validKinds++ - ranContainers = true + setRanContainers(r) case "DaemonSet": var daemonSetYAML v1apps.DaemonSet @@ -380,7 +393,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options report.Pods = append(report.Pods, r.Pods...) validKinds++ - ranContainers = true + setRanContainers(r) case "Deployment": var deploymentYAML v1apps.Deployment @@ -396,7 +409,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options report.Pods = append(report.Pods, r.Pods...) validKinds++ - ranContainers = true + setRanContainers(r) case "Job": var jobYAML v1.Job @@ -412,7 +425,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options report.Pods = append(report.Pods, r.Pods...) validKinds++ - ranContainers = true + setRanContainers(r) case "PersistentVolumeClaim": var pvcYAML v1.PersistentVolumeClaim @@ -473,10 +486,14 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options return nil, fmt.Errorf("YAML document does not contain any supported kube kind") } + if !options.ServiceContainer { + return report, nil + } + // If we started containers along with a service container, we are // running inside a systemd unit and need to set the main PID. - if options.ServiceContainer && ranContainers { + if ranContainers { switch len(notifyProxies) { case 0: // Optimization for containers/podman/issues/17345 // No container needs sdnotify, so we can mark the @@ -509,6 +526,12 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options } report.ServiceContainerID = serviceContainer.ID() + } else if serviceContainer != nil { + // No containers started, make sure to stop the service container. + // Note because the pods still do exists and are not removed by default we cannot remove it. + if err := serviceContainer.StopWithTimeout(0); err != nil { + logrus.Errorf("Failed to stop service container: %v", err) + } } return report, nil diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats index 264518b269..df184333b7 100644 --- a/test/system/252-quadlet.bats +++ b/test/system/252-quadlet.bats @@ -1107,6 +1107,59 @@ EOF service_cleanup $QUADLET_SERVICE_NAME inactive } +# https://github.com/containers/podman/issues/20667 +@test "quadlet kube - start error" { + local port=$(random_free_port) + # Create the YAMl file + pod_name="p-$(safename)" + container_name="c-$(safename)" + yaml_source="$PODMAN_TMPDIR/start_err$(safename).yaml" + cat >$yaml_source < $quadlet_file <