quadlet kube: correctly mark unit as failed

When no containers could be started we need to make sure the unit status
reflects this. This means we should not send the READ=1 message and not
keep the service container running when we were unable to start any
container.

There is the question what should happen when only a subset was started.
For systemd we can only be either running or failed. And as podman kube
play also just keeps the partial started pods running I opted to let
systemd keep considering this as success.

Fixes #20667
Fixes https://issues.redhat.com/browse/RHEL-80471

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2025-03-05 14:24:40 +01:00
committed by Matt Heon
parent bc37e935ba
commit 5327df1921
2 changed files with 81 additions and 5 deletions

View File

@ -283,6 +283,19 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
var configMaps []v1.ConfigMap
ranContainers := false
// set the ranContainers bool to true if at least one container was successfully started.
setRanContainers := func(r *entities.PlayKubeReport) {
if !ranContainers {
for _, p := range r.Pods {
// If the list of container errors is less then the total number of pod containers then we know it didn't start.
if len(p.ContainerErrors) < len(p.Containers)+len(p.InitContainers) {
ranContainers = true
break
}
}
}
}
// FIXME: both, the service container and the proxies, should ideally
// be _state_ of an object. The Kube code below is quite Spaghetti-code
// which we should refactor at some point to make it easier to extend
@ -364,7 +377,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
setRanContainers(r)
case "DaemonSet":
var daemonSetYAML v1apps.DaemonSet
@ -380,7 +393,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
setRanContainers(r)
case "Deployment":
var deploymentYAML v1apps.Deployment
@ -396,7 +409,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
setRanContainers(r)
case "Job":
var jobYAML v1.Job
@ -412,7 +425,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
report.Pods = append(report.Pods, r.Pods...)
validKinds++
ranContainers = true
setRanContainers(r)
case "PersistentVolumeClaim":
var pvcYAML v1.PersistentVolumeClaim
@ -473,10 +486,14 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
return nil, fmt.Errorf("YAML document does not contain any supported kube kind")
}
if !options.ServiceContainer {
return report, nil
}
// If we started containers along with a service container, we are
// running inside a systemd unit and need to set the main PID.
if options.ServiceContainer && ranContainers {
if ranContainers {
switch len(notifyProxies) {
case 0: // Optimization for containers/podman/issues/17345
// No container needs sdnotify, so we can mark the
@ -509,6 +526,12 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
}
report.ServiceContainerID = serviceContainer.ID()
} else if serviceContainer != nil {
// No containers started, make sure to stop the service container.
// Note because the pods still do exists and are not removed by default we cannot remove it.
if err := serviceContainer.StopWithTimeout(0); err != nil {
logrus.Errorf("Failed to stop service container: %v", err)
}
}
return report, nil

View File

@ -1107,6 +1107,59 @@ EOF
service_cleanup $QUADLET_SERVICE_NAME inactive
}
# https://github.com/containers/podman/issues/20667
@test "quadlet kube - start error" {
local port=$(random_free_port)
# Create the YAMl file
pod_name="p-$(safename)"
container_name="c-$(safename)"
yaml_source="$PODMAN_TMPDIR/start_err$(safename).yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
name: $pod_name
spec:
containers:
- command:
- "top"
image: $IMAGE
name: $container_name
ports:
- containerPort: 80
hostPort: $port
EOF
# Bind the port to force a an error when starting the pod
timeout --foreground -v --kill=10 10 nc -l 127.0.0.1 $port &
nc_pid=$!
# Create the Quadlet file
local quadlet_file=$PODMAN_TMPDIR/start_err_$(safename).kube
cat > $quadlet_file <<EOF
[Kube]
Yaml=${yaml_source}
EOF
run_quadlet "$quadlet_file"
run -0 systemctl daemon-reload
echo "$_LOG_PROMPT systemctl start $QUADLET_SERVICE_NAME"
run systemctl start $QUADLET_SERVICE_NAME
echo $output
assert $status -eq 1 "systemctl start should report failure"
run -0 systemctl show --property=ActiveState $QUADLET_SERVICE_NAME
assert "$output" == "ActiveState=failed" "unit must be in failed state"
echo "$_LOG_PROMPT journalctl -u $QUADLET_SERVICE_NAME"
run -0 journalctl -eu $QUADLET_SERVICE_NAME
assert "$output" =~ "$port: bind: address already in use" "journal contains the real podman start error"
kill "$nc_pid"
}
@test "quadlet - image files" {
local quadlet_tmpdir=$PODMAN_TMPDIR/quadlets