Merge pull request #15820 from vrothberg/fix-15800

kube: notifyproxy: fix lost READY message
This commit is contained in:
OpenShift Merge Robot
2022-09-26 13:37:40 +02:00
committed by GitHub
2 changed files with 81 additions and 37 deletions

View File

@ -10,6 +10,7 @@ import (
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
"sync"
buildahDefine "github.com/containers/buildah/define" buildahDefine "github.com/containers/buildah/define"
"github.com/containers/common/libimage" "github.com/containers/common/libimage"
@ -698,9 +699,24 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
fmt.Println(playKubePod.ContainerErrors) fmt.Println(playKubePod.ContainerErrors)
} }
// Wait for each proxy to receive a READY message. // Wait for each proxy to receive a READY message. Use a wait
for _, proxy := range sdNotifyProxies { // group to prevent the potential for ABBA kinds of deadlocks.
if err := proxy.WaitAndClose(); err != nil { var wg sync.WaitGroup
errors := make([]error, len(sdNotifyProxies))
for i := range sdNotifyProxies {
wg.Add(1)
go func(i int) {
err := sdNotifyProxies[i].WaitAndClose()
if err != nil {
err = fmt.Errorf("waiting for sd-notify proxy: %w", err)
}
errors[i] = err
wg.Done()
}(i)
}
wg.Wait()
for _, err := range errors {
if err != nil {
return nil, err return nil, err
} }
} }

View File

@ -1,6 +1,7 @@
package notifyproxy package notifyproxy
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -109,48 +110,75 @@ func (p *NotifyProxy) WaitAndClose() error {
} }
}() }()
const bufferSize = 1024 // Since reading from the connection is blocking, we need to spin up two
sBuilder := strings.Builder{} // goroutines. One waiting for the `READY` message, the other waiting
for { // for the container to stop running.
// Set a read deadline of one second such that we achieve a errorChan := make(chan error, 1)
// non-blocking read and can check if the container has already readyChan := make(chan bool, 1)
// stopped running; in that case no READY message will be send
// and we're done.
if err := p.connection.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
return err
}
go func() {
// Read until the `READY` message is received or the connection
// is closed.
const bufferSize = 1024
sBuilder := strings.Builder{}
for { for {
buffer := make([]byte, bufferSize) for {
num, err := p.connection.Read(buffer) buffer := make([]byte, bufferSize)
if err != nil { num, err := p.connection.Read(buffer)
if !errors.Is(err, os.ErrDeadlineExceeded) && !errors.Is(err, io.EOF) { if err != nil {
return err if !errors.Is(err, io.EOF) {
errorChan <- err
return
}
}
sBuilder.Write(buffer[:num])
if num != bufferSize || buffer[num-1] == '\n' {
// Break as we read an entire line that
// we can inspect for the `READY`
// message.
break
} }
} }
sBuilder.Write(buffer[:num])
if num != bufferSize || buffer[num-1] == '\n' { for _, line := range strings.Split(sBuilder.String(), "\n") {
break if line == daemon.SdNotifyReady {
readyChan <- true
return
}
} }
sBuilder.Reset()
} }
}()
for _, line := range strings.Split(sBuilder.String(), "\n") { if p.container != nil {
if line == daemon.SdNotifyReady { // Create a cancellable context to make sure the goroutine
return nil // below terminates.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
select {
case <-ctx.Done():
return
default:
state, err := p.container.State()
if err != nil {
errorChan <- err
return
}
if state != define.ContainerStateRunning {
errorChan <- fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
return
}
time.Sleep(time.Second)
} }
} }()
sBuilder.Reset() }
if p.container == nil { // Wait for the ready/error channel.
continue select {
} case <-readyChan:
return nil
state, err := p.container.State() case err := <-errorChan:
if err != nil { return err
return err
}
if state != define.ContainerStateRunning {
return fmt.Errorf("%w: %s", ErrNoReadyMessage, p.container.ID())
}
} }
} }