mirror of
https://github.com/containers/podman.git
synced 2025-05-17 15:18:43 +08:00
Move pod jobs to parallel execution
Make Podman pod operations that do not involve starting containers (which needs to be done in a specific order) use the same parallel operation code we use to make `podman stop` on large numbers of containers fast. We were previously stopping containers in a pod serially, which could take up to the timeout (default 15 seconds) for each container - stopping 100 containers that do not respond to SIGTERM would take 25 minutes. To do this, refactor the parallel operation code a bit to remove its dependency on libpod (damn circular import restrictions...) and use parallel functions that just re-use the standard container API operations - maximizes code reuse (previously each pod handler had a separate implementation of the container function it performed). This is a bit of a palate cleanser after fighting CI for two days - nice to be able to return to a land of sanity. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:

committed by
Matthew Heon

parent
a7500e54a4
commit
2bb2425704
@ -1,11 +1,10 @@
|
||||
package parallel
|
||||
package ctr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"github.com/containers/podman/v2/libpod"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/containers/podman/v2/pkg/parallel"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -14,44 +13,28 @@ import (
|
||||
// If no error is returned, each container specified in ctrs will have an entry
|
||||
// in the resulting map; containers with no error will be set to nil.
|
||||
func ContainerOp(ctx context.Context, ctrs []*libpod.Container, applyFunc func(*libpod.Container) error) (map[*libpod.Container]error, error) {
|
||||
jobControlLock.RLock()
|
||||
defer jobControlLock.RUnlock()
|
||||
|
||||
// We could use a sync.Map but given Go's lack of generic I'd rather
|
||||
// just use a lock on a normal map...
|
||||
// The expectation is that most of the time is spent in applyFunc
|
||||
// anyways.
|
||||
var (
|
||||
errMap = make(map[*libpod.Container]error)
|
||||
errLock sync.Mutex
|
||||
allDone sync.WaitGroup
|
||||
errMap = make(map[*libpod.Container]<-chan error)
|
||||
)
|
||||
|
||||
for _, ctr := range ctrs {
|
||||
// Block until a thread is available
|
||||
if err := jobControl.Acquire(ctx, 1); err != nil {
|
||||
return nil, errors.Wrapf(err, "error acquiring job control semaphore")
|
||||
}
|
||||
|
||||
allDone.Add(1)
|
||||
|
||||
c := ctr
|
||||
go func() {
|
||||
logrus.Debugf("Launching job on container %s", c.ID())
|
||||
|
||||
err := applyFunc(c)
|
||||
errLock.Lock()
|
||||
errMap[c] = err
|
||||
errLock.Unlock()
|
||||
|
||||
allDone.Done()
|
||||
jobControl.Release(1)
|
||||
}()
|
||||
logrus.Debugf("Starting parallel job on container %s", c.ID())
|
||||
errChan := parallel.Enqueue(ctx, func() error {
|
||||
return applyFunc(c)
|
||||
})
|
||||
errMap[c] = errChan
|
||||
}
|
||||
|
||||
allDone.Wait()
|
||||
finalErr := make(map[*libpod.Container]error)
|
||||
for ctr, errChan := range errMap {
|
||||
err := <-errChan
|
||||
finalErr[ctr] = err
|
||||
}
|
||||
|
||||
return errMap, nil
|
||||
return finalErr, nil
|
||||
}
|
||||
|
||||
// TODO: Add an Enqueue() function that returns a promise
|
@ -1,6 +1,7 @@
|
||||
package parallel
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
@ -42,3 +43,32 @@ func SetMaxThreads(threads uint) error {
|
||||
func GetMaxThreads() uint {
|
||||
return numThreads
|
||||
}
|
||||
|
||||
// Enqueue adds a single function to the parallel jobs queue. This function will
|
||||
// be run when an unused thread is available.
|
||||
// Returns a receive-only error channel that will return the error (if any) from
|
||||
// the provided function fn when fn has finished executing. The channel will be
|
||||
// closed after this.
|
||||
func Enqueue(ctx context.Context, fn func() error) <-chan error {
|
||||
retChan := make(chan error)
|
||||
|
||||
go func() {
|
||||
jobControlLock.RLock()
|
||||
defer jobControlLock.RUnlock()
|
||||
|
||||
defer close(retChan)
|
||||
|
||||
if err := jobControl.Acquire(ctx, 1); err != nil {
|
||||
retChan <- errors.Wrapf(err, "error acquiring job control semaphore")
|
||||
return
|
||||
}
|
||||
|
||||
err := fn()
|
||||
|
||||
jobControl.Release(1)
|
||||
|
||||
retChan <- err
|
||||
}()
|
||||
|
||||
return retChan
|
||||
}
|
||||
|
Reference in New Issue
Block a user