mirror of
https://github.com/containers/podman.git
synced 2025-08-14 19:12:48 +08:00

The OCICNI port format has one big problem: It does not support ranges. So if a users forwards a range of 1k ports with podman run -p 1001-2000 we have to store each of the thousand ports individually as array element. This bloats the db and makes the JSON encoding and decoding much slower. In many places we already use a better port struct type which supports ranges, e.g. `pkg/specgen` or the new network interface. Because of this we have to do many runtime conversions between the two port formats. If everything uses the new format we can skip the runtime conversions. This commit adds logic to replace all occurrences of the old format with the new one. The database will automatically migrate the ports to new format when the container config is read for the first time after the update. The `ParsePortMapping` function is `pkg/specgen/generate` has been reworked to better work with the new format. The new logic is able to deduplicate the given ports. This is necessary the ensure we store them efficiently in the DB. The new code should also be more performant than the old one. To prove that the code is fast enough I added go benchmarks. Parsing 1 million ports took less than 0.5 seconds on my laptop. Benchmark normalize PortMappings in specgen: Please note that the 1 million ports are actually 20x 50k ranges because we cannot have bigger ranges than 65535 ports. ``` $ go test -bench=. -benchmem ./pkg/specgen/generate/ goos: linux goarch: amd64 pkg: github.com/containers/podman/v3/pkg/specgen/generate cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz BenchmarkParsePortMappingNoPorts-12 480821532 2.230 ns/op 0 B/op 0 allocs/op BenchmarkParsePortMapping1-12 38972 30183 ns/op 131584 B/op 9 allocs/op BenchmarkParsePortMapping100-12 18752 60688 ns/op 141088 B/op 315 allocs/op BenchmarkParsePortMapping1k-12 3104 331719 ns/op 223840 B/op 3018 allocs/op BenchmarkParsePortMapping10k-12 376 3122930 ns/op 1223650 B/op 30027 allocs/op BenchmarkParsePortMapping1m-12 3 390869926 ns/op 124593840 B/op 4000624 allocs/op BenchmarkParsePortMappingReverse100-12 18940 63414 ns/op 141088 B/op 315 allocs/op BenchmarkParsePortMappingReverse1k-12 3015 362500 ns/op 223841 B/op 3018 allocs/op BenchmarkParsePortMappingReverse10k-12 343 3318135 ns/op 1223650 B/op 30027 allocs/op BenchmarkParsePortMappingReverse1m-12 3 403392469 ns/op 124593840 B/op 4000624 allocs/op BenchmarkParsePortMappingRange1-12 37635 28756 ns/op 131584 B/op 9 allocs/op BenchmarkParsePortMappingRange100-12 39604 28935 ns/op 131584 B/op 9 allocs/op BenchmarkParsePortMappingRange1k-12 38384 29921 ns/op 131584 B/op 9 allocs/op BenchmarkParsePortMappingRange10k-12 29479 40381 ns/op 131584 B/op 9 allocs/op BenchmarkParsePortMappingRange1m-12 927 1279369 ns/op 143022 B/op 164 allocs/op PASS ok github.com/containers/podman/v3/pkg/specgen/generate 25.492s ``` Benchmark convert old port format to new one: ``` go test -bench=. -benchmem ./libpod/ goos: linux goarch: amd64 pkg: github.com/containers/podman/v3/libpod cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz Benchmark_ocicniPortsToNetTypesPortsNoPorts-12 663526126 1.663 ns/op 0 B/op 0 allocs/op Benchmark_ocicniPortsToNetTypesPorts1-12 7858082 141.9 ns/op 72 B/op 2 allocs/op Benchmark_ocicniPortsToNetTypesPorts10-12 2065347 571.0 ns/op 536 B/op 4 allocs/op Benchmark_ocicniPortsToNetTypesPorts100-12 138478 8641 ns/op 4216 B/op 4 allocs/op Benchmark_ocicniPortsToNetTypesPorts1k-12 9414 120964 ns/op 41080 B/op 4 allocs/op Benchmark_ocicniPortsToNetTypesPorts10k-12 781 1490526 ns/op 401528 B/op 4 allocs/op Benchmark_ocicniPortsToNetTypesPorts1m-12 4 250579010 ns/op 40001656 B/op 4 allocs/op PASS ok github.com/containers/podman/v3/libpod 11.727s ``` Signed-off-by: Paul Holzinger <pholzing@redhat.com>
360 lines
8.9 KiB
Go
360 lines
8.9 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/containernetworking/plugins/pkg/ns"
|
|
"github.com/containers/podman/v3/libpod/network/types"
|
|
"github.com/containers/podman/v3/pkg/rootlessport"
|
|
"github.com/pkg/errors"
|
|
rkport "github.com/rootless-containers/rootlesskit/pkg/port"
|
|
rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
|
|
rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
const (
|
|
// ReexecChildKey is used internally for the second reexec
|
|
ReexecChildKey = "rootlessport-child"
|
|
reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
|
|
)
|
|
|
|
func main() {
|
|
if len(os.Args) > 1 {
|
|
fmt.Fprintln(os.Stderr, `too many arguments, rootlessport expects a json config via STDIN`)
|
|
os.Exit(1)
|
|
}
|
|
var err error
|
|
if os.Args[0] == ReexecChildKey {
|
|
err = child()
|
|
} else {
|
|
err = parent()
|
|
}
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func loadConfig(r io.Reader) (*rootlessport.Config, io.ReadCloser, io.WriteCloser, error) {
|
|
stdin, err := ioutil.ReadAll(r)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
var cfg rootlessport.Config
|
|
if err := json.Unmarshal(stdin, &cfg); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
if cfg.NetNSPath == "" {
|
|
return nil, nil, nil, errors.New("missing NetNSPath")
|
|
}
|
|
if cfg.ExitFD <= 0 {
|
|
return nil, nil, nil, errors.New("missing ExitFD")
|
|
}
|
|
exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
|
|
if exitFile == nil {
|
|
return nil, nil, nil, errors.New("invalid ExitFD")
|
|
}
|
|
if cfg.ReadyFD <= 0 {
|
|
return nil, nil, nil, errors.New("missing ReadyFD")
|
|
}
|
|
readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
|
|
if readyFile == nil {
|
|
return nil, nil, nil, errors.New("invalid ReadyFD")
|
|
}
|
|
return &cfg, exitFile, readyFile, nil
|
|
}
|
|
|
|
func parent() error {
|
|
// load config from stdin
|
|
cfg, exitR, readyW, err := loadConfig(os.Stdin)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
socketDir := filepath.Join(cfg.TmpDir, "rp")
|
|
err = os.MkdirAll(socketDir, 0700)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// create the parent driver
|
|
stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(stateDir)
|
|
driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
initComplete := make(chan struct{})
|
|
quit := make(chan struct{})
|
|
errCh := make(chan error)
|
|
// start the parent driver. initComplete will be closed when the child connected to the parent.
|
|
logrus.Infof("Starting parent driver")
|
|
go func() {
|
|
driverErr := driver.RunParentDriver(initComplete, quit, nil)
|
|
if driverErr != nil {
|
|
logrus.WithError(driverErr).Warn("Parent driver exited")
|
|
}
|
|
errCh <- driverErr
|
|
close(errCh)
|
|
}()
|
|
opaque := driver.OpaqueForChild()
|
|
logrus.Infof("opaque=%+v", opaque)
|
|
opaqueJSON, err := json.Marshal(opaque)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
childQuitR, childQuitW, err := os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
// stop the child
|
|
logrus.Info("Stopping child driver")
|
|
if err := childQuitW.Close(); err != nil {
|
|
logrus.WithError(err).Warn("Unable to close childQuitW")
|
|
}
|
|
}()
|
|
|
|
// reexec the child process in the child netns
|
|
cmd := exec.Command("/proc/self/exe")
|
|
cmd.Args = []string{ReexecChildKey}
|
|
cmd.Stdin = childQuitR
|
|
cmd.Stdout = &logrusWriter{prefix: "child"}
|
|
cmd.Stderr = cmd.Stdout
|
|
cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
|
|
childNS, err := ns.GetNS(cfg.NetNSPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := childNS.Do(func(_ ns.NetNS) error {
|
|
logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
|
|
return cmd.Start()
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
childErrCh := make(chan error)
|
|
go func() {
|
|
err := cmd.Wait()
|
|
childErrCh <- err
|
|
close(childErrCh)
|
|
}()
|
|
|
|
defer func() {
|
|
if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
|
|
logrus.WithError(err).Warn("Kill child process")
|
|
}
|
|
}()
|
|
|
|
logrus.Info("Waiting for initComplete")
|
|
// wait for the child to connect to the parent
|
|
outer:
|
|
for {
|
|
select {
|
|
case <-initComplete:
|
|
logrus.Infof("initComplete is closed; parent and child established the communication channel")
|
|
break outer
|
|
case err := <-childErrCh:
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case err := <-errCh:
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
defer func() {
|
|
logrus.Info("Stopping parent driver")
|
|
quit <- struct{}{}
|
|
if err := <-errCh; err != nil {
|
|
logrus.WithError(err).Warn("Parent driver returned error on exit")
|
|
}
|
|
}()
|
|
|
|
// let parent expose ports
|
|
logrus.Infof("Exposing ports %v", cfg.Mappings)
|
|
if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
|
|
return err
|
|
}
|
|
|
|
// we only need to have a socket to reload ports when we run under rootless cni
|
|
if cfg.RootlessCNI {
|
|
socketfile := filepath.Join(socketDir, cfg.ContainerID)
|
|
// make sure to remove the file if it exists to prevent EADDRINUSE
|
|
_ = os.Remove(socketfile)
|
|
// workaround to bypass the 108 char socket path limit
|
|
// open the fd and use the path to the fd as bind argument
|
|
fd, err := unix.Open(socketDir, unix.O_PATH, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = unix.Close(fd)
|
|
// remove the socket file on exit
|
|
defer os.Remove(socketfile)
|
|
if err != nil {
|
|
logrus.Warnf("Failed to close the socketDir fd: %v", err)
|
|
}
|
|
defer socket.Close()
|
|
go serve(socket, driver)
|
|
}
|
|
|
|
logrus.Info("Ready")
|
|
|
|
// https://github.com/containers/podman/issues/11248
|
|
// Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
|
|
if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
|
|
unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
|
|
unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
|
|
f.Close()
|
|
}
|
|
// write and close ReadyFD (convention is same as slirp4netns --ready-fd)
|
|
if _, err := readyW.Write([]byte("1")); err != nil {
|
|
return err
|
|
}
|
|
if err := readyW.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// wait for ExitFD to be closed
|
|
logrus.Info("Waiting for exitfd to be closed")
|
|
if _, err := ioutil.ReadAll(exitR); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func serve(listener net.Listener, pm rkport.Manager) {
|
|
for {
|
|
conn, err := listener.Accept()
|
|
if err != nil {
|
|
// we cannot log this error, stderr is already closed
|
|
continue
|
|
}
|
|
ctx := context.TODO()
|
|
err = handler(ctx, conn, pm)
|
|
if err != nil {
|
|
conn.Write([]byte(err.Error()))
|
|
} else {
|
|
conn.Write([]byte("OK"))
|
|
}
|
|
conn.Close()
|
|
}
|
|
}
|
|
|
|
func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
|
|
var childIP string
|
|
dec := json.NewDecoder(conn)
|
|
err := dec.Decode(&childIP)
|
|
if err != nil {
|
|
return errors.Wrap(err, "rootless port failed to decode ports")
|
|
}
|
|
portStatus, err := pm.ListPorts(ctx)
|
|
if err != nil {
|
|
return errors.Wrap(err, "rootless port failed to list ports")
|
|
}
|
|
for _, status := range portStatus {
|
|
err = pm.RemovePort(ctx, status.ID)
|
|
if err != nil {
|
|
return errors.Wrap(err, "rootless port failed to remove port")
|
|
}
|
|
}
|
|
// add the ports with the new child IP
|
|
for _, status := range portStatus {
|
|
// set the new child IP
|
|
status.Spec.ChildIP = childIP
|
|
_, err = pm.AddPort(ctx, status.Spec)
|
|
if err != nil {
|
|
return errors.Wrap(err, "rootless port failed to add port")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func exposePorts(pm rkport.Manager, portMappings []types.PortMapping, childIP string) error {
|
|
ctx := context.TODO()
|
|
for _, port := range portMappings {
|
|
protocols := strings.Split(port.Protocol, ",")
|
|
for _, protocol := range protocols {
|
|
hostIP := port.HostIP
|
|
if hostIP == "" {
|
|
hostIP = "0.0.0.0"
|
|
}
|
|
for i := uint16(0); i < port.Range; i++ {
|
|
spec := rkport.Spec{
|
|
Proto: protocol,
|
|
ParentIP: hostIP,
|
|
ParentPort: int(port.HostPort + i),
|
|
ChildPort: int(port.ContainerPort + i),
|
|
ChildIP: childIP,
|
|
}
|
|
if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
|
|
return err
|
|
}
|
|
if _, err := pm.AddPort(ctx, spec); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func child() error {
|
|
// load the config from the parent
|
|
var opaque map[string]string
|
|
if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
|
|
return err
|
|
}
|
|
|
|
// start the child driver
|
|
quit := make(chan struct{})
|
|
errCh := make(chan error)
|
|
go func() {
|
|
d := rkbuiltin.NewChildDriver(os.Stderr)
|
|
dErr := d.RunChildDriver(opaque, quit)
|
|
errCh <- dErr
|
|
}()
|
|
defer func() {
|
|
logrus.Info("Stopping child driver")
|
|
quit <- struct{}{}
|
|
if err := <-errCh; err != nil {
|
|
logrus.WithError(err).Warn("Child driver returned error on exit")
|
|
}
|
|
}()
|
|
|
|
// wait for stdin to be closed
|
|
if _, err := ioutil.ReadAll(os.Stdin); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type logrusWriter struct {
|
|
prefix string
|
|
}
|
|
|
|
func (w *logrusWriter) Write(p []byte) (int, error) {
|
|
logrus.Infof("%s%s", w.prefix, string(p))
|
|
return len(p), nil
|
|
}
|