Files
Paul Holzinger 0136a66a83 libpod: deduplicate ports in db
The OCICNI port format has one big problem: It does not support ranges.
So if a users forwards a range of 1k ports with podman run -p 1001-2000
we have to store each of the thousand ports individually as array element.
This bloats the db and makes the JSON encoding and decoding much slower.
In many places we already use a better port struct type which supports
ranges, e.g. `pkg/specgen` or the new network interface.

Because of this we have to do many runtime conversions between the two
port formats. If everything uses the new format we can skip the runtime
conversions.

This commit adds logic to replace all occurrences of the old format
with the new one. The database will automatically migrate the ports
to new format when the container config is read for the first time
after the update.

The `ParsePortMapping` function is `pkg/specgen/generate` has been
reworked to better work with the new format. The new logic is able
to deduplicate the given ports. This is necessary the ensure we
store them efficiently in the DB. The new code should also be more
performant than the old one.

To prove that the code is fast enough I added go benchmarks. Parsing
1 million ports took less than 0.5 seconds on my laptop.

Benchmark normalize PortMappings in specgen:
Please note that the 1 million ports are actually 20x 50k ranges
because we cannot have bigger ranges than 65535 ports.
```
$ go test -bench=. -benchmem  ./pkg/specgen/generate/
goos: linux
goarch: amd64
pkg: github.com/containers/podman/v3/pkg/specgen/generate
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
BenchmarkParsePortMappingNoPorts-12             480821532                2.230 ns/op           0 B/op          0 allocs/op
BenchmarkParsePortMapping1-12                      38972             30183 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMapping100-12                    18752             60688 ns/op          141088 B/op        315 allocs/op
BenchmarkParsePortMapping1k-12                      3104            331719 ns/op          223840 B/op       3018 allocs/op
BenchmarkParsePortMapping10k-12                      376           3122930 ns/op         1223650 B/op      30027 allocs/op
BenchmarkParsePortMapping1m-12                         3         390869926 ns/op        124593840 B/op   4000624 allocs/op
BenchmarkParsePortMappingReverse100-12             18940             63414 ns/op          141088 B/op        315 allocs/op
BenchmarkParsePortMappingReverse1k-12               3015            362500 ns/op          223841 B/op       3018 allocs/op
BenchmarkParsePortMappingReverse10k-12               343           3318135 ns/op         1223650 B/op      30027 allocs/op
BenchmarkParsePortMappingReverse1m-12                  3         403392469 ns/op        124593840 B/op   4000624 allocs/op
BenchmarkParsePortMappingRange1-12                 37635             28756 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange100-12               39604             28935 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange1k-12                38384             29921 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange10k-12               29479             40381 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange1m-12                  927           1279369 ns/op          143022 B/op        164 allocs/op
PASS
ok      github.com/containers/podman/v3/pkg/specgen/generate    25.492s
```

Benchmark convert old port format to new one:
```
go test -bench=. -benchmem  ./libpod/
goos: linux
goarch: amd64
pkg: github.com/containers/podman/v3/libpod
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
Benchmark_ocicniPortsToNetTypesPortsNoPorts-12          663526126                1.663 ns/op           0 B/op          0 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1-12                 7858082               141.9 ns/op            72 B/op          2 allocs/op
Benchmark_ocicniPortsToNetTypesPorts10-12                2065347               571.0 ns/op           536 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts100-12                138478              8641 ns/op            4216 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1k-12                   9414            120964 ns/op           41080 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts10k-12                   781           1490526 ns/op          401528 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1m-12                      4         250579010 ns/op        40001656 B/op          4 allocs/op
PASS
ok      github.com/containers/podman/v3/libpod  11.727s
```

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2021-10-27 18:59:56 +02:00

360 lines
8.9 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/podman/v3/libpod/network/types"
"github.com/containers/podman/v3/pkg/rootlessport"
"github.com/pkg/errors"
rkport "github.com/rootless-containers/rootlesskit/pkg/port"
rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const (
// ReexecChildKey is used internally for the second reexec
ReexecChildKey = "rootlessport-child"
reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
)
func main() {
if len(os.Args) > 1 {
fmt.Fprintln(os.Stderr, `too many arguments, rootlessport expects a json config via STDIN`)
os.Exit(1)
}
var err error
if os.Args[0] == ReexecChildKey {
err = child()
} else {
err = parent()
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func loadConfig(r io.Reader) (*rootlessport.Config, io.ReadCloser, io.WriteCloser, error) {
stdin, err := ioutil.ReadAll(r)
if err != nil {
return nil, nil, nil, err
}
var cfg rootlessport.Config
if err := json.Unmarshal(stdin, &cfg); err != nil {
return nil, nil, nil, err
}
if cfg.NetNSPath == "" {
return nil, nil, nil, errors.New("missing NetNSPath")
}
if cfg.ExitFD <= 0 {
return nil, nil, nil, errors.New("missing ExitFD")
}
exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
if exitFile == nil {
return nil, nil, nil, errors.New("invalid ExitFD")
}
if cfg.ReadyFD <= 0 {
return nil, nil, nil, errors.New("missing ReadyFD")
}
readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
if readyFile == nil {
return nil, nil, nil, errors.New("invalid ReadyFD")
}
return &cfg, exitFile, readyFile, nil
}
func parent() error {
// load config from stdin
cfg, exitR, readyW, err := loadConfig(os.Stdin)
if err != nil {
return err
}
socketDir := filepath.Join(cfg.TmpDir, "rp")
err = os.MkdirAll(socketDir, 0700)
if err != nil {
return err
}
// create the parent driver
stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
if err != nil {
return err
}
defer os.RemoveAll(stateDir)
driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
if err != nil {
return err
}
initComplete := make(chan struct{})
quit := make(chan struct{})
errCh := make(chan error)
// start the parent driver. initComplete will be closed when the child connected to the parent.
logrus.Infof("Starting parent driver")
go func() {
driverErr := driver.RunParentDriver(initComplete, quit, nil)
if driverErr != nil {
logrus.WithError(driverErr).Warn("Parent driver exited")
}
errCh <- driverErr
close(errCh)
}()
opaque := driver.OpaqueForChild()
logrus.Infof("opaque=%+v", opaque)
opaqueJSON, err := json.Marshal(opaque)
if err != nil {
return err
}
childQuitR, childQuitW, err := os.Pipe()
if err != nil {
return err
}
defer func() {
// stop the child
logrus.Info("Stopping child driver")
if err := childQuitW.Close(); err != nil {
logrus.WithError(err).Warn("Unable to close childQuitW")
}
}()
// reexec the child process in the child netns
cmd := exec.Command("/proc/self/exe")
cmd.Args = []string{ReexecChildKey}
cmd.Stdin = childQuitR
cmd.Stdout = &logrusWriter{prefix: "child"}
cmd.Stderr = cmd.Stdout
cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
childNS, err := ns.GetNS(cfg.NetNSPath)
if err != nil {
return err
}
if err := childNS.Do(func(_ ns.NetNS) error {
logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
return cmd.Start()
}); err != nil {
return err
}
childErrCh := make(chan error)
go func() {
err := cmd.Wait()
childErrCh <- err
close(childErrCh)
}()
defer func() {
if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
logrus.WithError(err).Warn("Kill child process")
}
}()
logrus.Info("Waiting for initComplete")
// wait for the child to connect to the parent
outer:
for {
select {
case <-initComplete:
logrus.Infof("initComplete is closed; parent and child established the communication channel")
break outer
case err := <-childErrCh:
if err != nil {
return err
}
case err := <-errCh:
if err != nil {
return err
}
}
}
defer func() {
logrus.Info("Stopping parent driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Parent driver returned error on exit")
}
}()
// let parent expose ports
logrus.Infof("Exposing ports %v", cfg.Mappings)
if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
return err
}
// we only need to have a socket to reload ports when we run under rootless cni
if cfg.RootlessCNI {
socketfile := filepath.Join(socketDir, cfg.ContainerID)
// make sure to remove the file if it exists to prevent EADDRINUSE
_ = os.Remove(socketfile)
// workaround to bypass the 108 char socket path limit
// open the fd and use the path to the fd as bind argument
fd, err := unix.Open(socketDir, unix.O_PATH, 0)
if err != nil {
return err
}
socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
if err != nil {
return err
}
err = unix.Close(fd)
// remove the socket file on exit
defer os.Remove(socketfile)
if err != nil {
logrus.Warnf("Failed to close the socketDir fd: %v", err)
}
defer socket.Close()
go serve(socket, driver)
}
logrus.Info("Ready")
// https://github.com/containers/podman/issues/11248
// Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
f.Close()
}
// write and close ReadyFD (convention is same as slirp4netns --ready-fd)
if _, err := readyW.Write([]byte("1")); err != nil {
return err
}
if err := readyW.Close(); err != nil {
return err
}
// wait for ExitFD to be closed
logrus.Info("Waiting for exitfd to be closed")
if _, err := ioutil.ReadAll(exitR); err != nil {
return err
}
return nil
}
func serve(listener net.Listener, pm rkport.Manager) {
for {
conn, err := listener.Accept()
if err != nil {
// we cannot log this error, stderr is already closed
continue
}
ctx := context.TODO()
err = handler(ctx, conn, pm)
if err != nil {
conn.Write([]byte(err.Error()))
} else {
conn.Write([]byte("OK"))
}
conn.Close()
}
}
func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
var childIP string
dec := json.NewDecoder(conn)
err := dec.Decode(&childIP)
if err != nil {
return errors.Wrap(err, "rootless port failed to decode ports")
}
portStatus, err := pm.ListPorts(ctx)
if err != nil {
return errors.Wrap(err, "rootless port failed to list ports")
}
for _, status := range portStatus {
err = pm.RemovePort(ctx, status.ID)
if err != nil {
return errors.Wrap(err, "rootless port failed to remove port")
}
}
// add the ports with the new child IP
for _, status := range portStatus {
// set the new child IP
status.Spec.ChildIP = childIP
_, err = pm.AddPort(ctx, status.Spec)
if err != nil {
return errors.Wrap(err, "rootless port failed to add port")
}
}
return nil
}
func exposePorts(pm rkport.Manager, portMappings []types.PortMapping, childIP string) error {
ctx := context.TODO()
for _, port := range portMappings {
protocols := strings.Split(port.Protocol, ",")
for _, protocol := range protocols {
hostIP := port.HostIP
if hostIP == "" {
hostIP = "0.0.0.0"
}
for i := uint16(0); i < port.Range; i++ {
spec := rkport.Spec{
Proto: protocol,
ParentIP: hostIP,
ParentPort: int(port.HostPort + i),
ChildPort: int(port.ContainerPort + i),
ChildIP: childIP,
}
if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
return err
}
if _, err := pm.AddPort(ctx, spec); err != nil {
return err
}
}
}
}
return nil
}
func child() error {
// load the config from the parent
var opaque map[string]string
if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
return err
}
// start the child driver
quit := make(chan struct{})
errCh := make(chan error)
go func() {
d := rkbuiltin.NewChildDriver(os.Stderr)
dErr := d.RunChildDriver(opaque, quit)
errCh <- dErr
}()
defer func() {
logrus.Info("Stopping child driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Child driver returned error on exit")
}
}()
// wait for stdin to be closed
if _, err := ioutil.ReadAll(os.Stdin); err != nil {
return err
}
return nil
}
type logrusWriter struct {
prefix string
}
func (w *logrusWriter) Write(p []byte) (int, error) {
logrus.Infof("%s%s", w.prefix, string(p))
return len(p), nil
}