From 0999991b20bf5336623a4cfc532f281b4636fbc2 Mon Sep 17 00:00:00 2001
From: danishprakash <danish.prakash@suse.com>
Date: Wed, 25 Jan 2023 11:36:13 +0530
Subject: [PATCH] add support for limiting tmpfs size for systemd-specific mnts

* add tests
* add documentation for --shm-size-systemd
* add support for both pod and standalone run

Signed-off-by: danishprakash <danish.prakash@suse.com>
---
 cmd/podman/common/create.go                   |  7 +++++++
 cmd/podman/containers/create.go               |  3 +++
 .../markdown/options/shm-size-systemd.md      | 10 ++++++++++
 docs/source/markdown/podman-create.1.md.in    |  2 ++
 docs/source/markdown/podman-pod-clone.1.md.in |  2 ++
 .../source/markdown/podman-pod-create.1.md.in |  2 ++
 docs/source/markdown/podman-run.1.md.in       |  2 ++
 libpod/container_config.go                    |  3 +++
 libpod/container_internal_linux.go            | 10 ++++++++--
 libpod/options.go                             | 17 ++++++++++++++++
 pkg/domain/entities/pods.go                   |  3 ++-
 pkg/specgen/generate/container.go             |  1 +
 pkg/specgen/generate/container_create.go      |  3 +++
 pkg/specgen/podspecgen.go                     |  4 ++++
 pkg/specgen/specgen.go                        |  4 ++++
 pkg/specgenutil/specgen.go                    | 10 ++++++++++
 test/e2e/pod_create_test.go                   | 20 +++++++++++++++++++
 test/e2e/run_test.go                          | 18 +++++++++++++++++
 18 files changed, 118 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/markdown/options/shm-size-systemd.md

diff --git a/cmd/podman/common/create.go b/cmd/podman/common/create.go
index 5ed8ae8b61..68d41c4be9 100644
--- a/cmd/podman/common/create.go
+++ b/cmd/podman/common/create.go
@@ -638,6 +638,13 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
 		)
 		_ = cmd.RegisterFlagCompletionFunc(shmSizeFlagName, completion.AutocompleteNone)
 
+		shmSizeSystemdFlagName := "shm-size-systemd"
+		createFlags.String(
+			shmSizeSystemdFlagName, "",
+			"Size of systemd specific tmpfs mounts (/run, /run/lock) "+sizeWithUnitFormat,
+		)
+		_ = cmd.RegisterFlagCompletionFunc(shmSizeSystemdFlagName, completion.AutocompleteNone)
+
 		sysctlFlagName := "sysctl"
 		createFlags.StringSliceVar(
 			&cf.Sysctl,
diff --git a/cmd/podman/containers/create.go b/cmd/podman/containers/create.go
index 848f0fd6a3..a6550f37f2 100644
--- a/cmd/podman/containers/create.go
+++ b/cmd/podman/containers/create.go
@@ -295,6 +295,9 @@ func CreateInit(c *cobra.Command, vals entities.ContainerCreateOptions, isInfra
 	if c.Flag("shm-size").Changed {
 		vals.ShmSize = c.Flag("shm-size").Value.String()
 	}
+	if c.Flag("shm-size-systemd").Changed {
+		vals.ShmSizeSystemd = c.Flag("shm-size-systemd").Value.String()
+	}
 	if (c.Flag("dns").Changed || c.Flag("dns-option").Changed || c.Flag("dns-search").Changed) && vals.Net != nil && (vals.Net.Network.NSMode == specgen.NoNetwork || vals.Net.Network.IsContainer()) {
 		return vals, fmt.Errorf("conflicting options: dns and the network mode: " + string(vals.Net.Network.NSMode))
 	}
diff --git a/docs/source/markdown/options/shm-size-systemd.md b/docs/source/markdown/options/shm-size-systemd.md
new file mode 100644
index 0000000000..7fab8f5ab6
--- /dev/null
+++ b/docs/source/markdown/options/shm-size-systemd.md
@@ -0,0 +1,10 @@
+####> This option file is used in:
+####>   podman create, pod clone, pod create, run
+####> If file is edited, make sure the changes
+####> are applicable to all of those.
+#### **--shm-size-systemd**=*number[unit]*
+
+Size of systemd-specific tmpfs mounts such as /run, /run/lock, /var/log/journal and /tmp.
+A _unit_ can be **b** (bytes), **k** (kibibytes), **m** (mebibytes), or **g** (gibibytes).
+If the unit is omitted, the system uses bytes. If the size is omitted, the default is **64m**.
+When _size_ is **0**, the usage is limited to 50% of the host's available memory.
diff --git a/docs/source/markdown/podman-create.1.md.in b/docs/source/markdown/podman-create.1.md.in
index 7ba7bd27b4..b25f660280 100644
--- a/docs/source/markdown/podman-create.1.md.in
+++ b/docs/source/markdown/podman-create.1.md.in
@@ -328,6 +328,8 @@ Automatically remove the container when it exits. The default is *false*.
 
 @@option shm-size
 
+@@option shm-size-systemd
+
 @@option stop-signal
 
 @@option stop-timeout
diff --git a/docs/source/markdown/podman-pod-clone.1.md.in b/docs/source/markdown/podman-pod-clone.1.md.in
index 5ef4592121..a474436ee4 100644
--- a/docs/source/markdown/podman-pod-clone.1.md.in
+++ b/docs/source/markdown/podman-pod-clone.1.md.in
@@ -71,6 +71,8 @@ Set a custom name for the cloned pod. The default if not specified is of the syn
 
 @@option shm-size
 
+@@option shm-size-systemd
+
 #### **--start**
 
 When set to true, this flag starts the newly created pod after the
diff --git a/docs/source/markdown/podman-pod-create.1.md.in b/docs/source/markdown/podman-pod-create.1.md.in
index ad68a54186..f2a6205176 100644
--- a/docs/source/markdown/podman-pod-create.1.md.in
+++ b/docs/source/markdown/podman-pod-create.1.md.in
@@ -157,6 +157,8 @@ Note: This options conflict with **--share=cgroup** since that would set the pod
 
 @@option shm-size
 
+@@option shm-size-systemd
+
 @@option subgidname
 
 @@option subuidname
diff --git a/docs/source/markdown/podman-run.1.md.in b/docs/source/markdown/podman-run.1.md.in
index a144248a9a..01e5e4cad0 100644
--- a/docs/source/markdown/podman-run.1.md.in
+++ b/docs/source/markdown/podman-run.1.md.in
@@ -358,6 +358,8 @@ container is using it. The default is *false*.
 
 @@option shm-size
 
+@@option shm-size-systemd
+
 @@option sig-proxy
 
 The default is **true**.
diff --git a/libpod/container_config.go b/libpod/container_config.go
index 1fdbe70233..2e890ab459 100644
--- a/libpod/container_config.go
+++ b/libpod/container_config.go
@@ -130,6 +130,8 @@ type ContainerRootFSConfig struct {
 	// ShmSize is the size of the container's SHM. Only used if ShmDir was
 	// not set manually at time of creation.
 	ShmSize int64 `json:"shmSize"`
+	// ShmSizeSystemd is the size of systemd-specific tmpfs mounts
+	ShmSizeSystemd int64 `json:"shmSizeSystemd"`
 	// Static directory for container content that will persist across
 	// reboot.
 	// StaticDir is a persistent directory for Libpod files that will
@@ -443,6 +445,7 @@ type InfraInherit struct {
 	SelinuxOpts        []string                 `json:"selinux_opts,omitempty"`
 	Volumes            []*specgen.NamedVolume   `json:"volumes,omitempty"`
 	ShmSize            *int64                   `json:"shm_size"`
+	ShmSizeSystemd     *int64                   `json:"shm_size_systemd"`
 }
 
 // IsDefaultShmSize determines if the user actually set the shm in the parent ctr or if it has been set to the default size
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index b309b32cd0..6ca63f9e20 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -205,6 +205,12 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
 	if !containerUUIDSet {
 		g.AddProcessEnv("container_uuid", c.ID()[:32])
 	}
+	// limit systemd-specific tmpfs mounts if specified
+	// while creating a pod or ctr, if not, default back to 50%
+	var shmSizeSystemdMntOpt string
+	if c.config.ShmSizeSystemd != 0 {
+		shmSizeSystemdMntOpt = fmt.Sprintf("size=%d", c.config.ShmSizeSystemd)
+	}
 	options := []string{"rw", "rprivate", "nosuid", "nodev"}
 	for _, dest := range []string{"/run", "/run/lock"} {
 		if MountExists(mounts, dest) {
@@ -214,7 +220,7 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
 			Destination: dest,
 			Type:        "tmpfs",
 			Source:      "tmpfs",
-			Options:     append(options, "tmpcopyup"),
+			Options:     append(options, "tmpcopyup", shmSizeSystemdMntOpt),
 		}
 		g.AddMount(tmpfsMnt)
 	}
@@ -226,7 +232,7 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
 			Destination: dest,
 			Type:        "tmpfs",
 			Source:      "tmpfs",
-			Options:     append(options, "tmpcopyup"),
+			Options:     append(options, "tmpcopyup", shmSizeSystemdMntOpt),
 		}
 		g.AddMount(tmpfsMnt)
 	}
diff --git a/libpod/options.go b/libpod/options.go
index e8ffd9a7bc..ed7b00fa8e 100644
--- a/libpod/options.go
+++ b/libpod/options.go
@@ -671,6 +671,23 @@ func WithShmSize(size int64) CtrCreateOption {
 	}
 }
 
+// WithShmSizeSystemd sets the size of systemd-specific mounts:
+//
+//	/run
+//	/run/lock
+//	/var/log/journal
+//	/tmp
+func WithShmSizeSystemd(size int64) CtrCreateOption {
+	return func(ctr *Container) error {
+		if ctr.valid {
+			return define.ErrCtrFinalized
+		}
+
+		ctr.config.ShmSizeSystemd = size
+		return nil
+	}
+}
+
 // WithPrivileged sets the privileged flag in the container runtime.
 func WithPrivileged(privileged bool) CtrCreateOption {
 	return func(ctr *Container) error {
diff --git a/pkg/domain/entities/pods.go b/pkg/domain/entities/pods.go
index 1ee975da39..9353e43f2b 100644
--- a/pkg/domain/entities/pods.go
+++ b/pkg/domain/entities/pods.go
@@ -260,6 +260,7 @@ type ContainerCreateOptions struct {
 	SecurityOpt        []string `json:"security_opt,omitempty"`
 	SdNotifyMode       string
 	ShmSize            string
+	ShmSizeSystemd     string
 	SignaturePolicy    string
 	StartupHCCmd       string
 	StartupHCInterval  string
@@ -269,8 +270,8 @@ type ContainerCreateOptions struct {
 	StopSignal         string
 	StopTimeout        uint
 	StorageOpts        []string
-	SubUIDName         string
 	SubGIDName         string
+	SubUIDName         string
 	Sysctl             []string `json:"sysctl,omitempty"`
 	Systemd            string
 	Timeout            uint
diff --git a/pkg/specgen/generate/container.go b/pkg/specgen/generate/container.go
index 1570fe5037..2c9dbb7a0f 100644
--- a/pkg/specgen/generate/container.go
+++ b/pkg/specgen/generate/container.go
@@ -478,6 +478,7 @@ func ConfigToSpec(rt *libpod.Runtime, specg *specgen.SpecGenerator, containerID
 	specg.HostDeviceList = conf.DeviceHostSrc
 	specg.Networks = conf.Networks
 	specg.ShmSize = &conf.ShmSize
+	specg.ShmSizeSystemd = &conf.ShmSizeSystemd
 
 	mapSecurityConfig(conf, specg)
 
diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go
index 2e78267ef8..2a2af1cc12 100644
--- a/pkg/specgen/generate/container_create.go
+++ b/pkg/specgen/generate/container_create.go
@@ -512,6 +512,9 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
 	if s.ShmSize != nil {
 		options = append(options, libpod.WithShmSize(*s.ShmSize))
 	}
+	if s.ShmSizeSystemd != nil {
+		options = append(options, libpod.WithShmSizeSystemd(*s.ShmSizeSystemd))
+	}
 	if s.Rootfs != "" {
 		options = append(options, libpod.WithRootFS(s.Rootfs, s.RootfsOverlay, s.RootfsMapping))
 	}
diff --git a/pkg/specgen/podspecgen.go b/pkg/specgen/podspecgen.go
index cc5fc8d762..2d855bfae1 100644
--- a/pkg/specgen/podspecgen.go
+++ b/pkg/specgen/podspecgen.go
@@ -192,6 +192,10 @@ type PodStorageConfig struct {
 	// Conflicts with ShmSize if IpcNS is not private.
 	// Optional.
 	ShmSize *int64 `json:"shm_size,omitempty"`
+	// ShmSizeSystemd is the size of systemd-specific tmpfs mounts
+	// specifically /run, /run/lock, /var/log/journal and /tmp.
+	// Optional
+	ShmSizeSystemd *int64 `json:"shm_size_systemd,omitempty"`
 }
 
 // PodCgroupConfig contains configuration options about a pod's cgroups.
diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go
index 8905000248..42d75a90b4 100644
--- a/pkg/specgen/specgen.go
+++ b/pkg/specgen/specgen.go
@@ -295,6 +295,10 @@ type ContainerStorageConfig struct {
 	// Conflicts with ShmSize if IpcNS is not private.
 	// Optional.
 	ShmSize *int64 `json:"shm_size,omitempty"`
+	// ShmSizeSystemd is the size of systemd-specific tmpfs mounts
+	// specifically /run, /run/lock, /var/log/journal and /tmp.
+	// Optional
+	ShmSizeSystemd *int64 `json:"shm_size_systemd,omitempty"`
 	// WorkDir is the container's working directory.
 	// If unset, the default, /, will be used.
 	// Optional.
diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go
index 8e2a490b28..16358b4af6 100644
--- a/pkg/specgenutil/specgen.go
+++ b/pkg/specgenutil/specgen.go
@@ -488,6 +488,16 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
 		s.ShmSize = &val
 	}
 
+	// SHM Size Systemd
+	if c.ShmSizeSystemd != "" {
+		val, err := units.RAMInBytes(c.ShmSizeSystemd)
+		if err != nil {
+			return fmt.Errorf("unable to translate --shm-size-systemd: %w", err)
+		}
+
+		s.ShmSizeSystemd = &val
+	}
+
 	if c.Net != nil {
 		s.Networks = c.Net.Networks
 	}
diff --git a/test/e2e/pod_create_test.go b/test/e2e/pod_create_test.go
index e058fea7a3..f692566aae 100644
--- a/test/e2e/pod_create_test.go
+++ b/test/e2e/pod_create_test.go
@@ -1193,4 +1193,24 @@ ENTRYPOINT ["sleep","99999"]
 		podJSON := podInspect.InspectPodToJSON()
 		Expect(podJSON.InfraConfig).To(HaveField("UtsNS", ns))
 	})
+
+	It("podman pod create --shm-size-systemd", func() {
+		podName := "testShmSizeSystemd"
+		session := podmanTest.Podman([]string{"pod", "create", "--name", podName, "--shm-size-systemd", "10mb"})
+		session.WaitWithDefaultTimeout()
+		Expect(session).Should(Exit(0))
+
+		// add container to pod
+		ctrRun := podmanTest.Podman([]string{"run", "-d", "--pod", podName, SYSTEMD_IMAGE, "/sbin/init"})
+		ctrRun.WaitWithDefaultTimeout()
+		Expect(ctrRun).Should(Exit(0))
+
+		run := podmanTest.Podman([]string{"exec", ctrRun.OutputToString(), "mount"})
+		run.WaitWithDefaultTimeout()
+		Expect(run).Should(Exit(0))
+		t, strings := run.GrepString("tmpfs on /run/lock")
+		Expect(t).To(BeTrue())
+		Expect(strings[0]).Should(ContainSubstring("size=10240k"))
+	})
+
 })
diff --git a/test/e2e/run_test.go b/test/e2e/run_test.go
index 6715589dff..a4939fdb5d 100644
--- a/test/e2e/run_test.go
+++ b/test/e2e/run_test.go
@@ -2053,4 +2053,22 @@ WORKDIR /madethis`, BB)
 		Expect(session).Should(Exit(0))
 		Expect(session.ErrorToString()).To(ContainSubstring("Trying to pull"))
 	})
+
+	It("podman run --shm-size-systemd", func() {
+		ctrName := "testShmSizeSystemd"
+		run := podmanTest.Podman([]string{"run", "--name", ctrName, "--shm-size-systemd", "10mb", "-d", SYSTEMD_IMAGE, "/sbin/init"})
+		run.WaitWithDefaultTimeout()
+		Expect(run).Should(Exit(0))
+
+		logs := podmanTest.Podman([]string{"logs", ctrName})
+		logs.WaitWithDefaultTimeout()
+		Expect(logs).Should(Exit(0))
+
+		mount := podmanTest.Podman([]string{"exec", ctrName, "mount"})
+		mount.WaitWithDefaultTimeout()
+		Expect(mount).Should(Exit(0))
+		t, strings := mount.GrepString("tmpfs on /run/lock")
+		Expect(t).To(BeTrue())
+		Expect(strings[0]).Should(ContainSubstring("size=10240k"))
+	})
 })