rootless: add management for the userNS

When running podman as non root user always create an userNS and let
the OCI runtime use it.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>

Closes: #936
Approved by: rhatdan
This commit is contained in:
Giuseppe Scrivano
2018-06-11 16:03:34 +02:00
committed by Atomic Bot
parent 8ee8f84734
commit 5ff90677c8
8 changed files with 303 additions and 20 deletions

View File

@ -7,6 +7,7 @@ import (
"github.com/containers/storage"
"github.com/projectatomic/libpod/libpod"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/urfave/cli"
)
@ -40,7 +41,7 @@ func GetRootlessStorageOpts() (storage.StoreOptions, error) {
func GetDefaultStoreOptions() (storage.StoreOptions, error) {
storageOpts := storage.DefaultStoreOptions
if os.Getuid() != 0 {
if rootless.IsRootless() {
var err error
storageOpts, err = GetRootlessStorageOpts()
if err != nil {

View File

@ -11,6 +11,7 @@ import (
"github.com/pkg/errors"
"github.com/projectatomic/libpod/pkg/hooks"
_ "github.com/projectatomic/libpod/pkg/hooks/0.1.0"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/projectatomic/libpod/version"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
@ -26,6 +27,15 @@ func main() {
debug := false
cpuProfile := false
became, err := rootless.BecomeRootInUserNS()
if err != nil {
logrus.Errorf(err.Error())
os.Exit(1)
}
if became {
os.Exit(0)
}
if reexec.Init() {
return
}

View File

@ -29,6 +29,7 @@ import (
"github.com/projectatomic/libpod/pkg/chrootuser"
"github.com/projectatomic/libpod/pkg/hooks"
"github.com/projectatomic/libpod/pkg/hooks/exec"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/projectatomic/libpod/pkg/secrets"
"github.com/projectatomic/libpod/pkg/util"
"github.com/sirupsen/logrus"
@ -235,7 +236,7 @@ func (c *Container) setupStorage(ctx context.Context) error {
return errors.Wrapf(err, "error creating container storage")
}
if os.Getuid() == 0 && (len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0) {
if !rootless.IsRootless() && (len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0) {
info, err := os.Stat(c.runtime.config.TmpDir)
if err != nil {
return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir)
@ -531,7 +532,7 @@ func (c *Container) completeNetworkSetup() error {
if !c.config.PostConfigureNetNS {
return nil
}
if os.Getuid() != 0 {
if rootless.IsRootless() {
return nil
}
if err := c.syncContainer(); err != nil {
@ -734,7 +735,7 @@ func (c *Container) mountStorage() (err error) {
return nil
}
if os.Getuid() == 0 {
if !rootless.IsRootless() {
// TODO: generalize this mount code so it will mount every mount in ctr.config.Mounts
mounted, err := mount.Mounted(c.config.ShmDir)
if err != nil {
@ -1004,11 +1005,9 @@ func (c *Container) postDeleteHooks(ctx context.Context) (err error) {
// Make standard bind mounts to include in the container
func (c *Container) makeBindMounts() error {
if os.Getuid() == 0 {
if err := os.Chown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
return errors.Wrapf(err, "cannot chown run directory %s", c.state.RunDir)
}
}
if c.state.BindMounts == nil {
c.state.BindMounts = make(map[string]string)
@ -1084,11 +1083,9 @@ func (c *Container) writeStringToRundir(destFile, output string) (string, error)
return "", errors.Wrapf(err, "unable to create %s", destFileName)
}
defer f.Close()
if os.Getuid() == 0 {
if err := f.Chown(c.RootUID(), c.RootGID()); err != nil {
return "", err
}
}
if _, err := f.WriteString(output); err != nil {
return "", errors.Wrapf(err, "unable to write %s", destFileName)
@ -1249,7 +1246,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
}
var err error
if os.Getuid() == 0 {
if !rootless.IsRootless() {
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, &g); err != nil {
return nil, errors.Wrapf(err, "error setting up OCI Hooks")
}
@ -1361,7 +1358,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
g.AddProcessEnv("container", "libpod")
}
if os.Getuid() != 0 {
if rootless.IsRootless() {
g.SetLinuxCgroupsPath("")
} else if c.runtime.config.CgroupManager == SystemdCgroupsManager {
// When runc is set to use Systemd as a cgroup manager, it

View File

@ -19,6 +19,7 @@ import (
"github.com/projectatomic/libpod/libpod/image"
"github.com/projectatomic/libpod/pkg/hooks"
sysreg "github.com/projectatomic/libpod/pkg/registries"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/sirupsen/logrus"
"github.com/ulule/deepcopier"
)
@ -197,7 +198,7 @@ func GetRootlessRuntimeDir() string {
}
func getDefaultTmpDir() string {
if os.Getuid() == 0 {
if !rootless.IsRootless() {
return "/var/run/libpod"
}
@ -216,7 +217,7 @@ func NewRuntime(options ...RuntimeOption) (runtime *Runtime, err error) {
configPath := ConfigPath
foundConfig := true
if os.Getuid() != 0 {
if rootless.IsRootless() {
foundConfig = false
} else if _, err := os.Stat(OverrideConfigPath); err == nil {
// Use the override configuration path

145
pkg/rootless/rootless.go Normal file
View File

@ -0,0 +1,145 @@
package rootless
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
gosignal "os/signal"
"runtime"
"syscall"
"github.com/containers/storage/pkg/idtools"
"github.com/docker/docker/pkg/signal"
"github.com/pkg/errors"
)
/*
extern int reexec_in_user_namespace(int ready);
extern int reexec_in_user_namespace_wait(int pid);
*/
import "C"
func runInUser() error {
os.Setenv("_LIBPOD_USERNS_CONFIGURED", "done")
return nil
}
// IsRootless tells us if we are running in rootless mode
func IsRootless() bool {
return os.Getuid() != 0 || os.Getenv("_LIBPOD_USERNS_CONFIGURED") != ""
}
func tryMappingTool(tool string, pid int, hostID int, mappings []idtools.IDMap) error {
path, err := exec.LookPath(tool)
if err != nil {
return err
}
appendTriplet := func(l []string, a, b, c int) []string {
return append(l, fmt.Sprintf("%d", a), fmt.Sprintf("%d", b), fmt.Sprintf("%d", c))
}
args := []string{path, fmt.Sprintf("%d", pid)}
args = appendTriplet(args, 0, hostID, 1)
if mappings != nil {
for _, i := range mappings {
args = appendTriplet(args, i.ContainerID+1, i.HostID, i.Size)
}
}
cmd := exec.Cmd{
Path: path,
Args: args,
}
return cmd.Run()
}
// BecomeRootInUserNS re-exec podman in a new userNS
func BecomeRootInUserNS() (bool, error) {
if os.Getuid() == 0 || os.Getenv("_LIBPOD_USERNS_CONFIGURED") != "" {
if os.Getenv("_LIBPOD_USERNS_CONFIGURED") == "init" {
return false, runInUser()
}
return false, nil
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
r, w, err := os.Pipe()
if err != nil {
return false, err
}
defer r.Close()
defer w.Close()
pidC := C.reexec_in_user_namespace(C.int(r.Fd()))
pid := int(pidC)
if pid < 0 {
return false, errors.Errorf("cannot re-exec process")
}
setgroups := fmt.Sprintf("/proc/%d/setgroups", pid)
err = ioutil.WriteFile(setgroups, []byte("deny\n"), 0666)
if err != nil {
return false, errors.Wrapf(err, "cannot write setgroups file")
}
var uids, gids []idtools.IDMap
username := os.Getenv("USER")
mappings, err := idtools.NewIDMappings(username, username)
if err == nil {
uids = mappings.UIDs()
gids = mappings.GIDs()
}
uidsMapped := false
if mappings != nil && uids != nil {
uidsMapped = tryMappingTool("newuidmap", pid, os.Getuid(), uids) == nil
}
if !uidsMapped {
uidMap := fmt.Sprintf("/proc/%d/uid_map", pid)
err = ioutil.WriteFile(uidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getuid())), 0666)
if err != nil {
return false, errors.Wrapf(err, "cannot write uid_map")
}
}
gidsMapped := false
if mappings != nil && gids != nil {
gidsMapped = tryMappingTool("newgidmap", pid, os.Getgid(), gids) == nil
}
if !gidsMapped {
gidMap := fmt.Sprintf("/proc/%d/gid_map", pid)
err = ioutil.WriteFile(gidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getgid())), 0666)
if err != nil {
return false, errors.Wrapf(err, "cannot write gid_map")
}
}
_, err = w.Write([]byte("1"))
if err != nil {
return false, errors.Wrapf(err, "write to sync pipe")
}
c := make(chan os.Signal, 1)
gosignal.Notify(c)
defer gosignal.Reset()
go func() {
for s := range c {
if s == signal.SIGCHLD || s == signal.SIGPIPE {
continue
}
syscall.Kill(int(pidC), s.(syscall.Signal))
}
}()
if C.reexec_in_user_namespace_wait(pidC) < 0 {
return false, errors.Wrapf(err, "error waiting for the re-exec process")
}
return true, nil
}

View File

@ -0,0 +1,128 @@
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/stat.h>
#include <limits.h>
#include <sys/types.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/wait.h>
static int
syscall_clone (unsigned long flags, void *child_stack)
{
return (int) syscall (__NR_clone, flags, child_stack);
}
static char **
get_cmd_line_args (pid_t pid)
{
int fd;
char path[PATH_MAX];
char *buffer;
size_t allocated;
size_t used = 0;
int ret;
int i, argc = 0;
char **argv;
sprintf (path, "/proc/%d/cmdline", pid);
fd = open (path, O_RDONLY);
if (fd < 0)
return NULL;
allocated = 512;
buffer = malloc (allocated);
if (buffer == NULL)
return NULL;
for (;;)
{
do
ret = read (fd, buffer + used, allocated - used);
while (ret < 0 && errno == EINTR);
if (ret < 0)
return NULL;
if (ret == 0)
break;
used += ret;
if (allocated == used)
{
allocated += 512;
buffer = realloc (buffer, allocated);
if (buffer == NULL)
return NULL;
}
}
close (fd);
for (i = 0; i < used; i++)
if (buffer[i] == '\0')
argc++;
argv = malloc (sizeof (char *) * (argc + 1));
argc = 0;
argv[argc++] = buffer;
for (i = 0; i < used - 1; i++)
if (buffer[i] == '\0')
argv[argc++] = buffer + i + 1;
argv[argc] = NULL;
return argv;
}
int
reexec_in_user_namespace(int ready)
{
int ret;
pid_t pid;
char b;
pid_t ppid = getpid ();
char **argv;
pid = syscall_clone (CLONE_NEWUSER|SIGCHLD, NULL);
if (pid)
return pid;
argv = get_cmd_line_args (ppid);
setenv ("_LIBPOD_USERNS_CONFIGURED", "init", 1);
do
ret = read (ready, &b, 1) < 0;
while (ret < 0 && errno == EINTR);
if (ret < 0)
_exit (1);
close (ready);
execv (argv[0], argv);
_exit (1);
}
int
reexec_in_user_namespace_wait (int pid)
{
pid_t p;
int status;
do
p = waitpid (pid, &status, 0);
while (p < 0 && errno == EINTR);
if (p < 0)
return -1;
if (WIFEXITED (status))
return WEXITSTATUS (status);
if (WIFSIGNALED (status))
return 128 + WTERMSIG (status);
return -1;
}

View File

@ -16,6 +16,7 @@ import (
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/projectatomic/libpod/libpod"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -360,7 +361,7 @@ func (c *CreateConfig) GetContainerCreateOptions() ([]libpod.CtrCreateOption, er
// does not have one
options = append(options, libpod.WithEntrypoint(c.Entrypoint))
if os.Getuid() != 0 {
if rootless.IsRootless() {
if !c.NetMode.IsHost() && !c.NetMode.IsNone() {
options = append(options, libpod.WithNetNS(portBindings, true))
}

View File

@ -1,7 +1,6 @@
package createconfig
import (
"os"
"strings"
"github.com/docker/docker/daemon/caps"
@ -12,6 +11,7 @@ import (
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/pkg/errors"
"github.com/projectatomic/libpod/pkg/rootless"
"github.com/sirupsen/logrus"
"io/ioutil"
)
@ -45,7 +45,7 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint
}
g.AddMount(sysMnt)
}
if os.Getuid() != 0 {
if rootless.IsRootless() {
g.RemoveMount("/dev/pts")
devPts := spec.Mount{
Destination: "/dev/pts",
@ -82,7 +82,7 @@ func CreateConfigToOCISpec(config *CreateConfig) (*spec.Spec, error) { //nolint
}
g.AddProcessEnv("container", "podman")
canAddResources := os.Getuid() == 0
canAddResources := !rootless.IsRootless()
if canAddResources {
// RESOURCES - MEMORY