mirror of
				https://github.com/containers/podman.git
				synced 2025-10-31 10:00:01 +08:00 
			
		
		
		
	Bump to runc main
By using main instead of the v1.1 branch, we drop an unnecessary dependency on cilium/ebpf, saving ~1mb of binary size. Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
		
							
								
								
									
										4
									
								
								vendor/github.com/opencontainers/runc/NOTICE
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/opencontainers/runc/NOTICE
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -8,9 +8,9 @@ The following is courtesy of our legal counsel: | ||||
|  | ||||
|  | ||||
| Use and transfer of Docker may be subject to certain restrictions by the | ||||
| United States and other governments.   | ||||
| United States and other governments. | ||||
| It is your responsibility to ensure that your use and/or transfer does not | ||||
| violate applicable laws.  | ||||
| violate applicable laws. | ||||
|  | ||||
| For more information, please see http://www.bis.doc.gov | ||||
|  | ||||
|  | ||||
							
								
								
									
										15
									
								
								vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -26,14 +26,19 @@ func isEnabled() bool { | ||||
| } | ||||
|  | ||||
| func setProcAttr(attr, value string) error { | ||||
| 	// Under AppArmor you can only change your own attr, so use /proc/self/ | ||||
| 	// instead of /proc/<tid>/ like libapparmor does | ||||
| 	attrPath := "/proc/self/attr/apparmor/" + attr | ||||
| 	if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) { | ||||
| 	attr = utils.CleanPath(attr) | ||||
| 	attrSubPath := "attr/apparmor/" + attr | ||||
| 	if _, err := os.Stat("/proc/self/" + attrSubPath); errors.Is(err, os.ErrNotExist) { | ||||
| 		// fall back to the old convention | ||||
| 		attrPath = "/proc/self/attr/" + attr | ||||
| 		attrSubPath = "attr/" + attr | ||||
| 	} | ||||
|  | ||||
| 	// Under AppArmor you can only change your own attr, so there's no reason | ||||
| 	// to not use /proc/thread-self/ (instead of /proc/<tid>/, like libapparmor | ||||
| 	// does). | ||||
| 	attrPath, closer := utils.ProcThreadSelf(attrSubPath) | ||||
| 	defer closer() | ||||
|  | ||||
| 	f, err := os.OpenFile(attrPath, os.O_WRONLY, 0) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
|  | ||||
							
								
								
									
										15
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,9 +1,24 @@ | ||||
| package cgroups | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/configs" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	// ErrDevicesUnsupported is an error returned when a cgroup manager | ||||
| 	// is not configured to set device rules. | ||||
| 	ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") | ||||
|  | ||||
| 	// DevicesSetV1 and DevicesSetV2 are functions to set devices for | ||||
| 	// cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices | ||||
| 	// package is imported, it is set to nil, so cgroup managers can't | ||||
| 	// manage devices. | ||||
| 	DevicesSetV1 func(path string, r *configs.Resources) error | ||||
| 	DevicesSetV2 func(path string, r *configs.Resources) error | ||||
| ) | ||||
|  | ||||
| type Manager interface { | ||||
| 	// Apply creates a cgroup, if not yet created, and adds a process | ||||
| 	// with the specified pid into that cgroup.  A special value of -1 | ||||
|  | ||||
							
								
								
									
										386
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										386
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,386 +0,0 @@ | ||||
| // SPDX-License-Identifier: Apache-2.0 | ||||
| /* | ||||
|  * Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com> | ||||
|  * Copyright (C) 2020 SUSE LLC | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package devices | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"sort" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/devices" | ||||
| ) | ||||
|  | ||||
| // deviceMeta is a Rule without the Allow or Permissions fields, and no | ||||
| // wildcard-type support. It's effectively the "match" portion of a metadata | ||||
| // rule, for the purposes of our emulation. | ||||
| type deviceMeta struct { | ||||
| 	node  devices.Type | ||||
| 	major int64 | ||||
| 	minor int64 | ||||
| } | ||||
|  | ||||
| // deviceRule is effectively the tuple (deviceMeta, Permissions). | ||||
| type deviceRule struct { | ||||
| 	meta  deviceMeta | ||||
| 	perms devices.Permissions | ||||
| } | ||||
|  | ||||
| // deviceRules is a mapping of device metadata rules to the associated | ||||
| // permissions in the ruleset. | ||||
| type deviceRules map[deviceMeta]devices.Permissions | ||||
|  | ||||
| func (r deviceRules) orderedEntries() []deviceRule { | ||||
| 	var rules []deviceRule | ||||
| 	for meta, perms := range r { | ||||
| 		rules = append(rules, deviceRule{meta: meta, perms: perms}) | ||||
| 	} | ||||
| 	sort.Slice(rules, func(i, j int) bool { | ||||
| 		// Sort by (major, minor, type). | ||||
| 		a, b := rules[i].meta, rules[j].meta | ||||
| 		return a.major < b.major || | ||||
| 			(a.major == b.major && a.minor < b.minor) || | ||||
| 			(a.major == b.major && a.minor == b.minor && a.node < b.node) | ||||
| 	}) | ||||
| 	return rules | ||||
| } | ||||
|  | ||||
| type Emulator struct { | ||||
| 	defaultAllow bool | ||||
| 	rules        deviceRules | ||||
| } | ||||
|  | ||||
| func (e *Emulator) IsBlacklist() bool { | ||||
| 	return e.defaultAllow | ||||
| } | ||||
|  | ||||
| func (e *Emulator) IsAllowAll() bool { | ||||
| 	return e.IsBlacklist() && len(e.rules) == 0 | ||||
| } | ||||
|  | ||||
| func parseLine(line string) (*deviceRule, error) { | ||||
| 	// Input: node major:minor perms. | ||||
| 	fields := strings.FieldsFunc(line, func(r rune) bool { | ||||
| 		return r == ' ' || r == ':' | ||||
| 	}) | ||||
| 	if len(fields) != 4 { | ||||
| 		return nil, fmt.Errorf("malformed devices.list rule %s", line) | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		rule  deviceRule | ||||
| 		node  = fields[0] | ||||
| 		major = fields[1] | ||||
| 		minor = fields[2] | ||||
| 		perms = fields[3] | ||||
| 	) | ||||
|  | ||||
| 	// Parse the node type. | ||||
| 	switch node { | ||||
| 	case "a": | ||||
| 		// Super-special case -- "a" always means every device with every | ||||
| 		// access mode. In fact, for devices.list this actually indicates that | ||||
| 		// the cgroup is in black-list mode. | ||||
| 		// TODO: Double-check that the entire file is "a *:* rwm". | ||||
| 		return nil, nil | ||||
| 	case "b": | ||||
| 		rule.meta.node = devices.BlockDevice | ||||
| 	case "c": | ||||
| 		rule.meta.node = devices.CharDevice | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("unknown device type %q", node) | ||||
| 	} | ||||
|  | ||||
| 	// Parse the major number. | ||||
| 	if major == "*" { | ||||
| 		rule.meta.major = devices.Wildcard | ||||
| 	} else { | ||||
| 		val, err := strconv.ParseUint(major, 10, 32) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("invalid major number: %w", err) | ||||
| 		} | ||||
| 		rule.meta.major = int64(val) | ||||
| 	} | ||||
|  | ||||
| 	// Parse the minor number. | ||||
| 	if minor == "*" { | ||||
| 		rule.meta.minor = devices.Wildcard | ||||
| 	} else { | ||||
| 		val, err := strconv.ParseUint(minor, 10, 32) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("invalid minor number: %w", err) | ||||
| 		} | ||||
| 		rule.meta.minor = int64(val) | ||||
| 	} | ||||
|  | ||||
| 	// Parse the access permissions. | ||||
| 	rule.perms = devices.Permissions(perms) | ||||
| 	if !rule.perms.IsValid() || rule.perms.IsEmpty() { | ||||
| 		return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) | ||||
| 	} | ||||
| 	return &rule, nil | ||||
| } | ||||
|  | ||||
| func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam | ||||
| 	if e.rules == nil { | ||||
| 		e.rules = make(map[deviceMeta]devices.Permissions) | ||||
| 	} | ||||
|  | ||||
| 	// Merge with any pre-existing permissions. | ||||
| 	oldPerms := e.rules[rule.meta] | ||||
| 	newPerms := rule.perms.Union(oldPerms) | ||||
| 	e.rules[rule.meta] = newPerms | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (e *Emulator) rmRule(rule deviceRule) error { | ||||
| 	// Give an error if any of the permissions requested to be removed are | ||||
| 	// present in a partially-matching wildcard rule, because such rules will | ||||
| 	// be ignored by cgroupv1. | ||||
| 	// | ||||
| 	// This is a diversion from cgroupv1, but is necessary to avoid leading | ||||
| 	// users into a false sense of security. cgroupv1 will silently(!) ignore | ||||
| 	// requests to remove partial exceptions, but we really shouldn't do that. | ||||
| 	// | ||||
| 	// It may seem like we could just "split" wildcard rules which hit this | ||||
| 	// issue, but unfortunately there are 2^32 possible major and minor | ||||
| 	// numbers, which would exhaust kernel memory quickly if we did this. Not | ||||
| 	// to mention it'd be really slow (the kernel side is implemented as a | ||||
| 	// linked-list of exceptions). | ||||
| 	for _, partialMeta := range []deviceMeta{ | ||||
| 		{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor}, | ||||
| 		{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard}, | ||||
| 		{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard}, | ||||
| 	} { | ||||
| 		// This wildcard rule is equivalent to the requested rule, so skip it. | ||||
| 		if rule.meta == partialMeta { | ||||
| 			continue | ||||
| 		} | ||||
| 		// Only give an error if the set of permissions overlap. | ||||
| 		partialPerms := e.rules[partialMeta] | ||||
| 		if !partialPerms.Intersection(rule.perms).IsEmpty() { | ||||
| 			return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Subtract all of the permissions listed from the full match rule. If the | ||||
| 	// rule didn't exist, all of this is a no-op. | ||||
| 	newPerms := e.rules[rule.meta].Difference(rule.perms) | ||||
| 	if newPerms.IsEmpty() { | ||||
| 		delete(e.rules, rule.meta) | ||||
| 	} else { | ||||
| 		e.rules[rule.meta] = newPerms | ||||
| 	} | ||||
| 	// TODO: The actual cgroup code doesn't care if an exception didn't exist | ||||
| 	//       during removal, so not erroring out here is /accurate/ but quite | ||||
| 	//       worrying. Maybe we should do additional validation, but again we | ||||
| 	//       have to worry about backwards-compatibility. | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (e *Emulator) allow(rule *deviceRule) error { | ||||
| 	// This cgroup is configured as a black-list. Reset the entire emulator, | ||||
| 	// and put is into black-list mode. | ||||
| 	if rule == nil || rule.meta.node == devices.WildcardDevice { | ||||
| 		*e = Emulator{ | ||||
| 			defaultAllow: true, | ||||
| 			rules:        nil, | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	var err error | ||||
| 	if e.defaultAllow { | ||||
| 		err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception") | ||||
| 	} else { | ||||
| 		err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception") | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| func (e *Emulator) deny(rule *deviceRule) error { | ||||
| 	// This cgroup is configured as a white-list. Reset the entire emulator, | ||||
| 	// and put is into white-list mode. | ||||
| 	if rule == nil || rule.meta.node == devices.WildcardDevice { | ||||
| 		*e = Emulator{ | ||||
| 			defaultAllow: false, | ||||
| 			rules:        nil, | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	var err error | ||||
| 	if e.defaultAllow { | ||||
| 		err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception") | ||||
| 	} else { | ||||
| 		err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception") | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| func (e *Emulator) Apply(rule devices.Rule) error { | ||||
| 	if !rule.Type.CanCgroup() { | ||||
| 		return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) | ||||
| 	} | ||||
|  | ||||
| 	innerRule := &deviceRule{ | ||||
| 		meta: deviceMeta{ | ||||
| 			node:  rule.Type, | ||||
| 			major: rule.Major, | ||||
| 			minor: rule.Minor, | ||||
| 		}, | ||||
| 		perms: rule.Permissions, | ||||
| 	} | ||||
| 	if innerRule.meta.node == devices.WildcardDevice { | ||||
| 		innerRule = nil | ||||
| 	} | ||||
|  | ||||
| 	if rule.Allow { | ||||
| 		return e.allow(innerRule) | ||||
| 	} | ||||
|  | ||||
| 	return e.deny(innerRule) | ||||
| } | ||||
|  | ||||
| // EmulatorFromList takes a reader to a "devices.list"-like source, and returns | ||||
| // a new Emulator that represents the state of the devices cgroup. Note that | ||||
| // black-list devices cgroups cannot be fully reconstructed, due to limitations | ||||
| // in the devices cgroup API. Instead, such cgroups are always treated as | ||||
| // "allow all" cgroups. | ||||
| func EmulatorFromList(list io.Reader) (*Emulator, error) { | ||||
| 	// Normally cgroups are in black-list mode by default, but the way we | ||||
| 	// figure out the current mode is whether or not devices.list has an | ||||
| 	// allow-all rule. So we default to a white-list, and the existence of an | ||||
| 	// "a *:* rwm" entry will tell us otherwise. | ||||
| 	e := &Emulator{ | ||||
| 		defaultAllow: false, | ||||
| 	} | ||||
|  | ||||
| 	// Parse the "devices.list". | ||||
| 	s := bufio.NewScanner(list) | ||||
| 	for s.Scan() { | ||||
| 		line := s.Text() | ||||
| 		deviceRule, err := parseLine(line) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("error parsing line %q: %w", line, err) | ||||
| 		} | ||||
| 		// "devices.list" is an allow list. Note that this means that in | ||||
| 		// black-list mode, we have no idea what rules are in play. As a | ||||
| 		// result, we need to be very careful in Transition(). | ||||
| 		if err := e.allow(deviceRule); err != nil { | ||||
| 			return nil, fmt.Errorf("error adding devices.list rule: %w", err) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := s.Err(); err != nil { | ||||
| 		return nil, fmt.Errorf("error reading devices.list lines: %w", err) | ||||
| 	} | ||||
| 	return e, nil | ||||
| } | ||||
|  | ||||
| // Transition calculates what is the minimally-disruptive set of rules need to | ||||
| // be applied to a devices cgroup in order to transition to the given target. | ||||
| // This means that any already-existing rules will not be applied, and | ||||
| // disruptive rules (like denying all device access) will only be applied if | ||||
| // necessary. | ||||
| // | ||||
| // This function is the sole reason for all of Emulator -- to allow us | ||||
| // to figure out how to update a containers' cgroups without causing spurious | ||||
| // device errors (if possible). | ||||
| func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) { | ||||
| 	var transitionRules []*devices.Rule | ||||
| 	oldRules := source.rules | ||||
|  | ||||
| 	// If the default policy doesn't match, we need to include a "disruptive" | ||||
| 	// rule (either allow-all or deny-all) in order to switch the cgroup to the | ||||
| 	// correct default policy. | ||||
| 	// | ||||
| 	// However, due to a limitation in "devices.list" we cannot be sure what | ||||
| 	// deny rules are in place in a black-list cgroup. Thus if the source is a | ||||
| 	// black-list we also have to include a disruptive rule. | ||||
| 	if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { | ||||
| 		transitionRules = append(transitionRules, &devices.Rule{ | ||||
| 			Type:        'a', | ||||
| 			Major:       -1, | ||||
| 			Minor:       -1, | ||||
| 			Permissions: devices.Permissions("rwm"), | ||||
| 			Allow:       target.defaultAllow, | ||||
| 		}) | ||||
| 		// The old rules are only relevant if we aren't starting out with a | ||||
| 		// disruptive rule. | ||||
| 		oldRules = nil | ||||
| 	} | ||||
|  | ||||
| 	// NOTE: We traverse through the rules in a sorted order so we always write | ||||
| 	//       the same set of rules (this is to aid testing). | ||||
|  | ||||
| 	// First, we create inverse rules for any old rules not in the new set. | ||||
| 	// This includes partial-inverse rules for specific permissions. This is a | ||||
| 	// no-op if we added a disruptive rule, since oldRules will be empty. | ||||
| 	for _, rule := range oldRules.orderedEntries() { | ||||
| 		meta, oldPerms := rule.meta, rule.perms | ||||
| 		newPerms := target.rules[meta] | ||||
| 		droppedPerms := oldPerms.Difference(newPerms) | ||||
| 		if !droppedPerms.IsEmpty() { | ||||
| 			transitionRules = append(transitionRules, &devices.Rule{ | ||||
| 				Type:        meta.node, | ||||
| 				Major:       meta.major, | ||||
| 				Minor:       meta.minor, | ||||
| 				Permissions: droppedPerms, | ||||
| 				Allow:       target.defaultAllow, | ||||
| 			}) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Add any additional rules which weren't in the old set. We happen to | ||||
| 	// filter out rules which are present in both sets, though this isn't | ||||
| 	// strictly necessary. | ||||
| 	for _, rule := range target.rules.orderedEntries() { | ||||
| 		meta, newPerms := rule.meta, rule.perms | ||||
| 		oldPerms := oldRules[meta] | ||||
| 		gainedPerms := newPerms.Difference(oldPerms) | ||||
| 		if !gainedPerms.IsEmpty() { | ||||
| 			transitionRules = append(transitionRules, &devices.Rule{ | ||||
| 				Type:        meta.node, | ||||
| 				Major:       meta.major, | ||||
| 				Minor:       meta.minor, | ||||
| 				Permissions: gainedPerms, | ||||
| 				Allow:       !target.defaultAllow, | ||||
| 			}) | ||||
| 		} | ||||
| 	} | ||||
| 	return transitionRules, nil | ||||
| } | ||||
|  | ||||
| // Rules returns the minimum set of rules necessary to convert a *deny-all* | ||||
| // cgroup to the emulated filter state (note that this is not the same as a | ||||
| // default cgroupv1 cgroup -- which is allow-all). This is effectively just a | ||||
| // wrapper around Transition() with the source emulator being an empty cgroup. | ||||
| func (e *Emulator) Rules() ([]*devices.Rule, error) { | ||||
| 	defaultCgroup := &Emulator{defaultAllow: false} | ||||
| 	return defaultCgroup.Transition(e) | ||||
| } | ||||
|  | ||||
| func wrapErr(err error, text string) error { | ||||
| 	if err == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return fmt.Errorf(text+": %w", err) | ||||
| } | ||||
							
								
								
									
										208
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										208
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,208 +0,0 @@ | ||||
| // Package devicefilter contains eBPF device filter program | ||||
| // | ||||
| // The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c | ||||
| // | ||||
| // Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) | ||||
| // agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 | ||||
| package devicefilter | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"strconv" | ||||
|  | ||||
| 	"github.com/cilium/ebpf/asm" | ||||
| 	devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices" | ||||
| 	"github.com/opencontainers/runc/libcontainer/devices" | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	// license string format is same as kernel MODULE_LICENSE macro | ||||
| 	license = "Apache" | ||||
| ) | ||||
|  | ||||
| // DeviceFilter returns eBPF device filter program and its license string | ||||
| func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { | ||||
| 	// Generate the minimum ruleset for the device rules we are given. While we | ||||
| 	// don't care about minimum transitions in cgroupv2, using the emulator | ||||
| 	// gives us a guarantee that the behaviour of devices filtering is the same | ||||
| 	// as cgroupv1, including security hardenings to avoid misconfiguration | ||||
| 	// (such as punching holes in wildcard rules). | ||||
| 	emu := new(devicesemulator.Emulator) | ||||
| 	for _, rule := range rules { | ||||
| 		if err := emu.Apply(*rule); err != nil { | ||||
| 			return nil, "", err | ||||
| 		} | ||||
| 	} | ||||
| 	cleanRules, err := emu.Rules() | ||||
| 	if err != nil { | ||||
| 		return nil, "", err | ||||
| 	} | ||||
|  | ||||
| 	p := &program{ | ||||
| 		defaultAllow: emu.IsBlacklist(), | ||||
| 	} | ||||
| 	p.init() | ||||
|  | ||||
| 	for idx, rule := range cleanRules { | ||||
| 		if rule.Type == devices.WildcardDevice { | ||||
| 			// We can safely skip over wildcard entries because there should | ||||
| 			// only be one (at most) at the very start to instruct cgroupv1 to | ||||
| 			// go into allow-list mode. However we do double-check this here. | ||||
| 			if idx != 0 || rule.Allow != emu.IsBlacklist() { | ||||
| 				return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) | ||||
| 			} | ||||
| 			continue | ||||
| 		} | ||||
| 		if rule.Allow == p.defaultAllow { | ||||
| 			// There should be no rules which have an action equal to the | ||||
| 			// default action, the emulator removes those. | ||||
| 			return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) | ||||
| 		} | ||||
| 		if err := p.appendRule(rule); err != nil { | ||||
| 			return nil, "", err | ||||
| 		} | ||||
| 	} | ||||
| 	return p.finalize(), license, nil | ||||
| } | ||||
|  | ||||
| type program struct { | ||||
| 	insts        asm.Instructions | ||||
| 	defaultAllow bool | ||||
| 	blockID      int | ||||
| } | ||||
|  | ||||
| func (p *program) init() { | ||||
| 	// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 | ||||
| 	/* | ||||
| 		u32 access_type | ||||
| 		u32 major | ||||
| 		u32 minor | ||||
| 	*/ | ||||
| 	// R2 <- type (lower 16 bit of u32 access_type at R1[0]) | ||||
| 	p.insts = append(p.insts, | ||||
| 		asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), | ||||
| 		asm.And.Imm32(asm.R2, 0xFFFF)) | ||||
|  | ||||
| 	// R3 <- access (upper 16 bit of u32 access_type at R1[0]) | ||||
| 	p.insts = append(p.insts, | ||||
| 		asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), | ||||
| 		// RSh: bitwise shift right | ||||
| 		asm.RSh.Imm32(asm.R3, 16)) | ||||
|  | ||||
| 	// R4 <- major (u32 major at R1[4]) | ||||
| 	p.insts = append(p.insts, | ||||
| 		asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) | ||||
|  | ||||
| 	// R5 <- minor (u32 minor at R1[8]) | ||||
| 	p.insts = append(p.insts, | ||||
| 		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) | ||||
| } | ||||
|  | ||||
| // appendRule rule converts an OCI rule to the relevant eBPF block and adds it | ||||
| // to the in-progress filter program. In order to operate properly, it must be | ||||
| // called with a "clean" rule list (generated by devices.Emulator.Rules() -- | ||||
| // with any "a" rules removed). | ||||
| func (p *program) appendRule(rule *devices.Rule) error { | ||||
| 	if p.blockID < 0 { | ||||
| 		return errors.New("the program is finalized") | ||||
| 	} | ||||
|  | ||||
| 	var bpfType int32 | ||||
| 	switch rule.Type { | ||||
| 	case devices.CharDevice: | ||||
| 		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) | ||||
| 	case devices.BlockDevice: | ||||
| 		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) | ||||
| 	default: | ||||
| 		// We do not permit 'a', nor any other types we don't know about. | ||||
| 		return fmt.Errorf("invalid type %q", string(rule.Type)) | ||||
| 	} | ||||
| 	if rule.Major > math.MaxUint32 { | ||||
| 		return fmt.Errorf("invalid major %d", rule.Major) | ||||
| 	} | ||||
| 	if rule.Minor > math.MaxUint32 { | ||||
| 		return fmt.Errorf("invalid minor %d", rule.Major) | ||||
| 	} | ||||
| 	hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 | ||||
| 	hasMinor := rule.Minor >= 0 | ||||
| 	bpfAccess := int32(0) | ||||
| 	for _, r := range rule.Permissions { | ||||
| 		switch r { | ||||
| 		case 'r': | ||||
| 			bpfAccess |= unix.BPF_DEVCG_ACC_READ | ||||
| 		case 'w': | ||||
| 			bpfAccess |= unix.BPF_DEVCG_ACC_WRITE | ||||
| 		case 'm': | ||||
| 			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD | ||||
| 		default: | ||||
| 			return fmt.Errorf("unknown device access %v", r) | ||||
| 		} | ||||
| 	} | ||||
| 	// If the access is rwm, skip the check. | ||||
| 	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) | ||||
|  | ||||
| 	var ( | ||||
| 		blockSym         = "block-" + strconv.Itoa(p.blockID) | ||||
| 		nextBlockSym     = "block-" + strconv.Itoa(p.blockID+1) | ||||
| 		prevBlockLastIdx = len(p.insts) - 1 | ||||
| 	) | ||||
| 	p.insts = append(p.insts, | ||||
| 		// if (R2 != bpfType) goto next | ||||
| 		asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), | ||||
| 	) | ||||
| 	if hasAccess { | ||||
| 		p.insts = append(p.insts, | ||||
| 			// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next | ||||
| 			asm.Mov.Reg32(asm.R1, asm.R3), | ||||
| 			asm.And.Imm32(asm.R1, bpfAccess), | ||||
| 			asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), | ||||
| 		) | ||||
| 	} | ||||
| 	if hasMajor { | ||||
| 		p.insts = append(p.insts, | ||||
| 			// if (R4 != major) goto next | ||||
| 			asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), | ||||
| 		) | ||||
| 	} | ||||
| 	if hasMinor { | ||||
| 		p.insts = append(p.insts, | ||||
| 			// if (R5 != minor) goto next | ||||
| 			asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), | ||||
| 		) | ||||
| 	} | ||||
| 	p.insts = append(p.insts, acceptBlock(rule.Allow)...) | ||||
| 	// set blockSym to the first instruction we added in this iteration | ||||
| 	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) | ||||
| 	p.blockID++ | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (p *program) finalize() asm.Instructions { | ||||
| 	var v int32 | ||||
| 	if p.defaultAllow { | ||||
| 		v = 1 | ||||
| 	} | ||||
| 	blockSym := "block-" + strconv.Itoa(p.blockID) | ||||
| 	p.insts = append(p.insts, | ||||
| 		// R0 <- v | ||||
| 		asm.Mov.Imm32(asm.R0, v).Sym(blockSym), | ||||
| 		asm.Return(), | ||||
| 	) | ||||
| 	p.blockID = -1 | ||||
| 	return p.insts | ||||
| } | ||||
|  | ||||
| func acceptBlock(accept bool) asm.Instructions { | ||||
| 	var v int32 | ||||
| 	if accept { | ||||
| 		v = 1 | ||||
| 	} | ||||
| 	return []asm.Instruction{ | ||||
| 		// R0 <- v | ||||
| 		asm.Mov.Imm32(asm.R0, v), | ||||
| 		asm.Return(), | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										253
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										253
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,253 +0,0 @@ | ||||
| package ebpf | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"runtime" | ||||
| 	"sync" | ||||
| 	"unsafe" | ||||
|  | ||||
| 	"github.com/cilium/ebpf" | ||||
| 	"github.com/cilium/ebpf/asm" | ||||
| 	"github.com/cilium/ebpf/link" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| func nilCloser() error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { | ||||
| 	type bpfAttrQuery struct { | ||||
| 		TargetFd    uint32 | ||||
| 		AttachType  uint32 | ||||
| 		QueryType   uint32 | ||||
| 		AttachFlags uint32 | ||||
| 		ProgIds     uint64 // __aligned_u64 | ||||
| 		ProgCnt     uint32 | ||||
| 	} | ||||
|  | ||||
| 	// Currently you can only have 64 eBPF programs attached to a cgroup. | ||||
| 	size := 64 | ||||
| 	retries := 0 | ||||
| 	for retries < 10 { | ||||
| 		progIds := make([]uint32, size) | ||||
| 		query := bpfAttrQuery{ | ||||
| 			TargetFd:   uint32(dirFd), | ||||
| 			AttachType: uint32(unix.BPF_CGROUP_DEVICE), | ||||
| 			ProgIds:    uint64(uintptr(unsafe.Pointer(&progIds[0]))), | ||||
| 			ProgCnt:    uint32(len(progIds)), | ||||
| 		} | ||||
|  | ||||
| 		// Fetch the list of program ids. | ||||
| 		_, _, errno := unix.Syscall(unix.SYS_BPF, | ||||
| 			uintptr(unix.BPF_PROG_QUERY), | ||||
| 			uintptr(unsafe.Pointer(&query)), | ||||
| 			unsafe.Sizeof(query)) | ||||
| 		size = int(query.ProgCnt) | ||||
| 		runtime.KeepAlive(query) | ||||
| 		if errno != 0 { | ||||
| 			// On ENOSPC we get the correct number of programs. | ||||
| 			if errno == unix.ENOSPC { | ||||
| 				retries++ | ||||
| 				continue | ||||
| 			} | ||||
| 			return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno) | ||||
| 		} | ||||
|  | ||||
| 		// Convert the ids to program handles. | ||||
| 		progIds = progIds[:size] | ||||
| 		programs := make([]*ebpf.Program, 0, len(progIds)) | ||||
| 		for _, progId := range progIds { | ||||
| 			program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) | ||||
| 			if err != nil { | ||||
| 				// We skip over programs that give us -EACCES or -EPERM. This | ||||
| 				// is necessary because there may be BPF programs that have | ||||
| 				// been attached (such as with --systemd-cgroup) which have an | ||||
| 				// LSM label that blocks us from interacting with the program. | ||||
| 				// | ||||
| 				// Because additional BPF_CGROUP_DEVICE programs only can add | ||||
| 				// restrictions, there's no real issue with just ignoring these | ||||
| 				// programs (and stops runc from breaking on distributions with | ||||
| 				// very strict SELinux policies). | ||||
| 				if errors.Is(err, os.ErrPermission) { | ||||
| 					logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err) | ||||
| 					continue | ||||
| 				} | ||||
| 				return nil, fmt.Errorf("cannot fetch program from id: %w", err) | ||||
| 			} | ||||
| 			programs = append(programs, program) | ||||
| 		} | ||||
| 		runtime.KeepAlive(progIds) | ||||
| 		return programs, nil | ||||
| 	} | ||||
|  | ||||
| 	return nil, errors.New("could not get complete list of CGROUP_DEVICE programs") | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	haveBpfProgReplaceBool bool | ||||
| 	haveBpfProgReplaceOnce sync.Once | ||||
| ) | ||||
|  | ||||
| // Loosely based on the BPF_F_REPLACE support check in | ||||
| // https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go. | ||||
| // | ||||
| // TODO: move this logic to cilium/ebpf | ||||
| func haveBpfProgReplace() bool { | ||||
| 	haveBpfProgReplaceOnce.Do(func() { | ||||
| 		prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ | ||||
| 			Type:    ebpf.CGroupDevice, | ||||
| 			License: "MIT", | ||||
| 			Instructions: asm.Instructions{ | ||||
| 				asm.Mov.Imm(asm.R0, 0), | ||||
| 				asm.Return(), | ||||
| 			}, | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) | ||||
| 			return | ||||
| 		} | ||||
| 		defer prog.Close() | ||||
|  | ||||
| 		devnull, err := os.Open("/dev/null") | ||||
| 		if err != nil { | ||||
| 			logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) | ||||
| 			return | ||||
| 		} | ||||
| 		defer devnull.Close() | ||||
|  | ||||
| 		// We know that we have BPF_PROG_ATTACH since we can load | ||||
| 		// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL | ||||
| 		// we know that the feature isn't present. | ||||
| 		err = link.RawAttachProgram(link.RawAttachProgramOptions{ | ||||
| 			// We rely on this fd being checked after attachFlags. | ||||
| 			Target: int(devnull.Fd()), | ||||
| 			// Attempt to "replace" bad fds with this program. | ||||
| 			Program: prog, | ||||
| 			Attach:  ebpf.AttachCGroupDevice, | ||||
| 			Flags:   unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE, | ||||
| 		}) | ||||
| 		if errors.Is(err, unix.EINVAL) { | ||||
| 			// not supported | ||||
| 			return | ||||
| 		} | ||||
| 		// attach_flags test succeeded. | ||||
| 		if !errors.Is(err, unix.EBADF) { | ||||
| 			logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) | ||||
| 		} | ||||
| 		haveBpfProgReplaceBool = true | ||||
| 	}) | ||||
| 	return haveBpfProgReplaceBool | ||||
| } | ||||
|  | ||||
| // LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory. | ||||
| // | ||||
| // Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . | ||||
| // | ||||
| // https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 | ||||
| func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) { | ||||
| 	// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). | ||||
| 	// This limit is not inherited into the container. | ||||
| 	memlockLimit := &unix.Rlimit{ | ||||
| 		Cur: unix.RLIM_INFINITY, | ||||
| 		Max: unix.RLIM_INFINITY, | ||||
| 	} | ||||
| 	_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit) | ||||
|  | ||||
| 	// Get the list of existing programs. | ||||
| 	oldProgs, err := findAttachedCgroupDeviceFilters(dirFd) | ||||
| 	if err != nil { | ||||
| 		return nilCloser, err | ||||
| 	} | ||||
| 	useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1 | ||||
|  | ||||
| 	// Generate new program. | ||||
| 	spec := &ebpf.ProgramSpec{ | ||||
| 		Type:         ebpf.CGroupDevice, | ||||
| 		Instructions: insts, | ||||
| 		License:      license, | ||||
| 	} | ||||
| 	prog, err := ebpf.NewProgram(spec) | ||||
| 	if err != nil { | ||||
| 		return nilCloser, err | ||||
| 	} | ||||
|  | ||||
| 	// If there is only one old program, we can just replace it directly. | ||||
| 	var ( | ||||
| 		replaceProg *ebpf.Program | ||||
| 		attachFlags uint32 = unix.BPF_F_ALLOW_MULTI | ||||
| 	) | ||||
| 	if useReplaceProg { | ||||
| 		replaceProg = oldProgs[0] | ||||
| 		attachFlags |= unix.BPF_F_REPLACE | ||||
| 	} | ||||
| 	err = link.RawAttachProgram(link.RawAttachProgramOptions{ | ||||
| 		Target:  dirFd, | ||||
| 		Program: prog, | ||||
| 		Replace: replaceProg, | ||||
| 		Attach:  ebpf.AttachCGroupDevice, | ||||
| 		Flags:   attachFlags, | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) | ||||
| 	} | ||||
| 	closer := func() error { | ||||
| 		err = link.RawDetachProgram(link.RawDetachProgramOptions{ | ||||
| 			Target:  dirFd, | ||||
| 			Program: prog, | ||||
| 			Attach:  ebpf.AttachCGroupDevice, | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) | ||||
| 		} | ||||
| 		// TODO: Should we attach the old filters back in this case? Otherwise | ||||
| 		//       we fail-open on a security feature, which is a bit scary. | ||||
| 		return nil | ||||
| 	} | ||||
| 	if !useReplaceProg { | ||||
| 		logLevel := logrus.DebugLevel | ||||
| 		// If there was more than one old program, give a warning (since this | ||||
| 		// really shouldn't happen with runc-managed cgroups) and then detach | ||||
| 		// all the old programs. | ||||
| 		if len(oldProgs) > 1 { | ||||
| 			// NOTE: Ideally this should be a warning but it turns out that | ||||
| 			//       systemd-managed cgroups trigger this warning (apparently | ||||
| 			//       systemd doesn't delete old non-systemd programs when | ||||
| 			//       setting properties). | ||||
| 			logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs)) | ||||
| 			logLevel = logrus.InfoLevel | ||||
| 		} | ||||
| 		for idx, oldProg := range oldProgs { | ||||
| 			// Output some extra debug info. | ||||
| 			if info, err := oldProg.Info(); err == nil { | ||||
| 				fields := logrus.Fields{ | ||||
| 					"type": info.Type.String(), | ||||
| 					"tag":  info.Tag, | ||||
| 					"name": info.Name, | ||||
| 				} | ||||
| 				if id, ok := info.ID(); ok { | ||||
| 					fields["id"] = id | ||||
| 				} | ||||
| 				if runCount, ok := info.RunCount(); ok { | ||||
| 					fields["run_count"] = runCount | ||||
| 				} | ||||
| 				if runtime, ok := info.Runtime(); ok { | ||||
| 					fields["runtime"] = runtime.String() | ||||
| 				} | ||||
| 				logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx) | ||||
| 			} | ||||
| 			err = link.RawDetachProgram(link.RawDetachProgramOptions{ | ||||
| 				Target:  dirFd, | ||||
| 				Program: oldProg, | ||||
| 				Attach:  ebpf.AttachCGroupDevice, | ||||
| 			}) | ||||
| 			if err != nil { | ||||
| 				return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return closer, nil | ||||
| } | ||||
							
								
								
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -50,24 +50,13 @@ func WriteFile(dir, file, data string) error { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer fd.Close() | ||||
| 	if err := retryingWriteFile(fd, data); err != nil { | ||||
| 	if _, err := fd.WriteString(data); err != nil { | ||||
| 		// Having data in the error message helps in debugging. | ||||
| 		return fmt.Errorf("failed to write %q: %w", data, err) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func retryingWriteFile(fd *os.File, data string) error { | ||||
| 	for { | ||||
| 		_, err := fd.Write([]byte(data)) | ||||
| 		if errors.Is(err, unix.EINTR) { | ||||
| 			logrus.Infof("interrupted while writing %s to %s", data, fd.Name()) | ||||
| 			continue | ||||
| 		} | ||||
| 		return err | ||||
| 	} | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	cgroupfsDir    = "/sys/fs/cgroup" | ||||
| 	cgroupfsPrefix = cgroupfsDir + "/" | ||||
| @ -90,7 +79,7 @@ func prepareOpenat2() error { | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} | ||||
| 			if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare | ||||
| 			if err != unix.ENOSYS { | ||||
| 				logrus.Warnf("falling back to securejoin: %s", prepErr) | ||||
| 			} else { | ||||
| 				logrus.Debug("openat2 not available, falling back to securejoin") | ||||
| @ -148,8 +137,9 @@ func openFile(dir, file string, flags int) (*os.File, error) { | ||||
| 		// | ||||
| 		// TODO: if such usage will ever be common, amend this | ||||
| 		// to reopen cgroupRootHandle and retry openat2. | ||||
| 		fdStr := strconv.Itoa(int(cgroupRootHandle.Fd())) | ||||
| 		fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) | ||||
| 		fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd()))) | ||||
| 		defer closer() | ||||
| 		fdDest, _ := os.Readlink(fdPath) | ||||
| 		if fdDest != cgroupfsDir { | ||||
| 			// Wrap the error so it is clear that cgroupRootHandle | ||||
| 			// is opened to an unexpected/wrong directory. | ||||
|  | ||||
							
								
								
									
										37
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										37
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -84,6 +84,28 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { | ||||
| 			period = "" | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	var burst string | ||||
| 	if r.CpuBurst != nil { | ||||
| 		burst = strconv.FormatUint(*r.CpuBurst, 10) | ||||
| 		if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { | ||||
| 			// this is a special trick for burst feature, the current systemd and low version of kernel will not support it. | ||||
| 			// So, an `no such file or directory` error would be raised, and we can ignore it . | ||||
| 			if !errors.Is(err, unix.ENOENT) { | ||||
| 				// Sometimes when the burst to be set is larger | ||||
| 				// than the current one, it is rejected by the kernel | ||||
| 				// (EINVAL) as old_quota/new_burst exceeds the parent | ||||
| 				// cgroup quota limit. If this happens and the quota is | ||||
| 				// going to be set, ignore the error for now and retry | ||||
| 				// after setting the quota. | ||||
| 				if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { | ||||
| 					return err | ||||
| 				} | ||||
| 			} | ||||
| 		} else { | ||||
| 			burst = "" | ||||
| 		} | ||||
| 	} | ||||
| 	if r.CpuQuota != 0 { | ||||
| 		if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { | ||||
| 			return err | ||||
| @ -93,7 +115,22 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 		if burst != "" { | ||||
| 			if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { | ||||
| 				if !errors.Is(err, unix.ENOENT) { | ||||
| 					return err | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if r.CPUIdle != nil { | ||||
| 		idle := strconv.FormatInt(*r.CPUIdle, 10) | ||||
| 		if err := cgroups.WriteFile(path, "cpu.idle", idle); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return s.SetRtSched(path, r) | ||||
| } | ||||
|  | ||||
|  | ||||
							
								
								
									
										2
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -195,7 +195,7 @@ func cpusetEnsureParent(current string) error { | ||||
| 	} | ||||
| 	// Treat non-existing directory as cgroupfs as it will be created, | ||||
| 	// and the root cpuset directory obviously exists. | ||||
| 	if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare | ||||
| 	if err != nil && err != unix.ENOENT { | ||||
| 		return &os.PathError{Op: "statfs", Path: parent, Err: err} | ||||
| 	} | ||||
|  | ||||
|  | ||||
							
								
								
									
										82
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										82
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,20 +1,11 @@ | ||||
| package fs | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"errors" | ||||
| 	"reflect" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups" | ||||
| 	cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices" | ||||
| 	"github.com/opencontainers/runc/libcontainer/configs" | ||||
| 	"github.com/opencontainers/runc/libcontainer/devices" | ||||
| 	"github.com/opencontainers/runc/libcontainer/userns" | ||||
| ) | ||||
|  | ||||
| type DevicesGroup struct { | ||||
| 	TestingSkipFinalCheck bool | ||||
| } | ||||
| type DevicesGroup struct{} | ||||
|  | ||||
| func (s *DevicesGroup) Name() string { | ||||
| 	return "devices" | ||||
| @ -33,75 +24,14 @@ func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error { | ||||
| 	return apply(path, pid) | ||||
| } | ||||
|  | ||||
| func loadEmulator(path string) (*cgroupdevices.Emulator, error) { | ||||
| 	list, err := cgroups.ReadFile(path, "devices.list") | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list)) | ||||
| } | ||||
|  | ||||
| func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) { | ||||
| 	// This defaults to a white-list -- which is what we want! | ||||
| 	emu := &cgroupdevices.Emulator{} | ||||
| 	for _, rule := range rules { | ||||
| 		if err := emu.Apply(*rule); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	return emu, nil | ||||
| } | ||||
|  | ||||
| func (s *DevicesGroup) Set(path string, r *configs.Resources) error { | ||||
| 	if userns.RunningInUserNS() || r.SkipDevices { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	// Generate two emulators, one for the current state of the cgroup and one | ||||
| 	// for the requested state by the user. | ||||
| 	current, err := loadEmulator(path) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	target, err := buildEmulator(r.Devices) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	// Compute the minimal set of transition rules needed to achieve the | ||||
| 	// requested state. | ||||
| 	transitionRules, err := current.Transition(target) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	for _, rule := range transitionRules { | ||||
| 		file := "devices.deny" | ||||
| 		if rule.Allow { | ||||
| 			file = "devices.allow" | ||||
| 		} | ||||
| 		if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil { | ||||
| 			return err | ||||
| 	if cgroups.DevicesSetV1 == nil { | ||||
| 		if len(r.Devices) == 0 { | ||||
| 			return nil | ||||
| 		} | ||||
| 		return cgroups.ErrDevicesUnsupported | ||||
| 	} | ||||
|  | ||||
| 	// Final safety check -- ensure that the resulting state is what was | ||||
| 	// requested. This is only really correct for white-lists, but for | ||||
| 	// black-lists we can at least check that the cgroup is in the right mode. | ||||
| 	// | ||||
| 	// This safety-check is skipped for the unit tests because we cannot | ||||
| 	// currently mock devices.list correctly. | ||||
| 	if !s.TestingSkipFinalCheck { | ||||
| 		currentAfter, err := loadEmulator(path) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) { | ||||
| 			return errors.New("resulting devices cgroup doesn't precisely match target") | ||||
| 		} else if target.IsBlacklist() != currentAfter.IsBlacklist() { | ||||
| 			return errors.New("resulting devices cgroup doesn't match target mode") | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| 	return cgroups.DevicesSetV1(path, r) | ||||
| } | ||||
|  | ||||
| func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { | ||||
|  | ||||
							
								
								
									
										34
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										34
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -54,13 +54,13 @@ type subsystem interface { | ||||
| 	Set(path string, r *configs.Resources) error | ||||
| } | ||||
|  | ||||
| type manager struct { | ||||
| type Manager struct { | ||||
| 	mu      sync.Mutex | ||||
| 	cgroups *configs.Cgroup | ||||
| 	paths   map[string]string | ||||
| } | ||||
|  | ||||
| func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) { | ||||
| func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) { | ||||
| 	// Some v1 controllers (cpu, cpuset, and devices) expect | ||||
| 	// cgroups.Resources to not be nil in Apply. | ||||
| 	if cg.Resources == nil { | ||||
| @ -78,7 +78,7 @@ func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, e | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return &manager{ | ||||
| 	return &Manager{ | ||||
| 		cgroups: cg, | ||||
| 		paths:   paths, | ||||
| 	}, nil | ||||
| @ -105,7 +105,7 @@ func isIgnorableError(rootless bool, err error) bool { | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| func (m *manager) Apply(pid int) (err error) { | ||||
| func (m *Manager) Apply(pid int) (err error) { | ||||
| 	m.mu.Lock() | ||||
| 	defer m.mu.Unlock() | ||||
|  | ||||
| @ -139,19 +139,19 @@ func (m *manager) Apply(pid int) (err error) { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) Destroy() error { | ||||
| func (m *Manager) Destroy() error { | ||||
| 	m.mu.Lock() | ||||
| 	defer m.mu.Unlock() | ||||
| 	return cgroups.RemovePaths(m.paths) | ||||
| } | ||||
|  | ||||
| func (m *manager) Path(subsys string) string { | ||||
| func (m *Manager) Path(subsys string) string { | ||||
| 	m.mu.Lock() | ||||
| 	defer m.mu.Unlock() | ||||
| 	return m.paths[subsys] | ||||
| } | ||||
|  | ||||
| func (m *manager) GetStats() (*cgroups.Stats, error) { | ||||
| func (m *Manager) GetStats() (*cgroups.Stats, error) { | ||||
| 	m.mu.Lock() | ||||
| 	defer m.mu.Unlock() | ||||
| 	stats := cgroups.NewStats() | ||||
| @ -167,7 +167,7 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { | ||||
| 	return stats, nil | ||||
| } | ||||
|  | ||||
| func (m *manager) Set(r *configs.Resources) error { | ||||
| func (m *Manager) Set(r *configs.Resources) error { | ||||
| 	if r == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| @ -183,7 +183,7 @@ func (m *manager) Set(r *configs.Resources) error { | ||||
| 		if err := sys.Set(path, r); err != nil { | ||||
| 			// When rootless is true, errors from the device subsystem | ||||
| 			// are ignored, as it is really not expected to work. | ||||
| 			if m.cgroups.Rootless && sys.Name() == "devices" { | ||||
| 			if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) { | ||||
| 				continue | ||||
| 			} | ||||
| 			// However, errors from other subsystems are not ignored. | ||||
| @ -202,7 +202,7 @@ func (m *manager) Set(r *configs.Resources) error { | ||||
|  | ||||
| // Freeze toggles the container's freezer cgroup depending on the state | ||||
| // provided | ||||
| func (m *manager) Freeze(state configs.FreezerState) error { | ||||
| func (m *Manager) Freeze(state configs.FreezerState) error { | ||||
| 	path := m.Path("freezer") | ||||
| 	if path == "" { | ||||
| 		return errors.New("cannot toggle freezer: cgroups not configured for container") | ||||
| @ -218,25 +218,25 @@ func (m *manager) Freeze(state configs.FreezerState) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) GetPids() ([]int, error) { | ||||
| func (m *Manager) GetPids() ([]int, error) { | ||||
| 	return cgroups.GetPids(m.Path("devices")) | ||||
| } | ||||
|  | ||||
| func (m *manager) GetAllPids() ([]int, error) { | ||||
| func (m *Manager) GetAllPids() ([]int, error) { | ||||
| 	return cgroups.GetAllPids(m.Path("devices")) | ||||
| } | ||||
|  | ||||
| func (m *manager) GetPaths() map[string]string { | ||||
| func (m *Manager) GetPaths() map[string]string { | ||||
| 	m.mu.Lock() | ||||
| 	defer m.mu.Unlock() | ||||
| 	return m.paths | ||||
| } | ||||
|  | ||||
| func (m *manager) GetCgroups() (*configs.Cgroup, error) { | ||||
| func (m *Manager) GetCgroups() (*configs.Cgroup, error) { | ||||
| 	return m.cgroups, nil | ||||
| } | ||||
|  | ||||
| func (m *manager) GetFreezerState() (configs.FreezerState, error) { | ||||
| func (m *Manager) GetFreezerState() (configs.FreezerState, error) { | ||||
| 	dir := m.Path("freezer") | ||||
| 	// If the container doesn't have the freezer cgroup, say it's undefined. | ||||
| 	if dir == "" { | ||||
| @ -246,7 +246,7 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) { | ||||
| 	return freezer.GetState(dir) | ||||
| } | ||||
|  | ||||
| func (m *manager) Exists() bool { | ||||
| func (m *Manager) Exists() bool { | ||||
| 	return cgroups.PathExists(m.Path("devices")) | ||||
| } | ||||
|  | ||||
| @ -254,7 +254,7 @@ func OOMKillCount(path string) (uint64, error) { | ||||
| 	return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") | ||||
| } | ||||
|  | ||||
| func (m *manager) OOMKillCount() (uint64, error) { | ||||
| func (m *Manager) OOMKillCount() (uint64, error) { | ||||
| 	c, err := OOMKillCount(m.Path("memory")) | ||||
| 	// Ignore ENOENT when rootless as it couldn't create cgroup. | ||||
| 	if err != nil && m.cgroups.Rootless && os.IsNotExist(err) { | ||||
|  | ||||
							
								
								
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -165,9 +165,8 @@ func subsysPath(root, inner, subsystem string) (string, error) { | ||||
| 		return filepath.Join(root, filepath.Base(mnt), inner), nil | ||||
| 	} | ||||
|  | ||||
| 	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating | ||||
| 	// process could in container and shared pid namespace with host, and | ||||
| 	// /proc/1/cgroup could point to whole other world of cgroups. | ||||
| 	// Use GetOwnCgroupPath for dind-like cases, when cgroupns is not | ||||
| 	// available. This is ugly. | ||||
| 	parentPath, err := cgroups.GetOwnCgroupPath(subsystem) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
|  | ||||
							
								
								
									
										33
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										33
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -2,16 +2,19 @@ package fs2 | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"errors" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups" | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||
| 	"github.com/opencontainers/runc/libcontainer/configs" | ||||
| ) | ||||
|  | ||||
| func isCpuSet(r *configs.Resources) bool { | ||||
| 	return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 | ||||
| 	return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil | ||||
| } | ||||
|  | ||||
| func setCpu(dirPath string, r *configs.Resources) error { | ||||
| @ -19,6 +22,12 @@ func setCpu(dirPath string, r *configs.Resources) error { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if r.CPUIdle != nil { | ||||
| 		if err := cgroups.WriteFile(dirPath, "cpu.idle", strconv.FormatInt(*r.CPUIdle, 10)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. | ||||
| 	if r.CpuWeight != 0 { | ||||
| 		if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil { | ||||
| @ -26,6 +35,23 @@ func setCpu(dirPath string, r *configs.Resources) error { | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	var burst string | ||||
| 	if r.CpuBurst != nil { | ||||
| 		burst = strconv.FormatUint(*r.CpuBurst, 10) | ||||
| 		if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { | ||||
| 			// Sometimes when the burst to be set is larger | ||||
| 			// than the current one, it is rejected by the kernel | ||||
| 			// (EINVAL) as old_quota/new_burst exceeds the parent | ||||
| 			// cgroup quota limit. If this happens and the quota is | ||||
| 			// going to be set, ignore the error for now and retry | ||||
| 			// after setting the quota. | ||||
| 			if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { | ||||
| 				return err | ||||
| 			} | ||||
| 		} else { | ||||
| 			burst = "" | ||||
| 		} | ||||
| 	} | ||||
| 	if r.CpuQuota != 0 || r.CpuPeriod != 0 { | ||||
| 		str := "max" | ||||
| 		if r.CpuQuota > 0 { | ||||
| @ -41,6 +67,11 @@ func setCpu(dirPath string, r *configs.Resources) error { | ||||
| 		if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		if burst != "" { | ||||
| 			if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
|  | ||||
							
								
								
									
										3
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -55,6 +55,9 @@ func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) { | ||||
| 		return filepath.Join(root, innerPath), nil | ||||
| 	} | ||||
|  | ||||
| 	// we don't need to use /proc/thread-self here because runc always runs | ||||
| 	// with every thread in the same cgroup. This lets us avoid having to do | ||||
| 	// runtime.LockOSThread. | ||||
| 	ownCgroup, err := parseCgroupFile("/proc/self/cgroup") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
|  | ||||
							
								
								
									
										75
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										75
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,75 +0,0 @@ | ||||
| package fs2 | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/ebpf" | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter" | ||||
| 	"github.com/opencontainers/runc/libcontainer/configs" | ||||
| 	"github.com/opencontainers/runc/libcontainer/devices" | ||||
| 	"github.com/opencontainers/runc/libcontainer/userns" | ||||
| ) | ||||
|  | ||||
| func isRWM(perms devices.Permissions) bool { | ||||
| 	var r, w, m bool | ||||
| 	for _, perm := range perms { | ||||
| 		switch perm { | ||||
| 		case 'r': | ||||
| 			r = true | ||||
| 		case 'w': | ||||
| 			w = true | ||||
| 		case 'm': | ||||
| 			m = true | ||||
| 		} | ||||
| 	} | ||||
| 	return r && w && m | ||||
| } | ||||
|  | ||||
| // This is similar to the logic applied in crun for handling errors from bpf(2) | ||||
| // <https://github.com/containers/crun/blob/0.17/src/libcrun/cgroup.c#L2438-L2470>. | ||||
| func canSkipEBPFError(r *configs.Resources) bool { | ||||
| 	// If we're running in a user namespace we can ignore eBPF rules because we | ||||
| 	// usually cannot use bpf(2), as well as rootless containers usually don't | ||||
| 	// have the necessary privileges to mknod(2) device inodes or access | ||||
| 	// host-level instances (though ideally we would be blocking device access | ||||
| 	// for rootless containers anyway). | ||||
| 	if userns.RunningInUserNS() { | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	// We cannot ignore an eBPF load error if any rule if is a block rule or it | ||||
| 	// doesn't permit all access modes. | ||||
| 	// | ||||
| 	// NOTE: This will sometimes trigger in cases where access modes are split | ||||
| 	//       between different rules but to handle this correctly would require | ||||
| 	//       using ".../libcontainer/cgroup/devices".Emulator. | ||||
| 	for _, dev := range r.Devices { | ||||
| 		if !dev.Allow || !isRWM(dev.Permissions) { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func setDevices(dirPath string, r *configs.Resources) error { | ||||
| 	if r.SkipDevices { | ||||
| 		return nil | ||||
| 	} | ||||
| 	insts, license, err := devicefilter.DeviceFilter(r.Devices) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("cannot get dir FD for %s", dirPath) | ||||
| 	} | ||||
| 	defer unix.Close(dirFD) | ||||
| 	if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { | ||||
| 		if !canSkipEBPFError(r) { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
							
								
								
									
										99
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										99
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -13,7 +13,7 @@ import ( | ||||
|  | ||||
| type parseError = fscommon.ParseError | ||||
|  | ||||
| type manager struct { | ||||
| type Manager struct { | ||||
| 	config *configs.Cgroup | ||||
| 	// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" | ||||
| 	dirPath string | ||||
| @ -25,7 +25,7 @@ type manager struct { | ||||
| // NewManager creates a manager for cgroup v2 unified hierarchy. | ||||
| // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". | ||||
| // If dirPath is empty, it is automatically set using config. | ||||
| func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) { | ||||
| func NewManager(config *configs.Cgroup, dirPath string) (*Manager, error) { | ||||
| 	if dirPath == "" { | ||||
| 		var err error | ||||
| 		dirPath, err = defaultDirPath(config) | ||||
| @ -34,14 +34,14 @@ func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	m := &manager{ | ||||
| 	m := &Manager{ | ||||
| 		config:  config, | ||||
| 		dirPath: dirPath, | ||||
| 	} | ||||
| 	return m, nil | ||||
| } | ||||
|  | ||||
| func (m *manager) getControllers() error { | ||||
| func (m *Manager) getControllers() error { | ||||
| 	if m.controllers != nil { | ||||
| 		return nil | ||||
| 	} | ||||
| @ -62,7 +62,7 @@ func (m *manager) getControllers() error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) Apply(pid int) error { | ||||
| func (m *Manager) Apply(pid int) error { | ||||
| 	if err := CreateCgroupPath(m.dirPath, m.config); err != nil { | ||||
| 		// Related tests: | ||||
| 		// - "runc create (no limits + no cgrouppath + no permission) succeeds" | ||||
| @ -84,15 +84,15 @@ func (m *manager) Apply(pid int) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) GetPids() ([]int, error) { | ||||
| func (m *Manager) GetPids() ([]int, error) { | ||||
| 	return cgroups.GetPids(m.dirPath) | ||||
| } | ||||
|  | ||||
| func (m *manager) GetAllPids() ([]int, error) { | ||||
| func (m *Manager) GetAllPids() ([]int, error) { | ||||
| 	return cgroups.GetAllPids(m.dirPath) | ||||
| } | ||||
|  | ||||
| func (m *manager) GetStats() (*cgroups.Stats, error) { | ||||
| func (m *Manager) GetStats() (*cgroups.Stats, error) { | ||||
| 	var errs []error | ||||
|  | ||||
| 	st := cgroups.NewStats() | ||||
| @ -114,6 +114,17 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { | ||||
| 	if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	// PSI (since kernel 4.20). | ||||
| 	var err error | ||||
| 	if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	// hugetlb (since kernel 5.6) | ||||
| 	if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { | ||||
| 		errs = append(errs, err) | ||||
| @ -122,13 +133,17 @@ func (m *manager) GetStats() (*cgroups.Stats, error) { | ||||
| 	if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	// misc (since kernel 5.13) | ||||
| 	if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { | ||||
| 		errs = append(errs, err) | ||||
| 	} | ||||
| 	if len(errs) > 0 && !m.config.Rootless { | ||||
| 		return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) | ||||
| 	} | ||||
| 	return st, nil | ||||
| } | ||||
|  | ||||
| func (m *manager) Freeze(state configs.FreezerState) error { | ||||
| func (m *Manager) Freeze(state configs.FreezerState) error { | ||||
| 	if m.config.Resources == nil { | ||||
| 		return errors.New("cannot toggle freezer: cgroups not configured for container") | ||||
| 	} | ||||
| @ -139,15 +154,15 @@ func (m *manager) Freeze(state configs.FreezerState) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) Destroy() error { | ||||
| func (m *Manager) Destroy() error { | ||||
| 	return cgroups.RemovePath(m.dirPath) | ||||
| } | ||||
|  | ||||
| func (m *manager) Path(_ string) string { | ||||
| func (m *Manager) Path(_ string) string { | ||||
| 	return m.dirPath | ||||
| } | ||||
|  | ||||
| func (m *manager) Set(r *configs.Resources) error { | ||||
| func (m *Manager) Set(r *configs.Resources) error { | ||||
| 	if r == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| @ -175,8 +190,10 @@ func (m *manager) Set(r *configs.Resources) error { | ||||
| 	// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work. | ||||
| 	// However, errors from other subsystems are not ignored. | ||||
| 	// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" | ||||
| 	if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless { | ||||
| 		return err | ||||
| 	if err := setDevices(m.dirPath, r); err != nil { | ||||
| 		if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	// cpuset (since kernel 5.0) | ||||
| 	if err := setCpuset(m.dirPath, r); err != nil { | ||||
| @ -201,7 +218,17 @@ func (m *manager) Set(r *configs.Resources) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) setUnified(res map[string]string) error { | ||||
| func setDevices(dirPath string, r *configs.Resources) error { | ||||
| 	if cgroups.DevicesSetV2 == nil { | ||||
| 		if len(r.Devices) > 0 { | ||||
| 			return cgroups.ErrDevicesUnsupported | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
| 	return cgroups.DevicesSetV2(dirPath, r) | ||||
| } | ||||
|  | ||||
| func (m *Manager) setUnified(res map[string]string) error { | ||||
| 	for k, v := range res { | ||||
| 		if strings.Contains(k, "/") { | ||||
| 			return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) | ||||
| @ -227,21 +254,21 @@ func (m *manager) setUnified(res map[string]string) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (m *manager) GetPaths() map[string]string { | ||||
| func (m *Manager) GetPaths() map[string]string { | ||||
| 	paths := make(map[string]string, 1) | ||||
| 	paths[""] = m.dirPath | ||||
| 	return paths | ||||
| } | ||||
|  | ||||
| func (m *manager) GetCgroups() (*configs.Cgroup, error) { | ||||
| func (m *Manager) GetCgroups() (*configs.Cgroup, error) { | ||||
| 	return m.config, nil | ||||
| } | ||||
|  | ||||
| func (m *manager) GetFreezerState() (configs.FreezerState, error) { | ||||
| func (m *Manager) GetFreezerState() (configs.FreezerState, error) { | ||||
| 	return getFreezer(m.dirPath) | ||||
| } | ||||
|  | ||||
| func (m *manager) Exists() bool { | ||||
| func (m *Manager) Exists() bool { | ||||
| 	return cgroups.PathExists(m.dirPath) | ||||
| } | ||||
|  | ||||
| @ -249,7 +276,7 @@ func OOMKillCount(path string) (uint64, error) { | ||||
| 	return fscommon.GetValueByKey(path, "memory.events", "oom_kill") | ||||
| } | ||||
|  | ||||
| func (m *manager) OOMKillCount() (uint64, error) { | ||||
| func (m *Manager) OOMKillCount() (uint64, error) { | ||||
| 	c, err := OOMKillCount(m.dirPath) | ||||
| 	if err != nil && m.config.Rootless && os.IsNotExist(err) { | ||||
| 		err = nil | ||||
| @ -257,3 +284,35 @@ func (m *manager) OOMKillCount() (uint64, error) { | ||||
|  | ||||
| 	return c, err | ||||
| } | ||||
|  | ||||
| func CheckMemoryUsage(dirPath string, r *configs.Resources) error { | ||||
| 	if !r.MemoryCheckBeforeUpdate { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if r.Memory <= 0 && r.MemorySwap <= 0 { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current") | ||||
| 	if err != nil { | ||||
| 		// This check is on best-effort basis, so if we can't read the | ||||
| 		// current usage (cgroup not yet created, or any other error), | ||||
| 		// we should not fail. | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if r.MemorySwap > 0 { | ||||
| 		if uint64(r.MemorySwap) <= usage { | ||||
| 			return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if r.Memory > 0 { | ||||
| 		if uint64(r.Memory) <= usage { | ||||
| 			return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
							
								
								
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -40,6 +40,11 @@ func setMemory(dirPath string, r *configs.Resources) error { | ||||
| 	if !isMemorySet(r) { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	if err := CheckMemoryUsage(dirPath, r); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
|  | ||||
							
								
								
									
										52
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,52 @@ | ||||
| package fs2 | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"os" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups" | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" | ||||
| ) | ||||
|  | ||||
| func statMisc(dirPath string, stats *cgroups.Stats) error { | ||||
| 	for _, file := range []string{"current", "events"} { | ||||
| 		fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		s := bufio.NewScanner(fd) | ||||
| 		for s.Scan() { | ||||
| 			key, value, err := fscommon.ParseKeyValue(s.Text()) | ||||
| 			if err != nil { | ||||
| 				fd.Close() | ||||
| 				return err | ||||
| 			} | ||||
|  | ||||
| 			key = strings.TrimSuffix(key, ".max") | ||||
|  | ||||
| 			if _, ok := stats.MiscStats[key]; !ok { | ||||
| 				stats.MiscStats[key] = cgroups.MiscStats{} | ||||
| 			} | ||||
|  | ||||
| 			tmp := stats.MiscStats[key] | ||||
|  | ||||
| 			switch file { | ||||
| 			case "current": | ||||
| 				tmp.Usage = value | ||||
| 			case "events": | ||||
| 				tmp.Events = value | ||||
| 			} | ||||
|  | ||||
| 			stats.MiscStats[key] = tmp | ||||
| 		} | ||||
| 		fd.Close() | ||||
|  | ||||
| 		if err := s.Err(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
							
								
								
									
										89
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,89 @@ | ||||
| package fs2 | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/cgroups" | ||||
| ) | ||||
|  | ||||
| func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) { | ||||
| 	f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY) | ||||
| 	if err != nil { | ||||
| 		if errors.Is(err, os.ErrNotExist) { | ||||
| 			// Kernel < 4.20, or CONFIG_PSI is not set, | ||||
| 			// or PSI stats are turned off for the cgroup | ||||
| 			// ("echo 0 > cgroup.pressure", kernel >= 6.1). | ||||
| 			return nil, nil | ||||
| 		} | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	var psistats cgroups.PSIStats | ||||
| 	sc := bufio.NewScanner(f) | ||||
| 	for sc.Scan() { | ||||
| 		parts := strings.Fields(sc.Text()) | ||||
| 		var pv *cgroups.PSIData | ||||
| 		switch parts[0] { | ||||
| 		case "some": | ||||
| 			pv = &psistats.Some | ||||
| 		case "full": | ||||
| 			pv = &psistats.Full | ||||
| 		} | ||||
| 		if pv != nil { | ||||
| 			*pv, err = parsePSIData(parts[1:]) | ||||
| 			if err != nil { | ||||
| 				return nil, &parseError{Path: dirPath, File: file, Err: err} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if err := sc.Err(); err != nil { | ||||
| 		if errors.Is(err, unix.ENOTSUP) { | ||||
| 			// Some kernels (e.g. CS9) may return ENOTSUP on read | ||||
| 			// if psi=1 kernel cmdline parameter is required. | ||||
| 			return nil, nil | ||||
| 		} | ||||
| 		return nil, &parseError{Path: dirPath, File: file, Err: err} | ||||
| 	} | ||||
| 	return &psistats, nil | ||||
| } | ||||
|  | ||||
| func parsePSIData(psi []string) (cgroups.PSIData, error) { | ||||
| 	data := cgroups.PSIData{} | ||||
| 	for _, f := range psi { | ||||
| 		kv := strings.SplitN(f, "=", 2) | ||||
| 		if len(kv) != 2 { | ||||
| 			return data, fmt.Errorf("invalid psi data: %q", f) | ||||
| 		} | ||||
| 		var pv *float64 | ||||
| 		switch kv[0] { | ||||
| 		case "avg10": | ||||
| 			pv = &data.Avg10 | ||||
| 		case "avg60": | ||||
| 			pv = &data.Avg60 | ||||
| 		case "avg300": | ||||
| 			pv = &data.Avg300 | ||||
| 		case "total": | ||||
| 			v, err := strconv.ParseUint(kv[1], 10, 64) | ||||
| 			if err != nil { | ||||
| 				return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err) | ||||
| 			} | ||||
| 			data.Total = v | ||||
| 		} | ||||
| 		if pv != nil { | ||||
| 			v, err := strconv.ParseFloat(kv[1], 64) | ||||
| 			if err != nil { | ||||
| 				return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err) | ||||
| 			} | ||||
| 			*pv = v | ||||
| 		} | ||||
| 	} | ||||
| 	return data, nil | ||||
| } | ||||
							
								
								
									
										27
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										27
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -32,9 +32,22 @@ type CpuUsage struct { | ||||
| 	UsageInUsermode uint64 `json:"usage_in_usermode"` | ||||
| } | ||||
|  | ||||
| type PSIData struct { | ||||
| 	Avg10  float64 `json:"avg10"` | ||||
| 	Avg60  float64 `json:"avg60"` | ||||
| 	Avg300 float64 `json:"avg300"` | ||||
| 	Total  uint64  `json:"total"` | ||||
| } | ||||
|  | ||||
| type PSIStats struct { | ||||
| 	Some PSIData `json:"some,omitempty"` | ||||
| 	Full PSIData `json:"full,omitempty"` | ||||
| } | ||||
|  | ||||
| type CpuStats struct { | ||||
| 	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"` | ||||
| 	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` | ||||
| 	PSI            *PSIStats      `json:"psi,omitempty"` | ||||
| } | ||||
|  | ||||
| type CPUSetStats struct { | ||||
| @ -91,6 +104,7 @@ type MemoryStats struct { | ||||
| 	UseHierarchy bool `json:"use_hierarchy"` | ||||
|  | ||||
| 	Stats map[string]uint64 `json:"stats,omitempty"` | ||||
| 	PSI   *PSIStats         `json:"psi,omitempty"` | ||||
| } | ||||
|  | ||||
| type PageUsageByNUMA struct { | ||||
| @ -135,6 +149,7 @@ type BlkioStats struct { | ||||
| 	IoMergedRecursive       []BlkioStatEntry `json:"io_merged_recursive,omitempty"` | ||||
| 	IoTimeRecursive         []BlkioStatEntry `json:"io_time_recursive,omitempty"` | ||||
| 	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"` | ||||
| 	PSI                     *PSIStats        `json:"psi,omitempty"` | ||||
| } | ||||
|  | ||||
| type HugetlbStats struct { | ||||
| @ -157,6 +172,13 @@ type RdmaStats struct { | ||||
| 	RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` | ||||
| } | ||||
|  | ||||
| type MiscStats struct { | ||||
| 	// current resource usage for a key in misc | ||||
| 	Usage uint64 `json:"usage,omitempty"` | ||||
| 	// number of times the resource usage was about to go over the max boundary | ||||
| 	Events uint64 `json:"events,omitempty"` | ||||
| } | ||||
|  | ||||
| type Stats struct { | ||||
| 	CpuStats    CpuStats    `json:"cpu_stats,omitempty"` | ||||
| 	CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` | ||||
| @ -166,10 +188,13 @@ type Stats struct { | ||||
| 	// the map is in the format "size of hugepage: stats of the hugepage" | ||||
| 	HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` | ||||
| 	RdmaStats    RdmaStats               `json:"rdma_stats,omitempty"` | ||||
| 	// the map is in the format "misc resource name: stats of the key" | ||||
| 	MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` | ||||
| } | ||||
|  | ||||
| func NewStats() *Stats { | ||||
| 	memoryStats := MemoryStats{Stats: make(map[string]uint64)} | ||||
| 	hugetlbStats := make(map[string]HugetlbStats) | ||||
| 	return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} | ||||
| 	miscStats := make(map[string]MiscStats) | ||||
| 	return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} | ||||
| } | ||||
|  | ||||
							
								
								
									
										93
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										93
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -36,13 +36,13 @@ func IsCgroup2UnifiedMode() bool { | ||||
| 		var st unix.Statfs_t | ||||
| 		err := unix.Statfs(unifiedMountpoint, &st) | ||||
| 		if err != nil { | ||||
| 			level := logrus.WarnLevel | ||||
| 			if os.IsNotExist(err) && userns.RunningInUserNS() { | ||||
| 				// ignore the "not found" error if running in userns | ||||
| 				logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint) | ||||
| 				isUnified = false | ||||
| 				return | ||||
| 				// For rootless containers, sweep it under the rug. | ||||
| 				level = logrus.DebugLevel | ||||
| 			} | ||||
| 			panic(fmt.Sprintf("cannot statfs cgroup root: %s", err)) | ||||
| 			logrus.StandardLogger().Logf(level, | ||||
| 				"statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) | ||||
| 		} | ||||
| 		isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC | ||||
| 	}) | ||||
| @ -217,21 +217,26 @@ func PathExists(path string) bool { | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func EnterPid(cgroupPaths map[string]string, pid int) error { | ||||
| 	for _, path := range cgroupPaths { | ||||
| 		if PathExists(path) { | ||||
| 			if err := WriteCgroupProc(path, pid); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| // rmdir tries to remove a directory, optionally retrying on EBUSY. | ||||
| func rmdir(path string, retry bool) error { | ||||
| 	delay := time.Millisecond | ||||
| 	tries := 10 | ||||
|  | ||||
| func rmdir(path string) error { | ||||
| again: | ||||
| 	err := unix.Rmdir(path) | ||||
| 	if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare | ||||
| 	switch err { // nolint:errorlint // unix errors are bare | ||||
| 	case nil, unix.ENOENT: | ||||
| 		return nil | ||||
| 	case unix.EINTR: | ||||
| 		goto again | ||||
| 	case unix.EBUSY: | ||||
| 		if retry && tries > 0 { | ||||
| 			time.Sleep(delay) | ||||
| 			delay *= 2 | ||||
| 			tries-- | ||||
| 			goto again | ||||
|  | ||||
| 		} | ||||
| 	} | ||||
| 	return &os.PathError{Op: "rmdir", Path: path, Err: err} | ||||
| } | ||||
| @ -239,68 +244,42 @@ func rmdir(path string) error { | ||||
| // RemovePath aims to remove cgroup path. It does so recursively, | ||||
| // by removing any subdirectories (sub-cgroups) first. | ||||
| func RemovePath(path string) error { | ||||
| 	// try the fast path first | ||||
| 	if err := rmdir(path); err == nil { | ||||
| 	// Try the fast path first. | ||||
| 	if err := rmdir(path, false); err == nil { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	infos, err := os.ReadDir(path) | ||||
| 	if err != nil { | ||||
| 		if os.IsNotExist(err) { | ||||
| 			err = nil | ||||
| 		} | ||||
| 	if err != nil && !os.IsNotExist(err) { | ||||
| 		return err | ||||
| 	} | ||||
| 	for _, info := range infos { | ||||
| 		if info.IsDir() { | ||||
| 			// We should remove subcgroups dir first | ||||
| 			// We should remove subcgroup first. | ||||
| 			if err = RemovePath(filepath.Join(path, info.Name())); err != nil { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if err == nil { | ||||
| 		err = rmdir(path) | ||||
| 		err = rmdir(path, true) | ||||
| 	} | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| // RemovePaths iterates over the provided paths removing them. | ||||
| // We trying to remove all paths five times with increasing delay between tries. | ||||
| // If after all there are not removed cgroups - appropriate error will be | ||||
| // returned. | ||||
| func RemovePaths(paths map[string]string) (err error) { | ||||
| 	const retries = 5 | ||||
| 	delay := 10 * time.Millisecond | ||||
| 	for i := 0; i < retries; i++ { | ||||
| 		if i != 0 { | ||||
| 			time.Sleep(delay) | ||||
| 			delay *= 2 | ||||
| 		} | ||||
| 		for s, p := range paths { | ||||
| 			if err := RemovePath(p); err != nil { | ||||
| 				// do not log intermediate iterations | ||||
| 				switch i { | ||||
| 				case 0: | ||||
| 					logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)") | ||||
| 				case retries - 1: | ||||
| 					logrus.WithError(err).Error("Failed to remove cgroup") | ||||
| 				} | ||||
| 			} | ||||
| 			_, err := os.Stat(p) | ||||
| 			// We need this strange way of checking cgroups existence because | ||||
| 			// RemoveAll almost always returns error, even on already removed | ||||
| 			// cgroups | ||||
| 			if os.IsNotExist(err) { | ||||
| 				delete(paths, s) | ||||
| 			} | ||||
| 		} | ||||
| 		if len(paths) == 0 { | ||||
| 			//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 | ||||
| 			paths = make(map[string]string) | ||||
| 			return nil | ||||
| 	for s, p := range paths { | ||||
| 		if err := RemovePath(p); err == nil { | ||||
| 			delete(paths, s) | ||||
| 		} | ||||
| 	} | ||||
| 	if len(paths) == 0 { | ||||
| 		//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 | ||||
| 		// TODO: switch to clear once Go < 1.21 is not supported. | ||||
| 		paths = make(map[string]string) | ||||
| 		return nil | ||||
| 	} | ||||
| 	return fmt.Errorf("Failed to remove paths: %v", paths) | ||||
| } | ||||
|  | ||||
|  | ||||
							
								
								
									
										31
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -99,11 +99,12 @@ func tryDefaultPath(cgroupPath, subsystem string) string { | ||||
| // expensive), so it is assumed that cgroup mounts are not being changed. | ||||
| func readCgroupMountinfo() ([]*mountinfo.Info, error) { | ||||
| 	readMountinfoOnce.Do(func() { | ||||
| 		// mountinfo.GetMounts uses /proc/thread-self, so we can use it without | ||||
| 		// issues. | ||||
| 		cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( | ||||
| 			mountinfo.FSTypeFilter("cgroup"), | ||||
| 		) | ||||
| 	}) | ||||
|  | ||||
| 	return cgroupMountinfo, readMountinfoErr | ||||
| } | ||||
|  | ||||
| @ -196,6 +197,9 @@ func getCgroupMountsV1(all bool) ([]Mount, error) { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// We don't need to use /proc/thread-self here because runc always runs | ||||
| 	// with every thread in the same cgroup. This lets us avoid having to do | ||||
| 	// runtime.LockOSThread. | ||||
| 	allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| @ -214,6 +218,10 @@ func GetOwnCgroup(subsystem string) (string, error) { | ||||
| 	if IsCgroup2UnifiedMode() { | ||||
| 		return "", errUnified | ||||
| 	} | ||||
|  | ||||
| 	// We don't need to use /proc/thread-self here because runc always runs | ||||
| 	// with every thread in the same cgroup. This lets us avoid having to do | ||||
| 	// runtime.LockOSThread. | ||||
| 	cgroups, err := ParseCgroupFile("/proc/self/cgroup") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| @ -236,27 +244,6 @@ func GetOwnCgroupPath(subsystem string) (string, error) { | ||||
| 	return getCgroupPathHelper(subsystem, cgroup) | ||||
| } | ||||
|  | ||||
| func GetInitCgroup(subsystem string) (string, error) { | ||||
| 	if IsCgroup2UnifiedMode() { | ||||
| 		return "", errUnified | ||||
| 	} | ||||
| 	cgroups, err := ParseCgroupFile("/proc/1/cgroup") | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	return getControllerPath(subsystem, cgroups) | ||||
| } | ||||
|  | ||||
| func GetInitCgroupPath(subsystem string) (string, error) { | ||||
| 	cgroup, err := GetInitCgroup(subsystem) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
|  | ||||
| 	return getCgroupPathHelper(subsystem, cgroup) | ||||
| } | ||||
|  | ||||
| func getCgroupPathHelper(subsystem, cgroup string) (string, error) { | ||||
| 	mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) | ||||
| 	if err != nil { | ||||
|  | ||||
							
								
								
									
										8
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -2,8 +2,8 @@ package configs | ||||
|  | ||||
| import "fmt" | ||||
|  | ||||
| // blockIODevice holds major:minor format supported in blkio cgroup | ||||
| type blockIODevice struct { | ||||
| // BlockIODevice holds major:minor format supported in blkio cgroup. | ||||
| type BlockIODevice struct { | ||||
| 	// Major is the device's major number | ||||
| 	Major int64 `json:"major"` | ||||
| 	// Minor is the device's minor number | ||||
| @ -12,7 +12,7 @@ type blockIODevice struct { | ||||
|  | ||||
| // WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair | ||||
| type WeightDevice struct { | ||||
| 	blockIODevice | ||||
| 	BlockIODevice | ||||
| 	// Weight is the bandwidth rate for the device, range is from 10 to 1000 | ||||
| 	Weight uint16 `json:"weight"` | ||||
| 	// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only | ||||
| @ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string { | ||||
|  | ||||
| // ThrottleDevice struct holds a `major:minor rate_per_second` pair | ||||
| type ThrottleDevice struct { | ||||
| 	blockIODevice | ||||
| 	BlockIODevice | ||||
| 	// Rate is the IO rate limit per cgroup per device | ||||
| 	Rate uint64 `json:"rate"` | ||||
| } | ||||
|  | ||||
							
								
								
									
										11
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -69,6 +69,9 @@ type Resources struct { | ||||
| 	// CPU hardcap limit (in usecs). Allowed cpu time in a given period. | ||||
| 	CpuQuota int64 `json:"cpu_quota"` | ||||
|  | ||||
| 	// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. | ||||
| 	CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive | ||||
|  | ||||
| 	// CPU period to be used for hardcapping (in usecs). 0 to use system default. | ||||
| 	CpuPeriod uint64 `json:"cpu_period"` | ||||
|  | ||||
| @ -84,6 +87,9 @@ type Resources struct { | ||||
| 	// MEM to use | ||||
| 	CpusetMems string `json:"cpuset_mems"` | ||||
|  | ||||
| 	// cgroup SCHED_IDLE | ||||
| 	CPUIdle *int64 `json:"cpu_idle,omitempty"` | ||||
|  | ||||
| 	// Process limit; set <= `0' to disable limit. | ||||
| 	PidsLimit int64 `json:"pids_limit"` | ||||
|  | ||||
| @ -155,4 +161,9 @@ type Resources struct { | ||||
| 	// during Set() to figure out whether the freeze is required. Those | ||||
| 	// methods may be relatively slow, thus this flag. | ||||
| 	SkipFreezeOnSet bool `json:"-"` | ||||
|  | ||||
| 	// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check | ||||
| 	// if the new memory limits (Memory and MemorySwap) being set are lower | ||||
| 	// than the current memory usage, and reject if so. | ||||
| 	MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"` | ||||
| } | ||||
|  | ||||
							
								
								
									
										111
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										111
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -8,6 +8,7 @@ import ( | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"golang.org/x/sys/unix" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/devices" | ||||
| 	"github.com/opencontainers/runtime-spec/specs-go" | ||||
| @ -31,12 +32,13 @@ type IDMap struct { | ||||
| // for syscalls. Additional architectures can be added by specifying them in | ||||
| // Architectures. | ||||
| type Seccomp struct { | ||||
| 	DefaultAction    Action     `json:"default_action"` | ||||
| 	Architectures    []string   `json:"architectures"` | ||||
| 	Syscalls         []*Syscall `json:"syscalls"` | ||||
| 	DefaultErrnoRet  *uint      `json:"default_errno_ret"` | ||||
| 	ListenerPath     string     `json:"listener_path,omitempty"` | ||||
| 	ListenerMetadata string     `json:"listener_metadata,omitempty"` | ||||
| 	DefaultAction    Action                   `json:"default_action"` | ||||
| 	Architectures    []string                 `json:"architectures"` | ||||
| 	Flags            []specs.LinuxSeccompFlag `json:"flags"` | ||||
| 	Syscalls         []*Syscall               `json:"syscalls"` | ||||
| 	DefaultErrnoRet  *uint                    `json:"default_errno_ret"` | ||||
| 	ListenerPath     string                   `json:"listener_path,omitempty"` | ||||
| 	ListenerMetadata string                   `json:"listener_metadata,omitempty"` | ||||
| } | ||||
|  | ||||
| // Action is taken upon rule match in Seccomp | ||||
| @ -83,9 +85,6 @@ type Syscall struct { | ||||
| 	Args     []*Arg `json:"args"` | ||||
| } | ||||
|  | ||||
| // TODO Windows. Many of these fields should be factored out into those parts | ||||
| // which are common across platforms, and those which are platform specific. | ||||
|  | ||||
| // Config defines configuration options for executing a process inside a contained environment. | ||||
| type Config struct { | ||||
| 	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs | ||||
| @ -121,6 +120,9 @@ type Config struct { | ||||
| 	// Hostname optionally sets the container's hostname if provided | ||||
| 	Hostname string `json:"hostname"` | ||||
|  | ||||
| 	// Domainname optionally sets the container's domainname if provided | ||||
| 	Domainname string `json:"domainname"` | ||||
|  | ||||
| 	// Namespaces specifies the container's namespaces that it should setup when cloning the init process | ||||
| 	// If a namespace is not provided that namespace is shared from the container's parent process | ||||
| 	Namespaces Namespaces `json:"namespaces"` | ||||
| @ -158,11 +160,11 @@ type Config struct { | ||||
| 	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ | ||||
| 	OomScoreAdj *int `json:"oom_score_adj,omitempty"` | ||||
|  | ||||
| 	// UidMappings is an array of User ID mappings for User Namespaces | ||||
| 	UidMappings []IDMap `json:"uid_mappings"` | ||||
| 	// UIDMappings is an array of User ID mappings for User Namespaces | ||||
| 	UIDMappings []IDMap `json:"uid_mappings"` | ||||
|  | ||||
| 	// GidMappings is an array of Group ID mappings for User Namespaces | ||||
| 	GidMappings []IDMap `json:"gid_mappings"` | ||||
| 	// GIDMappings is an array of Group ID mappings for User Namespaces | ||||
| 	GIDMappings []IDMap `json:"gid_mappings"` | ||||
|  | ||||
| 	// MaskPaths specifies paths within the container's rootfs to mask over with a bind | ||||
| 	// mount pointing to /dev/null as to prevent reads of the file. | ||||
| @ -211,6 +213,74 @@ type Config struct { | ||||
| 	// RootlessCgroups is set when unlikely to have the full access to cgroups. | ||||
| 	// When RootlessCgroups is set, cgroups errors are ignored. | ||||
| 	RootlessCgroups bool `json:"rootless_cgroups,omitempty"` | ||||
|  | ||||
| 	// TimeOffsets specifies the offset for supporting time namespaces. | ||||
| 	TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` | ||||
|  | ||||
| 	// Scheduler represents the scheduling attributes for a process. | ||||
| 	Scheduler *Scheduler `json:"scheduler,omitempty"` | ||||
|  | ||||
| 	// Personality contains configuration for the Linux personality syscall. | ||||
| 	Personality *LinuxPersonality `json:"personality,omitempty"` | ||||
| } | ||||
|  | ||||
| // Scheduler is based on the Linux sched_setattr(2) syscall. | ||||
| type Scheduler = specs.Scheduler | ||||
|  | ||||
| // ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr. | ||||
| func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { | ||||
| 	var policy uint32 | ||||
| 	switch scheduler.Policy { | ||||
| 	case specs.SchedOther: | ||||
| 		policy = 0 | ||||
| 	case specs.SchedFIFO: | ||||
| 		policy = 1 | ||||
| 	case specs.SchedRR: | ||||
| 		policy = 2 | ||||
| 	case specs.SchedBatch: | ||||
| 		policy = 3 | ||||
| 	case specs.SchedISO: | ||||
| 		policy = 4 | ||||
| 	case specs.SchedIdle: | ||||
| 		policy = 5 | ||||
| 	case specs.SchedDeadline: | ||||
| 		policy = 6 | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy) | ||||
| 	} | ||||
|  | ||||
| 	var flags uint64 | ||||
| 	for _, flag := range scheduler.Flags { | ||||
| 		switch flag { | ||||
| 		case specs.SchedFlagResetOnFork: | ||||
| 			flags |= 0x01 | ||||
| 		case specs.SchedFlagReclaim: | ||||
| 			flags |= 0x02 | ||||
| 		case specs.SchedFlagDLOverrun: | ||||
| 			flags |= 0x04 | ||||
| 		case specs.SchedFlagKeepPolicy: | ||||
| 			flags |= 0x08 | ||||
| 		case specs.SchedFlagKeepParams: | ||||
| 			flags |= 0x10 | ||||
| 		case specs.SchedFlagUtilClampMin: | ||||
| 			flags |= 0x20 | ||||
| 		case specs.SchedFlagUtilClampMax: | ||||
| 			flags |= 0x40 | ||||
| 		default: | ||||
| 			return nil, fmt.Errorf("invalid scheduler flag: %s", flag) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return &unix.SchedAttr{ | ||||
| 		Size:     unix.SizeofSchedAttr, | ||||
| 		Policy:   policy, | ||||
| 		Flags:    flags, | ||||
| 		Nice:     scheduler.Nice, | ||||
| 		Priority: uint32(scheduler.Priority), | ||||
| 		Runtime:  scheduler.Runtime, | ||||
| 		Deadline: scheduler.Deadline, | ||||
| 		Period:   scheduler.Period, | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| type ( | ||||
| @ -277,6 +347,7 @@ type Capabilities struct { | ||||
| 	Ambient []string | ||||
| } | ||||
|  | ||||
| // Deprecated: use (Hooks).Run instead. | ||||
| func (hooks HookList) RunHooks(state *specs.State) error { | ||||
| 	for i, h := range hooks { | ||||
| 		if err := h.Run(state); err != nil { | ||||
| @ -333,6 +404,18 @@ func (hooks *Hooks) MarshalJSON() ([]byte, error) { | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // Run executes all hooks for the given hook name. | ||||
| func (hooks Hooks) Run(name HookName, state *specs.State) error { | ||||
| 	list := hooks[name] | ||||
| 	for i, h := range list { | ||||
| 		if err := h.Run(state); err != nil { | ||||
| 			return fmt.Errorf("error running %s hook #%d: %w", name, i, err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| type Hook interface { | ||||
| 	// Run executes the hook with the provided state. | ||||
| 	Run(*specs.State) error | ||||
| @ -393,7 +476,7 @@ func (c Command) Run(s *specs.State) error { | ||||
| 	go func() { | ||||
| 		err := cmd.Wait() | ||||
| 		if err != nil { | ||||
| 			err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) | ||||
| 			err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) | ||||
| 		} | ||||
| 		errC <- err | ||||
| 	}() | ||||
|  | ||||
							
								
								
									
										31
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -7,22 +7,33 @@ import ( | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	errNoUIDMap   = errors.New("User namespaces enabled, but no uid mappings found.") | ||||
| 	errNoUserMap  = errors.New("User namespaces enabled, but no user mapping found.") | ||||
| 	errNoGIDMap   = errors.New("User namespaces enabled, but no gid mappings found.") | ||||
| 	errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.") | ||||
| 	errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found") | ||||
| 	errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found") | ||||
| ) | ||||
|  | ||||
| // Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details. | ||||
| // https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h | ||||
| const ( | ||||
| 	PerLinux   = 0x0000 | ||||
| 	PerLinux32 = 0x0008 | ||||
| ) | ||||
|  | ||||
| type LinuxPersonality struct { | ||||
| 	// Domain for the personality | ||||
| 	// can only contain values "LINUX" and "LINUX32" | ||||
| 	Domain int `json:"domain"` | ||||
| } | ||||
|  | ||||
| // HostUID gets the translated uid for the process on host which could be | ||||
| // different when user namespaces are enabled. | ||||
| func (c Config) HostUID(containerId int) (int, error) { | ||||
| 	if c.Namespaces.Contains(NEWUSER) { | ||||
| 		if c.UidMappings == nil { | ||||
| 		if len(c.UIDMappings) == 0 { | ||||
| 			return -1, errNoUIDMap | ||||
| 		} | ||||
| 		id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings) | ||||
| 		id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings) | ||||
| 		if !found { | ||||
| 			return -1, errNoUserMap | ||||
| 			return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId) | ||||
| 		} | ||||
| 		// If we are a 32-bit binary running on a 64-bit system, it's possible | ||||
| 		// the mapped user is too large to store in an int, which means we | ||||
| @ -47,12 +58,12 @@ func (c Config) HostRootUID() (int, error) { | ||||
| // different when user namespaces are enabled. | ||||
| func (c Config) HostGID(containerId int) (int, error) { | ||||
| 	if c.Namespaces.Contains(NEWUSER) { | ||||
| 		if c.GidMappings == nil { | ||||
| 		if len(c.GIDMappings) == 0 { | ||||
| 			return -1, errNoGIDMap | ||||
| 		} | ||||
| 		id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings) | ||||
| 		id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings) | ||||
| 		if !found { | ||||
| 			return -1, errNoGroupMap | ||||
| 			return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId) | ||||
| 		} | ||||
| 		// If we are a 32-bit binary running on a 64-bit system, it's possible | ||||
| 		// the mapped user is too large to store in an int, which means we | ||||
|  | ||||
							
								
								
									
										43
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										43
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,48 +1,7 @@ | ||||
| package configs | ||||
|  | ||||
| import "golang.org/x/sys/unix" | ||||
|  | ||||
| const ( | ||||
| 	// EXT_COPYUP is a directive to copy up the contents of a directory when | ||||
| 	// a tmpfs is mounted over it. | ||||
| 	EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning | ||||
| 	EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning | ||||
| ) | ||||
|  | ||||
| type Mount struct { | ||||
| 	// Source path for the mount. | ||||
| 	Source string `json:"source"` | ||||
|  | ||||
| 	// Destination path for the mount inside the container. | ||||
| 	Destination string `json:"destination"` | ||||
|  | ||||
| 	// Device the mount is for. | ||||
| 	Device string `json:"device"` | ||||
|  | ||||
| 	// Mount flags. | ||||
| 	Flags int `json:"flags"` | ||||
|  | ||||
| 	// Propagation Flags | ||||
| 	PropagationFlags []int `json:"propagation_flags"` | ||||
|  | ||||
| 	// Mount data applied to the mount. | ||||
| 	Data string `json:"data"` | ||||
|  | ||||
| 	// Relabel source if set, "z" indicates shared, "Z" indicates unshared. | ||||
| 	Relabel string `json:"relabel"` | ||||
|  | ||||
| 	// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). | ||||
| 	RecAttr *unix.MountAttr `json:"rec_attr"` | ||||
|  | ||||
| 	// Extensions are additional flags that are specific to runc. | ||||
| 	Extensions int `json:"extensions"` | ||||
|  | ||||
| 	// Optional Command to be run before Source is mounted. | ||||
| 	PremountCmds []Command `json:"premount_cmds"` | ||||
|  | ||||
| 	// Optional Command to be run after Source is mounted. | ||||
| 	PostmountCmds []Command `json:"postmount_cmds"` | ||||
| } | ||||
|  | ||||
| func (m *Mount) IsBind() bool { | ||||
| 	return m.Flags&unix.MS_BIND != 0 | ||||
| } | ||||
|  | ||||
							
								
								
									
										66
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| package configs | ||||
|  | ||||
| import "golang.org/x/sys/unix" | ||||
|  | ||||
| type MountIDMapping struct { | ||||
| 	// Recursive indicates if the mapping needs to be recursive. | ||||
| 	Recursive bool `json:"recursive"` | ||||
|  | ||||
| 	// UserNSPath is a path to a user namespace that indicates the necessary | ||||
| 	// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and | ||||
| 	// GIDMappings must be set to nil. | ||||
| 	UserNSPath string `json:"userns_path,omitempty"` | ||||
|  | ||||
| 	// UIDMappings is the uid mapping set for this mount, to be used with | ||||
| 	// MOUNT_ATTR_IDMAP. | ||||
| 	UIDMappings []IDMap `json:"uid_mappings,omitempty"` | ||||
|  | ||||
| 	// GIDMappings is the gid mapping set for this mount, to be used with | ||||
| 	// MOUNT_ATTR_IDMAP. | ||||
| 	GIDMappings []IDMap `json:"gid_mappings,omitempty"` | ||||
| } | ||||
|  | ||||
| type Mount struct { | ||||
| 	// Source path for the mount. | ||||
| 	Source string `json:"source"` | ||||
|  | ||||
| 	// Destination path for the mount inside the container. | ||||
| 	Destination string `json:"destination"` | ||||
|  | ||||
| 	// Device the mount is for. | ||||
| 	Device string `json:"device"` | ||||
|  | ||||
| 	// Mount flags. | ||||
| 	Flags int `json:"flags"` | ||||
|  | ||||
| 	// Mount flags that were explicitly cleared in the configuration (meaning | ||||
| 	// the user explicitly requested that these flags *not* be set). | ||||
| 	ClearedFlags int `json:"cleared_flags"` | ||||
|  | ||||
| 	// Propagation Flags | ||||
| 	PropagationFlags []int `json:"propagation_flags"` | ||||
|  | ||||
| 	// Mount data applied to the mount. | ||||
| 	Data string `json:"data"` | ||||
|  | ||||
| 	// Relabel source if set, "z" indicates shared, "Z" indicates unshared. | ||||
| 	Relabel string `json:"relabel"` | ||||
|  | ||||
| 	// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). | ||||
| 	RecAttr *unix.MountAttr `json:"rec_attr"` | ||||
|  | ||||
| 	// Extensions are additional flags that are specific to runc. | ||||
| 	Extensions int `json:"extensions"` | ||||
|  | ||||
| 	// Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, | ||||
| 	// the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. | ||||
| 	IDMapping *MountIDMapping `json:"id_mapping,omitempty"` | ||||
| } | ||||
|  | ||||
| func (m *Mount) IsBind() bool { | ||||
| 	return m.Flags&unix.MS_BIND != 0 | ||||
| } | ||||
|  | ||||
| func (m *Mount) IsIDMapped() bool { | ||||
| 	return m.IDMapping != nil | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,10 @@ | ||||
| //go:build !linux | ||||
| // +build !linux | ||||
|  | ||||
| package configs | ||||
|  | ||||
| type Mount struct{} | ||||
|  | ||||
| func (m *Mount) IsBind() bool { | ||||
| 	return false | ||||
| } | ||||
							
								
								
									
										7
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -14,6 +14,7 @@ const ( | ||||
| 	NEWIPC    NamespaceType = "NEWIPC" | ||||
| 	NEWUSER   NamespaceType = "NEWUSER" | ||||
| 	NEWCGROUP NamespaceType = "NEWCGROUP" | ||||
| 	NEWTIME   NamespaceType = "NEWTIME" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| @ -38,6 +39,8 @@ func NsName(ns NamespaceType) string { | ||||
| 		return "uts" | ||||
| 	case NEWCGROUP: | ||||
| 		return "cgroup" | ||||
| 	case NEWTIME: | ||||
| 		return "time" | ||||
| 	} | ||||
| 	return "" | ||||
| } | ||||
| @ -56,6 +59,9 @@ func IsNamespaceSupported(ns NamespaceType) bool { | ||||
| 	if nsFile == "" { | ||||
| 		return false | ||||
| 	} | ||||
| 	// We don't need to use /proc/thread-self here because the list of | ||||
| 	// namespace types is unrelated to the thread. This lets us avoid having to | ||||
| 	// do runtime.LockOSThread. | ||||
| 	_, err := os.Stat("/proc/self/ns/" + nsFile) | ||||
| 	// a namespace is supported if it exists and we have permissions to read it | ||||
| 	supported = err == nil | ||||
| @ -72,6 +78,7 @@ func NamespaceTypes() []NamespaceType { | ||||
| 		NEWPID, | ||||
| 		NEWNS, | ||||
| 		NEWCGROUP, | ||||
| 		NEWTIME, | ||||
| 	} | ||||
| } | ||||
|  | ||||
|  | ||||
							
								
								
									
										13
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{ | ||||
| 	NEWUTS:    unix.CLONE_NEWUTS, | ||||
| 	NEWPID:    unix.CLONE_NEWPID, | ||||
| 	NEWCGROUP: unix.CLONE_NEWCGROUP, | ||||
| 	NEWTIME:   unix.CLONE_NEWTIME, | ||||
| } | ||||
|  | ||||
| // CloneFlags parses the container's Namespaces options to set the correct | ||||
| @ -31,3 +32,15 @@ func (n *Namespaces) CloneFlags() uintptr { | ||||
| 	} | ||||
| 	return uintptr(flag) | ||||
| } | ||||
|  | ||||
| // IsPrivate tells whether the namespace of type t is configured as private | ||||
| // (i.e. it exists and is not shared). | ||||
| func (n Namespaces) IsPrivate(t NamespaceType) bool { | ||||
| 	for _, v := range n { | ||||
| 		if v.Type == t { | ||||
| 			return v.Path == "" | ||||
| 		} | ||||
| 	} | ||||
| 	// Not found, so implicitly sharing a parent namespace. | ||||
| 	return false | ||||
| } | ||||
|  | ||||
							
								
								
									
										81
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/lookup_deprecated.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/lookup_deprecated.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,81 @@ | ||||
| package user | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
|  | ||||
| 	"github.com/moby/sys/user" | ||||
| ) | ||||
|  | ||||
| // LookupUser looks up a user by their username in /etc/passwd. If the user | ||||
| // cannot be found (or there is no /etc/passwd file on the filesystem), then | ||||
| // LookupUser returns an error. | ||||
| func LookupUser(username string) (user.User, error) { | ||||
| 	return user.LookupUser(username) | ||||
| } | ||||
|  | ||||
| // LookupUid looks up a user by their user id in /etc/passwd. If the user cannot | ||||
| // be found (or there is no /etc/passwd file on the filesystem), then LookupId | ||||
| // returns an error. | ||||
| func LookupUid(uid int) (user.User, error) { //nolint:revive // ignore var-naming: func LookupUid should be LookupUID | ||||
| 	return user.LookupUid(uid) | ||||
| } | ||||
|  | ||||
| // LookupGroup looks up a group by its name in /etc/group. If the group cannot | ||||
| // be found (or there is no /etc/group file on the filesystem), then LookupGroup | ||||
| // returns an error. | ||||
| func LookupGroup(groupname string) (user.Group, error) { | ||||
| 	return user.LookupGroup(groupname) | ||||
| } | ||||
|  | ||||
| // LookupGid looks up a group by its group id in /etc/group. If the group cannot | ||||
| // be found (or there is no /etc/group file on the filesystem), then LookupGid | ||||
| // returns an error. | ||||
| func LookupGid(gid int) (user.Group, error) { | ||||
| 	return user.LookupGid(gid) | ||||
| } | ||||
|  | ||||
| func GetPasswdPath() (string, error) { | ||||
| 	return user.GetPasswdPath() | ||||
| } | ||||
|  | ||||
| func GetPasswd() (io.ReadCloser, error) { | ||||
| 	return user.GetPasswd() | ||||
| } | ||||
|  | ||||
| func GetGroupPath() (string, error) { | ||||
| 	return user.GetGroupPath() | ||||
| } | ||||
|  | ||||
| func GetGroup() (io.ReadCloser, error) { | ||||
| 	return user.GetGroup() | ||||
| } | ||||
|  | ||||
| // CurrentUser looks up the current user by their user id in /etc/passwd. If the | ||||
| // user cannot be found (or there is no /etc/passwd file on the filesystem), | ||||
| // then CurrentUser returns an error. | ||||
| func CurrentUser() (user.User, error) { | ||||
| 	return user.CurrentUser() | ||||
| } | ||||
|  | ||||
| // CurrentGroup looks up the current user's group by their primary group id's | ||||
| // entry in /etc/passwd. If the group cannot be found (or there is no | ||||
| // /etc/group file on the filesystem), then CurrentGroup returns an error. | ||||
| func CurrentGroup() (user.Group, error) { | ||||
| 	return user.CurrentGroup() | ||||
| } | ||||
|  | ||||
| func CurrentUserSubUIDs() ([]user.SubID, error) { | ||||
| 	return user.CurrentUserSubUIDs() | ||||
| } | ||||
|  | ||||
| func CurrentUserSubGIDs() ([]user.SubID, error) { | ||||
| 	return user.CurrentUserSubGIDs() | ||||
| } | ||||
|  | ||||
| func CurrentProcessUIDMap() ([]user.IDMap, error) { | ||||
| 	return user.CurrentProcessUIDMap() | ||||
| } | ||||
|  | ||||
| func CurrentProcessGIDMap() ([]user.IDMap, error) { | ||||
| 	return user.CurrentProcessGIDMap() | ||||
| } | ||||
							
								
								
									
										157
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										157
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,157 +0,0 @@ | ||||
| //go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris | ||||
| // +build darwin dragonfly freebsd linux netbsd openbsd solaris | ||||
|  | ||||
| package user | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| // Unix-specific path to the passwd and group formatted files. | ||||
| const ( | ||||
| 	unixPasswdPath = "/etc/passwd" | ||||
| 	unixGroupPath  = "/etc/group" | ||||
| ) | ||||
|  | ||||
| // LookupUser looks up a user by their username in /etc/passwd. If the user | ||||
| // cannot be found (or there is no /etc/passwd file on the filesystem), then | ||||
| // LookupUser returns an error. | ||||
| func LookupUser(username string) (User, error) { | ||||
| 	return lookupUserFunc(func(u User) bool { | ||||
| 		return u.Name == username | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // LookupUid looks up a user by their user id in /etc/passwd. If the user cannot | ||||
| // be found (or there is no /etc/passwd file on the filesystem), then LookupId | ||||
| // returns an error. | ||||
| func LookupUid(uid int) (User, error) { | ||||
| 	return lookupUserFunc(func(u User) bool { | ||||
| 		return u.Uid == uid | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func lookupUserFunc(filter func(u User) bool) (User, error) { | ||||
| 	// Get operating system-specific passwd reader-closer. | ||||
| 	passwd, err := GetPasswd() | ||||
| 	if err != nil { | ||||
| 		return User{}, err | ||||
| 	} | ||||
| 	defer passwd.Close() | ||||
|  | ||||
| 	// Get the users. | ||||
| 	users, err := ParsePasswdFilter(passwd, filter) | ||||
| 	if err != nil { | ||||
| 		return User{}, err | ||||
| 	} | ||||
|  | ||||
| 	// No user entries found. | ||||
| 	if len(users) == 0 { | ||||
| 		return User{}, ErrNoPasswdEntries | ||||
| 	} | ||||
|  | ||||
| 	// Assume the first entry is the "correct" one. | ||||
| 	return users[0], nil | ||||
| } | ||||
|  | ||||
| // LookupGroup looks up a group by its name in /etc/group. If the group cannot | ||||
| // be found (or there is no /etc/group file on the filesystem), then LookupGroup | ||||
| // returns an error. | ||||
| func LookupGroup(groupname string) (Group, error) { | ||||
| 	return lookupGroupFunc(func(g Group) bool { | ||||
| 		return g.Name == groupname | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // LookupGid looks up a group by its group id in /etc/group. If the group cannot | ||||
| // be found (or there is no /etc/group file on the filesystem), then LookupGid | ||||
| // returns an error. | ||||
| func LookupGid(gid int) (Group, error) { | ||||
| 	return lookupGroupFunc(func(g Group) bool { | ||||
| 		return g.Gid == gid | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func lookupGroupFunc(filter func(g Group) bool) (Group, error) { | ||||
| 	// Get operating system-specific group reader-closer. | ||||
| 	group, err := GetGroup() | ||||
| 	if err != nil { | ||||
| 		return Group{}, err | ||||
| 	} | ||||
| 	defer group.Close() | ||||
|  | ||||
| 	// Get the users. | ||||
| 	groups, err := ParseGroupFilter(group, filter) | ||||
| 	if err != nil { | ||||
| 		return Group{}, err | ||||
| 	} | ||||
|  | ||||
| 	// No user entries found. | ||||
| 	if len(groups) == 0 { | ||||
| 		return Group{}, ErrNoGroupEntries | ||||
| 	} | ||||
|  | ||||
| 	// Assume the first entry is the "correct" one. | ||||
| 	return groups[0], nil | ||||
| } | ||||
|  | ||||
| func GetPasswdPath() (string, error) { | ||||
| 	return unixPasswdPath, nil | ||||
| } | ||||
|  | ||||
| func GetPasswd() (io.ReadCloser, error) { | ||||
| 	return os.Open(unixPasswdPath) | ||||
| } | ||||
|  | ||||
| func GetGroupPath() (string, error) { | ||||
| 	return unixGroupPath, nil | ||||
| } | ||||
|  | ||||
| func GetGroup() (io.ReadCloser, error) { | ||||
| 	return os.Open(unixGroupPath) | ||||
| } | ||||
|  | ||||
| // CurrentUser looks up the current user by their user id in /etc/passwd. If the | ||||
| // user cannot be found (or there is no /etc/passwd file on the filesystem), | ||||
| // then CurrentUser returns an error. | ||||
| func CurrentUser() (User, error) { | ||||
| 	return LookupUid(unix.Getuid()) | ||||
| } | ||||
|  | ||||
| // CurrentGroup looks up the current user's group by their primary group id's | ||||
| // entry in /etc/passwd. If the group cannot be found (or there is no | ||||
| // /etc/group file on the filesystem), then CurrentGroup returns an error. | ||||
| func CurrentGroup() (Group, error) { | ||||
| 	return LookupGid(unix.Getgid()) | ||||
| } | ||||
|  | ||||
| func currentUserSubIDs(fileName string) ([]SubID, error) { | ||||
| 	u, err := CurrentUser() | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	filter := func(entry SubID) bool { | ||||
| 		return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) | ||||
| 	} | ||||
| 	return ParseSubIDFileFilter(fileName, filter) | ||||
| } | ||||
|  | ||||
| func CurrentUserSubUIDs() ([]SubID, error) { | ||||
| 	return currentUserSubIDs("/etc/subuid") | ||||
| } | ||||
|  | ||||
| func CurrentUserSubGIDs() ([]SubID, error) { | ||||
| 	return currentUserSubIDs("/etc/subgid") | ||||
| } | ||||
|  | ||||
| func CurrentProcessUIDMap() ([]IDMap, error) { | ||||
| 	return ParseIDMapFile("/proc/self/uid_map") | ||||
| } | ||||
|  | ||||
| func CurrentProcessGIDMap() ([]IDMap, error) { | ||||
| 	return ParseIDMapFile("/proc/self/gid_map") | ||||
| } | ||||
							
								
								
									
										605
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										605
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,605 +0,0 @@ | ||||
| package user | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"bytes" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	minID = 0 | ||||
| 	maxID = 1<<31 - 1 // for 32-bit systems compatibility | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	// ErrNoPasswdEntries is returned if no matching entries were found in /etc/group. | ||||
| 	ErrNoPasswdEntries = errors.New("no matching entries in passwd file") | ||||
| 	// ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd. | ||||
| 	ErrNoGroupEntries = errors.New("no matching entries in group file") | ||||
| 	// ErrRange is returned if a UID or GID is outside of the valid range. | ||||
| 	ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minID, maxID) | ||||
| ) | ||||
|  | ||||
| type User struct { | ||||
| 	Name  string | ||||
| 	Pass  string | ||||
| 	Uid   int | ||||
| 	Gid   int | ||||
| 	Gecos string | ||||
| 	Home  string | ||||
| 	Shell string | ||||
| } | ||||
|  | ||||
| type Group struct { | ||||
| 	Name string | ||||
| 	Pass string | ||||
| 	Gid  int | ||||
| 	List []string | ||||
| } | ||||
|  | ||||
| // SubID represents an entry in /etc/sub{u,g}id | ||||
| type SubID struct { | ||||
| 	Name  string | ||||
| 	SubID int64 | ||||
| 	Count int64 | ||||
| } | ||||
|  | ||||
| // IDMap represents an entry in /proc/PID/{u,g}id_map | ||||
| type IDMap struct { | ||||
| 	ID       int64 | ||||
| 	ParentID int64 | ||||
| 	Count    int64 | ||||
| } | ||||
|  | ||||
| func parseLine(line []byte, v ...interface{}) { | ||||
| 	parseParts(bytes.Split(line, []byte(":")), v...) | ||||
| } | ||||
|  | ||||
| func parseParts(parts [][]byte, v ...interface{}) { | ||||
| 	if len(parts) == 0 { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	for i, p := range parts { | ||||
| 		// Ignore cases where we don't have enough fields to populate the arguments. | ||||
| 		// Some configuration files like to misbehave. | ||||
| 		if len(v) <= i { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		// Use the type of the argument to figure out how to parse it, scanf() style. | ||||
| 		// This is legit. | ||||
| 		switch e := v[i].(type) { | ||||
| 		case *string: | ||||
| 			*e = string(p) | ||||
| 		case *int: | ||||
| 			// "numbers", with conversion errors ignored because of some misbehaving configuration files. | ||||
| 			*e, _ = strconv.Atoi(string(p)) | ||||
| 		case *int64: | ||||
| 			*e, _ = strconv.ParseInt(string(p), 10, 64) | ||||
| 		case *[]string: | ||||
| 			// Comma-separated lists. | ||||
| 			if len(p) != 0 { | ||||
| 				*e = strings.Split(string(p), ",") | ||||
| 			} else { | ||||
| 				*e = []string{} | ||||
| 			} | ||||
| 		default: | ||||
| 			// Someone goof'd when writing code using this function. Scream so they can hear us. | ||||
| 			panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e)) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func ParsePasswdFile(path string) ([]User, error) { | ||||
| 	passwd, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer passwd.Close() | ||||
| 	return ParsePasswd(passwd) | ||||
| } | ||||
|  | ||||
| func ParsePasswd(passwd io.Reader) ([]User, error) { | ||||
| 	return ParsePasswdFilter(passwd, nil) | ||||
| } | ||||
|  | ||||
| func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { | ||||
| 	passwd, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer passwd.Close() | ||||
| 	return ParsePasswdFilter(passwd, filter) | ||||
| } | ||||
|  | ||||
| func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { | ||||
| 	if r == nil { | ||||
| 		return nil, errors.New("nil source for passwd-formatted data") | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		s   = bufio.NewScanner(r) | ||||
| 		out = []User{} | ||||
| 	) | ||||
|  | ||||
| 	for s.Scan() { | ||||
| 		line := bytes.TrimSpace(s.Bytes()) | ||||
| 		if len(line) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// see: man 5 passwd | ||||
| 		//  name:password:UID:GID:GECOS:directory:shell | ||||
| 		// Name:Pass:Uid:Gid:Gecos:Home:Shell | ||||
| 		//  root:x:0:0:root:/root:/bin/bash | ||||
| 		//  adm:x:3:4:adm:/var/adm:/bin/false | ||||
| 		p := User{} | ||||
| 		parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) | ||||
|  | ||||
| 		if filter == nil || filter(p) { | ||||
| 			out = append(out, p) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := s.Err(); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return out, nil | ||||
| } | ||||
|  | ||||
| func ParseGroupFile(path string) ([]Group, error) { | ||||
| 	group, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	defer group.Close() | ||||
| 	return ParseGroup(group) | ||||
| } | ||||
|  | ||||
| func ParseGroup(group io.Reader) ([]Group, error) { | ||||
| 	return ParseGroupFilter(group, nil) | ||||
| } | ||||
|  | ||||
| func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { | ||||
| 	group, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer group.Close() | ||||
| 	return ParseGroupFilter(group, filter) | ||||
| } | ||||
|  | ||||
| func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { | ||||
| 	if r == nil { | ||||
| 		return nil, errors.New("nil source for group-formatted data") | ||||
| 	} | ||||
| 	rd := bufio.NewReader(r) | ||||
| 	out := []Group{} | ||||
|  | ||||
| 	// Read the file line-by-line. | ||||
| 	for { | ||||
| 		var ( | ||||
| 			isPrefix  bool | ||||
| 			wholeLine []byte | ||||
| 			err       error | ||||
| 		) | ||||
|  | ||||
| 		// Read the next line. We do so in chunks (as much as reader's | ||||
| 		// buffer is able to keep), check if we read enough columns | ||||
| 		// already on each step and store final result in wholeLine. | ||||
| 		for { | ||||
| 			var line []byte | ||||
| 			line, isPrefix, err = rd.ReadLine() | ||||
|  | ||||
| 			if err != nil { | ||||
| 				// We should return no error if EOF is reached | ||||
| 				// without a match. | ||||
| 				if err == io.EOF { | ||||
| 					err = nil | ||||
| 				} | ||||
| 				return out, err | ||||
| 			} | ||||
|  | ||||
| 			// Simple common case: line is short enough to fit in a | ||||
| 			// single reader's buffer. | ||||
| 			if !isPrefix && len(wholeLine) == 0 { | ||||
| 				wholeLine = line | ||||
| 				break | ||||
| 			} | ||||
|  | ||||
| 			wholeLine = append(wholeLine, line...) | ||||
|  | ||||
| 			// Check if we read the whole line already. | ||||
| 			if !isPrefix { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// There's no spec for /etc/passwd or /etc/group, but we try to follow | ||||
| 		// the same rules as the glibc parser, which allows comments and blank | ||||
| 		// space at the beginning of a line. | ||||
| 		wholeLine = bytes.TrimSpace(wholeLine) | ||||
| 		if len(wholeLine) == 0 || wholeLine[0] == '#' { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// see: man 5 group | ||||
| 		//  group_name:password:GID:user_list | ||||
| 		// Name:Pass:Gid:List | ||||
| 		//  root:x:0:root | ||||
| 		//  adm:x:4:root,adm,daemon | ||||
| 		p := Group{} | ||||
| 		parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List) | ||||
|  | ||||
| 		if filter == nil || filter(p) { | ||||
| 			out = append(out, p) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type ExecUser struct { | ||||
| 	Uid   int | ||||
| 	Gid   int | ||||
| 	Sgids []int | ||||
| 	Home  string | ||||
| } | ||||
|  | ||||
| // GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the | ||||
| // given file paths and uses that data as the arguments to GetExecUser. If the | ||||
| // files cannot be opened for any reason, the error is ignored and a nil | ||||
| // io.Reader is passed instead. | ||||
| func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { | ||||
| 	var passwd, group io.Reader | ||||
|  | ||||
| 	if passwdFile, err := os.Open(passwdPath); err == nil { | ||||
| 		passwd = passwdFile | ||||
| 		defer passwdFile.Close() | ||||
| 	} | ||||
|  | ||||
| 	if groupFile, err := os.Open(groupPath); err == nil { | ||||
| 		group = groupFile | ||||
| 		defer groupFile.Close() | ||||
| 	} | ||||
|  | ||||
| 	return GetExecUser(userSpec, defaults, passwd, group) | ||||
| } | ||||
|  | ||||
| // GetExecUser parses a user specification string (using the passwd and group | ||||
| // readers as sources for /etc/passwd and /etc/group data, respectively). In | ||||
| // the case of blank fields or missing data from the sources, the values in | ||||
| // defaults is used. | ||||
| // | ||||
| // GetExecUser will return an error if a user or group literal could not be | ||||
| // found in any entry in passwd and group respectively. | ||||
| // | ||||
| // Examples of valid user specifications are: | ||||
| //   - "" | ||||
| //   - "user" | ||||
| //   - "uid" | ||||
| //   - "user:group" | ||||
| //   - "uid:gid | ||||
| //   - "user:gid" | ||||
| //   - "uid:group" | ||||
| // | ||||
| // It should be noted that if you specify a numeric user or group id, they will | ||||
| // not be evaluated as usernames (only the metadata will be filled). So attempting | ||||
| // to parse a user with user.Name = "1337" will produce the user with a UID of | ||||
| // 1337. | ||||
| func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { | ||||
| 	if defaults == nil { | ||||
| 		defaults = new(ExecUser) | ||||
| 	} | ||||
|  | ||||
| 	// Copy over defaults. | ||||
| 	user := &ExecUser{ | ||||
| 		Uid:   defaults.Uid, | ||||
| 		Gid:   defaults.Gid, | ||||
| 		Sgids: defaults.Sgids, | ||||
| 		Home:  defaults.Home, | ||||
| 	} | ||||
|  | ||||
| 	// Sgids slice *cannot* be nil. | ||||
| 	if user.Sgids == nil { | ||||
| 		user.Sgids = []int{} | ||||
| 	} | ||||
|  | ||||
| 	// Allow for userArg to have either "user" syntax, or optionally "user:group" syntax | ||||
| 	var userArg, groupArg string | ||||
| 	parseLine([]byte(userSpec), &userArg, &groupArg) | ||||
|  | ||||
| 	// Convert userArg and groupArg to be numeric, so we don't have to execute | ||||
| 	// Atoi *twice* for each iteration over lines. | ||||
| 	uidArg, uidErr := strconv.Atoi(userArg) | ||||
| 	gidArg, gidErr := strconv.Atoi(groupArg) | ||||
|  | ||||
| 	// Find the matching user. | ||||
| 	users, err := ParsePasswdFilter(passwd, func(u User) bool { | ||||
| 		if userArg == "" { | ||||
| 			// Default to current state of the user. | ||||
| 			return u.Uid == user.Uid | ||||
| 		} | ||||
|  | ||||
| 		if uidErr == nil { | ||||
| 			// If the userArg is numeric, always treat it as a UID. | ||||
| 			return uidArg == u.Uid | ||||
| 		} | ||||
|  | ||||
| 		return u.Name == userArg | ||||
| 	}) | ||||
|  | ||||
| 	// If we can't find the user, we have to bail. | ||||
| 	if err != nil && passwd != nil { | ||||
| 		if userArg == "" { | ||||
| 			userArg = strconv.Itoa(user.Uid) | ||||
| 		} | ||||
| 		return nil, fmt.Errorf("unable to find user %s: %w", userArg, err) | ||||
| 	} | ||||
|  | ||||
| 	var matchedUserName string | ||||
| 	if len(users) > 0 { | ||||
| 		// First match wins, even if there's more than one matching entry. | ||||
| 		matchedUserName = users[0].Name | ||||
| 		user.Uid = users[0].Uid | ||||
| 		user.Gid = users[0].Gid | ||||
| 		user.Home = users[0].Home | ||||
| 	} else if userArg != "" { | ||||
| 		// If we can't find a user with the given username, the only other valid | ||||
| 		// option is if it's a numeric username with no associated entry in passwd. | ||||
|  | ||||
| 		if uidErr != nil { | ||||
| 			// Not numeric. | ||||
| 			return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries) | ||||
| 		} | ||||
| 		user.Uid = uidArg | ||||
|  | ||||
| 		// Must be inside valid uid range. | ||||
| 		if user.Uid < minID || user.Uid > maxID { | ||||
| 			return nil, ErrRange | ||||
| 		} | ||||
|  | ||||
| 		// Okay, so it's numeric. We can just roll with this. | ||||
| 	} | ||||
|  | ||||
| 	// On to the groups. If we matched a username, we need to do this because of | ||||
| 	// the supplementary group IDs. | ||||
| 	if groupArg != "" || matchedUserName != "" { | ||||
| 		groups, err := ParseGroupFilter(group, func(g Group) bool { | ||||
| 			// If the group argument isn't explicit, we'll just search for it. | ||||
| 			if groupArg == "" { | ||||
| 				// Check if user is a member of this group. | ||||
| 				for _, u := range g.List { | ||||
| 					if u == matchedUserName { | ||||
| 						return true | ||||
| 					} | ||||
| 				} | ||||
| 				return false | ||||
| 			} | ||||
|  | ||||
| 			if gidErr == nil { | ||||
| 				// If the groupArg is numeric, always treat it as a GID. | ||||
| 				return gidArg == g.Gid | ||||
| 			} | ||||
|  | ||||
| 			return g.Name == groupArg | ||||
| 		}) | ||||
| 		if err != nil && group != nil { | ||||
| 			return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err) | ||||
| 		} | ||||
|  | ||||
| 		// Only start modifying user.Gid if it is in explicit form. | ||||
| 		if groupArg != "" { | ||||
| 			if len(groups) > 0 { | ||||
| 				// First match wins, even if there's more than one matching entry. | ||||
| 				user.Gid = groups[0].Gid | ||||
| 			} else { | ||||
| 				// If we can't find a group with the given name, the only other valid | ||||
| 				// option is if it's a numeric group name with no associated entry in group. | ||||
|  | ||||
| 				if gidErr != nil { | ||||
| 					// Not numeric. | ||||
| 					return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries) | ||||
| 				} | ||||
| 				user.Gid = gidArg | ||||
|  | ||||
| 				// Must be inside valid gid range. | ||||
| 				if user.Gid < minID || user.Gid > maxID { | ||||
| 					return nil, ErrRange | ||||
| 				} | ||||
|  | ||||
| 				// Okay, so it's numeric. We can just roll with this. | ||||
| 			} | ||||
| 		} else if len(groups) > 0 { | ||||
| 			// Supplementary group ids only make sense if in the implicit form. | ||||
| 			user.Sgids = make([]int, len(groups)) | ||||
| 			for i, group := range groups { | ||||
| 				user.Sgids[i] = group.Gid | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return user, nil | ||||
| } | ||||
|  | ||||
| // GetAdditionalGroups looks up a list of groups by name or group id | ||||
| // against the given /etc/group formatted data. If a group name cannot | ||||
| // be found, an error will be returned. If a group id cannot be found, | ||||
| // or the given group data is nil, the id will be returned as-is | ||||
| // provided it is in the legal range. | ||||
| func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { | ||||
| 	groups := []Group{} | ||||
| 	if group != nil { | ||||
| 		var err error | ||||
| 		groups, err = ParseGroupFilter(group, func(g Group) bool { | ||||
| 			for _, ag := range additionalGroups { | ||||
| 				if g.Name == ag || strconv.Itoa(g.Gid) == ag { | ||||
| 					return true | ||||
| 				} | ||||
| 			} | ||||
| 			return false | ||||
| 		}) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	gidMap := make(map[int]struct{}) | ||||
| 	for _, ag := range additionalGroups { | ||||
| 		var found bool | ||||
| 		for _, g := range groups { | ||||
| 			// if we found a matched group either by name or gid, take the | ||||
| 			// first matched as correct | ||||
| 			if g.Name == ag || strconv.Itoa(g.Gid) == ag { | ||||
| 				if _, ok := gidMap[g.Gid]; !ok { | ||||
| 					gidMap[g.Gid] = struct{}{} | ||||
| 					found = true | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		// we asked for a group but didn't find it. let's check to see | ||||
| 		// if we wanted a numeric group | ||||
| 		if !found { | ||||
| 			gid, err := strconv.ParseInt(ag, 10, 64) | ||||
| 			if err != nil { | ||||
| 				// Not a numeric ID either. | ||||
| 				return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries) | ||||
| 			} | ||||
| 			// Ensure gid is inside gid range. | ||||
| 			if gid < minID || gid > maxID { | ||||
| 				return nil, ErrRange | ||||
| 			} | ||||
| 			gidMap[int(gid)] = struct{}{} | ||||
| 		} | ||||
| 	} | ||||
| 	gids := []int{} | ||||
| 	for gid := range gidMap { | ||||
| 		gids = append(gids, gid) | ||||
| 	} | ||||
| 	return gids, nil | ||||
| } | ||||
|  | ||||
| // GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups | ||||
| // that opens the groupPath given and gives it as an argument to | ||||
| // GetAdditionalGroups. | ||||
| func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { | ||||
| 	var group io.Reader | ||||
|  | ||||
| 	if groupFile, err := os.Open(groupPath); err == nil { | ||||
| 		group = groupFile | ||||
| 		defer groupFile.Close() | ||||
| 	} | ||||
| 	return GetAdditionalGroups(additionalGroups, group) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFile(path string) ([]SubID, error) { | ||||
| 	subid, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer subid.Close() | ||||
| 	return ParseSubID(subid) | ||||
| } | ||||
|  | ||||
| func ParseSubID(subid io.Reader) ([]SubID, error) { | ||||
| 	return ParseSubIDFilter(subid, nil) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { | ||||
| 	subid, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer subid.Close() | ||||
| 	return ParseSubIDFilter(subid, filter) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { | ||||
| 	if r == nil { | ||||
| 		return nil, errors.New("nil source for subid-formatted data") | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		s   = bufio.NewScanner(r) | ||||
| 		out = []SubID{} | ||||
| 	) | ||||
|  | ||||
| 	for s.Scan() { | ||||
| 		line := bytes.TrimSpace(s.Bytes()) | ||||
| 		if len(line) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// see: man 5 subuid | ||||
| 		p := SubID{} | ||||
| 		parseLine(line, &p.Name, &p.SubID, &p.Count) | ||||
|  | ||||
| 		if filter == nil || filter(p) { | ||||
| 			out = append(out, p) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := s.Err(); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return out, nil | ||||
| } | ||||
|  | ||||
| func ParseIDMapFile(path string) ([]IDMap, error) { | ||||
| 	r, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer r.Close() | ||||
| 	return ParseIDMap(r) | ||||
| } | ||||
|  | ||||
| func ParseIDMap(r io.Reader) ([]IDMap, error) { | ||||
| 	return ParseIDMapFilter(r, nil) | ||||
| } | ||||
|  | ||||
| func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { | ||||
| 	r, err := os.Open(path) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer r.Close() | ||||
| 	return ParseIDMapFilter(r, filter) | ||||
| } | ||||
|  | ||||
| func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { | ||||
| 	if r == nil { | ||||
| 		return nil, errors.New("nil source for idmap-formatted data") | ||||
| 	} | ||||
|  | ||||
| 	var ( | ||||
| 		s   = bufio.NewScanner(r) | ||||
| 		out = []IDMap{} | ||||
| 	) | ||||
|  | ||||
| 	for s.Scan() { | ||||
| 		line := bytes.TrimSpace(s.Bytes()) | ||||
| 		if len(line) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// see: man 7 user_namespaces | ||||
| 		p := IDMap{} | ||||
| 		parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count) | ||||
|  | ||||
| 		if filter == nil || filter(p) { | ||||
| 			out = append(out, p) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := s.Err(); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return out, nil | ||||
| } | ||||
							
								
								
									
										146
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user_deprecated.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user_deprecated.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,146 @@ | ||||
| // Package user is an alias for [github.com/moby/sys/user]. | ||||
| // | ||||
| // Deprecated: use [github.com/moby/sys/user]. | ||||
| package user | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
|  | ||||
| 	"github.com/moby/sys/user" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	// ErrNoPasswdEntries is returned if no matching entries were found in /etc/group. | ||||
| 	ErrNoPasswdEntries = user.ErrNoPasswdEntries | ||||
| 	// ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd. | ||||
| 	ErrNoGroupEntries = user.ErrNoGroupEntries | ||||
| 	// ErrRange is returned if a UID or GID is outside of the valid range. | ||||
| 	ErrRange = user.ErrRange | ||||
| ) | ||||
|  | ||||
| type ( | ||||
| 	User = user.User | ||||
|  | ||||
| 	Group = user.Group | ||||
|  | ||||
| 	// SubID represents an entry in /etc/sub{u,g}id. | ||||
| 	SubID = user.SubID | ||||
|  | ||||
| 	// IDMap represents an entry in /proc/PID/{u,g}id_map. | ||||
| 	IDMap = user.IDMap | ||||
|  | ||||
| 	ExecUser = user.ExecUser | ||||
| ) | ||||
|  | ||||
| func ParsePasswdFile(path string) ([]user.User, error) { | ||||
| 	return user.ParsePasswdFile(path) | ||||
| } | ||||
|  | ||||
| func ParsePasswd(passwd io.Reader) ([]user.User, error) { | ||||
| 	return user.ParsePasswd(passwd) | ||||
| } | ||||
|  | ||||
| func ParsePasswdFileFilter(path string, filter func(user.User) bool) ([]user.User, error) { | ||||
| 	return user.ParsePasswdFileFilter(path, filter) | ||||
| } | ||||
|  | ||||
| func ParsePasswdFilter(r io.Reader, filter func(user.User) bool) ([]user.User, error) { | ||||
| 	return user.ParsePasswdFilter(r, filter) | ||||
| } | ||||
|  | ||||
| func ParseGroupFile(path string) ([]user.Group, error) { | ||||
| 	return user.ParseGroupFile(path) | ||||
| } | ||||
|  | ||||
| func ParseGroup(group io.Reader) ([]user.Group, error) { | ||||
| 	return user.ParseGroup(group) | ||||
| } | ||||
|  | ||||
| func ParseGroupFileFilter(path string, filter func(user.Group) bool) ([]user.Group, error) { | ||||
| 	return user.ParseGroupFileFilter(path, filter) | ||||
| } | ||||
|  | ||||
| func ParseGroupFilter(r io.Reader, filter func(user.Group) bool) ([]user.Group, error) { | ||||
| 	return user.ParseGroupFilter(r, filter) | ||||
| } | ||||
|  | ||||
| // GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the | ||||
| // given file paths and uses that data as the arguments to GetExecUser. If the | ||||
| // files cannot be opened for any reason, the error is ignored and a nil | ||||
| // io.Reader is passed instead. | ||||
| func GetExecUserPath(userSpec string, defaults *user.ExecUser, passwdPath, groupPath string) (*user.ExecUser, error) { | ||||
| 	return user.GetExecUserPath(userSpec, defaults, passwdPath, groupPath) | ||||
| } | ||||
|  | ||||
| // GetExecUser parses a user specification string (using the passwd and group | ||||
| // readers as sources for /etc/passwd and /etc/group data, respectively). In | ||||
| // the case of blank fields or missing data from the sources, the values in | ||||
| // defaults is used. | ||||
| // | ||||
| // GetExecUser will return an error if a user or group literal could not be | ||||
| // found in any entry in passwd and group respectively. | ||||
| // | ||||
| // Examples of valid user specifications are: | ||||
| //   - "" | ||||
| //   - "user" | ||||
| //   - "uid" | ||||
| //   - "user:group" | ||||
| //   - "uid:gid | ||||
| //   - "user:gid" | ||||
| //   - "uid:group" | ||||
| // | ||||
| // It should be noted that if you specify a numeric user or group id, they will | ||||
| // not be evaluated as usernames (only the metadata will be filled). So attempting | ||||
| // to parse a user with user.Name = "1337" will produce the user with a UID of | ||||
| // 1337. | ||||
| func GetExecUser(userSpec string, defaults *user.ExecUser, passwd, group io.Reader) (*user.ExecUser, error) { | ||||
| 	return user.GetExecUser(userSpec, defaults, passwd, group) | ||||
| } | ||||
|  | ||||
| // GetAdditionalGroups looks up a list of groups by name or group id | ||||
| // against the given /etc/group formatted data. If a group name cannot | ||||
| // be found, an error will be returned. If a group id cannot be found, | ||||
| // or the given group data is nil, the id will be returned as-is | ||||
| // provided it is in the legal range. | ||||
| func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { | ||||
| 	return user.GetAdditionalGroups(additionalGroups, group) | ||||
| } | ||||
|  | ||||
| // GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups | ||||
| // that opens the groupPath given and gives it as an argument to | ||||
| // GetAdditionalGroups. | ||||
| func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { | ||||
| 	return user.GetAdditionalGroupsPath(additionalGroups, groupPath) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFile(path string) ([]user.SubID, error) { | ||||
| 	return user.ParseSubIDFile(path) | ||||
| } | ||||
|  | ||||
| func ParseSubID(subid io.Reader) ([]user.SubID, error) { | ||||
| 	return user.ParseSubID(subid) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFileFilter(path string, filter func(user.SubID) bool) ([]user.SubID, error) { | ||||
| 	return user.ParseSubIDFileFilter(path, filter) | ||||
| } | ||||
|  | ||||
| func ParseSubIDFilter(r io.Reader, filter func(user.SubID) bool) ([]user.SubID, error) { | ||||
| 	return user.ParseSubIDFilter(r, filter) | ||||
| } | ||||
|  | ||||
| func ParseIDMapFile(path string) ([]user.IDMap, error) { | ||||
| 	return user.ParseIDMapFile(path) | ||||
| } | ||||
|  | ||||
| func ParseIDMap(r io.Reader) ([]user.IDMap, error) { | ||||
| 	return user.ParseIDMap(r) | ||||
| } | ||||
|  | ||||
| func ParseIDMapFileFilter(path string, filter func(user.IDMap) bool) ([]user.IDMap, error) { | ||||
| 	return user.ParseIDMapFileFilter(path, filter) | ||||
| } | ||||
|  | ||||
| func ParseIDMapFilter(r io.Reader, filter func(user.IDMap) bool) ([]user.IDMap, error) { | ||||
| 	return user.ParseIDMapFilter(r, filter) | ||||
| } | ||||
							
								
								
									
										43
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										43
									
								
								vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,43 +0,0 @@ | ||||
| //go:build gofuzz | ||||
| // +build gofuzz | ||||
|  | ||||
| package user | ||||
|  | ||||
| import ( | ||||
| 	"io" | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| func IsDivisbleBy(n int, divisibleby int) bool { | ||||
| 	return (n % divisibleby) == 0 | ||||
| } | ||||
|  | ||||
| func FuzzUser(data []byte) int { | ||||
| 	if len(data) == 0 { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	if !IsDivisbleBy(len(data), 5) { | ||||
| 		return -1 | ||||
| 	} | ||||
|  | ||||
| 	var divided [][]byte | ||||
|  | ||||
| 	chunkSize := len(data) / 5 | ||||
|  | ||||
| 	for i := 0; i < len(data); i += chunkSize { | ||||
| 		end := i + chunkSize | ||||
|  | ||||
| 		divided = append(divided, data[i:end]) | ||||
| 	} | ||||
|  | ||||
| 	_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil) | ||||
|  | ||||
| 	var passwd, group io.Reader | ||||
|  | ||||
| 	group = strings.NewReader(string(divided[1])) | ||||
| 	_, _ = GetAdditionalGroups([]string{string(divided[2])}, group) | ||||
|  | ||||
| 	passwd = strings.NewReader(string(divided[3])) | ||||
| 	_, _ = GetExecUser(string(divided[4]), nil, passwd, group) | ||||
| 	return 1 | ||||
| } | ||||
							
								
								
									
										1
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,5 +1,4 @@ | ||||
| package userns | ||||
|  | ||||
| // RunningInUserNS detects whether we are currently running in a user namespace. | ||||
| // Originally copied from github.com/lxc/lxd/shared/util.go | ||||
| var RunningInUserNS = runningInUserNS | ||||
|  | ||||
							
								
								
									
										11
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -3,14 +3,7 @@ | ||||
|  | ||||
| package userns | ||||
|  | ||||
| import ( | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/user" | ||||
| ) | ||||
|  | ||||
| func FuzzUIDMap(data []byte) int { | ||||
| 	uidmap, _ := user.ParseIDMap(strings.NewReader(string(data))) | ||||
| 	_ = uidMapInUserNS(uidmap) | ||||
| func FuzzUIDMap(uidmap []byte) int { | ||||
| 	_ = uidMapInUserNS(string(uidmap)) | ||||
| 	return 1 | ||||
| } | ||||
|  | ||||
							
								
								
									
										44
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										44
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -1,9 +1,10 @@ | ||||
| package userns | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"sync" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/user" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| @ -12,26 +13,43 @@ var ( | ||||
| ) | ||||
|  | ||||
| // runningInUserNS detects whether we are currently running in a user namespace. | ||||
| // Originally copied from github.com/lxc/lxd/shared/util.go | ||||
| // | ||||
| // Originally copied from https://github.com/lxc/incus/blob/e45085dd42f826b3c8c3228e9733c0b6f998eafe/shared/util.go#L678-L700. | ||||
| func runningInUserNS() bool { | ||||
| 	nsOnce.Do(func() { | ||||
| 		uidmap, err := user.CurrentProcessUIDMap() | ||||
| 		file, err := os.Open("/proc/self/uid_map") | ||||
| 		if err != nil { | ||||
| 			// This kernel-provided file only exists if user namespaces are supported | ||||
| 			// This kernel-provided file only exists if user namespaces are supported. | ||||
| 			return | ||||
| 		} | ||||
| 		inUserNS = uidMapInUserNS(uidmap) | ||||
| 		defer file.Close() | ||||
|  | ||||
| 		buf := bufio.NewReader(file) | ||||
| 		l, _, err := buf.ReadLine() | ||||
| 		if err != nil { | ||||
| 			return | ||||
| 		} | ||||
|  | ||||
| 		inUserNS = uidMapInUserNS(string(l)) | ||||
| 	}) | ||||
| 	return inUserNS | ||||
| } | ||||
|  | ||||
| func uidMapInUserNS(uidmap []user.IDMap) bool { | ||||
| 	/* | ||||
| 	 * We assume we are in the initial user namespace if we have a full | ||||
| 	 * range - 4294967295 uids starting at uid 0. | ||||
| 	 */ | ||||
| 	if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 { | ||||
| func uidMapInUserNS(uidMap string) bool { | ||||
| 	if uidMap == "" { | ||||
| 		// File exist but empty (the initial state when userns is created, | ||||
| 		// see user_namespaces(7)). | ||||
| 		return true | ||||
| 	} | ||||
|  | ||||
| 	var a, b, c int64 | ||||
| 	if _, err := fmt.Sscanf(uidMap, "%d %d %d", &a, &b, &c); err != nil { | ||||
| 		// Assume we are in a regular, non user namespace. | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
|  | ||||
| 	// As per user_namespaces(7), /proc/self/uid_map of | ||||
| 	// the initial user namespace shows 0 0 4294967295. | ||||
| 	initNS := a == 0 && b == 0 && c == 4294967295 | ||||
| 	return !initNS | ||||
| } | ||||
|  | ||||
							
								
								
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -3,8 +3,6 @@ | ||||
|  | ||||
| package userns | ||||
|  | ||||
| import "github.com/opencontainers/runc/libcontainer/user" | ||||
|  | ||||
| // runningInUserNS is a stub for non-Linux systems | ||||
| // Always returns false | ||||
| func runningInUserNS() bool { | ||||
| @ -13,6 +11,6 @@ func runningInUserNS() bool { | ||||
|  | ||||
| // uidMapInUserNS is a stub for non-Linux systems | ||||
| // Always returns false | ||||
| func uidMapInUserNS(uidmap []user.IDMap) bool { | ||||
| func uidMapInUserNS(uidMap string) bool { | ||||
| 	return false | ||||
| } | ||||
|  | ||||
							
								
								
									
										156
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,156 @@ | ||||
| package userns | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"sort" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"golang.org/x/sys/unix" | ||||
|  | ||||
| 	"github.com/opencontainers/runc/libcontainer/configs" | ||||
| ) | ||||
|  | ||||
| type Mapping struct { | ||||
| 	UIDMappings []configs.IDMap | ||||
| 	GIDMappings []configs.IDMap | ||||
| } | ||||
|  | ||||
| func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { | ||||
| 	for _, uid := range m.UIDMappings { | ||||
| 		uids = append(uids, syscall.SysProcIDMap{ | ||||
| 			ContainerID: int(uid.ContainerID), | ||||
| 			HostID:      int(uid.HostID), | ||||
| 			Size:        int(uid.Size), | ||||
| 		}) | ||||
| 	} | ||||
| 	for _, gid := range m.GIDMappings { | ||||
| 		gids = append(gids, syscall.SysProcIDMap{ | ||||
| 			ContainerID: int(gid.ContainerID), | ||||
| 			HostID:      int(gid.HostID), | ||||
| 			Size:        int(gid.Size), | ||||
| 		}) | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // id returns a unique identifier for this mapping, agnostic of the order of | ||||
| // the uid and gid mappings (because the order doesn't matter to the kernel). | ||||
| // The set of userns handles is indexed using this ID. | ||||
| func (m Mapping) id() string { | ||||
| 	var uids, gids []string | ||||
| 	for _, idmap := range m.UIDMappings { | ||||
| 		uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) | ||||
| 	} | ||||
| 	for _, idmap := range m.GIDMappings { | ||||
| 		gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) | ||||
| 	} | ||||
| 	// We don't care about the sort order -- just sort them. | ||||
| 	sort.Strings(uids) | ||||
| 	sort.Strings(gids) | ||||
| 	return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") | ||||
| } | ||||
|  | ||||
| type Handles struct { | ||||
| 	m    sync.Mutex | ||||
| 	maps map[string]*os.File | ||||
| } | ||||
|  | ||||
| // Release all resources associated with this Handle. All existing files | ||||
| // returned from Get() will continue to work even after calling Release(). The | ||||
| // same Handles can be re-used after calling Release(). | ||||
| func (hs *Handles) Release() { | ||||
| 	hs.m.Lock() | ||||
| 	defer hs.m.Unlock() | ||||
|  | ||||
| 	// Close the files for good measure, though GC will do that for us anyway. | ||||
| 	for _, file := range hs.maps { | ||||
| 		_ = file.Close() | ||||
| 	} | ||||
| 	hs.maps = nil | ||||
| } | ||||
|  | ||||
| func spawnProc(req Mapping) (*os.Process, error) { | ||||
| 	// We need to spawn a subprocess with the requested mappings, which is | ||||
| 	// unfortunately quite expensive. The "safe" way of doing this is natively | ||||
| 	// with Go (and then spawning something like "sleep infinity"), but | ||||
| 	// execve() is a waste of cycles because we just need some process to have | ||||
| 	// the right mapping, we don't care what it's executing. The "unsafe" | ||||
| 	// option of doing a clone() behind the back of Go is probably okay in | ||||
| 	// theory as long as we just do kill(getpid(), SIGSTOP). However, if we | ||||
| 	// tell Go to put the new process into PTRACE_TRACEME mode, we can avoid | ||||
| 	// the exec and not have to faff around with the mappings. | ||||
| 	// | ||||
| 	// Note that Go's stdlib does not support newuidmap, but in the case of | ||||
| 	// id-mapped mounts, it seems incredibly unlikely that the user will be | ||||
| 	// requesting us to do a remapping as an unprivileged user with mappings | ||||
| 	// they have privileges over. | ||||
| 	logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) | ||||
| 	uidMappings, gidMappings := req.toSys() | ||||
| 	// We don't need to use /proc/thread-self here because the exe mm of a | ||||
| 	// thread-group is guaranteed to be the same for all threads by definition. | ||||
| 	// This lets us avoid having to do runtime.LockOSThread. | ||||
| 	return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ | ||||
| 		Sys: &syscall.SysProcAttr{ | ||||
| 			Cloneflags:                 unix.CLONE_NEWUSER, | ||||
| 			UidMappings:                uidMappings, | ||||
| 			GidMappings:                gidMappings, | ||||
| 			GidMappingsEnableSetgroups: false, | ||||
| 			// Put the process into PTRACE_TRACEME mode to allow us to get the | ||||
| 			// userns without having a proper execve() target. | ||||
| 			Ptrace: true, | ||||
| 		}, | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func dupFile(f *os.File) (*os.File, error) { | ||||
| 	newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) | ||||
| 	if err != nil { | ||||
| 		return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) | ||||
| 	} | ||||
| 	return os.NewFile(uintptr(newFd), f.Name()), nil | ||||
| } | ||||
|  | ||||
| // Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested | ||||
| // mapping. The processes spawned to produce userns nsfds are cached, so if | ||||
| // equivalent user namespace mappings are requested, the same user namespace | ||||
| // will be returned. The caller is responsible for closing the returned file | ||||
| // descriptor. | ||||
| func (hs *Handles) Get(req Mapping) (file *os.File, err error) { | ||||
| 	hs.m.Lock() | ||||
| 	defer hs.m.Unlock() | ||||
|  | ||||
| 	if hs.maps == nil { | ||||
| 		hs.maps = make(map[string]*os.File) | ||||
| 	} | ||||
|  | ||||
| 	file, ok := hs.maps[req.id()] | ||||
| 	if !ok { | ||||
| 		proc, err := spawnProc(req) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) | ||||
| 		} | ||||
| 		// Make sure we kill the helper process. We ignore errors because | ||||
| 		// there's not much we can do about them anyway, and ultimately | ||||
| 		defer func() { | ||||
| 			_ = proc.Kill() | ||||
| 			_, _ = proc.Wait() | ||||
| 		}() | ||||
|  | ||||
| 		// Stash away a handle to the userns file. This is neater than keeping | ||||
| 		// the process alive, because Go's GC can handle files much better than | ||||
| 		// leaked processes, and having long-living useless processes seems | ||||
| 		// less than ideal. | ||||
| 		file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		hs.maps[req.id()] = file | ||||
| 	} | ||||
| 	// Duplicate the file, to make sure the lifecycle of each *os.File we | ||||
| 	// return is independent. | ||||
| 	return dupFile(file) | ||||
| } | ||||
							
								
								
									
										85
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										85
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -19,13 +19,14 @@ package utils | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"runtime" | ||||
|  | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| // MaxSendfdLen is the maximum length of the name of a file descriptor being | ||||
| // sent using SendFd. The name of the file handle returned by RecvFd will never | ||||
| // be larger than this value. | ||||
| // MaxNameLen is the maximum length of the name of a file descriptor being sent | ||||
| // using SendFile. The name of the file handle returned by RecvFile will never be | ||||
| // larger than this value. | ||||
| const MaxNameLen = 4096 | ||||
|  | ||||
| // oobSpace is the size of the oob slice required to store a single FD. Note | ||||
| @ -33,26 +34,21 @@ const MaxNameLen = 4096 | ||||
| // so sizeof(fd) = 4. | ||||
| var oobSpace = unix.CmsgSpace(4) | ||||
|  | ||||
| // RecvFd waits for a file descriptor to be sent over the given AF_UNIX | ||||
| // RecvFile waits for a file descriptor to be sent over the given AF_UNIX | ||||
| // socket. The file name of the remote file descriptor will be recreated | ||||
| // locally (it is sent as non-auxiliary data in the same payload). | ||||
| func RecvFd(socket *os.File) (*os.File, error) { | ||||
| 	// For some reason, unix.Recvmsg uses the length rather than the capacity | ||||
| 	// when passing the msg_controllen and other attributes to recvmsg.  So we | ||||
| 	// have to actually set the length. | ||||
| func RecvFile(socket *os.File) (_ *os.File, Err error) { | ||||
| 	name := make([]byte, MaxNameLen) | ||||
| 	oob := make([]byte, oobSpace) | ||||
|  | ||||
| 	sockfd := socket.Fd() | ||||
| 	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) | ||||
| 	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	if n >= MaxNameLen || oobn != oobSpace { | ||||
| 		return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) | ||||
| 		return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) | ||||
| 	} | ||||
|  | ||||
| 	// Truncate. | ||||
| 	name = name[:n] | ||||
| 	oob = oob[:oobn] | ||||
| @ -61,36 +57,63 @@ func RecvFd(socket *os.File) (*os.File, error) { | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// We cannot control how many SCM_RIGHTS we receive, and upon receiving | ||||
| 	// them all of the descriptors are installed in our fd table, so we need to | ||||
| 	// parse all of the SCM_RIGHTS we received in order to close all of the | ||||
| 	// descriptors on error. | ||||
| 	var fds []int | ||||
| 	defer func() { | ||||
| 		for i, fd := range fds { | ||||
| 			if i == 0 && Err == nil { | ||||
| 				// Only close the first one on error. | ||||
| 				continue | ||||
| 			} | ||||
| 			// Always close extra ones. | ||||
| 			_ = unix.Close(fd) | ||||
| 		} | ||||
| 	}() | ||||
| 	var lastErr error | ||||
| 	for _, scm := range scms { | ||||
| 		if scm.Header.Type == unix.SCM_RIGHTS { | ||||
| 			scmFds, err := unix.ParseUnixRights(&scm) | ||||
| 			if err != nil { | ||||
| 				lastErr = err | ||||
| 			} else { | ||||
| 				fds = append(fds, scmFds...) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if lastErr != nil { | ||||
| 		return nil, lastErr | ||||
| 	} | ||||
|  | ||||
| 	// We do this after collecting the fds to make sure we close them all when | ||||
| 	// returning an error here. | ||||
| 	if len(scms) != 1 { | ||||
| 		return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) | ||||
| 	} | ||||
| 	scm := scms[0] | ||||
|  | ||||
| 	fds, err := unix.ParseUnixRights(&scm) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if len(fds) != 1 { | ||||
| 		return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) | ||||
| 	} | ||||
| 	fd := uintptr(fds[0]) | ||||
|  | ||||
| 	return os.NewFile(fd, string(name)), nil | ||||
| 	return os.NewFile(uintptr(fds[0]), string(name)), nil | ||||
| } | ||||
|  | ||||
| // SendFd sends a file descriptor over the given AF_UNIX socket. In | ||||
| // addition, the file.Name() of the given file will also be sent as | ||||
| // non-auxiliary data in the same payload (allowing to send contextual | ||||
| // information for a file descriptor). | ||||
| func SendFd(socket *os.File, name string, fd uintptr) error { | ||||
| // SendFile sends a file over the given AF_UNIX socket. file.Name() is also | ||||
| // included so that if the other end uses RecvFile, the file will have the same | ||||
| // name information. | ||||
| func SendFile(socket *os.File, file *os.File) error { | ||||
| 	name := file.Name() | ||||
| 	if len(name) >= MaxNameLen { | ||||
| 		return fmt.Errorf("sendfd: filename too long: %s", name) | ||||
| 	} | ||||
| 	return SendFds(socket, []byte(name), int(fd)) | ||||
| 	err := SendRawFd(socket, name, file.Fd()) | ||||
| 	runtime.KeepAlive(file) | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| // SendFds sends a list of files descriptor and msg over the given AF_UNIX socket. | ||||
| func SendFds(socket *os.File, msg []byte, fds ...int) error { | ||||
| 	oob := unix.UnixRights(fds...) | ||||
| 	return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0) | ||||
| // SendRawFd sends a specific file descriptor over the given AF_UNIX socket. | ||||
| func SendRawFd(socket *os.File, msg string, fd uintptr) error { | ||||
| 	oob := unix.UnixRights(int(fd)) | ||||
| 	return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) | ||||
| } | ||||
|  | ||||
							
								
								
									
										58
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										58
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -3,15 +3,12 @@ package utils | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"unsafe" | ||||
|  | ||||
| 	securejoin "github.com/cyphar/filepath-securejoin" | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| @ -43,6 +40,9 @@ func ExitStatus(status unix.WaitStatus) int { | ||||
| } | ||||
|  | ||||
| // WriteJSON writes the provided struct v to w using standard json marshaling | ||||
| // without a trailing newline. This is used instead of json.Encoder because | ||||
| // there might be a problem in json decoder in some cases, see: | ||||
| // https://github.com/docker/docker/issues/14203#issuecomment-174177790 | ||||
| func WriteJSON(w io.Writer, v interface{}) error { | ||||
| 	data, err := json.Marshal(v) | ||||
| 	if err != nil { | ||||
| @ -99,52 +99,16 @@ func stripRoot(root, path string) string { | ||||
| 	return CleanPath("/" + path) | ||||
| } | ||||
|  | ||||
| // WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) | ||||
| // corresponding to the unsafePath resolved within the root. Before passing the | ||||
| // fd, this path is verified to have been inside the root -- so operating on it | ||||
| // through the passed fdpath should be safe. Do not access this path through | ||||
| // the original path strings, and do not attempt to use the pathname outside of | ||||
| // the passed closure (the file handle will be freed once the closure returns). | ||||
| func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { | ||||
| 	// Remove the root then forcefully resolve inside the root. | ||||
| 	unsafePath = stripRoot(root, unsafePath) | ||||
| 	path, err := securejoin.SecureJoin(root, unsafePath) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("resolving path inside rootfs failed: %w", err) | ||||
| 	} | ||||
|  | ||||
| 	// Open the target path. | ||||
| 	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("open o_path procfd: %w", err) | ||||
| 	} | ||||
| 	defer fh.Close() | ||||
|  | ||||
| 	// Double-check the path is the one we expected. | ||||
| 	procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) | ||||
| 	if realpath, err := os.Readlink(procfd); err != nil { | ||||
| 		return fmt.Errorf("procfd verification failed: %w", err) | ||||
| 	} else if realpath != path { | ||||
| 		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) | ||||
| 	} | ||||
|  | ||||
| 	// Run the closure. | ||||
| 	return fn(procfd) | ||||
| } | ||||
|  | ||||
| // SearchLabels searches a list of key-value pairs for the provided key and | ||||
| // returns the corresponding value. The pairs must be separated with '='. | ||||
| func SearchLabels(labels []string, query string) string { | ||||
| 	for _, l := range labels { | ||||
| 		parts := strings.SplitN(l, "=", 2) | ||||
| 		if len(parts) < 2 { | ||||
| 			continue | ||||
| 		} | ||||
| 		if parts[0] == query { | ||||
| 			return parts[1] | ||||
| // SearchLabels searches through a list of key=value pairs for a given key, | ||||
| // returning its value, and the binary flag telling whether the key exist. | ||||
| func SearchLabels(labels []string, key string) (string, bool) { | ||||
| 	key += "=" | ||||
| 	for _, s := range labels { | ||||
| 		if strings.HasPrefix(s, key) { | ||||
| 			return s[len(key):], true | ||||
| 		} | ||||
| 	} | ||||
| 	return "" | ||||
| 	return "", false | ||||
| } | ||||
|  | ||||
| // Annotations returns the bundle path and user defined annotations from the | ||||
|  | ||||
							
								
								
									
										154
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										154
									
								
								vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @ -5,10 +5,16 @@ package utils | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"runtime" | ||||
| 	"strconv" | ||||
| 	"sync" | ||||
| 	_ "unsafe" // for go:linkname | ||||
|  | ||||
| 	securejoin "github.com/cyphar/filepath-securejoin" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"golang.org/x/sys/unix" | ||||
| ) | ||||
|  | ||||
| @ -24,12 +30,39 @@ func EnsureProcHandle(fh *os.File) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	haveCloseRangeCloexecBool bool | ||||
| 	haveCloseRangeCloexecOnce sync.Once | ||||
| ) | ||||
|  | ||||
| func haveCloseRangeCloexec() bool { | ||||
| 	haveCloseRangeCloexecOnce.Do(func() { | ||||
| 		// Make sure we're not closing a random file descriptor. | ||||
| 		tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0) | ||||
| 		if err != nil { | ||||
| 			return | ||||
| 		} | ||||
| 		defer unix.Close(tmpFd) | ||||
|  | ||||
| 		err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC) | ||||
| 		// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC). | ||||
| 		// -ENOSYS and -EINVAL ultimately mean we don't have support, but any | ||||
| 		// other potential error would imply that even the most basic close | ||||
| 		// operation wouldn't work. | ||||
| 		haveCloseRangeCloexecBool = err == nil | ||||
| 	}) | ||||
| 	return haveCloseRangeCloexecBool | ||||
| } | ||||
|  | ||||
| type fdFunc func(fd int) | ||||
|  | ||||
| // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in | ||||
| // the current process. | ||||
| func fdRangeFrom(minFd int, fn fdFunc) error { | ||||
| 	fdDir, err := os.Open("/proc/self/fd") | ||||
| 	procSelfFd, closer := ProcThreadSelf("fd") | ||||
| 	defer closer() | ||||
|  | ||||
| 	fdDir, err := os.Open(procSelfFd) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| @ -67,6 +100,12 @@ func fdRangeFrom(minFd int, fn fdFunc) error { | ||||
| // CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or | ||||
| // equal to minFd in the current process. | ||||
| func CloseExecFrom(minFd int) error { | ||||
| 	// Use close_range(CLOSE_RANGE_CLOEXEC) if possible. | ||||
| 	if haveCloseRangeCloexec() { | ||||
| 		err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC) | ||||
| 		return os.NewSyscallError("close_range", err) | ||||
| 	} | ||||
| 	// Otherwise, fall back to the standard loop. | ||||
| 	return fdRangeFrom(minFd, unix.CloseOnExec) | ||||
| } | ||||
|  | ||||
| @ -89,7 +128,8 @@ func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive | ||||
| // *os.File operations would apply to the wrong file). This function is only | ||||
| // intended to be called from the last stage of runc init. | ||||
| func UnsafeCloseFrom(minFd int) error { | ||||
| 	// We must not close some file descriptors. | ||||
| 	// We cannot use close_range(2) even if it is available, because we must | ||||
| 	// not close some file descriptors. | ||||
| 	return fdRangeFrom(minFd, func(fd int) { | ||||
| 		if runtime_IsPollDescriptor(uintptr(fd)) { | ||||
| 			// These are the Go runtimes internal netpoll file descriptors. | ||||
| @ -107,11 +147,117 @@ func UnsafeCloseFrom(minFd int) error { | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // NewSockPair returns a new unix socket pair | ||||
| func NewSockPair(name string) (parent *os.File, child *os.File, err error) { | ||||
| // NewSockPair returns a new SOCK_STREAM unix socket pair. | ||||
| func NewSockPair(name string) (parent, child *os.File, err error) { | ||||
| 	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) | ||||
| 	if err != nil { | ||||
| 		return nil, nil, err | ||||
| 	} | ||||
| 	return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil | ||||
| } | ||||
|  | ||||
| // WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) | ||||
| // corresponding to the unsafePath resolved within the root. Before passing the | ||||
| // fd, this path is verified to have been inside the root -- so operating on it | ||||
| // through the passed fdpath should be safe. Do not access this path through | ||||
| // the original path strings, and do not attempt to use the pathname outside of | ||||
| // the passed closure (the file handle will be freed once the closure returns). | ||||
| func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { | ||||
| 	// Remove the root then forcefully resolve inside the root. | ||||
| 	unsafePath = stripRoot(root, unsafePath) | ||||
| 	path, err := securejoin.SecureJoin(root, unsafePath) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("resolving path inside rootfs failed: %w", err) | ||||
| 	} | ||||
|  | ||||
| 	procSelfFd, closer := ProcThreadSelf("fd/") | ||||
| 	defer closer() | ||||
|  | ||||
| 	// Open the target path. | ||||
| 	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("open o_path procfd: %w", err) | ||||
| 	} | ||||
| 	defer fh.Close() | ||||
|  | ||||
| 	procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd()))) | ||||
| 	// Double-check the path is the one we expected. | ||||
| 	if realpath, err := os.Readlink(procfd); err != nil { | ||||
| 		return fmt.Errorf("procfd verification failed: %w", err) | ||||
| 	} else if realpath != path { | ||||
| 		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) | ||||
| 	} | ||||
|  | ||||
| 	return fn(procfd) | ||||
| } | ||||
|  | ||||
| type ProcThreadSelfCloser func() | ||||
|  | ||||
| var ( | ||||
| 	haveProcThreadSelf     bool | ||||
| 	haveProcThreadSelfOnce sync.Once | ||||
| ) | ||||
|  | ||||
| // ProcThreadSelf returns a string that is equivalent to | ||||
| // /proc/thread-self/<subpath>, with a graceful fallback on older kernels where | ||||
| // /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin, | ||||
| // meaning that the passed string needs to be trusted. The caller _must_ call | ||||
| // the returned procThreadSelfCloser function (which is runtime.UnlockOSThread) | ||||
| // *only once* after it has finished using the returned path string. | ||||
| func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) { | ||||
| 	haveProcThreadSelfOnce.Do(func() { | ||||
| 		if _, err := os.Stat("/proc/thread-self/"); err == nil { | ||||
| 			haveProcThreadSelf = true | ||||
| 		} else { | ||||
| 			logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err) | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	// We need to lock our thread until the caller is done with the path string | ||||
| 	// because any non-atomic operation on the path (such as opening a file, | ||||
| 	// then reading it) could be interrupted by the Go runtime where the | ||||
| 	// underlying thread is swapped out and the original thread is killed, | ||||
| 	// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In | ||||
| 	// addition, the pre-3.17 fallback makes everything non-atomic because the | ||||
| 	// same thing could happen between unix.Gettid() and the path operations. | ||||
| 	// | ||||
| 	// In theory, we don't need to lock in the atomic user case when using | ||||
| 	// /proc/thread-self/, but it's better to be safe than sorry (and there are | ||||
| 	// only one or two truly atomic users of /proc/thread-self/). | ||||
| 	runtime.LockOSThread() | ||||
|  | ||||
| 	threadSelf := "/proc/thread-self/" | ||||
| 	if !haveProcThreadSelf { | ||||
| 		// Pre-3.17 kernels did not have /proc/thread-self, so do it manually. | ||||
| 		threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/" | ||||
| 		if _, err := os.Stat(threadSelf); err != nil { | ||||
| 			// Unfortunately, this code is called from rootfs_linux.go where we | ||||
| 			// are running inside the pid namespace of the container but /proc | ||||
| 			// is the host's procfs. Unfortunately there is no real way to get | ||||
| 			// the correct tid to use here (the kernel age means we cannot do | ||||
| 			// things like set up a private fsopen("proc") -- even scanning | ||||
| 			// NSpid in all of the tasks in /proc/self/task/*/status requires | ||||
| 			// Linux 4.1). | ||||
| 			// | ||||
| 			// So, we just have to assume that /proc/self is acceptable in this | ||||
| 			// one specific case. | ||||
| 			if os.Getpid() == 1 { | ||||
| 				logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err) | ||||
| 			} else { | ||||
| 				// This should never happen, but the fallback should work in most cases... | ||||
| 				logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err) | ||||
| 			} | ||||
| 			threadSelf = "/proc/self/" | ||||
| 		} | ||||
| 	} | ||||
| 	return threadSelf + subpath, runtime.UnlockOSThread | ||||
| } | ||||
|  | ||||
| // ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to | ||||
| // create a /proc/thread-self handle for given file descriptor. | ||||
| // | ||||
| // It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but | ||||
| // without using fmt.Sprintf to avoid unneeded overhead. | ||||
| func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { | ||||
| 	return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 Matt Heon
					Matt Heon