//go:build !remote package libpod import ( "context" "errors" "fmt" "math/rand" "os" "reflect" "sort" "strconv" "strings" "time" "unicode/utf8" "github.com/containers/common/libnetwork/types" "github.com/containers/common/pkg/config" "github.com/containers/podman/v5/libpod/define" "github.com/containers/podman/v5/pkg/annotations" "github.com/containers/podman/v5/pkg/domain/entities" "github.com/containers/podman/v5/pkg/env" v1 "github.com/containers/podman/v5/pkg/k8s.io/api/core/v1" "github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/api/resource" v12 "github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/util/intstr" "github.com/containers/podman/v5/pkg/lookup" "github.com/containers/podman/v5/pkg/namespaces" "github.com/containers/podman/v5/pkg/specgen" "github.com/containers/podman/v5/pkg/util" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "golang.org/x/exp/slices" ) // GenerateForKube takes a slice of libpod containers and generates // one v1.Pod description that includes just a single container. func GenerateForKube(ctx context.Context, ctrs []*Container, getService, useLongAnnotations, podmanOnly bool) (*v1.Pod, error) { // Generate the v1.Pod yaml description return simplePodWithV1Containers(ctx, ctrs, getService, useLongAnnotations, podmanOnly) } // GenerateForKube takes a slice of libpod containers and generates // one v1.Pod description func (p *Pod) GenerateForKube(ctx context.Context, getService, useLongAnnotations, podmanOnly bool) (*v1.Pod, []v1.ServicePort, error) { // Generate the v1.Pod yaml description var ( ports []v1.ContainerPort servicePorts []v1.ServicePort ) allContainers, err := p.allContainers() if err != nil { return nil, servicePorts, err } // If the pod has no containers, no sense to generate YAML if len(allContainers) == 0 { return nil, servicePorts, fmt.Errorf("pod %s has no containers", p.ID()) } // If only an infra container is present, makes no sense to generate YAML if len(allContainers) == 1 && p.HasInfraContainer() { return nil, servicePorts, fmt.Errorf("pod %s only has an infra container", p.ID()) } extraHost := make([]v1.HostAlias, 0) hostNetwork := false hostUsers := true infraName := "" if p.HasInfraContainer() { infraContainer, err := p.getInfraContainer() if err != nil { return nil, servicePorts, err } for _, host := range infraContainer.config.ContainerNetworkConfig.HostAdd { hostname, ip, hasIP := strings.Cut(host, ":") if !hasIP { return nil, servicePorts, errors.New("invalid hostAdd") } extraHost = append(extraHost, v1.HostAlias{ IP: ip, Hostnames: []string{hostname}, }) } ports, err = portMappingToContainerPort(infraContainer.config.PortMappings, getService) if err != nil { return nil, servicePorts, err } spState := newServicePortState() servicePorts, err = spState.containerPortsToServicePorts(ports) if err != nil { return nil, servicePorts, err } hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host)) hostUsers = infraContainer.IDMappings().HostUIDMapping && infraContainer.IDMappings().HostGIDMapping infraName = infraContainer.config.Name } pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork, hostUsers, getService, useLongAnnotations, podmanOnly, infraName) if err != nil { return nil, servicePorts, err } pod.Spec.HostAliases = extraHost // Set the pod's restart policy pod.Spec.RestartPolicy = getPodRestartPolicy(p.config.RestartPolicy) if p.SharesPID() { // unfortunately, go doesn't have a nice way to specify a pointer to a bool b := true pod.Spec.ShareProcessNamespace = &b } return pod, servicePorts, nil } func (p *Pod) getInfraContainer() (*Container, error) { infraID, err := p.InfraContainerID() if err != nil { return nil, err } return p.runtime.GetContainer(infraID) } func GenerateForKubeDaemonSet(ctx context.Context, pod *YAMLPod, options entities.GenerateKubeOptions) (*YAMLDaemonSet, error) { // Restart policy for DaemonSets can only be set to Always if !(pod.Spec.RestartPolicy == "" || pod.Spec.RestartPolicy == v1.RestartPolicyAlways) { return nil, fmt.Errorf("k8s DaemonSets can only have restartPolicy set to Always") } // Error out if the user tries to set replica count if options.Replicas > 1 { return nil, fmt.Errorf("k8s DaemonSets don't allow setting replicas") } // Create label map that will be added to podSpec and DaemonSet metadata // The matching label lets the daemonset know which pod to manage appKey := "app" matchLabels := map[string]string{appKey: pod.Name} // Add the key:value (app:pod-name) to the podSpec labels if pod.Labels == nil { pod.Labels = matchLabels } else { pod.Labels[appKey] = pod.Name } depSpec := YAMLDaemonSetSpec{ DaemonSetSpec: v1.DaemonSetSpec{ Selector: &v12.LabelSelector{ MatchLabels: matchLabels, }, }, Template: &YAMLPodTemplateSpec{ PodTemplateSpec: v1.PodTemplateSpec{ ObjectMeta: pod.ObjectMeta, }, Spec: pod.Spec, }, } // Create the DaemonSet object dep := YAMLDaemonSet{ DaemonSet: v1.DaemonSet{ ObjectMeta: v12.ObjectMeta{ Name: pod.Name + "-daemonset", CreationTimestamp: pod.CreationTimestamp, Labels: pod.Labels, }, TypeMeta: v12.TypeMeta{ Kind: "DaemonSet", APIVersion: "apps/v1", }, }, Spec: &depSpec, } return &dep, nil } // GenerateForKubeDeployment returns a YAMLDeployment from a YAMLPod that is then used to create a kubernetes Deployment // kind YAML. func GenerateForKubeDeployment(ctx context.Context, pod *YAMLPod, options entities.GenerateKubeOptions) (*YAMLDeployment, error) { // Restart policy for Deployments can only be set to Always if options.Type == define.K8sKindDeployment && !(pod.Spec.RestartPolicy == "" || pod.Spec.RestartPolicy == v1.RestartPolicyAlways) { return nil, fmt.Errorf("k8s Deployments can only have restartPolicy set to Always") } // Create label map that will be added to podSpec and Deployment metadata // The matching label lets the deployment know which pods to manage appKey := "app" matchLabels := map[string]string{appKey: pod.Name} // Add the key:value (app:pod-name) to the podSpec labels if pod.Labels == nil { pod.Labels = matchLabels } else { pod.Labels[appKey] = pod.Name } depSpec := YAMLDeploymentSpec{ DeploymentSpec: v1.DeploymentSpec{ Selector: &v12.LabelSelector{ MatchLabels: matchLabels, }, }, Template: &YAMLPodTemplateSpec{ PodTemplateSpec: v1.PodTemplateSpec{ ObjectMeta: pod.ObjectMeta, }, Spec: pod.Spec, }, } // Add replicas count if user adds replica number with --replicas flag and is greater than 1 // If replicas is set to 1, no need to add it to the generated yaml as k8s automatically defaults // to that. Podman as sets replicas to 1 by default. if options.Replicas > 1 { depSpec.Replicas = &options.Replicas } // Create the Deployment object dep := YAMLDeployment{ Deployment: v1.Deployment{ ObjectMeta: v12.ObjectMeta{ Name: pod.Name + "-deployment", CreationTimestamp: pod.CreationTimestamp, Labels: pod.Labels, }, TypeMeta: v12.TypeMeta{ Kind: "Deployment", APIVersion: "apps/v1", }, }, Spec: &depSpec, } return &dep, nil } // GenerateForKube generates a v1.PersistentVolumeClaim from a libpod volume. func (v *Volume) GenerateForKube() *v1.PersistentVolumeClaim { annotations := make(map[string]string) annotations[util.VolumeDriverAnnotation] = v.Driver() for k, v := range v.Options() { switch k { case "o": annotations[util.VolumeMountOptsAnnotation] = v case "device": annotations[util.VolumeDeviceAnnotation] = v case "type": annotations[util.VolumeTypeAnnotation] = v case "UID": annotations[util.VolumeUIDAnnotation] = v case "GID": annotations[util.VolumeGIDAnnotation] = v } } return &v1.PersistentVolumeClaim{ TypeMeta: v12.TypeMeta{ Kind: "PersistentVolumeClaim", APIVersion: "v1", }, ObjectMeta: v12.ObjectMeta{ Name: v.Name(), Labels: v.Labels(), Annotations: annotations, CreationTimestamp: v12.Now(), }, Spec: v1.PersistentVolumeClaimSpec{ Resources: v1.ResourceRequirements{ Requests: map[v1.ResourceName]resource.Quantity{ v1.ResourceStorage: resource.MustParse("1Gi"), }, }, AccessModes: []v1.PersistentVolumeAccessMode{ v1.ReadWriteOnce, }, }, } } // YAMLPodSpec represents the same k8s API core PodSpec struct with a small // change and that is having Containers as a pointer to YAMLContainer. // Because Go doesn't omit empty struct and we want to omit Status in YAML // if it's empty. Fixes: GH-11998 type YAMLPodSpec struct { v1.PodSpec Containers []*YAMLContainer `json:"containers"` } // YAMLPod represents the same k8s API core Pod struct with a small // change and that is having Spec as a pointer to YAMLPodSpec and // Status as a pointer to k8s API core PodStatus. // Because Go doesn't omit empty struct and we want to omit Status in YAML // if it's empty. Fixes: GH-11998 type YAMLPod struct { v1.Pod Spec *YAMLPodSpec `json:"spec,omitempty"` Status *v1.PodStatus `json:"status,omitempty"` } // YAMLPodTemplateSpec represents the same k8s API core PodTemplateStruct with a // small change and that is having Spec as a pointer to YAMLPodSpec. // Because Go doesn't omit empty struct and we want to omit any empty structs in the // Pod yaml. This is used when generating a Deployment kind. type YAMLPodTemplateSpec struct { v1.PodTemplateSpec Spec *YAMLPodSpec `json:"spec,omitempty"` } // YAMLDeploymentSpec represents the same k8s API core DeploymentSpec with a small // change and that is having Template as a pointer to YAMLPodTemplateSpec and Strategy // as a pointer to k8s API core DeploymentStrategy. // Because Go doesn't omit empty struct and we want to omit Strategy and any fields in the Pod YAML // if it's empty. type YAMLDeploymentSpec struct { v1.DeploymentSpec Template *YAMLPodTemplateSpec `json:"template,omitempty"` Strategy *v1.DeploymentStrategy `json:"strategy,omitempty"` } // YAMLDaemonSetSpec represents the same k8s API core DeploymentSpec with a small // change and that is having Template as a pointer to YAMLPodTemplateSpec and Strategy // as a pointer to k8s API core DaemonSetStrategy. // Because Go doesn't omit empty struct and we want to omit Strategy and any fields in the Pod YAML // if it's empty. type YAMLDaemonSetSpec struct { v1.DaemonSetSpec Template *YAMLPodTemplateSpec `json:"template,omitempty"` Strategy *v1.DaemonSetUpdateStrategy `json:"strategy,omitempty"` } // YAMLDaemonSet represents the same k8s API core DaemonSet with a small change // and that is having Spec as a pointer to YAMLDaemonSetSpec and Status as a pointer to // k8s API core DaemonSetStatus. // Because Go doesn't omit empty struct and we want to omit Status and any fields in the DaemonSetSpec // if it's empty. type YAMLDaemonSet struct { v1.DaemonSet Spec *YAMLDaemonSetSpec `json:"spec,omitempty"` Status *v1.DaemonSetStatus `json:"status,omitempty"` } // YAMLDeployment represents the same k8s API core Deployment with a small change // and that is having Spec as a pointer to YAMLDeploymentSpec and Status as a pointer to // k8s API core DeploymentStatus. // Because Go doesn't omit empty struct and we want to omit Status and any fields in the DeploymentSpec // if it's empty. type YAMLDeployment struct { v1.Deployment Spec *YAMLDeploymentSpec `json:"spec,omitempty"` Status *v1.DeploymentStatus `json:"status,omitempty"` } // YAMLService represents the same k8s API core Service struct with a small // change and that is having Status as a pointer to k8s API core ServiceStatus. // Because Go doesn't omit empty struct and we want to omit Status in YAML // if it's empty. Fixes: GH-11998 type YAMLService struct { v1.Service Status *v1.ServiceStatus `json:"status,omitempty"` } // YAMLContainer represents the same k8s API core Container struct with a small // change and that is having Resources as a pointer to k8s API core ResourceRequirements. // Because Go doesn't omit empty struct and we want to omit Status in YAML // if it's empty. Fixes: GH-11998 type YAMLContainer struct { v1.Container Resources *v1.ResourceRequirements `json:"resources,omitempty"` } // ConvertV1PodToYAMLPod takes k8s API core Pod and returns a pointer to YAMLPod func ConvertV1PodToYAMLPod(pod *v1.Pod) *YAMLPod { cs := []*YAMLContainer{} for _, cc := range pod.Spec.Containers { var res *v1.ResourceRequirements if len(cc.Resources.Limits) > 0 || len(cc.Resources.Requests) > 0 { res = &cc.Resources } cs = append(cs, &YAMLContainer{Container: cc, Resources: res}) } mpo := &YAMLPod{Pod: *pod} mpo.Spec = &YAMLPodSpec{PodSpec: pod.Spec, Containers: cs} for _, ctr := range pod.Spec.Containers { if ctr.SecurityContext == nil || ctr.SecurityContext.SELinuxOptions == nil { continue } selinuxOpts := ctr.SecurityContext.SELinuxOptions if selinuxOpts.User == "" && selinuxOpts.Role == "" && selinuxOpts.Type == "" && selinuxOpts.Level == "" && selinuxOpts.FileType == "" { ctr.SecurityContext.SELinuxOptions = nil } } dnsCfg := pod.Spec.DNSConfig if dnsCfg != nil && (len(dnsCfg.Nameservers)+len(dnsCfg.Searches)+len(dnsCfg.Options) > 0) { mpo.Spec.DNSConfig = dnsCfg } status := pod.Status if status.Phase != "" || len(status.Conditions) > 0 || status.Message != "" || status.Reason != "" || status.NominatedNodeName != "" || status.HostIP != "" || status.PodIP != "" || status.StartTime != nil || len(status.InitContainerStatuses) > 0 || len(status.ContainerStatuses) > 0 || status.QOSClass != "" || len(status.EphemeralContainerStatuses) > 0 { mpo.Status = &status } return mpo } // GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (YAMLService, error) { service := YAMLService{} selector := make(map[string]string) selector["app"] = pod.Labels["app"] ports := servicePorts if len(ports) == 0 { p, err := containersToServicePorts(pod.Spec.Containers) if err != nil { return service, err } ports = p } serviceSpec := v1.ServiceSpec{ Ports: ports, Selector: selector, Type: v1.ServiceTypeNodePort, } service.Spec = serviceSpec service.ObjectMeta = pod.ObjectMeta // Reset the annotations for the service as the pod annotations are not needed for the service service.ObjectMeta.Annotations = nil tm := v12.TypeMeta{ Kind: "Service", APIVersion: pod.TypeMeta.APIVersion, } service.TypeMeta = tm return service, nil } // servicePortState allows calling containerPortsToServicePorts for a single service type servicePortState struct { // A program using the shared math/rand state with the default seed will produce the same sequence of pseudo-random numbers // for each execution. Use a private RNG state not to interfere with other users. rng *rand.Rand usedPorts map[int]struct{} } func newServicePortState() servicePortState { return servicePortState{ rng: rand.New(rand.NewSource(time.Now().UnixNano())), usedPorts: map[int]struct{}{}, } } func truncateKubeAnnotation(str string, useLongAnnotations bool) string { str = strings.TrimSpace(str) if useLongAnnotations || utf8.RuneCountInString(str) < define.MaxKubeAnnotation { return str } trunc := string([]rune(str)[:define.MaxKubeAnnotation]) logrus.Warnf("Truncation Annotation: %q to %q: Kubernetes only allows %d characters", str, trunc, define.MaxKubeAnnotation) return trunc } // containerPortsToServicePorts takes a slice of containerports and generates a // slice of service ports func (state *servicePortState) containerPortsToServicePorts(containerPorts []v1.ContainerPort) ([]v1.ServicePort, error) { sps := make([]v1.ServicePort, 0, len(containerPorts)) for _, cp := range containerPorts { var nodePort int attempt := 0 for { // Legal nodeport range is 30000-32767 nodePort = 30000 + state.rng.Intn(32767-30000+1) if _, found := state.usedPorts[nodePort]; !found { state.usedPorts[nodePort] = struct{}{} break } attempt++ if attempt >= 100 { return nil, fmt.Errorf("too many attempts trying to generate a unique NodePort number") } } servicePort := v1.ServicePort{ Protocol: cp.Protocol, Port: cp.ContainerPort, NodePort: int32(nodePort), Name: strconv.Itoa(int(cp.ContainerPort)), TargetPort: intstr.Parse(strconv.Itoa(int(cp.ContainerPort))), } sps = append(sps, servicePort) } return sps, nil } // containersToServicePorts takes a slice of v1.Containers and generates an // inclusive list of serviceports to expose func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, error) { state := newServicePortState() sps := make([]v1.ServicePort, 0, len(containers)) for _, ctr := range containers { ports, err := state.containerPortsToServicePorts(ctr.Ports) if err != nil { return nil, err } sps = append(sps, ports...) } return sps, nil } func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork, hostUsers, getService, useLongAnnotations, podmanOnly bool, infraName string) (*v1.Pod, error) { deDupPodVolumes := make(map[string]*v1.Volume) first := true podContainers := make([]v1.Container, 0, len(containers)) podInitCtrs := []v1.Container{} podAnnotations := make(map[string]string) dnsInfo := v1.PodDNSConfig{} var ( hostname string stopTimeout *uint ) // Let's sort the containers in order of created time // This will ensure that the init containers are defined in the correct order in the kube yaml sort.Slice(containers, func(i, j int) bool { return containers[i].CreatedTime().Before(containers[j].CreatedTime()) }) for _, ctr := range containers { if !ctr.IsInfra() { for k, v := range ctr.config.Spec.Annotations { if !podmanOnly && (define.IsReservedAnnotation(k) || annotations.IsReservedAnnotation(k)) { continue } podAnnotations[fmt.Sprintf("%s/%s", k, removeUnderscores(ctr.Name()))] = truncateKubeAnnotation(v, useLongAnnotations) } // Convert auto-update labels into kube annotations for k, v := range getAutoUpdateAnnotations(ctr.Name(), ctr.Labels(), useLongAnnotations) { podAnnotations[k] = truncateKubeAnnotation(v, useLongAnnotations) } isInit := ctr.IsInitCtr() // Since hostname is only set at pod level, set the hostname to the hostname of the first container we encounter if hostname == "" { // Only set the hostname if it is not set to the truncated container ID, which we do by default if no // hostname is specified for the container and if it is not set to the pod name. if !strings.Contains(ctr.ID(), ctr.Hostname()) && ctr.Hostname() != p.Name() { hostname = ctr.Hostname() } } // Pick the first container that has a stop-timeout set and use that value // Ignore podman's default if ctr.config.StopTimeout != util.DefaultContainerConfig().Engine.StopTimeout && stopTimeout == nil { stopTimeout = &ctr.config.StopTimeout } ctr, volumes, _, annotations, err := containerToV1Container(ctx, ctr, getService) if err != nil { return nil, err } for k, v := range annotations { podAnnotations[define.BindMountPrefix] = truncateKubeAnnotation(k+":"+v, useLongAnnotations) } // Since port bindings for the pod are handled by the // infra container, wipe them here only if we are sharing the net namespace // If the network namespace is not being shared in the pod, then containers // can have their own network configurations if p.SharesNet() { ctr.Ports = nil // We add the original port declarations from the libpod infra container // to the first kubernetes container description because otherwise we lose // the original container/port bindings. // Add the port configuration to the first regular container or the first // init container if only init containers have been created in the pod. if first && len(ports) > 0 && (!isInit || len(containers) == 2) { ctr.Ports = ports first = false } } if isInit { podInitCtrs = append(podInitCtrs, ctr) continue } podContainers = append(podContainers, ctr) // Deduplicate volumes, so if containers in the pod share a volume, it's only // listed in the volumes section once for _, vol := range volumes { vol := vol deDupPodVolumes[vol.Name] = &vol } } else { _, _, infraDNS, _, err := containerToV1Container(ctx, ctr, getService) if err != nil { return nil, err } if infraDNS != nil { if servers := infraDNS.Nameservers; len(servers) > 0 { dnsInfo.Nameservers = servers } if searches := infraDNS.Searches; len(searches) > 0 { dnsInfo.Searches = searches } if options := infraDNS.Options; len(options) > 0 { dnsInfo.Options = options } } // If the infraName is not the podID-infra, that means the user set another infra name using // --infra-name during pod creation if infraName != "" && infraName != p.ID()[:12]+"-infra" { podAnnotations[define.InfraNameAnnotation] = truncateKubeAnnotation(infraName, useLongAnnotations) } } } podVolumes := []v1.Volume{} for _, vol := range deDupPodVolumes { podVolumes = append(podVolumes, *vol) } podName := removeUnderscores(p.Name()) return newPodObject( podName, podAnnotations, podInitCtrs, podContainers, podVolumes, &dnsInfo, hostNetwork, hostUsers, hostname, stopTimeout), nil } func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork, hostUsers bool, hostname string, stopTimeout *uint) *v1.Pod { tm := v12.TypeMeta{ Kind: "Pod", APIVersion: "v1", } // Add a label called "app" with the containers name as a value labels := make(map[string]string) labels["app"] = removeUnderscores(podName) om := v12.ObjectMeta{ // The name of the pod is container_name-libpod Name: podName, Labels: labels, // CreationTimestamp seems to be required, so adding it; in doing so, the timestamp // will reflect time this is run (not container create time) because the conversion // of the container create time to v1 Time is probably not warranted nor worthwhile. CreationTimestamp: v12.Now(), Annotations: annotations, } ps := v1.PodSpec{ Containers: containers, Hostname: hostname, HostNetwork: hostNetwork, InitContainers: initCtrs, Volumes: volumes, } if !hostUsers { ps.HostUsers = &hostUsers } if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) { ps.DNSConfig = dnsOptions } if stopTimeout != nil { terminationGracePeriod := int64(*stopTimeout) ps.TerminationGracePeriodSeconds = &terminationGracePeriod } p := v1.Pod{ TypeMeta: tm, ObjectMeta: om, Spec: ps, } return &p } // simplePodWithV1Containers is a function used by inspect when kube yaml needs to be generated // for a single container. we "insert" that container description in a pod. func simplePodWithV1Containers(ctx context.Context, ctrs []*Container, getService, useLongAnnotations, podmanOnly bool) (*v1.Pod, error) { kubeCtrs := make([]v1.Container, 0, len(ctrs)) kubeInitCtrs := []v1.Container{} kubeVolumes := make([]v1.Volume, 0) hostUsers := true hostNetwork := true podDNS := v1.PodDNSConfig{} kubeAnnotations := make(map[string]string) ctrNames := make([]string, 0, len(ctrs)) var ( hostname string restartPolicy *string stopTimeout *uint ) for _, ctr := range ctrs { ctrNames = append(ctrNames, removeUnderscores(ctr.Name())) for k, v := range ctr.config.Spec.Annotations { if !podmanOnly && (define.IsReservedAnnotation(k) || annotations.IsReservedAnnotation(k)) { continue } kubeAnnotations[fmt.Sprintf("%s/%s", k, removeUnderscores(ctr.Name()))] = truncateKubeAnnotation(v, useLongAnnotations) } // Convert auto-update labels into kube annotations for k, v := range getAutoUpdateAnnotations(ctr.Name(), ctr.Labels(), useLongAnnotations) { kubeAnnotations[k] = truncateKubeAnnotation(v, useLongAnnotations) } isInit := ctr.IsInitCtr() // Since hostname is only set at pod level, set the hostname to the hostname of the first container we encounter if hostname == "" { // Only set the hostname if it is not set to the truncated container ID, which we do by default if no // hostname is specified for the container if !strings.Contains(ctr.ID(), ctr.Hostname()) { hostname = ctr.Hostname() } } // Pick the first container that has a stop-timeout set and use that value // Ignore podman's default if ctr.config.StopTimeout != util.DefaultContainerConfig().Engine.StopTimeout && stopTimeout == nil { stopTimeout = &ctr.config.StopTimeout } // Use the restart policy of the first non-init container if !isInit && restartPolicy == nil { restartPolicy = &ctr.config.RestartPolicy } if ctr.config.Spec.Process != nil { var ulimitArr []string defaultUlimits := util.DefaultContainerConfig().Ulimits() for _, ulimit := range ctr.config.Spec.Process.Rlimits { finalUlimit := strings.ToLower(strings.ReplaceAll(ulimit.Type, "RLIMIT_", "")) + "=" + strconv.Itoa(int(ulimit.Soft)) + ":" + strconv.Itoa(int(ulimit.Hard)) // compare ulimit with default list so we don't add it twice if slices.Contains(defaultUlimits, finalUlimit) { continue } ulimitArr = append(ulimitArr, finalUlimit) } if len(ulimitArr) > 0 { kubeAnnotations[define.UlimitAnnotation] = strings.Join(ulimitArr, ",") } } if !ctr.HostNetwork() { hostNetwork = false } if !(ctr.IDMappings().HostUIDMapping && ctr.IDMappings().HostGIDMapping) { hostUsers = false } kubeCtr, kubeVols, ctrDNS, annotations, err := containerToV1Container(ctx, ctr, getService) if err != nil { return nil, err } for k, v := range annotations { kubeAnnotations[define.BindMountPrefix] = truncateKubeAnnotation(k+":"+v, useLongAnnotations) } if isInit { kubeInitCtrs = append(kubeInitCtrs, kubeCtr) } else { kubeCtrs = append(kubeCtrs, kubeCtr) } kubeVolumes = append(kubeVolumes, kubeVols...) // Combine DNS information in sum'd structure if ctrDNS != nil { // nameservers if servers := ctrDNS.Nameservers; servers != nil { if podDNS.Nameservers == nil { podDNS.Nameservers = make([]string, 0) } for _, s := range servers { if !slices.Contains(podDNS.Nameservers, s) { // only append if it does not exist podDNS.Nameservers = append(podDNS.Nameservers, s) } } } // search domains if domains := ctrDNS.Searches; domains != nil { if podDNS.Searches == nil { podDNS.Searches = make([]string, 0) } for _, d := range domains { if !slices.Contains(podDNS.Searches, d) { // only append if it does not exist podDNS.Searches = append(podDNS.Searches, d) } } } // dns options if options := ctrDNS.Options; options != nil { if podDNS.Options == nil { podDNS.Options = make([]v1.PodDNSConfigOption, 0) } podDNS.Options = append(podDNS.Options, options...) } } // end if ctrDNS } podName := removeUnderscores(ctrs[0].Name()) // Check if the pod name and container name will end up conflicting // Append -pod if so if slices.Contains(ctrNames, podName) { podName += "-pod" } pod := newPodObject( podName, kubeAnnotations, kubeInitCtrs, kubeCtrs, kubeVolumes, &podDNS, hostNetwork, hostUsers, hostname, stopTimeout) // Set the pod's restart policy policy := "" if restartPolicy != nil { policy = *restartPolicy } pod.Spec.RestartPolicy = getPodRestartPolicy(policy) return pod, nil } // getPodRestartPolicy returns the pod restart policy to be set in the generated kube yaml func getPodRestartPolicy(policy string) v1.RestartPolicy { switch policy { case define.RestartPolicyNo: return v1.RestartPolicyNever case define.RestartPolicyAlways: return v1.RestartPolicyAlways case define.RestartPolicyOnFailure: return v1.RestartPolicyOnFailure default: // some pod/ctr create from cmdline, such as "" - set it to "" and let k8s handle the defaults return "" } } // containerToV1Container converts information we know about a libpod container // to a V1.Container specification. func containerToV1Container(ctx context.Context, c *Container, getService bool) (v1.Container, []v1.Volume, *v1.PodDNSConfig, map[string]string, error) { kubeContainer := v1.Container{} kubeVolumes := []v1.Volume{} annotations := make(map[string]string) kubeSec, hasSecData, err := generateKubeSecurityContext(c) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } // NOTE: a privileged container mounts all of /dev/*. if !c.Privileged() && c.config.Spec.Linux != nil && len(c.config.Spec.Linux.Devices) > 0 { // TODO Enable when we can support devices and their names kubeContainer.VolumeDevices = generateKubeVolumeDeviceFromLinuxDevice(c.config.Spec.Linux.Devices) return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("linux devices: %w", define.ErrNotImplemented) } if len(c.config.UserVolumes) > 0 { volumeMounts, volumes, localAnnotations, err := libpodMountsToKubeVolumeMounts(c) if err != nil { return kubeContainer, kubeVolumes, nil, nil, err } annotations = localAnnotations kubeContainer.VolumeMounts = volumeMounts kubeVolumes = append(kubeVolumes, volumes...) } portmappings, err := c.PortMappings() if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } ports, err := portMappingToContainerPort(portmappings, getService) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } // Handle command and arguments. if ep := c.Entrypoint(); len(ep) > 0 { // If we have an entrypoint, set the container's command as // arguments. kubeContainer.Command = ep kubeContainer.Args = c.Command() } else { kubeContainer.Command = c.Command() } kubeContainer.Name = removeUnderscores(c.Name()) _, image := c.Image() // The infra container may have been created with an overlay root FS // instead of an infra image. If so, set the imageto the default K8s // pause one and make sure it's in the storage by pulling it down if // missing. if image == "" && c.IsInfra() { image = c.runtime.config.Engine.InfraImage if _, err := c.runtime.libimageRuntime.Pull(ctx, image, config.PullPolicyMissing, nil); err != nil { return kubeContainer, nil, nil, nil, err } } kubeContainer.Image = image kubeContainer.Stdin = c.Stdin() img, _, err := c.runtime.libimageRuntime.LookupImage(image, nil) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("looking up image %q of container %q: %w", image, c.ID(), err) } imgData, err := img.Inspect(ctx, nil) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } // If the user doesn't set a command/entrypoint when creating the container with podman and // is using the image command or entrypoint from the image, don't add it to the generated kube yaml if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) || reflect.DeepEqual(imgData.Config.Entrypoint, kubeContainer.Command) { kubeContainer.Command = nil } if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() { kubeContainer.WorkingDir = c.WorkingDir() } if imgData.User == c.User() && hasSecData { kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil } // If the image has user set as a positive integer value, then set runAsNonRoot to true // in the kube yaml imgUserID, err := strconv.Atoi(imgData.User) if err == nil && imgUserID > 0 { trueBool := true kubeSec.RunAsNonRoot = &trueBool } envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env) if err != nil { return kubeContainer, kubeVolumes, nil, annotations, err } kubeContainer.Env = envVariables kubeContainer.Ports = ports // This should not be applicable // container.EnvFromSource = if hasSecData { kubeContainer.SecurityContext = kubeSec } kubeContainer.StdinOnce = false kubeContainer.TTY = c.Terminal() resources := c.LinuxResources() if resources != nil { if resources.Memory != nil && resources.Memory.Limit != nil { if kubeContainer.Resources.Limits == nil { kubeContainer.Resources.Limits = v1.ResourceList{} } qty := kubeContainer.Resources.Limits.Memory() qty.Set(*c.config.Spec.Linux.Resources.Memory.Limit) kubeContainer.Resources.Limits[v1.ResourceMemory] = *qty } if resources.CPU != nil && resources.CPU.Quota != nil && resources.CPU.Period != nil { quota := *resources.CPU.Quota period := *resources.CPU.Period if quota > 0 && period > 0 { cpuLimitMilli := int64(1000 * util.PeriodAndQuotaToCores(period, quota)) // Kubernetes: precision finer than 1m is not allowed if cpuLimitMilli >= 1 { if kubeContainer.Resources.Limits == nil { kubeContainer.Resources.Limits = v1.ResourceList{} } qty := kubeContainer.Resources.Limits.Cpu() qty.SetMilli(cpuLimitMilli) kubeContainer.Resources.Limits[v1.ResourceCPU] = *qty } } } } // Obtain the DNS entries from the container dns := v1.PodDNSConfig{} // DNS servers if servers := c.config.DNSServer; len(servers) > 0 { dnsServers := make([]string, 0) for _, server := range servers { dnsServers = append(dnsServers, server.String()) } dns.Nameservers = dnsServers } // DNS search domains if searches := c.config.DNSSearch; len(searches) > 0 { dns.Searches = searches } // DNS options if options := c.config.DNSOption; len(options) > 0 { dnsOptions := make([]v1.PodDNSConfigOption, 0) for _, option := range options { // the option can be "k:v" or just "k", no delimiter is required name, value, _ := strings.Cut(option, ":") dnsOpt := v1.PodDNSConfigOption{ Name: name, Value: &value, } dnsOptions = append(dnsOptions, dnsOpt) } dns.Options = dnsOptions } return kubeContainer, kubeVolumes, &dns, annotations, nil } // portMappingToContainerPort takes a portmapping and converts // it to a v1.ContainerPort format for kube output func portMappingToContainerPort(portMappings []types.PortMapping, getService bool) ([]v1.ContainerPort, error) { containerPorts := make([]v1.ContainerPort, 0, len(portMappings)) for _, p := range portMappings { protocols := strings.Split(p.Protocol, ",") for _, proto := range protocols { var protocol v1.Protocol switch strings.ToUpper(proto) { case "TCP": // do nothing as it is the default protocol in k8s, there is no need to explicitly // add it to the generated yaml case "UDP": protocol = v1.ProtocolUDP case "SCTP": protocol = v1.ProtocolSCTP default: return containerPorts, fmt.Errorf("unknown network protocol %s", p.Protocol) } for i := uint16(0); i < p.Range; i++ { cp := v1.ContainerPort{ // Name will not be supported HostIP: p.HostIP, ContainerPort: int32(p.ContainerPort + i), Protocol: protocol, } if !getService { cp.HostPort = int32(p.HostPort + i) } containerPorts = append(containerPorts, cp) } } } return containerPorts, nil } // libpodEnvVarsToKubeEnvVars converts a key=value string slice to []v1.EnvVar func libpodEnvVarsToKubeEnvVars(envs []string, imageEnvs []string) ([]v1.EnvVar, error) { defaultEnv := env.DefaultEnvVariables() envVars := make([]v1.EnvVar, 0, len(envs)) imageMap := make(map[string]string, len(imageEnvs)) for _, ie := range imageEnvs { key, val, _ := strings.Cut(ie, "=") imageMap[key] = val } for _, e := range envs { envName, envValue, hasValue := strings.Cut(e, "=") if !hasValue { return envVars, fmt.Errorf("environment variable %s is malformed; should be key=value", e) } if defaultEnv[envName] == envValue { continue } if imageMap[envName] == envValue { continue } ev := v1.EnvVar{ Name: envName, Value: envValue, } envVars = append(envVars, ev) } return envVars, nil } // libpodMountsToKubeVolumeMounts converts the containers mounts to a struct kube understands func libpodMountsToKubeVolumeMounts(c *Container) ([]v1.VolumeMount, []v1.Volume, map[string]string, error) { namedVolumes, mounts := c.SortUserVolumes(c.config.Spec) vms := make([]v1.VolumeMount, 0, len(mounts)) vos := make([]v1.Volume, 0, len(mounts)) annotations := make(map[string]string) var suffix string for index, m := range mounts { for _, opt := range m.Options { if opt == "Z" || opt == "z" { annotations[m.Source] = opt break } } vm, vo, err := generateKubeVolumeMount(m) if err != nil { return vms, vos, annotations, err } // Name will be the same, so use the index as suffix suffix = fmt.Sprintf("-%d", index) vm.Name += suffix vo.Name += suffix vms = append(vms, vm) vos = append(vos, vo) } for _, v := range namedVolumes { vm, vo := generateKubePersistentVolumeClaim(v) vms = append(vms, vm) vos = append(vos, vo) } return vms, vos, annotations, nil } // generateKubePersistentVolumeClaim converts a ContainerNamedVolume to a Kubernetes PersistentVolumeClaim func generateKubePersistentVolumeClaim(v *ContainerNamedVolume) (v1.VolumeMount, v1.Volume) { ro := slices.Contains(v.Options, "ro") // To avoid naming conflicts with any host path mounts, add a unique suffix to the volume's name. name := v.Name + "-pvc" vm := v1.VolumeMount{} vm.Name = name vm.MountPath = v.Dest vm.ReadOnly = ro pvc := v1.PersistentVolumeClaimVolumeSource{ClaimName: v.Name, ReadOnly: ro} vs := v1.VolumeSource{} vs.PersistentVolumeClaim = &pvc vo := v1.Volume{Name: name, VolumeSource: vs} return vm, vo } // generateKubeVolumeMount takes a user specified mount and returns // a kubernetes VolumeMount (to be added to the container) and a kubernetes Volume // (to be added to the pod) func generateKubeVolumeMount(m specs.Mount) (v1.VolumeMount, v1.Volume, error) { vm := v1.VolumeMount{} vo := v1.Volume{} var ( name string err error ) if m.Type == define.TypeTmpfs { name = "tmp" vo.EmptyDir = &v1.EmptyDirVolumeSource{ Medium: v1.StorageMediumMemory, } vo.Name = name } else { name, err = convertVolumePathToName(m.Source) if err != nil { return vm, vo, err } // To avoid naming conflicts with any persistent volume mounts, add a unique suffix to the volume's name. name += "-host" vo.Name = name vo.HostPath = &v1.HostPathVolumeSource{} vo.HostPath.Path = m.Source isDir, err := isHostPathDirectory(m.Source) // neither a directory or a file lives here, default to creating a directory // TODO should this be an error instead? var hostPathType v1.HostPathType switch { case err != nil: hostPathType = v1.HostPathDirectoryOrCreate case isDir: hostPathType = v1.HostPathDirectory default: hostPathType = v1.HostPathFile } vo.HostPath.Type = &hostPathType } vm.Name = name vm.MountPath = m.Destination if slices.Contains(m.Options, "ro") { vm.ReadOnly = true } return vm, vo, nil } func isHostPathDirectory(hostPathSource string) (bool, error) { info, err := os.Stat(hostPathSource) if err != nil { return false, err } return info.Mode().IsDir(), nil } func convertVolumePathToName(hostSourcePath string) (string, error) { if len(hostSourcePath) == 0 { return "", errors.New("hostSourcePath must be specified to generate volume name") } if len(hostSourcePath) == 1 { if hostSourcePath != "/" { return "", fmt.Errorf("hostSourcePath malformatted: %s", hostSourcePath) } // add special case name return "root", nil } // First, trim trailing slashes, then replace slashes with dashes. // Thus, /mnt/data/ will become mnt-data return strings.ReplaceAll(strings.Trim(hostSourcePath, "/"), "/", "-"), nil } func determineCapAddDropFromCapabilities(defaultCaps, containerCaps []string) *v1.Capabilities { var ( drop = []v1.Capability{} add = []v1.Capability{} ) dedupDrop := make(map[string]bool) dedupAdd := make(map[string]bool) // Find caps in the defaultCaps but not in the container's // those indicate a dropped cap for _, capability := range defaultCaps { if !slices.Contains(containerCaps, capability) { if _, ok := dedupDrop[capability]; !ok { drop = append(drop, v1.Capability(capability)) dedupDrop[capability] = true } } } // Find caps in the container but not in the defaults; those indicate // an added cap for _, capability := range containerCaps { if !slices.Contains(defaultCaps, capability) { if _, ok := dedupAdd[capability]; !ok { add = append(add, v1.Capability(capability)) dedupAdd[capability] = true } } } if len(add) > 0 || len(drop) > 0 { return &v1.Capabilities{ Add: add, Drop: drop, } } return nil } func (c *Container) capAddDrop(caps *specs.LinuxCapabilities) *v1.Capabilities { // FreeBSD containers don't have caps so don't dereference if it's nil if caps == nil { return nil } // Combine all the container's capabilities into a slice containerCaps := make([]string, 0, len(caps.Ambient)+len(caps.Bounding)+len(caps.Effective)+len(caps.Inheritable)+len(caps.Permitted)) containerCaps = append(containerCaps, caps.Ambient...) containerCaps = append(containerCaps, caps.Bounding...) containerCaps = append(containerCaps, caps.Effective...) containerCaps = append(containerCaps, caps.Inheritable...) containerCaps = append(containerCaps, caps.Permitted...) calculatedCaps := determineCapAddDropFromCapabilities(c.runtime.config.Containers.DefaultCapabilities.Get(), containerCaps) return calculatedCaps } // generateKubeSecurityContext generates a securityContext based on the existing container func generateKubeSecurityContext(c *Container) (*v1.SecurityContext, bool, error) { privileged := c.Privileged() ro := c.IsReadOnly() allowPrivEscalation := !c.config.Spec.Process.NoNewPrivileges var capabilities *v1.Capabilities if !privileged { // Running privileged adds all caps. capabilities = c.capAddDrop(c.config.Spec.Process.Capabilities) } scHasData := false sc := v1.SecurityContext{ // RunAsNonRoot is an optional parameter; our first implementations should be root only; however // I'm leaving this as a bread-crumb for later //RunAsNonRoot: &nonRoot, } if capabilities != nil { scHasData = true sc.Capabilities = capabilities } var selinuxOpts v1.SELinuxOptions selinuxHasData := false for _, label := range strings.Split(c.config.Spec.Annotations[define.InspectAnnotationLabel], ",label=") { opt, val, hasVal := strings.Cut(label, ":") if hasVal { switch opt { case "filetype": selinuxOpts.FileType = val selinuxHasData = true case "type": selinuxOpts.Type = val selinuxHasData = true case "level": selinuxOpts.Level = val selinuxHasData = true } } else if opt == "disable" { selinuxOpts.Type = "spc_t" selinuxHasData = true } } if selinuxHasData { sc.SELinuxOptions = &selinuxOpts scHasData = true } if !allowPrivEscalation { scHasData = true sc.AllowPrivilegeEscalation = &allowPrivEscalation } if privileged { scHasData = true sc.Privileged = &privileged } if ro { scHasData = true sc.ReadOnlyRootFilesystem = &ro } if c.config.Spec.Linux.MaskedPaths == nil { scHasData = true unmask := v1.UnmaskedProcMount sc.ProcMount = &unmask } if c.User() != "" { if !c.batched { c.lock.Lock() defer c.lock.Unlock() } if err := c.syncContainer(); err != nil { return nil, false, fmt.Errorf("unable to sync container during YAML generation: %w", err) } mountpoint := c.state.Mountpoint if mountpoint == "" { var err error mountpoint, err = c.mount() if err != nil { return nil, false, fmt.Errorf("failed to mount %s mountpoint: %w", c.ID(), err) } defer func() { if err := c.unmount(false); err != nil { logrus.Errorf("Failed to unmount container: %v", err) } }() } logrus.Debugf("Looking in container for user: %s", c.User()) execUser, err := lookup.GetUserGroupInfo(mountpoint, c.User(), nil) if err != nil { return nil, false, err } uid := int64(execUser.Uid) gid := int64(execUser.Gid) scHasData = true sc.RunAsUser = &uid sc.RunAsGroup = &gid } return &sc, scHasData, nil } // generateKubeVolumeDeviceFromLinuxDevice takes a list of devices and makes a VolumeDevice struct for kube func generateKubeVolumeDeviceFromLinuxDevice(devices []specs.LinuxDevice) []v1.VolumeDevice { volumeDevices := make([]v1.VolumeDevice, 0, len(devices)) for _, d := range devices { vd := v1.VolumeDevice{ // TBD How are we going to sync up these names //Name: DevicePath: d.Path, } volumeDevices = append(volumeDevices, vd) } return volumeDevices } func removeUnderscores(s string) string { return strings.ReplaceAll(s, "_", "") } // getAutoUpdateAnnotations searches for auto-update container labels // and returns them as kube annotations func getAutoUpdateAnnotations(ctrName string, ctrLabels map[string]string, useLongAnnotations bool) map[string]string { autoUpdateLabel := "io.containers.autoupdate" annotations := make(map[string]string) ctrName = removeUnderscores(ctrName) for k, v := range ctrLabels { if strings.Contains(k, autoUpdateLabel) { // since labels can variate between containers within a pod, they will be // identified with the container name when converted into kube annotations kc := fmt.Sprintf("%s/%s", k, ctrName) annotations[kc] = truncateKubeAnnotation(v, useLongAnnotations) } } return annotations }