2015-04-28 23:11:37 +00:00
|
|
|
/*
|
2015-05-01 16:19:44 +00:00
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
2015-04-28 23:11:37 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package rkt
|
|
|
|
|
|
|
|
import (
|
2015-08-26 01:50:42 +00:00
|
|
|
"bytes"
|
2015-04-30 01:11:30 +00:00
|
|
|
"encoding/json"
|
2015-04-28 23:11:37 +00:00
|
|
|
"fmt"
|
2015-04-30 20:34:46 +00:00
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
2015-04-28 23:11:37 +00:00
|
|
|
"os/exec"
|
2015-04-30 20:34:46 +00:00
|
|
|
"path"
|
2015-05-04 23:51:31 +00:00
|
|
|
"strconv"
|
2015-04-28 23:11:37 +00:00
|
|
|
"strings"
|
2016-03-04 22:52:45 +00:00
|
|
|
"sync"
|
2015-10-08 01:38:01 +00:00
|
|
|
"syscall"
|
2015-04-30 20:34:46 +00:00
|
|
|
"time"
|
2015-04-28 23:11:37 +00:00
|
|
|
|
2015-08-05 22:05:17 +00:00
|
|
|
appcschema "github.com/appc/spec/schema"
|
|
|
|
appctypes "github.com/appc/spec/schema/types"
|
|
|
|
"github.com/coreos/go-systemd/unit"
|
2015-11-19 02:35:31 +00:00
|
|
|
rktapi "github.com/coreos/rkt/api/v1alpha"
|
2015-08-05 22:05:17 +00:00
|
|
|
"github.com/golang/glog"
|
2015-11-20 23:57:56 +00:00
|
|
|
"golang.org/x/net/context"
|
2015-12-30 01:17:27 +00:00
|
|
|
"google.golang.org/grpc"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api"
|
2015-09-03 21:40:58 +00:00
|
|
|
"k8s.io/kubernetes/pkg/client/record"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/credentialprovider"
|
|
|
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
2016-03-04 22:52:45 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
2015-10-19 22:15:59 +00:00
|
|
|
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
|
2016-03-04 22:52:45 +00:00
|
|
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
2015-11-20 17:54:37 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/securitycontext"
|
|
|
|
"k8s.io/kubernetes/pkg/types"
|
2015-08-13 12:59:15 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util"
|
2016-03-04 22:52:45 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/errors"
|
2015-10-08 01:38:01 +00:00
|
|
|
utilexec "k8s.io/kubernetes/pkg/util/exec"
|
2016-03-09 02:58:24 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/flowcontrol"
|
2015-10-07 21:04:41 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/sets"
|
2016-01-11 07:55:51 +00:00
|
|
|
utilstrings "k8s.io/kubernetes/pkg/util/strings"
|
2015-04-28 23:11:37 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2016-03-19 00:22:11 +00:00
|
|
|
RktType = "rkt"
|
|
|
|
DefaultRktAPIServiceEndpoint = "localhost:15441"
|
2015-10-21 20:04:10 +00:00
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
minimumAppcVersion = "0.7.4"
|
2016-03-19 00:22:11 +00:00
|
|
|
minimumRktBinVersion = "1.2.1"
|
|
|
|
recommendedRktBinVersion = "1.2.1"
|
2015-11-19 02:35:31 +00:00
|
|
|
minimumRktApiVersion = "1.0.0-alpha"
|
|
|
|
minimumSystemdVersion = "219"
|
2015-04-30 06:33:07 +00:00
|
|
|
|
2015-08-13 23:39:17 +00:00
|
|
|
systemdServiceDir = "/run/systemd/system"
|
|
|
|
rktDataDir = "/var/lib/rkt"
|
|
|
|
rktLocalConfigDir = "/etc/rkt"
|
2015-04-28 23:11:37 +00:00
|
|
|
|
|
|
|
kubernetesUnitPrefix = "k8s"
|
|
|
|
unitKubernetesSection = "X-Kubernetes"
|
|
|
|
unitPodName = "POD"
|
|
|
|
unitRktID = "RktID"
|
2015-08-25 20:03:33 +00:00
|
|
|
unitRestartCount = "RestartCount"
|
2015-04-28 23:11:37 +00:00
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
k8sRktKubeletAnno = "rkt.kubernetes.io/managed-by-kubelet"
|
|
|
|
k8sRktKubeletAnnoValue = "true"
|
|
|
|
k8sRktUIDAnno = "rkt.kubernetes.io/uid"
|
|
|
|
k8sRktNameAnno = "rkt.kubernetes.io/name"
|
|
|
|
k8sRktNamespaceAnno = "rkt.kubernetes.io/namespace"
|
|
|
|
//TODO: remove the creation time annotation once this is closed: https://github.com/coreos/rkt/issues/1789
|
2016-01-12 02:30:29 +00:00
|
|
|
k8sRktCreationTimeAnno = "rkt.kubernetes.io/created"
|
2016-02-09 06:50:25 +00:00
|
|
|
k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash"
|
|
|
|
k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count"
|
2016-02-08 16:13:08 +00:00
|
|
|
k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
|
2016-01-12 02:30:29 +00:00
|
|
|
dockerPrefix = "docker://"
|
2015-05-04 23:51:31 +00:00
|
|
|
|
|
|
|
authDir = "auth.d"
|
|
|
|
dockerAuthTemplate = `{"rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials":{"user":%q,"password":%q}}`
|
|
|
|
|
2015-11-19 02:35:31 +00:00
|
|
|
defaultRktAPIServiceAddr = "localhost:15441"
|
2015-12-12 01:09:21 +00:00
|
|
|
defaultNetworkName = "rkt.kubernetes.io"
|
2016-01-29 00:01:01 +00:00
|
|
|
|
|
|
|
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
|
|
|
|
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
|
|
|
|
// hence, setting ndots to be 5.
|
|
|
|
// TODO(yifan): Move this and dockertools.ndotsDNSOption to a common package.
|
|
|
|
defaultDNSOption = "ndots:5"
|
2016-03-08 19:46:40 +00:00
|
|
|
|
|
|
|
// Annotations for the ENTRYPOINT and CMD for an ACI that's converted from Docker image.
|
|
|
|
// TODO(yifan): Import them from docker2aci. See https://github.com/appc/docker2aci/issues/133.
|
|
|
|
appcDockerEntrypoint = "appc.io/docker/entrypoint"
|
|
|
|
appcDockerCmd = "appc.io/docker/cmd"
|
2016-03-04 22:52:45 +00:00
|
|
|
|
|
|
|
// TODO(yifan): Reuse this const with Docker runtime.
|
|
|
|
minimumGracePeriodInSeconds = 2
|
2015-04-28 23:11:37 +00:00
|
|
|
)
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
// Runtime implements the Containerruntime for rkt. The implementation
|
2015-04-28 23:11:37 +00:00
|
|
|
// uses systemd, so in order to run this runtime, systemd must be installed
|
|
|
|
// on the machine.
|
2015-09-28 22:46:29 +00:00
|
|
|
type Runtime struct {
|
2015-11-19 02:35:31 +00:00
|
|
|
systemd systemdInterface
|
|
|
|
// The grpc client for rkt api-service.
|
|
|
|
apisvcConn *grpc.ClientConn
|
|
|
|
apisvc rktapi.PublicAPIClient
|
2016-03-19 00:22:11 +00:00
|
|
|
config *Config
|
2015-04-28 23:11:37 +00:00
|
|
|
// TODO(yifan): Refactor this to be generic keyring.
|
|
|
|
dockerKeyring credentialprovider.DockerKeyring
|
2015-05-06 18:02:08 +00:00
|
|
|
|
|
|
|
containerRefManager *kubecontainer.RefManager
|
2016-01-29 00:01:01 +00:00
|
|
|
runtimeHelper kubecontainer.RuntimeHelper
|
2015-05-06 18:02:08 +00:00
|
|
|
recorder record.EventRecorder
|
2015-10-19 22:15:59 +00:00
|
|
|
livenessManager proberesults.Manager
|
2015-05-06 23:51:37 +00:00
|
|
|
volumeGetter volumeGetter
|
2015-08-17 23:19:25 +00:00
|
|
|
imagePuller kubecontainer.ImagePuller
|
2016-03-04 22:52:45 +00:00
|
|
|
runner kubecontainer.HandlerRunner
|
2015-11-19 02:35:31 +00:00
|
|
|
|
2016-03-12 01:29:25 +00:00
|
|
|
versions versions
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
var _ kubecontainer.Runtime = &Runtime{}
|
2015-05-01 23:12:14 +00:00
|
|
|
|
2015-05-06 23:51:37 +00:00
|
|
|
// TODO(yifan): Remove this when volumeManager is moved to separate package.
|
|
|
|
type volumeGetter interface {
|
|
|
|
GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool)
|
|
|
|
}
|
|
|
|
|
2015-04-28 23:11:37 +00:00
|
|
|
// New creates the rkt container runtime which implements the container runtime interface.
|
|
|
|
// It will test if the rkt binary is in the $PATH, and whether we can get the
|
|
|
|
// version of it. If so, creates the rkt container runtime, otherwise returns an error.
|
2016-03-19 00:22:11 +00:00
|
|
|
func New(
|
|
|
|
apiEndpoint string,
|
|
|
|
config *Config,
|
2016-01-29 00:01:01 +00:00
|
|
|
runtimeHelper kubecontainer.RuntimeHelper,
|
2015-05-06 18:02:08 +00:00
|
|
|
recorder record.EventRecorder,
|
|
|
|
containerRefManager *kubecontainer.RefManager,
|
2015-10-19 22:15:59 +00:00
|
|
|
livenessManager proberesults.Manager,
|
|
|
|
volumeGetter volumeGetter,
|
2016-03-04 22:52:45 +00:00
|
|
|
httpClient kubetypes.HttpGetter,
|
2016-03-09 02:58:24 +00:00
|
|
|
imageBackOff *flowcontrol.Backoff,
|
2015-10-20 21:49:44 +00:00
|
|
|
serializeImagePulls bool,
|
|
|
|
) (*Runtime, error) {
|
2015-11-19 02:35:31 +00:00
|
|
|
// Create dbus connection.
|
|
|
|
systemd, err := newSystemd()
|
2015-04-28 23:11:37 +00:00
|
|
|
if err != nil {
|
2015-11-19 02:35:31 +00:00
|
|
|
return nil, fmt.Errorf("rkt: cannot create systemd interface: %v", err)
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
|
|
|
|
2015-11-19 02:35:31 +00:00
|
|
|
// TODO(yifan): Use secure connection.
|
2016-03-19 00:22:11 +00:00
|
|
|
apisvcConn, err := grpc.Dial(apiEndpoint, grpc.WithInsecure())
|
2015-04-28 23:11:37 +00:00
|
|
|
if err != nil {
|
2015-11-19 02:35:31 +00:00
|
|
|
return nil, fmt.Errorf("rkt: cannot connect to rkt api service: %v", err)
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
|
|
|
|
2016-03-19 00:22:11 +00:00
|
|
|
// TODO(yifan): Get the rkt path from API service.
|
|
|
|
if config.Path == "" {
|
2015-08-17 17:03:45 +00:00
|
|
|
// No default rkt path was set, so try to find one in $PATH.
|
|
|
|
var err error
|
2016-03-19 00:22:11 +00:00
|
|
|
config.Path, err = exec.LookPath("rkt")
|
2015-08-17 17:03:45 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot find rkt binary: %v", err)
|
|
|
|
}
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
rkt := &Runtime{
|
2015-05-06 18:02:08 +00:00
|
|
|
systemd: systemd,
|
2015-11-19 02:35:31 +00:00
|
|
|
apisvcConn: apisvcConn,
|
|
|
|
apisvc: rktapi.NewPublicAPIClient(apisvcConn),
|
2015-05-06 18:02:08 +00:00
|
|
|
config: config,
|
|
|
|
dockerKeyring: credentialprovider.NewDockerKeyring(),
|
|
|
|
containerRefManager: containerRefManager,
|
2016-01-29 00:01:01 +00:00
|
|
|
runtimeHelper: runtimeHelper,
|
2015-05-06 18:02:08 +00:00
|
|
|
recorder: recorder,
|
2015-10-19 22:15:59 +00:00
|
|
|
livenessManager: livenessManager,
|
2015-05-14 00:57:54 +00:00
|
|
|
volumeGetter: volumeGetter,
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
2016-03-19 00:22:11 +00:00
|
|
|
|
|
|
|
rkt.config, err = rkt.getConfig(rkt.config)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("rkt: cannot get config from rkt api service: %v", err)
|
|
|
|
}
|
|
|
|
|
2016-03-04 22:52:45 +00:00
|
|
|
rkt.runner = lifecycle.NewHandlerRunner(httpClient, rkt, rkt)
|
|
|
|
|
2015-10-20 21:49:44 +00:00
|
|
|
if serializeImagePulls {
|
|
|
|
rkt.imagePuller = kubecontainer.NewSerializedImagePuller(recorder, rkt, imageBackOff)
|
|
|
|
} else {
|
|
|
|
rkt.imagePuller = kubecontainer.NewImagePuller(recorder, rkt, imageBackOff)
|
|
|
|
}
|
2015-04-28 23:11:37 +00:00
|
|
|
|
2016-03-12 01:29:25 +00:00
|
|
|
if err := rkt.getVersions(); err != nil {
|
|
|
|
return nil, fmt.Errorf("rkt: error getting version info: %v", err)
|
|
|
|
}
|
|
|
|
|
2015-04-28 23:11:37 +00:00
|
|
|
return rkt, nil
|
|
|
|
}
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) buildCommand(args ...string) *exec.Cmd {
|
2016-03-19 00:22:11 +00:00
|
|
|
cmd := exec.Command(r.config.Path)
|
2015-04-28 23:11:37 +00:00
|
|
|
cmd.Args = append(cmd.Args, r.config.buildGlobalOptions()...)
|
|
|
|
cmd.Args = append(cmd.Args, args...)
|
|
|
|
return cmd
|
|
|
|
}
|
|
|
|
|
2016-01-01 01:01:34 +00:00
|
|
|
// convertToACName converts a string into ACName.
|
|
|
|
func convertToACName(name string) appctypes.ACName {
|
|
|
|
// Note that as the 'name' already matches 'DNS_LABEL'
|
|
|
|
// defined in pkg/api/types.go, there shouldn't be error or panic.
|
|
|
|
acname, _ := appctypes.SanitizeACName(name)
|
|
|
|
return *appctypes.MustACName(acname)
|
|
|
|
}
|
|
|
|
|
2015-04-28 23:11:37 +00:00
|
|
|
// runCommand invokes rkt binary with arguments and returns the result
|
2015-05-14 00:57:54 +00:00
|
|
|
// from stdout in a list of strings. Each string in the list is a line.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) runCommand(args ...string) ([]string, error) {
|
2015-04-28 23:11:37 +00:00
|
|
|
glog.V(4).Info("rkt: Run command:", args)
|
|
|
|
|
2015-08-26 01:50:42 +00:00
|
|
|
var stdout, stderr bytes.Buffer
|
|
|
|
cmd := r.buildCommand(args...)
|
|
|
|
cmd.Stdout, cmd.Stderr = &stdout, &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String())
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
2015-08-26 01:50:42 +00:00
|
|
|
return strings.Split(strings.TrimSpace(stdout.String()), "\n"), nil
|
2015-04-28 23:11:37 +00:00
|
|
|
}
|
2015-04-30 03:04:29 +00:00
|
|
|
|
2016-04-05 01:03:40 +00:00
|
|
|
// makePodServiceFileName constructs the unit file name for a pod using its rkt pod uuid.
|
|
|
|
func makePodServiceFileName(uuid string) string {
|
2015-08-13 23:39:17 +00:00
|
|
|
// TODO(yifan): Add name for readability? We need to consider the
|
|
|
|
// limit of the length.
|
2016-04-05 01:03:40 +00:00
|
|
|
return fmt.Sprintf("%s_%s.service", kubernetesUnitPrefix, uuid)
|
2015-04-30 03:04:29 +00:00
|
|
|
}
|
2015-04-30 01:11:30 +00:00
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// setIsolators sets the apps' isolators according to the security context and resource spec.
|
|
|
|
func setIsolators(app *appctypes.App, c *api.Container, ctx *api.SecurityContext) error {
|
|
|
|
var isolators []appctypes.Isolator
|
2015-04-30 01:11:30 +00:00
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// Capabilities isolators.
|
|
|
|
if ctx != nil {
|
|
|
|
var addCaps, dropCaps []string
|
2015-05-09 21:17:36 +00:00
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
if ctx.Capabilities != nil {
|
|
|
|
addCaps, dropCaps = securitycontext.MakeCapabilities(ctx.Capabilities.Add, ctx.Capabilities.Drop)
|
2015-05-05 23:02:13 +00:00
|
|
|
}
|
2016-01-08 21:19:49 +00:00
|
|
|
if ctx.Privileged != nil && *ctx.Privileged {
|
|
|
|
addCaps, dropCaps = allCapabilities(), []string{}
|
|
|
|
}
|
|
|
|
if len(addCaps) > 0 {
|
|
|
|
set, err := appctypes.NewLinuxCapabilitiesRetainSet(addCaps...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
isolators = append(isolators, set.AsIsolator())
|
|
|
|
}
|
|
|
|
if len(dropCaps) > 0 {
|
|
|
|
set, err := appctypes.NewLinuxCapabilitiesRevokeSet(dropCaps...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
isolators = append(isolators, set.AsIsolator())
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// Resources isolators.
|
|
|
|
type resource struct {
|
|
|
|
limit string
|
|
|
|
request string
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
|
2016-01-29 18:43:00 +00:00
|
|
|
// If limit is empty, populate it with request and vice versa.
|
|
|
|
resources := make(map[api.ResourceName]*resource)
|
2015-04-30 01:11:30 +00:00
|
|
|
for name, quantity := range c.Resources.Limits {
|
2016-01-29 18:43:00 +00:00
|
|
|
resources[name] = &resource{limit: quantity.String(), request: quantity.String()}
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
for name, quantity := range c.Resources.Requests {
|
|
|
|
r, ok := resources[name]
|
2016-01-29 18:43:00 +00:00
|
|
|
if ok {
|
|
|
|
r.request = quantity.String()
|
|
|
|
continue
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
2016-01-29 18:43:00 +00:00
|
|
|
resources[name] = &resource{limit: quantity.String(), request: quantity.String()}
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
2016-01-08 21:19:49 +00:00
|
|
|
|
2015-04-30 01:11:30 +00:00
|
|
|
for name, res := range resources {
|
|
|
|
switch name {
|
|
|
|
case api.ResourceCPU:
|
2016-01-08 21:19:49 +00:00
|
|
|
cpu, err := appctypes.NewResourceCPUIsolator(res.request, res.limit)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
isolators = append(isolators, cpu.AsIsolator())
|
2015-04-30 01:11:30 +00:00
|
|
|
case api.ResourceMemory:
|
2016-01-08 21:19:49 +00:00
|
|
|
memory, err := appctypes.NewResourceMemoryIsolator(res.request, res.limit)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
isolators = append(isolators, memory.AsIsolator())
|
2015-04-30 01:11:30 +00:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("resource type not supported: %v", name)
|
|
|
|
}
|
|
|
|
}
|
2016-01-08 21:19:49 +00:00
|
|
|
|
|
|
|
mergeIsolators(app, isolators)
|
2015-04-30 01:11:30 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// mergeIsolators replaces the app.Isolators with isolators.
|
|
|
|
func mergeIsolators(app *appctypes.App, isolators []appctypes.Isolator) {
|
|
|
|
for _, is := range isolators {
|
|
|
|
found := false
|
|
|
|
for j, js := range app.Isolators {
|
|
|
|
if is.Name.Equals(js.Name) {
|
|
|
|
switch is.Name {
|
|
|
|
case appctypes.LinuxCapabilitiesRetainSetName:
|
|
|
|
// TODO(yifan): More fine grain merge for capability set instead of override.
|
|
|
|
fallthrough
|
|
|
|
case appctypes.LinuxCapabilitiesRevokeSetName:
|
|
|
|
fallthrough
|
|
|
|
case appctypes.ResourceCPUName:
|
|
|
|
fallthrough
|
|
|
|
case appctypes.ResourceMemoryName:
|
|
|
|
app.Isolators[j] = is
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unexpected isolator name: %v", is.Name))
|
|
|
|
}
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
app.Isolators = append(app.Isolators, is)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-01 01:01:34 +00:00
|
|
|
// mergeEnv merges the optEnv with the image's environments.
|
|
|
|
// The environments defined in the image will be overridden by
|
|
|
|
// the ones with the same name in optEnv.
|
|
|
|
func mergeEnv(app *appctypes.App, optEnv []kubecontainer.EnvVar) {
|
|
|
|
envMap := make(map[string]string)
|
|
|
|
for _, e := range app.Environment {
|
|
|
|
envMap[e.Name] = e.Value
|
|
|
|
}
|
|
|
|
for _, e := range optEnv {
|
|
|
|
envMap[e.Name] = e.Value
|
|
|
|
}
|
|
|
|
app.Environment = nil
|
|
|
|
for name, value := range envMap {
|
|
|
|
app.Environment = append(app.Environment, appctypes.EnvironmentVariable{
|
|
|
|
Name: name,
|
|
|
|
Value: value,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// mergeMounts merges the optMounts with the image's mount points.
|
|
|
|
// The mount points defined in the image will be overridden by the ones
|
|
|
|
// with the same name in optMounts.
|
|
|
|
func mergeMounts(app *appctypes.App, optMounts []kubecontainer.Mount) {
|
|
|
|
mountMap := make(map[appctypes.ACName]appctypes.MountPoint)
|
|
|
|
for _, m := range app.MountPoints {
|
|
|
|
mountMap[m.Name] = m
|
|
|
|
}
|
|
|
|
for _, m := range optMounts {
|
|
|
|
mpName := convertToACName(m.Name)
|
|
|
|
mountMap[mpName] = appctypes.MountPoint{
|
|
|
|
Name: mpName,
|
|
|
|
Path: m.ContainerPath,
|
|
|
|
ReadOnly: m.ReadOnly,
|
2015-08-21 18:47:05 +00:00
|
|
|
}
|
|
|
|
}
|
2016-01-01 01:01:34 +00:00
|
|
|
app.MountPoints = nil
|
|
|
|
for _, mount := range mountMap {
|
|
|
|
app.MountPoints = append(app.MountPoints, mount)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// mergePortMappings merges the optPortMappings with the image's port mappings.
|
|
|
|
// The port mappings defined in the image will be overridden by the ones
|
|
|
|
// with the same name in optPortMappings.
|
|
|
|
func mergePortMappings(app *appctypes.App, optPortMappings []kubecontainer.PortMapping) {
|
|
|
|
portMap := make(map[appctypes.ACName]appctypes.Port)
|
|
|
|
for _, p := range app.Ports {
|
|
|
|
portMap[p.Name] = p
|
|
|
|
}
|
|
|
|
for _, p := range optPortMappings {
|
|
|
|
pName := convertToACName(p.Name)
|
|
|
|
portMap[pName] = appctypes.Port{
|
|
|
|
Name: pName,
|
|
|
|
Protocol: string(p.Protocol),
|
|
|
|
Port: uint(p.ContainerPort),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
app.Ports = nil
|
|
|
|
for _, port := range portMap {
|
|
|
|
app.Ports = append(app.Ports, port)
|
|
|
|
}
|
2015-08-21 18:47:05 +00:00
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
func verifyNonRoot(app *appctypes.App, ctx *api.SecurityContext) error {
|
|
|
|
if ctx != nil && ctx.RunAsNonRoot != nil && *ctx.RunAsNonRoot {
|
|
|
|
if ctx.RunAsUser != nil && *ctx.RunAsUser == 0 {
|
|
|
|
return fmt.Errorf("container's runAsUser breaks non-root policy")
|
|
|
|
}
|
|
|
|
if ctx.RunAsUser == nil && app.User == "0" {
|
|
|
|
return fmt.Errorf("container has no runAsUser and image will run as root")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-21 18:47:05 +00:00
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
func setSupplementaryGIDs(app *appctypes.App, podCtx *api.PodSecurityContext) {
|
|
|
|
if podCtx != nil {
|
|
|
|
app.SupplementaryGIDs = app.SupplementaryGIDs[:0]
|
|
|
|
for _, v := range podCtx.SupplementalGroups {
|
|
|
|
app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(v))
|
|
|
|
}
|
|
|
|
if podCtx.FSGroup != nil {
|
|
|
|
app.SupplementaryGIDs = append(app.SupplementaryGIDs, int(*podCtx.FSGroup))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// setApp merges the container spec with the image's manifest.
|
2016-03-08 19:46:40 +00:00
|
|
|
func setApp(imgManifest *appcschema.ImageManifest, c *api.Container, opts *kubecontainer.RunContainerOptions, ctx *api.SecurityContext, podCtx *api.PodSecurityContext) error {
|
|
|
|
app := imgManifest.App
|
|
|
|
|
|
|
|
// Set up Exec.
|
|
|
|
var command, args []string
|
|
|
|
cmd, ok := imgManifest.Annotations.Get(appcDockerEntrypoint)
|
|
|
|
if ok {
|
2016-03-18 23:58:55 +00:00
|
|
|
err := json.Unmarshal([]byte(cmd), &command)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot unmarshal ENTRYPOINT %q: %v", cmd, err)
|
|
|
|
}
|
2016-03-08 19:46:40 +00:00
|
|
|
}
|
|
|
|
ag, ok := imgManifest.Annotations.Get(appcDockerCmd)
|
|
|
|
if ok {
|
2016-03-18 23:58:55 +00:00
|
|
|
err := json.Unmarshal([]byte(ag), &args)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot unmarshal CMD %q: %v", ag, err)
|
|
|
|
}
|
2016-03-08 19:46:40 +00:00
|
|
|
}
|
|
|
|
userCommand, userArgs := kubecontainer.ExpandContainerCommandAndArgs(c, opts.Envs)
|
|
|
|
|
|
|
|
if len(userCommand) > 0 {
|
|
|
|
command = userCommand
|
|
|
|
args = nil // If 'command' is specified, then drop the default args.
|
|
|
|
}
|
|
|
|
if len(userArgs) > 0 {
|
|
|
|
args = userArgs
|
|
|
|
}
|
|
|
|
|
2016-01-28 07:14:50 +00:00
|
|
|
exec := append(command, args...)
|
|
|
|
if len(exec) > 0 {
|
|
|
|
app.Exec = exec
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// Set UID and GIDs.
|
|
|
|
if err := verifyNonRoot(app, ctx); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ctx != nil && ctx.RunAsUser != nil {
|
|
|
|
app.User = strconv.Itoa(int(*ctx.RunAsUser))
|
|
|
|
}
|
|
|
|
setSupplementaryGIDs(app, podCtx)
|
2015-04-30 01:11:30 +00:00
|
|
|
|
2016-01-27 19:55:56 +00:00
|
|
|
// If 'User' or 'Group' are still empty at this point,
|
|
|
|
// then apply the root UID and GID.
|
|
|
|
// TODO(yifan): Instead of using root GID, we should use
|
|
|
|
// the GID which the user is in.
|
|
|
|
if app.User == "" {
|
|
|
|
app.User = "0"
|
|
|
|
}
|
|
|
|
if app.Group == "" {
|
|
|
|
app.Group = "0"
|
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
// Set working directory.
|
2015-04-30 01:11:30 +00:00
|
|
|
if len(c.WorkingDir) > 0 {
|
|
|
|
app.WorkingDirectory = c.WorkingDir
|
|
|
|
}
|
|
|
|
|
2016-01-01 01:01:34 +00:00
|
|
|
// Notes that we don't create Mounts section in the pod manifest here,
|
|
|
|
// as Mounts will be automatically generated by rkt.
|
|
|
|
mergeMounts(app, opts.Mounts)
|
|
|
|
mergeEnv(app, opts.Envs)
|
|
|
|
mergePortMappings(app, opts.PortMappings)
|
2015-04-30 01:11:30 +00:00
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
return setIsolators(app, c, ctx)
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appcschema.PodManifest, error) {
|
2015-04-30 01:11:30 +00:00
|
|
|
manifest := appcschema.BlankPodManifest()
|
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
listResp, err := r.apisvc.ListPods(context.Background(), &rktapi.ListPodsRequest{
|
2015-12-17 00:52:39 +00:00
|
|
|
Detail: true,
|
|
|
|
Filters: kubernetesPodFilters(pod.UID),
|
2015-11-21 00:56:35 +00:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("couldn't list pods: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
restartCount := 0
|
2015-12-17 00:52:39 +00:00
|
|
|
for _, pod := range listResp.Pods {
|
2015-11-21 00:56:35 +00:00
|
|
|
manifest := &appcschema.PodManifest{}
|
2015-12-17 00:52:39 +00:00
|
|
|
err = json.Unmarshal(pod.Manifest, manifest)
|
2015-04-30 01:11:30 +00:00
|
|
|
if err != nil {
|
2015-11-21 00:56:35 +00:00
|
|
|
glog.Warningf("rkt: error unmatshaling pod manifest: %v", err)
|
|
|
|
continue
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
if countString, ok := manifest.Annotations.Get(k8sRktRestartCountAnno); ok {
|
|
|
|
num, err := strconv.Atoi(countString)
|
|
|
|
if err != nil {
|
|
|
|
glog.Warningf("rkt: error reading restart count on pod: %v", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if num+1 > restartCount {
|
|
|
|
restartCount = num + 1
|
|
|
|
}
|
2015-05-14 00:57:54 +00:00
|
|
|
}
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
2015-05-14 00:57:54 +00:00
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue)
|
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID))
|
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name)
|
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace)
|
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktCreationTimeAnno), strconv.FormatInt(time.Now().Unix(), 10))
|
|
|
|
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount))
|
2015-05-14 00:57:54 +00:00
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
for _, c := range pod.Spec.Containers {
|
2016-01-12 02:30:29 +00:00
|
|
|
err := r.newAppcRuntimeApp(pod, c, pullSecrets, manifest)
|
2015-08-10 18:15:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-04-30 01:11:30 +00:00
|
|
|
}
|
|
|
|
|
2015-05-06 23:51:37 +00:00
|
|
|
volumeMap, ok := r.volumeGetter.GetVolumes(pod.UID)
|
|
|
|
if !ok {
|
2015-11-20 17:54:37 +00:00
|
|
|
return nil, fmt.Errorf("cannot get the volumes for pod %q", format.Pod(pod))
|
2015-05-06 23:51:37 +00:00
|
|
|
}
|
|
|
|
|
2015-04-30 01:11:30 +00:00
|
|
|
// Set global volumes.
|
2016-01-01 01:01:34 +00:00
|
|
|
for vname, volume := range volumeMap {
|
2015-04-30 01:11:30 +00:00
|
|
|
manifest.Volumes = append(manifest.Volumes, appctypes.Volume{
|
2016-01-01 01:01:34 +00:00
|
|
|
Name: convertToACName(vname),
|
2015-04-30 01:11:30 +00:00
|
|
|
Kind: "host",
|
2016-03-23 05:12:21 +00:00
|
|
|
Source: volume.Mounter.GetPath(),
|
2015-04-30 01:11:30 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(yifan): Set pod-level isolators once it's supported in kubernetes.
|
|
|
|
return manifest, nil
|
|
|
|
}
|
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
// TODO(yifan): Can make rkt handle this when '--net=host'. See https://github.com/coreos/rkt/issues/2430.
|
|
|
|
func makeHostNetworkMount(opts *kubecontainer.RunContainerOptions) (*kubecontainer.Mount, *kubecontainer.Mount) {
|
|
|
|
hostsMount := kubecontainer.Mount{
|
|
|
|
Name: "kubernetes-hostnetwork-hosts-conf",
|
|
|
|
ContainerPath: "/etc/hosts",
|
|
|
|
HostPath: "/etc/hosts",
|
|
|
|
ReadOnly: true,
|
|
|
|
}
|
|
|
|
resolvMount := kubecontainer.Mount{
|
|
|
|
Name: "kubernetes-hostnetwork-resolv-conf",
|
|
|
|
ContainerPath: "/etc/resolv.conf",
|
|
|
|
HostPath: "/etc/resolv.conf",
|
|
|
|
ReadOnly: true,
|
|
|
|
}
|
|
|
|
opts.Mounts = append(opts.Mounts, hostsMount, resolvMount)
|
|
|
|
return &hostsMount, &resolvMount
|
|
|
|
}
|
|
|
|
|
2016-01-12 02:30:29 +00:00
|
|
|
func makeContainerLogMount(opts *kubecontainer.RunContainerOptions, container *api.Container) (*kubecontainer.Mount, error) {
|
|
|
|
if opts.PodContainerDir == "" || container.TerminationMessagePath == "" {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// In docker runtime, the container log path contains the container ID.
|
|
|
|
// However, for rkt runtime, we cannot get the container ID before the
|
|
|
|
// the container is launched, so here we generate a random uuid to enable
|
|
|
|
// us to map a container's termination message path to an unique log file
|
|
|
|
// on the disk.
|
|
|
|
randomUID := util.NewUUID()
|
|
|
|
containerLogPath := path.Join(opts.PodContainerDir, string(randomUID))
|
|
|
|
fs, err := os.Create(containerLogPath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := fs.Close(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
mnt := kubecontainer.Mount{
|
2016-01-12 02:30:29 +00:00
|
|
|
// Use a random name for the termination message mount, so that
|
|
|
|
// when a container restarts, it will not overwrite the old termination
|
|
|
|
// message.
|
|
|
|
Name: fmt.Sprintf("termination-message-%s", randomUID),
|
|
|
|
ContainerPath: container.TerminationMessagePath,
|
|
|
|
HostPath: containerLogPath,
|
|
|
|
ReadOnly: false,
|
|
|
|
}
|
2016-04-14 19:00:51 +00:00
|
|
|
opts.Mounts = append(opts.Mounts, mnt)
|
2016-01-12 02:30:29 +00:00
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
return &mnt, nil
|
2016-01-12 02:30:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
|
2015-11-21 00:56:35 +00:00
|
|
|
if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return nil
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
imgManifest, err := r.getImageManifest(c.Image)
|
|
|
|
if err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return err
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if imgManifest.App == nil {
|
|
|
|
imgManifest.App = new(appctypes.App)
|
|
|
|
}
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
imageID, err := r.getImageID(c.Image)
|
2015-11-21 00:56:35 +00:00
|
|
|
if err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return err
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
2015-12-17 00:52:39 +00:00
|
|
|
hash, err := appctypes.NewHash(imageID)
|
2015-11-21 00:56:35 +00:00
|
|
|
if err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return err
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
2016-03-07 20:24:08 +00:00
|
|
|
// TODO: determine how this should be handled for rkt
|
|
|
|
opts, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, "")
|
2015-11-21 00:56:35 +00:00
|
|
|
if err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// create the container log file and make a mount pair.
|
|
|
|
mnt, err := makeContainerLogMount(opts, &c)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
// If run in 'hostnetwork' mode, then mount the host's /etc/resolv.conf and /etc/hosts,
|
|
|
|
// and add volumes.
|
|
|
|
var hostsMnt, resolvMnt *kubecontainer.Mount
|
|
|
|
if kubecontainer.IsHostNetworkPod(pod) {
|
|
|
|
hostsMnt, resolvMnt = makeHostNetworkMount(opts)
|
|
|
|
manifest.Volumes = append(manifest.Volumes, appctypes.Volume{
|
|
|
|
Name: convertToACName(hostsMnt.Name),
|
|
|
|
Kind: "host",
|
|
|
|
Source: hostsMnt.HostPath,
|
|
|
|
})
|
|
|
|
manifest.Volumes = append(manifest.Volumes, appctypes.Volume{
|
|
|
|
Name: convertToACName(resolvMnt.Name),
|
|
|
|
Kind: "host",
|
|
|
|
Source: resolvMnt.HostPath,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2016-01-08 21:19:49 +00:00
|
|
|
ctx := securitycontext.DetermineEffectiveSecurityContext(pod, &c)
|
2016-03-08 19:46:40 +00:00
|
|
|
if err := setApp(imgManifest, &c, opts, ctx, pod.Spec.SecurityContext); err != nil {
|
2016-01-12 02:30:29 +00:00
|
|
|
return err
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
2016-01-12 02:30:29 +00:00
|
|
|
ra := appcschema.RuntimeApp{
|
2016-01-01 01:01:34 +00:00
|
|
|
Name: convertToACName(c.Name),
|
2015-11-21 00:56:35 +00:00
|
|
|
Image: appcschema.RuntimeImage{ID: *hash},
|
|
|
|
App: imgManifest.App,
|
|
|
|
Annotations: []appctypes.Annotation{
|
|
|
|
{
|
|
|
|
Name: *appctypes.MustACIdentifier(k8sRktContainerHashAnno),
|
2016-01-01 01:01:34 +00:00
|
|
|
Value: strconv.FormatUint(kubecontainer.HashContainer(&c), 10),
|
2015-11-21 00:56:35 +00:00
|
|
|
},
|
|
|
|
},
|
2016-01-12 02:30:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if mnt != nil {
|
|
|
|
ra.Annotations = append(ra.Annotations, appctypes.Annotation{
|
|
|
|
Name: *appctypes.MustACIdentifier(k8sRktTerminationMessagePathAnno),
|
|
|
|
Value: mnt.HostPath,
|
|
|
|
})
|
|
|
|
|
|
|
|
manifest.Volumes = append(manifest.Volumes, appctypes.Volume{
|
|
|
|
Name: convertToACName(mnt.Name),
|
|
|
|
Kind: "host",
|
|
|
|
Source: mnt.HostPath,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
manifest.Apps = append(manifest.Apps, ra)
|
|
|
|
|
|
|
|
// Set global ports.
|
|
|
|
for _, port := range opts.PortMappings {
|
|
|
|
manifest.Ports = append(manifest.Ports, appctypes.ExposedPort{
|
|
|
|
Name: convertToACName(port.Name),
|
|
|
|
HostPort: uint(port.HostPort),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
2015-12-21 19:25:38 +00:00
|
|
|
func runningKubernetesPodFilters(uid types.UID) []*rktapi.PodFilter {
|
|
|
|
return []*rktapi.PodFilter{
|
|
|
|
{
|
|
|
|
States: []rktapi.PodState{
|
|
|
|
rktapi.PodState_POD_STATE_RUNNING,
|
|
|
|
},
|
|
|
|
Annotations: []*rktapi.KeyValue{
|
|
|
|
{
|
|
|
|
Key: k8sRktKubeletAnno,
|
|
|
|
Value: k8sRktKubeletAnnoValue,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Key: k8sRktUIDAnno,
|
|
|
|
Value: string(uid),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
func kubernetesPodFilters(uid types.UID) []*rktapi.PodFilter {
|
|
|
|
return []*rktapi.PodFilter{
|
|
|
|
{
|
|
|
|
Annotations: []*rktapi.KeyValue{
|
|
|
|
{
|
|
|
|
Key: k8sRktKubeletAnno,
|
|
|
|
Value: k8sRktKubeletAnnoValue,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Key: k8sRktUIDAnno,
|
|
|
|
Value: string(uid),
|
|
|
|
},
|
2015-11-21 00:56:35 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-30 20:34:46 +00:00
|
|
|
func newUnitOption(section, name, value string) *unit.UnitOption {
|
|
|
|
return &unit.UnitOption{Section: section, Name: name, Value: value}
|
|
|
|
}
|
|
|
|
|
2015-08-17 23:19:25 +00:00
|
|
|
// apiPodToruntimePod converts an api.Pod to kubelet/container.Pod.
|
2015-08-13 23:39:17 +00:00
|
|
|
func apiPodToruntimePod(uuid string, pod *api.Pod) *kubecontainer.Pod {
|
2015-04-30 20:34:46 +00:00
|
|
|
p := &kubecontainer.Pod{
|
|
|
|
ID: pod.UID,
|
|
|
|
Name: pod.Name,
|
|
|
|
Namespace: pod.Namespace,
|
|
|
|
}
|
|
|
|
for i := range pod.Spec.Containers {
|
|
|
|
c := &pod.Spec.Containers[i]
|
|
|
|
p.Containers = append(p.Containers, &kubecontainer.Container{
|
2015-10-07 17:58:05 +00:00
|
|
|
ID: buildContainerID(&containerID{uuid, c.Name}),
|
2015-04-30 20:34:46 +00:00
|
|
|
Name: c.Name,
|
|
|
|
Image: c.Image,
|
2015-05-15 23:14:08 +00:00
|
|
|
Hash: kubecontainer.HashContainer(c),
|
2015-04-30 20:34:46 +00:00
|
|
|
Created: time.Now().Unix(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2015-08-25 20:03:33 +00:00
|
|
|
// serviceFilePath returns the absolute path of the service file.
|
|
|
|
func serviceFilePath(serviceName string) string {
|
|
|
|
return path.Join(systemdServiceDir, serviceName)
|
|
|
|
}
|
|
|
|
|
2016-01-29 00:01:01 +00:00
|
|
|
// generateRunCommand crafts a 'rkt run-prepared' command with necessary parameters.
|
|
|
|
func (r *Runtime) generateRunCommand(pod *api.Pod, uuid string) (string, error) {
|
|
|
|
runPrepared := r.buildCommand("run-prepared").Args
|
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
var hostname string
|
|
|
|
var err error
|
2016-01-29 00:01:01 +00:00
|
|
|
// Setup network configuration.
|
2016-04-14 19:00:51 +00:00
|
|
|
if kubecontainer.IsHostNetworkPod(pod) {
|
2016-01-29 00:01:01 +00:00
|
|
|
runPrepared = append(runPrepared, "--net=host")
|
2016-04-14 19:00:51 +00:00
|
|
|
|
|
|
|
// TODO(yifan): Let runtimeHelper.GeneratePodHostNameAndDomain() to handle this.
|
|
|
|
hostname, err = os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2016-01-29 00:01:01 +00:00
|
|
|
} else {
|
|
|
|
runPrepared = append(runPrepared, fmt.Sprintf("--net=%s", defaultNetworkName))
|
|
|
|
|
2016-04-14 19:00:51 +00:00
|
|
|
// Setup DNS.
|
|
|
|
dnsServers, dnsSearches, err := r.runtimeHelper.GetClusterDNS(pod)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
for _, server := range dnsServers {
|
|
|
|
runPrepared = append(runPrepared, fmt.Sprintf("--dns=%s", server))
|
|
|
|
}
|
|
|
|
for _, search := range dnsSearches {
|
|
|
|
runPrepared = append(runPrepared, fmt.Sprintf("--dns-search=%s", search))
|
|
|
|
}
|
|
|
|
if len(dnsServers) > 0 || len(dnsSearches) > 0 {
|
|
|
|
runPrepared = append(runPrepared, fmt.Sprintf("--dns-opt=%s", defaultDNSOption))
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(yifan): host domain is not being used.
|
|
|
|
hostname, _ = r.runtimeHelper.GeneratePodHostNameAndDomain(pod)
|
2016-01-29 00:01:01 +00:00
|
|
|
}
|
2016-03-21 21:28:57 +00:00
|
|
|
|
|
|
|
runPrepared = append(runPrepared, fmt.Sprintf("--hostname=%s", hostname))
|
2016-01-29 00:01:01 +00:00
|
|
|
runPrepared = append(runPrepared, uuid)
|
|
|
|
return strings.Join(runPrepared, " "), nil
|
|
|
|
}
|
|
|
|
|
2015-04-30 20:34:46 +00:00
|
|
|
// preparePod will:
|
|
|
|
//
|
|
|
|
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
|
2015-09-01 02:25:26 +00:00
|
|
|
// 2. Create the unit file and save it under systemdUnitDir.
|
2015-04-30 20:34:46 +00:00
|
|
|
//
|
2015-08-26 01:50:42 +00:00
|
|
|
// On success, it will return a string that represents name of the unit file
|
|
|
|
// and the runtime pod.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret) (string, *kubecontainer.Pod, error) {
|
2015-04-30 20:34:46 +00:00
|
|
|
// Generate the pod manifest from the pod spec.
|
2015-08-17 23:19:25 +00:00
|
|
|
manifest, err := r.makePodManifest(pod, pullSecrets)
|
2015-04-30 20:34:46 +00:00
|
|
|
if err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
2015-08-13 23:39:17 +00:00
|
|
|
manifestFile, err := ioutil.TempFile("", fmt.Sprintf("manifest-%s-", pod.Name))
|
2015-04-30 20:34:46 +00:00
|
|
|
if err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
manifestFile.Close()
|
|
|
|
if err := os.Remove(manifestFile.Name()); err != nil {
|
|
|
|
glog.Warningf("rkt: Cannot remove temp manifest file %q: %v", manifestFile.Name(), err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
data, err := json.Marshal(manifest)
|
|
|
|
if err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
2016-01-14 00:52:47 +00:00
|
|
|
|
|
|
|
glog.V(4).Infof("Generating pod manifest for pod %q: %v", format.Pod(pod), string(data))
|
2015-04-30 20:34:46 +00:00
|
|
|
// Since File.Write returns error if the written length is less than len(data),
|
|
|
|
// so check error is enough for us.
|
|
|
|
if _, err := manifestFile.Write(data); err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
|
2015-08-25 20:03:33 +00:00
|
|
|
// Run 'rkt prepare' to get the rkt UUID.
|
|
|
|
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()}
|
2015-09-01 02:25:26 +00:00
|
|
|
if r.config.Stage1Image != "" {
|
2016-04-01 07:32:10 +00:00
|
|
|
cmds = append(cmds, "--stage1-path", r.config.Stage1Image)
|
2015-09-01 02:25:26 +00:00
|
|
|
}
|
2015-04-30 20:34:46 +00:00
|
|
|
output, err := r.runCommand(cmds...)
|
|
|
|
if err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
if len(output) != 1 {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, fmt.Errorf("invalid output from 'rkt prepare': %v", output)
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
uuid := output[0]
|
2015-08-13 23:39:17 +00:00
|
|
|
glog.V(4).Infof("'rkt prepare' returns %q", uuid)
|
2015-04-30 20:34:46 +00:00
|
|
|
|
2015-08-25 20:03:33 +00:00
|
|
|
// Create systemd service file for the rkt pod.
|
2016-01-29 00:01:01 +00:00
|
|
|
runPrepared, err := r.generateRunCommand(pod, uuid)
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, fmt.Errorf("failed to generate 'rkt run-prepared' command: %v", err)
|
2015-08-13 23:39:17 +00:00
|
|
|
}
|
|
|
|
|
2015-09-15 16:43:59 +00:00
|
|
|
// TODO handle pod.Spec.HostPID
|
2015-08-10 08:14:01 +00:00
|
|
|
// TODO handle pod.Spec.HostIPC
|
2015-09-15 16:43:59 +00:00
|
|
|
|
2015-04-30 20:34:46 +00:00
|
|
|
units := []*unit.UnitOption{
|
|
|
|
newUnitOption("Service", "ExecStart", runPrepared),
|
2015-09-01 02:25:26 +00:00
|
|
|
// This enables graceful stop.
|
|
|
|
newUnitOption("Service", "KillMode", "mixed"),
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
|
2016-04-05 01:03:40 +00:00
|
|
|
serviceName := makePodServiceFileName(uuid)
|
2015-11-20 17:54:37 +00:00
|
|
|
glog.V(4).Infof("rkt: Creating service file %q for pod %q", serviceName, format.Pod(pod))
|
2015-08-25 20:03:33 +00:00
|
|
|
serviceFile, err := os.Create(serviceFilePath(serviceName))
|
2015-04-30 20:34:46 +00:00
|
|
|
if err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
2015-12-21 19:25:38 +00:00
|
|
|
if _, err := io.Copy(serviceFile, unit.Serialize(units)); err != nil {
|
2015-08-26 01:50:42 +00:00
|
|
|
return "", nil, err
|
2015-08-25 20:03:33 +00:00
|
|
|
}
|
2015-12-21 19:25:38 +00:00
|
|
|
serviceFile.Close()
|
|
|
|
|
|
|
|
return serviceName, apiPodToruntimePod(uuid, pod), nil
|
2015-08-26 01:50:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// generateEvents is a helper function that generates some container
|
|
|
|
// life cycle events for containers in a pod.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) generateEvents(runtimePod *kubecontainer.Pod, reason string, failure error) {
|
2015-08-26 01:50:42 +00:00
|
|
|
// Set up container references.
|
|
|
|
for _, c := range runtimePod.Containers {
|
2015-10-07 17:58:05 +00:00
|
|
|
containerID := c.ID
|
2015-08-26 01:50:42 +00:00
|
|
|
id, err := parseContainerID(containerID)
|
|
|
|
if err != nil {
|
|
|
|
glog.Warningf("Invalid container ID %q", containerID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
ref, ok := r.containerRefManager.GetRef(containerID)
|
|
|
|
if !ok {
|
|
|
|
glog.Warningf("No ref for container %q", containerID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note that 'rkt id' is the pod id.
|
2016-01-11 07:55:51 +00:00
|
|
|
uuid := utilstrings.ShortenString(id.uuid, 8)
|
2015-08-26 01:50:42 +00:00
|
|
|
switch reason {
|
|
|
|
case "Created":
|
2015-11-13 22:30:01 +00:00
|
|
|
r.recorder.Eventf(ref, api.EventTypeNormal, kubecontainer.CreatedContainer, "Created with rkt id %v", uuid)
|
2015-08-26 01:50:42 +00:00
|
|
|
case "Started":
|
2015-11-13 22:30:01 +00:00
|
|
|
r.recorder.Eventf(ref, api.EventTypeNormal, kubecontainer.StartedContainer, "Started with rkt id %v", uuid)
|
2015-08-26 01:50:42 +00:00
|
|
|
case "Failed":
|
2015-11-13 22:30:01 +00:00
|
|
|
r.recorder.Eventf(ref, api.EventTypeWarning, kubecontainer.FailedToStartContainer, "Failed to start with rkt id %v with error %v", uuid, failure)
|
2015-08-26 01:50:42 +00:00
|
|
|
case "Killing":
|
2015-11-13 22:30:01 +00:00
|
|
|
r.recorder.Eventf(ref, api.EventTypeNormal, kubecontainer.KillingContainer, "Killing with rkt id %v", uuid)
|
2015-08-26 01:50:42 +00:00
|
|
|
default:
|
|
|
|
glog.Errorf("rkt: Unexpected event %q", reason)
|
2015-08-25 20:03:33 +00:00
|
|
|
}
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
2015-08-26 01:50:42 +00:00
|
|
|
return
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
|
2015-08-25 20:03:33 +00:00
|
|
|
// RunPod first creates the unit file for a pod, and then
|
|
|
|
// starts the unit over d-bus.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) RunPod(pod *api.Pod, pullSecrets []api.Secret) error {
|
2015-11-20 17:54:37 +00:00
|
|
|
glog.V(4).Infof("Rkt starts to run pod: name %q.", format.Pod(pod))
|
2015-04-30 20:34:46 +00:00
|
|
|
|
2015-08-26 01:50:42 +00:00
|
|
|
name, runtimePod, prepareErr := r.preparePod(pod, pullSecrets)
|
|
|
|
|
|
|
|
// Set container references and generate events.
|
|
|
|
// If preparedPod fails, then send out 'failed' events for each container.
|
|
|
|
// Otherwise, store the container references so we can use them later to send events.
|
|
|
|
for i, c := range pod.Spec.Containers {
|
|
|
|
ref, err := kubecontainer.GenerateContainerRef(pod, &c)
|
|
|
|
if err != nil {
|
2015-11-20 17:54:37 +00:00
|
|
|
glog.Errorf("Couldn't make a ref to pod %q, container %v: '%v'", format.Pod(pod), c.Name, err)
|
2015-08-26 01:50:42 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if prepareErr != nil {
|
2015-11-13 22:30:01 +00:00
|
|
|
r.recorder.Eventf(ref, api.EventTypeWarning, kubecontainer.FailedToCreateContainer, "Failed to create rkt container with error: %v", prepareErr)
|
2015-08-26 01:50:42 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-10-07 17:58:05 +00:00
|
|
|
containerID := runtimePod.Containers[i].ID
|
2015-08-26 01:50:42 +00:00
|
|
|
r.containerRefManager.SetRef(containerID, ref)
|
|
|
|
}
|
|
|
|
|
|
|
|
if prepareErr != nil {
|
|
|
|
return prepareErr
|
2015-04-30 20:34:46 +00:00
|
|
|
}
|
|
|
|
|
2015-08-26 01:50:42 +00:00
|
|
|
r.generateEvents(runtimePod, "Created", nil)
|
|
|
|
|
2015-08-25 20:03:33 +00:00
|
|
|
// RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart
|
|
|
|
// a unit if the unit file is changed and reloaded.
|
2015-12-11 13:25:35 +00:00
|
|
|
reschan := make(chan string)
|
|
|
|
_, err := r.systemd.RestartUnit(name, "replace", reschan)
|
|
|
|
if err != nil {
|
|
|
|
r.generateEvents(runtimePod, "Failed", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
res := <-reschan
|
|
|
|
if res != "done" {
|
|
|
|
err := fmt.Errorf("Failed to restart unit %q: %s", name, res)
|
2015-08-26 01:50:42 +00:00
|
|
|
r.generateEvents(runtimePod, "Failed", err)
|
2015-04-30 20:34:46 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-08-26 01:50:42 +00:00
|
|
|
|
|
|
|
r.generateEvents(runtimePod, "Started", nil)
|
2015-04-30 20:34:46 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-04-30 22:11:07 +00:00
|
|
|
|
2016-03-04 22:52:45 +00:00
|
|
|
func (r *Runtime) runPreStopHook(pod *api.Pod, runtimePod *kubecontainer.Pod) error {
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
var errlist []error
|
|
|
|
errCh := make(chan error, len(pod.Spec.Containers))
|
|
|
|
|
|
|
|
wg.Add(len(pod.Spec.Containers))
|
|
|
|
|
|
|
|
for i, c := range pod.Spec.Containers {
|
|
|
|
if c.Lifecycle == nil || c.Lifecycle.PreStop == nil {
|
|
|
|
wg.Done()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
hook := c.Lifecycle.PreStop
|
|
|
|
containerID := runtimePod.Containers[i].ID
|
|
|
|
container := &pod.Spec.Containers[i]
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
if err := r.runner.Run(containerID, pod, container, hook); err != nil {
|
|
|
|
glog.Errorf("rkt: Failed to run pre-stop hook for container %q of pod %q: %v", container.Name, format.Pod(pod), err)
|
|
|
|
errCh <- err
|
|
|
|
}
|
|
|
|
wg.Done()
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
close(errCh)
|
|
|
|
|
|
|
|
for err := range errCh {
|
|
|
|
errlist = append(errlist, err)
|
|
|
|
}
|
|
|
|
return errors.NewAggregate(errlist)
|
|
|
|
}
|
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
// convertRktPod will convert a rktapi.Pod to a kubecontainer.Pod
|
2015-12-17 00:52:39 +00:00
|
|
|
func (r *Runtime) convertRktPod(rktpod *rktapi.Pod) (*kubecontainer.Pod, error) {
|
2015-11-21 00:56:35 +00:00
|
|
|
manifest := &appcschema.PodManifest{}
|
|
|
|
err := json.Unmarshal(rktpod.Manifest, manifest)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
podUID, ok := manifest.Annotations.Get(k8sRktUIDAnno)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("pod is missing annotation %s", k8sRktUIDAnno)
|
|
|
|
}
|
|
|
|
podName, ok := manifest.Annotations.Get(k8sRktNameAnno)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("pod is missing annotation %s", k8sRktNameAnno)
|
|
|
|
}
|
|
|
|
podNamespace, ok := manifest.Annotations.Get(k8sRktNamespaceAnno)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("pod is missing annotation %s", k8sRktNamespaceAnno)
|
|
|
|
}
|
|
|
|
podCreatedString, ok := manifest.Annotations.Get(k8sRktCreationTimeAnno)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("pod is missing annotation %s", k8sRktCreationTimeAnno)
|
|
|
|
}
|
|
|
|
podCreated, err := strconv.ParseInt(podCreatedString, 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("couldn't parse pod creation timestamp: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
kubepod := &kubecontainer.Pod{
|
|
|
|
ID: types.UID(podUID),
|
|
|
|
Name: podName,
|
|
|
|
Namespace: podNamespace,
|
|
|
|
}
|
2015-12-15 01:26:43 +00:00
|
|
|
|
|
|
|
for i, app := range rktpod.Apps {
|
|
|
|
// The order of the apps is determined by the rkt pod manifest.
|
|
|
|
// TODO(yifan): Let the server to unmarshal the annotations? https://github.com/coreos/rkt/issues/1872
|
|
|
|
hashStr, ok := manifest.Apps[i].Annotations.Get(k8sRktContainerHashAnno)
|
2015-11-21 00:56:35 +00:00
|
|
|
if !ok {
|
2015-12-15 01:26:43 +00:00
|
|
|
return nil, fmt.Errorf("app %q is missing annotation %s", app.Name, k8sRktContainerHashAnno)
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
2015-12-15 01:26:43 +00:00
|
|
|
containerHash, err := strconv.ParseUint(hashStr, 10, 64)
|
2015-11-21 00:56:35 +00:00
|
|
|
if err != nil {
|
2015-12-15 01:26:43 +00:00
|
|
|
return nil, fmt.Errorf("couldn't parse container's hash %q: %v", hashStr, err)
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
kubepod.Containers = append(kubepod.Containers, &kubecontainer.Container{
|
2016-03-18 18:43:20 +00:00
|
|
|
ID: buildContainerID(&containerID{rktpod.Id, app.Name}),
|
|
|
|
Name: app.Name,
|
|
|
|
// By default, the version returned by rkt API service will be "latest" if not specified.
|
|
|
|
Image: fmt.Sprintf("%s:%s", app.Image.Name, app.Image.Version),
|
2015-11-21 00:56:35 +00:00
|
|
|
Hash: containerHash,
|
|
|
|
Created: podCreated,
|
2015-12-15 01:26:43 +00:00
|
|
|
State: appStateToContainerState(app.State),
|
2015-11-21 00:56:35 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return kubepod, nil
|
|
|
|
}
|
|
|
|
|
2015-04-30 22:11:07 +00:00
|
|
|
// GetPods runs 'systemctl list-unit' and 'rkt list' to get the list of rkt pods.
|
2015-08-08 21:29:57 +00:00
|
|
|
// Then it will use the result to construct a list of container runtime pods.
|
2015-04-30 22:11:07 +00:00
|
|
|
// If all is false, then only running pods will be returned, otherwise all pods will be
|
|
|
|
// returned.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) {
|
2015-04-30 22:11:07 +00:00
|
|
|
glog.V(4).Infof("Rkt getting pods")
|
|
|
|
|
2015-11-21 00:56:35 +00:00
|
|
|
listReq := &rktapi.ListPodsRequest{
|
2015-12-17 00:52:39 +00:00
|
|
|
Detail: true,
|
|
|
|
Filters: []*rktapi.PodFilter{
|
|
|
|
{
|
|
|
|
Annotations: []*rktapi.KeyValue{
|
|
|
|
{
|
|
|
|
Key: k8sRktKubeletAnno,
|
|
|
|
Value: k8sRktKubeletAnnoValue,
|
|
|
|
},
|
2015-11-21 00:56:35 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
if !all {
|
2015-12-17 00:52:39 +00:00
|
|
|
listReq.Filters[0].States = []rktapi.PodState{rktapi.PodState_POD_STATE_RUNNING}
|
2015-11-21 00:56:35 +00:00
|
|
|
}
|
|
|
|
listResp, err := r.apisvc.ListPods(context.Background(), listReq)
|
2015-04-30 22:11:07 +00:00
|
|
|
if err != nil {
|
2015-11-21 00:56:35 +00:00
|
|
|
return nil, fmt.Errorf("couldn't list pods: %v", err)
|
2015-04-30 22:11:07 +00:00
|
|
|
}
|
|
|
|
|
2016-03-10 00:49:29 +00:00
|
|
|
pods := make(map[types.UID]*kubecontainer.Pod)
|
2016-03-11 17:32:22 +00:00
|
|
|
var podIDs []types.UID
|
2015-12-17 00:52:39 +00:00
|
|
|
for _, pod := range listResp.Pods {
|
|
|
|
pod, err := r.convertRktPod(pod)
|
2015-11-21 00:56:35 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Warningf("rkt: Cannot construct pod from unit file: %v.", err)
|
|
|
|
continue
|
2015-04-30 22:11:07 +00:00
|
|
|
}
|
2016-03-10 00:49:29 +00:00
|
|
|
|
|
|
|
// Group pods together.
|
|
|
|
oldPod, found := pods[pod.ID]
|
|
|
|
if !found {
|
|
|
|
pods[pod.ID] = pod
|
2016-03-11 17:32:22 +00:00
|
|
|
podIDs = append(podIDs, pod.ID)
|
2016-03-10 00:49:29 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
oldPod.Containers = append(oldPod.Containers, pod.Containers...)
|
|
|
|
}
|
|
|
|
|
2016-03-11 17:32:22 +00:00
|
|
|
// Convert map to list, using the consistent order from the podIDs array.
|
2016-03-10 00:49:29 +00:00
|
|
|
var result []*kubecontainer.Pod
|
2016-03-11 17:32:22 +00:00
|
|
|
for _, id := range podIDs {
|
|
|
|
result = append(result, pods[id])
|
2015-04-30 22:11:07 +00:00
|
|
|
}
|
2016-03-10 00:49:29 +00:00
|
|
|
|
|
|
|
return result, nil
|
2015-04-30 22:11:07 +00:00
|
|
|
}
|
2015-04-30 23:58:12 +00:00
|
|
|
|
2016-03-04 22:52:45 +00:00
|
|
|
func (r *Runtime) waitPreStopHooks(pod *api.Pod, runningPod *kubecontainer.Pod) {
|
|
|
|
gracePeriod := int64(minimumGracePeriodInSeconds)
|
|
|
|
switch {
|
|
|
|
case pod.DeletionGracePeriodSeconds != nil:
|
|
|
|
gracePeriod = *pod.DeletionGracePeriodSeconds
|
|
|
|
case pod.Spec.TerminationGracePeriodSeconds != nil:
|
|
|
|
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
|
|
|
|
}
|
|
|
|
|
|
|
|
errCh := make(chan error, 1)
|
|
|
|
go func() {
|
|
|
|
if err := r.runPreStopHook(pod, runningPod); err != nil {
|
|
|
|
errCh <- err
|
|
|
|
}
|
|
|
|
close(errCh)
|
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(time.Duration(gracePeriod) * time.Second):
|
|
|
|
glog.V(2).Infof("rkt: Some pre-stop hooks did not complete in %d seconds for pod %v", gracePeriod, format.Pod(pod))
|
|
|
|
case err := <-errCh:
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("rkt: Some pre-stop hooks failed for pod %v: %v", format.Pod(pod), err)
|
|
|
|
} else {
|
|
|
|
glog.V(4).Infof("rkt: pre-stop hooks for pod %v completed", format.Pod(pod))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-30 23:58:12 +00:00
|
|
|
// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
|
2015-08-20 01:57:58 +00:00
|
|
|
glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name)
|
2015-08-26 01:50:42 +00:00
|
|
|
|
2016-03-04 22:52:45 +00:00
|
|
|
if len(runningPod.Containers) == 0 {
|
|
|
|
glog.V(4).Infof("rkt: Pod %q is already being killed, no action will be taken", runningPod.Name)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if pod != nil {
|
|
|
|
r.waitPreStopHooks(pod, &runningPod)
|
|
|
|
}
|
|
|
|
|
2016-04-05 01:03:40 +00:00
|
|
|
containerID, err := parseContainerID(runningPod.Containers[0].ID)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("rkt: Failed to get rkt uuid of the pod %q: %v", runningPod.Name, err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
serviceName := makePodServiceFileName(containerID.uuid)
|
2015-08-26 01:50:42 +00:00
|
|
|
r.generateEvents(&runningPod, "Killing", nil)
|
|
|
|
for _, c := range runningPod.Containers {
|
2015-10-07 17:58:05 +00:00
|
|
|
r.containerRefManager.ClearRef(c.ID)
|
2015-08-26 01:50:42 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 21:04:41 +00:00
|
|
|
// Touch the systemd service file to update the mod time so it will
|
|
|
|
// not be garbage collected too soon.
|
|
|
|
if err := os.Chtimes(serviceFilePath(serviceName), time.Now(), time.Now()); err != nil {
|
|
|
|
glog.Errorf("rkt: Failed to change the modification time of the service file %q: %v", serviceName, err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-09-01 02:25:26 +00:00
|
|
|
// Since all service file have 'KillMode=mixed', the processes in
|
|
|
|
// the unit's cgroup will receive a SIGKILL if the normal stop timeouts.
|
2015-12-11 13:25:35 +00:00
|
|
|
reschan := make(chan string)
|
2016-04-05 01:03:40 +00:00
|
|
|
if _, err = r.systemd.StopUnit(serviceName, "replace", reschan); err != nil {
|
2015-10-07 21:04:41 +00:00
|
|
|
glog.Errorf("rkt: Failed to stop unit %q: %v", serviceName, err)
|
2015-09-01 02:25:26 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-12-11 13:25:35 +00:00
|
|
|
|
|
|
|
res := <-reschan
|
|
|
|
if res != "done" {
|
2016-02-23 01:07:31 +00:00
|
|
|
err := fmt.Errorf("invalid result: %s", res)
|
|
|
|
glog.Errorf("rkt: Failed to stop unit %q: %v", serviceName, err)
|
2015-12-11 13:25:35 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-10-07 21:04:41 +00:00
|
|
|
return nil
|
2015-08-18 00:58:09 +00:00
|
|
|
}
|
|
|
|
|
2015-10-21 20:04:10 +00:00
|
|
|
func (r *Runtime) Type() string {
|
|
|
|
return RktType
|
|
|
|
}
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) Version() (kubecontainer.Version, error) {
|
2016-03-12 01:29:25 +00:00
|
|
|
r.versions.RLock()
|
|
|
|
defer r.versions.RUnlock()
|
|
|
|
return r.versions.binVersion, nil
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
|
2016-01-14 23:16:07 +00:00
|
|
|
func (r *Runtime) APIVersion() (kubecontainer.Version, error) {
|
2016-03-12 01:29:25 +00:00
|
|
|
r.versions.RLock()
|
|
|
|
defer r.versions.RUnlock()
|
|
|
|
return r.versions.apiVersion, nil
|
2016-03-03 10:01:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Status returns error if rkt is unhealthy, nil otherwise.
|
|
|
|
func (r *Runtime) Status() error {
|
|
|
|
return r.checkVersion(minimumRktBinVersion, recommendedRktBinVersion, minimumAppcVersion, minimumRktApiVersion, minimumSystemdVersion)
|
2016-01-14 23:16:07 +00:00
|
|
|
}
|
|
|
|
|
2015-05-04 23:51:31 +00:00
|
|
|
// SyncPod syncs the running pod to match the specified desired pod.
|
2016-03-09 02:58:24 +00:00
|
|
|
func (r *Runtime) SyncPod(pod *api.Pod, podStatus api.PodStatus, internalPodStatus *kubecontainer.PodStatus, pullSecrets []api.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
|
2016-01-12 10:19:13 +00:00
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
result.Fail(err)
|
|
|
|
}
|
|
|
|
}()
|
2015-12-22 23:05:01 +00:00
|
|
|
// TODO: (random-liu) Stop using running pod in SyncPod()
|
|
|
|
// TODO: (random-liu) Rename podStatus to apiPodStatus, rename internalPodStatus to podStatus, and use new pod status as much as possible,
|
|
|
|
// we may stop using apiPodStatus someday.
|
|
|
|
runningPod := kubecontainer.ConvertPodStatusToRunningPod(internalPodStatus)
|
2015-05-04 23:51:31 +00:00
|
|
|
// Add references to all containers.
|
2015-10-07 17:58:05 +00:00
|
|
|
unidentifiedContainers := make(map[kubecontainer.ContainerID]*kubecontainer.Container)
|
2015-05-04 23:51:31 +00:00
|
|
|
for _, c := range runningPod.Containers {
|
|
|
|
unidentifiedContainers[c.ID] = c
|
|
|
|
}
|
|
|
|
|
|
|
|
restartPod := false
|
|
|
|
for _, container := range pod.Spec.Containers {
|
2015-05-15 23:14:08 +00:00
|
|
|
expectedHash := kubecontainer.HashContainer(&container)
|
2015-05-04 23:51:31 +00:00
|
|
|
|
|
|
|
c := runningPod.FindContainerByName(container.Name)
|
|
|
|
if c == nil {
|
2016-02-08 16:15:58 +00:00
|
|
|
if kubecontainer.ShouldContainerBeRestarted(&container, pod, internalPodStatus) {
|
2015-05-04 23:51:31 +00:00
|
|
|
glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
|
|
|
|
// TODO(yifan): Containers in one pod are fate-sharing at this moment, see:
|
|
|
|
// https://github.com/appc/spec/issues/276.
|
|
|
|
restartPod = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-08-10 17:30:34 +00:00
|
|
|
// TODO: check for non-root image directives. See ../docker/manager.go#SyncPod
|
|
|
|
|
2015-05-04 23:51:31 +00:00
|
|
|
// TODO(yifan): Take care of host network change.
|
|
|
|
containerChanged := c.Hash != 0 && c.Hash != expectedHash
|
|
|
|
if containerChanged {
|
2015-12-07 21:31:02 +00:00
|
|
|
glog.Infof("Pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", format.Pod(pod), container.Name, c.Hash, expectedHash)
|
2015-05-04 23:51:31 +00:00
|
|
|
restartPod = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2015-10-19 22:15:59 +00:00
|
|
|
liveness, found := r.livenessManager.Get(c.ID)
|
|
|
|
if found && liveness != proberesults.Success && pod.Spec.RestartPolicy != api.RestartPolicyNever {
|
2015-12-07 21:31:02 +00:00
|
|
|
glog.Infof("Pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name)
|
2015-05-04 23:51:31 +00:00
|
|
|
restartPod = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
delete(unidentifiedContainers, c.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is any unidentified containers, restart the pod.
|
|
|
|
if len(unidentifiedContainers) > 0 {
|
|
|
|
restartPod = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if restartPod {
|
2015-09-29 02:14:18 +00:00
|
|
|
// Kill the pod only if the pod is actually running.
|
|
|
|
if len(runningPod.Containers) > 0 {
|
2016-01-12 10:19:13 +00:00
|
|
|
if err = r.KillPod(pod, runningPod); err != nil {
|
|
|
|
return
|
2015-09-29 02:14:18 +00:00
|
|
|
}
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
2016-01-12 10:19:13 +00:00
|
|
|
if err = r.RunPod(pod, pullSecrets); err != nil {
|
|
|
|
return
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
}
|
2016-01-12 10:19:13 +00:00
|
|
|
return
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
|
2015-09-28 22:46:29 +00:00
|
|
|
// GarbageCollect collects the pods/containers.
|
|
|
|
// TODO(yifan): Enforce the gc policy, also, it would be better if we can
|
|
|
|
// just GC kubernetes pods.
|
2015-10-07 07:39:59 +00:00
|
|
|
func (r *Runtime) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error {
|
2015-05-04 23:51:31 +00:00
|
|
|
if err := exec.Command("systemctl", "reset-failed").Run(); err != nil {
|
2015-10-07 21:04:41 +00:00
|
|
|
glog.Errorf("rkt: Failed to reset failed systemd services: %v, continue to gc anyway...", err)
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
2015-10-07 21:04:41 +00:00
|
|
|
|
2015-10-07 07:39:59 +00:00
|
|
|
if _, err := r.runCommand("gc", "--grace-period="+gcPolicy.MinAge.String(), "--expire-prepared="+gcPolicy.MinAge.String()); err != nil {
|
2015-05-04 23:51:31 +00:00
|
|
|
glog.Errorf("rkt: Failed to gc: %v", err)
|
2015-10-07 21:04:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// GC all inactive systemd service files.
|
|
|
|
units, err := r.systemd.ListUnits()
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("rkt: Failed to list units: %v", err)
|
2015-05-04 23:51:31 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-10-07 21:04:41 +00:00
|
|
|
runningKubernetesUnits := sets.NewString()
|
|
|
|
for _, u := range units {
|
|
|
|
if strings.HasPrefix(u.Name, kubernetesUnitPrefix) && u.SubState == "running" {
|
|
|
|
runningKubernetesUnits.Insert(u.Name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
files, err := ioutil.ReadDir(systemdServiceDir)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("rkt: Failed to read the systemd service directory: %v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, f := range files {
|
2015-10-19 18:23:54 +00:00
|
|
|
if strings.HasPrefix(f.Name(), kubernetesUnitPrefix) && !runningKubernetesUnits.Has(f.Name()) && f.ModTime().Before(time.Now().Add(-gcPolicy.MinAge)) {
|
2015-10-07 21:04:41 +00:00
|
|
|
glog.V(4).Infof("rkt: Removing inactive systemd service file: %v", f.Name())
|
|
|
|
if err := os.Remove(serviceFilePath(f.Name())); err != nil {
|
|
|
|
glog.Warningf("rkt: Failed to remove inactive systemd service file %v: %v", f.Name(), err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-05-04 23:51:31 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-13 23:39:17 +00:00
|
|
|
// Note: In rkt, the container ID is in the form of "UUID:appName", where
|
2015-05-04 23:51:31 +00:00
|
|
|
// appName is the container name.
|
2015-09-01 02:25:26 +00:00
|
|
|
// TODO(yifan): If the rkt is using lkvm as the stage1 image, then this function will fail.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (r *Runtime) RunInContainer(containerID kubecontainer.ContainerID, cmd []string) ([]byte, error) {
|
2015-05-04 23:51:31 +00:00
|
|
|
glog.V(4).Infof("Rkt running in container.")
|
|
|
|
|
|
|
|
id, err := parseContainerID(containerID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-08-13 23:39:17 +00:00
|
|
|
args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid)
|
2015-05-04 23:51:31 +00:00
|
|
|
args = append(args, cmd...)
|
|
|
|
|
2015-10-08 01:38:01 +00:00
|
|
|
result, err := r.buildCommand(args...).CombinedOutput()
|
|
|
|
if err != nil {
|
|
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
|
|
err = &rktExitError{exitErr}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// rktExitError implemets /pkg/util/exec.ExitError interface.
|
|
|
|
type rktExitError struct{ *exec.ExitError }
|
|
|
|
|
|
|
|
var _ utilexec.ExitError = &rktExitError{}
|
|
|
|
|
|
|
|
func (r *rktExitError) ExitStatus() int {
|
|
|
|
if status, ok := r.Sys().(syscall.WaitStatus); ok {
|
|
|
|
return status.ExitStatus()
|
|
|
|
}
|
|
|
|
return 0
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
func (r *Runtime) AttachContainer(containerID kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool) error {
|
2015-08-18 00:58:09 +00:00
|
|
|
return fmt.Errorf("unimplemented")
|
2015-07-28 04:48:55 +00:00
|
|
|
}
|
|
|
|
|
2015-08-13 23:39:17 +00:00
|
|
|
// Note: In rkt, the container ID is in the form of "UUID:appName", where UUID is
|
|
|
|
// the rkt UUID, and appName is the container name.
|
2015-09-01 02:25:26 +00:00
|
|
|
// TODO(yifan): If the rkt is using lkvm as the stage1 image, then this function will fail.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (r *Runtime) ExecInContainer(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool) error {
|
2015-05-04 23:51:31 +00:00
|
|
|
glog.V(4).Infof("Rkt execing in container.")
|
|
|
|
|
|
|
|
id, err := parseContainerID(containerID)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-13 23:39:17 +00:00
|
|
|
args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid)
|
2015-05-04 23:51:31 +00:00
|
|
|
args = append(args, cmd...)
|
|
|
|
command := r.buildCommand(args...)
|
|
|
|
|
|
|
|
if tty {
|
2015-05-08 06:36:47 +00:00
|
|
|
p, err := kubecontainer.StartPty(command)
|
2015-05-04 23:51:31 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer p.Close()
|
|
|
|
|
|
|
|
// make sure to close the stdout stream
|
|
|
|
defer stdout.Close()
|
|
|
|
|
|
|
|
if stdin != nil {
|
|
|
|
go io.Copy(p, stdin)
|
|
|
|
}
|
|
|
|
if stdout != nil {
|
|
|
|
go io.Copy(stdout, p)
|
|
|
|
}
|
|
|
|
return command.Wait()
|
|
|
|
}
|
|
|
|
if stdin != nil {
|
|
|
|
// Use an os.Pipe here as it returns true *os.File objects.
|
2015-05-20 23:00:19 +00:00
|
|
|
// This way, if you run 'kubectl exec <pod> -i bash' (no tty) and type 'exit',
|
2015-05-04 23:51:31 +00:00
|
|
|
// the call below to command.Run() can unblock because its Stdin is the read half
|
|
|
|
// of the pipe.
|
|
|
|
r, w, err := os.Pipe()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
go io.Copy(w, stdin)
|
|
|
|
|
|
|
|
command.Stdin = r
|
|
|
|
}
|
|
|
|
if stdout != nil {
|
|
|
|
command.Stdout = stdout
|
|
|
|
}
|
|
|
|
if stderr != nil {
|
|
|
|
command.Stderr = stderr
|
|
|
|
}
|
|
|
|
return command.Run()
|
|
|
|
}
|
|
|
|
|
|
|
|
// PortForward executes socat in the pod's network namespace and copies
|
|
|
|
// data between stream (representing the user's local connection on their
|
|
|
|
// computer) and the specified port in the container.
|
|
|
|
//
|
|
|
|
// TODO:
|
|
|
|
// - match cgroups of container
|
|
|
|
// - should we support nsenter + socat on the host? (current impl)
|
|
|
|
// - should we support nsenter + socat in a container, running with elevated privs and --pid=host?
|
|
|
|
//
|
|
|
|
// TODO(yifan): Merge with the same function in dockertools.
|
2015-09-01 02:25:26 +00:00
|
|
|
// TODO(yifan): If the rkt is using lkvm as the stage1 image, then this function will fail.
|
2015-09-28 22:46:29 +00:00
|
|
|
func (r *Runtime) PortForward(pod *kubecontainer.Pod, port uint16, stream io.ReadWriteCloser) error {
|
2015-05-04 23:51:31 +00:00
|
|
|
glog.V(4).Infof("Rkt port forwarding in container.")
|
|
|
|
|
2015-12-21 19:25:38 +00:00
|
|
|
listResp, err := r.apisvc.ListPods(context.Background(), &rktapi.ListPodsRequest{
|
|
|
|
Detail: true,
|
|
|
|
Filters: runningKubernetesPodFilters(pod.ID),
|
|
|
|
})
|
2015-05-04 23:51:31 +00:00
|
|
|
if err != nil {
|
2015-12-21 19:25:38 +00:00
|
|
|
return fmt.Errorf("couldn't list pods: %v", err)
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
|
2015-12-21 19:25:38 +00:00
|
|
|
if len(listResp.Pods) != 1 {
|
|
|
|
var podlist []string
|
|
|
|
for _, p := range listResp.Pods {
|
|
|
|
podlist = append(podlist, p.Id)
|
|
|
|
}
|
|
|
|
return fmt.Errorf("more than one running rkt pod for the kubernetes pod [%s]", strings.Join(podlist, ", "))
|
2015-05-04 23:51:31 +00:00
|
|
|
}
|
|
|
|
|
2015-09-22 20:29:51 +00:00
|
|
|
socatPath, lookupErr := exec.LookPath("socat")
|
2015-05-04 23:51:31 +00:00
|
|
|
if lookupErr != nil {
|
|
|
|
return fmt.Errorf("unable to do port forwarding: socat not found.")
|
|
|
|
}
|
|
|
|
|
2015-12-21 19:25:38 +00:00
|
|
|
args := []string{"-t", fmt.Sprintf("%d", listResp.Pods[0].Pid), "-n", socatPath, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
|
2015-09-22 20:29:51 +00:00
|
|
|
|
|
|
|
nsenterPath, lookupErr := exec.LookPath("nsenter")
|
2015-05-04 23:51:31 +00:00
|
|
|
if lookupErr != nil {
|
|
|
|
return fmt.Errorf("unable to do port forwarding: nsenter not found.")
|
|
|
|
}
|
2015-09-22 20:29:51 +00:00
|
|
|
|
|
|
|
command := exec.Command(nsenterPath, args...)
|
2015-05-04 23:51:31 +00:00
|
|
|
command.Stdout = stream
|
2015-09-22 20:29:51 +00:00
|
|
|
|
|
|
|
// If we use Stdin, command.Run() won't return until the goroutine that's copying
|
|
|
|
// from stream finishes. Unfortunately, if you have a client like telnet connected
|
|
|
|
// via port forwarding, as long as the user's telnet client is connected to the user's
|
|
|
|
// local listener that port forwarding sets up, the telnet session never exits. This
|
|
|
|
// means that even if socat has finished running, command.Run() won't ever return
|
|
|
|
// (because the client still has the connection and stream open).
|
|
|
|
//
|
|
|
|
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
|
|
|
|
// when the command (socat) exits.
|
|
|
|
inPipe, err := command.StdinPipe()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to do port forwarding: error creating stdin pipe: %v", err)
|
|
|
|
}
|
|
|
|
go func() {
|
|
|
|
io.Copy(inPipe, stream)
|
|
|
|
inPipe.Close()
|
|
|
|
}()
|
|
|
|
|
2015-05-04 23:51:31 +00:00
|
|
|
return command.Run()
|
|
|
|
}
|
|
|
|
|
2015-12-12 01:09:21 +00:00
|
|
|
// appStateToContainerState converts rktapi.AppState to kubecontainer.ContainerState.
|
|
|
|
func appStateToContainerState(state rktapi.AppState) kubecontainer.ContainerState {
|
|
|
|
switch state {
|
|
|
|
case rktapi.AppState_APP_STATE_RUNNING:
|
|
|
|
return kubecontainer.ContainerStateRunning
|
|
|
|
case rktapi.AppState_APP_STATE_EXITED:
|
|
|
|
return kubecontainer.ContainerStateExited
|
|
|
|
}
|
|
|
|
return kubecontainer.ContainerStateUnknown
|
|
|
|
}
|
|
|
|
|
2015-12-21 19:25:38 +00:00
|
|
|
// getPodInfo returns the pod manifest, creation time and restart count of the pod.
|
|
|
|
func getPodInfo(pod *rktapi.Pod) (podManifest *appcschema.PodManifest, creationTime time.Time, restartCount int, err error) {
|
2015-12-17 00:52:39 +00:00
|
|
|
// TODO(yifan): The manifest is only used for getting the annotations.
|
|
|
|
// Consider to let the server to unmarshal the annotations.
|
2015-12-12 01:09:21 +00:00
|
|
|
var manifest appcschema.PodManifest
|
|
|
|
if err = json.Unmarshal(pod.Manifest, &manifest); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
creationTimeStr, ok := manifest.Annotations.Get(k8sRktCreationTimeAnno)
|
|
|
|
if !ok {
|
|
|
|
err = fmt.Errorf("no creation timestamp in pod manifest")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
unixSec, err := strconv.ParseInt(creationTimeStr, 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if countString, ok := manifest.Annotations.Get(k8sRktRestartCountAnno); ok {
|
|
|
|
restartCount, err = strconv.Atoi(countString)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &manifest, time.Unix(unixSec, 0), restartCount, nil
|
|
|
|
}
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
// populateContainerStatus fills the container status according to the app's information.
|
|
|
|
func populateContainerStatus(pod rktapi.Pod, app rktapi.App, runtimeApp appcschema.RuntimeApp, restartCount int, creationTime time.Time) (*kubecontainer.ContainerStatus, error) {
|
|
|
|
hashStr, ok := runtimeApp.Annotations.Get(k8sRktContainerHashAnno)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("No container hash in pod manifest")
|
|
|
|
}
|
|
|
|
|
|
|
|
hashNum, err := strconv.ParseUint(hashStr, 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-01-12 02:30:29 +00:00
|
|
|
var reason, message string
|
|
|
|
if app.State == rktapi.AppState_APP_STATE_EXITED {
|
|
|
|
if app.ExitCode == 0 {
|
|
|
|
reason = "Completed"
|
|
|
|
} else {
|
|
|
|
reason = "Error"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
terminationMessagePath, ok := runtimeApp.Annotations.Get(k8sRktTerminationMessagePathAnno)
|
|
|
|
if ok {
|
|
|
|
if data, err := ioutil.ReadFile(terminationMessagePath); err != nil {
|
|
|
|
message = fmt.Sprintf("Error on reading termination-log %s: %v", terminationMessagePath, err)
|
|
|
|
} else {
|
|
|
|
message = string(data)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
return &kubecontainer.ContainerStatus{
|
|
|
|
ID: buildContainerID(&containerID{uuid: pod.Id, appName: app.Name}),
|
|
|
|
Name: app.Name,
|
|
|
|
State: appStateToContainerState(app.State),
|
|
|
|
// TODO(yifan): Use the creation/start/finished timestamp when it's implemented.
|
|
|
|
CreatedAt: creationTime,
|
|
|
|
StartedAt: creationTime,
|
|
|
|
ExitCode: int(app.ExitCode),
|
2016-03-18 18:43:20 +00:00
|
|
|
// By default, the version returned by rkt API service will be "latest" if not specified.
|
|
|
|
Image: fmt.Sprintf("%s:%s", app.Image.Name, app.Image.Version),
|
|
|
|
ImageID: "rkt://" + app.Image.Id, // TODO(yifan): Add the prefix only in api.PodStatus.
|
|
|
|
Hash: hashNum,
|
2015-12-17 00:52:39 +00:00
|
|
|
// TODO(yifan): Note that now all apps share the same restart count, this might
|
|
|
|
// change once apps don't share the same lifecycle.
|
|
|
|
// See https://github.com/appc/spec/pull/547.
|
|
|
|
RestartCount: restartCount,
|
2016-01-12 02:30:29 +00:00
|
|
|
Reason: reason,
|
|
|
|
Message: message,
|
2015-12-17 00:52:39 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2015-12-05 00:06:25 +00:00
|
|
|
func (r *Runtime) GetPodStatus(uid types.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
|
2015-12-12 01:09:21 +00:00
|
|
|
podStatus := &kubecontainer.PodStatus{
|
|
|
|
ID: uid,
|
|
|
|
Name: name,
|
|
|
|
Namespace: namespace,
|
|
|
|
}
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
listResp, err := r.apisvc.ListPods(context.Background(), &rktapi.ListPodsRequest{
|
|
|
|
Detail: true,
|
|
|
|
Filters: kubernetesPodFilters(uid),
|
|
|
|
})
|
2015-12-12 01:09:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("couldn't list pods: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var latestPod *rktapi.Pod
|
|
|
|
var latestRestartCount int = -1
|
|
|
|
|
2015-12-17 00:52:39 +00:00
|
|
|
// In this loop, we group all containers from all pods together,
|
|
|
|
// also we try to find the latest pod, so we can fill other info of the pod below.
|
|
|
|
for _, pod := range listResp.Pods {
|
2015-12-21 19:25:38 +00:00
|
|
|
manifest, creationTime, restartCount, err := getPodInfo(pod)
|
2015-12-12 01:09:21 +00:00
|
|
|
if err != nil {
|
2016-01-29 19:35:04 +00:00
|
|
|
glog.Warningf("rkt: Couldn't get necessary info from the rkt pod, (uuid %q): %v", pod.Id, err)
|
2015-12-12 01:09:21 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if restartCount > latestRestartCount {
|
|
|
|
latestPod = pod
|
|
|
|
latestRestartCount = restartCount
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, app := range pod.Apps {
|
|
|
|
// The order of the apps is determined by the rkt pod manifest.
|
2015-12-17 00:52:39 +00:00
|
|
|
// TODO(yifan): Save creationTime, restartCount in each app's annotation,
|
|
|
|
// so we don't need to pass them.
|
|
|
|
cs, err := populateContainerStatus(*pod, *app, manifest.Apps[i], restartCount, creationTime)
|
2015-12-12 01:09:21 +00:00
|
|
|
if err != nil {
|
2015-12-17 00:52:39 +00:00
|
|
|
glog.Warningf("rkt: Failed to populate container status(uuid %q, app %q): %v", pod.Id, app.Name, err)
|
2015-12-12 01:09:21 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-12-17 00:52:39 +00:00
|
|
|
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, cs)
|
2015-12-12 01:09:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if latestPod != nil {
|
|
|
|
// Try to fill the IP info.
|
|
|
|
for _, n := range latestPod.Networks {
|
|
|
|
if n.Name == defaultNetworkName {
|
|
|
|
podStatus.IP = n.Ipv4
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return podStatus, nil
|
2015-11-10 22:18:47 +00:00
|
|
|
}
|