2015-04-28 23:11:37 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2015-04-28 23:11:37 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package rkt
import (
2016-04-28 00:16:28 +00:00
"bufio"
2015-08-26 01:50:42 +00:00
"bytes"
2015-04-30 01:11:30 +00:00
"encoding/json"
2015-04-28 23:11:37 +00:00
"fmt"
2015-04-30 20:34:46 +00:00
"io"
"io/ioutil"
"os"
2015-04-28 23:11:37 +00:00
"os/exec"
2015-04-30 20:34:46 +00:00
"path"
2016-04-15 01:01:40 +00:00
"path/filepath"
2016-04-21 00:49:08 +00:00
"sort"
2015-05-04 23:51:31 +00:00
"strconv"
2015-04-28 23:11:37 +00:00
"strings"
2016-03-04 22:52:45 +00:00
"sync"
2015-10-08 01:38:01 +00:00
"syscall"
2015-04-30 20:34:46 +00:00
"time"
2015-04-28 23:11:37 +00:00
2015-08-05 22:05:17 +00:00
appcschema "github.com/appc/spec/schema"
appctypes "github.com/appc/spec/schema/types"
"github.com/coreos/go-systemd/unit"
2015-11-19 02:35:31 +00:00
rktapi "github.com/coreos/rkt/api/v1alpha"
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2015-11-20 23:57:56 +00:00
"golang.org/x/net/context"
2015-12-30 01:17:27 +00:00
"google.golang.org/grpc"
2017-06-22 18:24:23 +00:00
"k8s.io/api/core/v1"
2017-01-17 03:38:19 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2017-01-11 14:09:48 +00:00
kubetypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/errors"
2017-01-24 14:35:22 +00:00
"k8s.io/apimachinery/pkg/util/uuid"
2017-01-11 14:09:48 +00:00
utilwait "k8s.io/apimachinery/pkg/util/wait"
2017-01-30 18:39:54 +00:00
"k8s.io/client-go/tools/record"
2017-04-14 09:33:57 +00:00
"k8s.io/client-go/tools/remotecommand"
2017-01-23 18:37:22 +00:00
"k8s.io/client-go/util/flowcontrol"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
2016-07-13 00:32:24 +00:00
"k8s.io/kubernetes/pkg/kubelet/events"
2016-07-14 01:05:18 +00:00
"k8s.io/kubernetes/pkg/kubelet/images"
2016-06-13 16:40:17 +00:00
"k8s.io/kubernetes/pkg/kubelet/leaky"
2016-03-04 22:52:45 +00:00
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
2016-05-03 00:49:42 +00:00
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/network/hairpin"
2015-10-19 22:15:59 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2016-05-22 05:00:38 +00:00
"k8s.io/kubernetes/pkg/kubelet/types"
2015-11-20 17:54:37 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/format"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/securitycontext"
2015-10-08 01:38:01 +00:00
utilexec "k8s.io/kubernetes/pkg/util/exec"
2016-06-06 23:23:16 +00:00
"k8s.io/kubernetes/pkg/util/selinux"
2016-01-11 07:55:51 +00:00
utilstrings "k8s.io/kubernetes/pkg/util/strings"
2017-07-05 17:44:41 +00:00
"k8s.io/kubernetes/pkg/util/term"
2015-04-28 23:11:37 +00:00
)
const (
2016-03-19 00:22:11 +00:00
RktType = "rkt"
DefaultRktAPIServiceEndpoint = "localhost:15441"
2015-10-21 20:04:10 +00:00
2016-09-02 22:09:34 +00:00
minimumRktBinVersion = "1.13.0"
2016-05-31 22:02:50 +00:00
minimumRktApiVersion = "1.0.0-alpha"
minimumSystemdVersion = "219"
2015-04-30 06:33:07 +00:00
2015-08-13 23:39:17 +00:00
systemdServiceDir = "/run/systemd/system"
rktDataDir = "/var/lib/rkt"
rktLocalConfigDir = "/etc/rkt"
2015-04-28 23:11:37 +00:00
2017-04-29 16:04:39 +00:00
kubernetesUnitPrefix = "k8s_"
unitKubernetesSection = "X-Kubernetes"
unitPodUID = "PodUID"
unitPodName = "PodName"
unitPodNamespace = "PodNamespace"
unitPodHostNetwork = "PodHostNetwork"
unitPodNetworkNamespace = "PodNetworkNamespace"
2015-04-28 23:11:37 +00:00
2016-04-15 01:01:40 +00:00
k8sRktKubeletAnno = "rkt.kubernetes.io/managed-by-kubelet"
k8sRktKubeletAnnoValue = "true"
2016-02-09 06:50:25 +00:00
k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash"
k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count"
2016-02-08 16:13:08 +00:00
k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
2016-04-26 22:20:06 +00:00
2017-06-17 10:43:15 +00:00
k8sRktLimitNoFileAnno = "systemd-unit-option.rkt.kubernetes.io/LimitNOFILE"
2016-04-26 22:20:06 +00:00
// TODO(euank): This has significant security concerns as a stage1 image is
// effectively root.
// Furthermore, this (using an annotation) is a hack to pass an extra
// non-portable argument in. It should not be relied on to be stable.
// In the future, this might be subsumed by a first-class api object, or by a
// kitchen-sink params object (#17064).
// See discussion in #23944
// Also, do we want more granularity than path-at-the-kubelet-level and
// image/name-at-the-pod-level?
k8sRktStage1NameAnno = "rkt.alpha.kubernetes.io/stage1-name-override"
dockerPrefix = "docker://"
2015-05-04 23:51:31 +00:00
authDir = "auth.d"
dockerAuthTemplate = ` { "rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials": { "user":%q,"password":%q}} `
2015-11-19 02:35:31 +00:00
defaultRktAPIServiceAddr = "localhost:15441"
2016-01-29 00:01:01 +00:00
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
// hence, setting ndots to be 5.
// TODO(yifan): Move this and dockertools.ndotsDNSOption to a common package.
defaultDNSOption = "ndots:5"
2016-03-08 19:46:40 +00:00
// Annotations for the ENTRYPOINT and CMD for an ACI that's converted from Docker image.
2016-10-08 01:51:49 +00:00
// Taken from https://github.com/appc/docker2aci/blob/v0.12.3/lib/common/common.go#L33
appcDockerEntrypoint = "appc.io/docker/entrypoint"
appcDockerCmd = "appc.io/docker/cmd"
appcDockerRegistryURL = "appc.io/docker/registryurl"
appcDockerRepository = "appc.io/docker/repository"
2016-03-04 22:52:45 +00:00
// TODO(yifan): Reuse this const with Docker runtime.
minimumGracePeriodInSeconds = 2
2016-06-07 20:22:55 +00:00
// The network name of the network when no-op plugin is being used.
// TODO(yifan): This is not ideal since today we cannot make the rkt's 'net.d' dir point to the
// CNI directory specified by kubelet. Once that is fixed, we can just use the network config
// under the CNI directory directly.
// See https://github.com/coreos/rkt/pull/2312#issuecomment-200068370.
defaultNetworkName = "rkt.kubernetes.io"
2016-06-17 21:28:30 +00:00
// defaultRequestTimeout is the default timeout of rkt requests.
2017-04-28 15:57:19 +00:00
// Value is slightly offset from 2 minutes to make timeouts due to this
// constant recognizable.
defaultRequestTimeout = 2 * time . Minute - 1 * time . Second
2016-09-06 19:20:58 +00:00
etcHostsPath = "/etc/hosts"
etcResolvConfPath = "/etc/resolv.conf"
2015-04-28 23:11:37 +00:00
)
2015-09-28 22:46:29 +00:00
// Runtime implements the Containerruntime for rkt. The implementation
2015-04-28 23:11:37 +00:00
// uses systemd, so in order to run this runtime, systemd must be installed
// on the machine.
2015-09-28 22:46:29 +00:00
type Runtime struct {
2016-04-21 01:21:41 +00:00
cli cliInterface
2015-11-19 02:35:31 +00:00
systemd systemdInterface
// The grpc client for rkt api-service.
apisvcConn * grpc . ClientConn
apisvc rktapi . PublicAPIClient
2016-03-19 00:22:11 +00:00
config * Config
2015-04-28 23:11:37 +00:00
// TODO(yifan): Refactor this to be generic keyring.
dockerKeyring credentialprovider . DockerKeyring
2015-05-06 18:02:08 +00:00
containerRefManager * kubecontainer . RefManager
2016-04-21 00:49:08 +00:00
podGetter podGetter
2016-01-29 00:01:01 +00:00
runtimeHelper kubecontainer . RuntimeHelper
2015-05-06 18:02:08 +00:00
recorder record . EventRecorder
2015-10-19 22:15:59 +00:00
livenessManager proberesults . Manager
2016-07-14 01:05:18 +00:00
imagePuller images . ImageManager
2016-03-04 22:52:45 +00:00
runner kubecontainer . HandlerRunner
2016-04-15 01:01:40 +00:00
execer utilexec . Interface
os kubecontainer . OSInterface
2016-12-13 23:00:34 +00:00
// Network plugin manager.
network * network . PluginManager
2016-05-03 00:49:42 +00:00
// If true, the "hairpin mode" flag is set on container interfaces.
// A false value means the kubelet just backs off from setting it,
// it might already be true.
configureHairpinMode bool
2016-04-15 01:01:40 +00:00
// used for a systemd Exec, which requires the full path.
2016-05-03 00:49:42 +00:00
touchPath string
nsenterPath string
2015-11-19 02:35:31 +00:00
2016-03-12 01:29:25 +00:00
versions versions
2016-06-17 21:28:30 +00:00
// requestTimeout is the timeout of rkt requests.
requestTimeout time . Duration
2017-04-29 16:04:39 +00:00
2017-05-09 14:15:22 +00:00
unitGetter unitServiceGetter
2017-04-29 16:04:39 +00:00
}
// Field of the X-Kubernetes directive of a systemd service file
type podServiceDirective struct {
id string
name string
namespace string
hostNetwork bool
networkNamespace kubecontainer . ContainerID
2015-04-28 23:11:37 +00:00
}
2015-09-28 22:46:29 +00:00
var _ kubecontainer . Runtime = & Runtime { }
2016-11-03 00:42:00 +00:00
var _ kubecontainer . DirectStreamingRuntime = & Runtime { }
2015-05-01 23:12:14 +00:00
2016-04-21 00:49:08 +00:00
// TODO(yifan): This duplicates the podGetter in dockertools.
type podGetter interface {
2016-11-18 20:50:58 +00:00
GetPodByUID ( kubetypes . UID ) ( * v1 . Pod , bool )
2016-04-21 00:49:08 +00:00
}
// cliInterface wrapps the command line calls for testing purpose.
type cliInterface interface {
2016-05-24 01:16:01 +00:00
// RunCommand creates rkt commands and runs it with the given config.
// If the config is nil, it will use the one inferred from rkt API service.
RunCommand ( config * Config , args ... string ) ( result [ ] string , err error )
2016-04-21 00:49:08 +00:00
}
2017-05-09 14:15:22 +00:00
// unitServiceGetter wrapps the systemd open files for testing purpose
type unitServiceGetter interface {
getKubernetesDirective ( string ) ( podServiceDirective , error )
getNetworkNamespace ( kubetypes . UID , * rktapi . Pod ) ( kubecontainer . ContainerID , error )
2017-04-29 16:04:39 +00:00
}
2015-04-28 23:11:37 +00:00
// New creates the rkt container runtime which implements the container runtime interface.
// It will test if the rkt binary is in the $PATH, and whether we can get the
// version of it. If so, creates the rkt container runtime, otherwise returns an error.
2016-03-19 00:22:11 +00:00
func New (
apiEndpoint string ,
config * Config ,
2016-01-29 00:01:01 +00:00
runtimeHelper kubecontainer . RuntimeHelper ,
2015-05-06 18:02:08 +00:00
recorder record . EventRecorder ,
containerRefManager * kubecontainer . RefManager ,
2016-04-21 00:49:08 +00:00
podGetter podGetter ,
2015-10-19 22:15:59 +00:00
livenessManager proberesults . Manager ,
2016-05-22 05:00:38 +00:00
httpClient types . HttpGetter ,
2016-05-03 00:49:42 +00:00
networkPlugin network . NetworkPlugin ,
hairpinMode bool ,
2016-04-15 01:01:40 +00:00
execer utilexec . Interface ,
os kubecontainer . OSInterface ,
2016-03-09 02:58:24 +00:00
imageBackOff * flowcontrol . Backoff ,
2015-10-20 21:49:44 +00:00
serializeImagePulls bool ,
2016-09-21 21:26:17 +00:00
imagePullQPS float32 ,
imagePullBurst int ,
2016-06-17 21:28:30 +00:00
requestTimeout time . Duration ,
2015-10-20 21:49:44 +00:00
) ( * Runtime , error ) {
2015-11-19 02:35:31 +00:00
// Create dbus connection.
systemd , err := newSystemd ( )
2015-04-28 23:11:37 +00:00
if err != nil {
2015-11-19 02:35:31 +00:00
return nil , fmt . Errorf ( "rkt: cannot create systemd interface: %v" , err )
2015-04-28 23:11:37 +00:00
}
2015-11-19 02:35:31 +00:00
// TODO(yifan): Use secure connection.
2016-03-19 00:22:11 +00:00
apisvcConn , err := grpc . Dial ( apiEndpoint , grpc . WithInsecure ( ) )
2015-04-28 23:11:37 +00:00
if err != nil {
2015-11-19 02:35:31 +00:00
return nil , fmt . Errorf ( "rkt: cannot connect to rkt api service: %v" , err )
2015-04-28 23:11:37 +00:00
}
2016-03-19 00:22:11 +00:00
// TODO(yifan): Get the rkt path from API service.
if config . Path == "" {
2015-08-17 17:03:45 +00:00
// No default rkt path was set, so try to find one in $PATH.
var err error
2016-04-15 01:01:40 +00:00
config . Path , err = execer . LookPath ( "rkt" )
2015-08-17 17:03:45 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot find rkt binary: %v" , err )
}
2015-04-28 23:11:37 +00:00
}
2016-04-15 01:01:40 +00:00
touchPath , err := execer . LookPath ( "touch" )
if err != nil {
return nil , fmt . Errorf ( "cannot find touch binary: %v" , err )
}
2016-05-03 00:49:42 +00:00
nsenterPath , err := execer . LookPath ( "nsenter" )
if err != nil {
return nil , fmt . Errorf ( "cannot find nsenter binary: %v" , err )
}
2016-06-17 21:28:30 +00:00
if requestTimeout == 0 {
requestTimeout = defaultRequestTimeout
}
2015-09-28 22:46:29 +00:00
rkt := & Runtime {
2016-04-21 01:21:41 +00:00
os : kubecontainer . RealOS { } ,
2015-05-06 18:02:08 +00:00
systemd : systemd ,
2015-11-19 02:35:31 +00:00
apisvcConn : apisvcConn ,
apisvc : rktapi . NewPublicAPIClient ( apisvcConn ) ,
2015-05-06 18:02:08 +00:00
config : config ,
dockerKeyring : credentialprovider . NewDockerKeyring ( ) ,
containerRefManager : containerRefManager ,
2016-04-21 00:49:08 +00:00
podGetter : podGetter ,
2016-01-29 00:01:01 +00:00
runtimeHelper : runtimeHelper ,
2015-05-06 18:02:08 +00:00
recorder : recorder ,
2015-10-19 22:15:59 +00:00
livenessManager : livenessManager ,
2016-12-13 23:00:34 +00:00
network : network . NewPluginManager ( networkPlugin ) ,
2016-04-15 01:01:40 +00:00
execer : execer ,
touchPath : touchPath ,
2016-05-03 00:49:42 +00:00
nsenterPath : nsenterPath ,
2016-06-17 21:28:30 +00:00
requestTimeout : requestTimeout ,
2015-04-28 23:11:37 +00:00
}
2016-03-19 00:22:11 +00:00
rkt . config , err = rkt . getConfig ( rkt . config )
if err != nil {
return nil , fmt . Errorf ( "rkt: cannot get config from rkt api service: %v" , err )
}
2016-11-03 00:42:00 +00:00
cmdRunner := kubecontainer . DirectStreamingRunner ( rkt )
rkt . runner = lifecycle . NewHandlerRunner ( httpClient , cmdRunner , rkt )
2016-03-04 22:52:45 +00:00
2016-09-21 21:26:17 +00:00
rkt . imagePuller = images . NewImageManager ( recorder , rkt , imageBackOff , serializeImagePulls , imagePullQPS , imagePullBurst )
2015-04-28 23:11:37 +00:00
2016-03-12 01:29:25 +00:00
if err := rkt . getVersions ( ) ; err != nil {
return nil , fmt . Errorf ( "rkt: error getting version info: %v" , err )
}
2016-04-21 01:21:41 +00:00
rkt . cli = rkt
2017-05-09 14:15:22 +00:00
rkt . unitGetter = rkt
2016-04-21 01:21:41 +00:00
2015-04-28 23:11:37 +00:00
return rkt , nil
}
2016-05-24 01:16:01 +00:00
func buildCommand ( config * Config , args ... string ) * exec . Cmd {
cmd := exec . Command ( config . Path )
cmd . Args = append ( cmd . Args , config . buildGlobalOptions ( ) ... )
cmd . Args = append ( cmd . Args , args ... )
return cmd
2015-04-28 23:11:37 +00:00
}
2016-01-01 01:01:34 +00:00
// convertToACName converts a string into ACName.
func convertToACName ( name string ) appctypes . ACName {
// Note that as the 'name' already matches 'DNS_LABEL'
// defined in pkg/api/types.go, there shouldn't be error or panic.
acname , _ := appctypes . SanitizeACName ( name )
return * appctypes . MustACName ( acname )
}
2016-04-21 01:21:41 +00:00
// RunCommand invokes rkt binary with arguments and returns the result
2015-05-14 00:57:54 +00:00
// from stdout in a list of strings. Each string in the list is a line.
2016-05-24 01:16:01 +00:00
// If config is non-nil, it will use the given config instead of the config
// inferred from rkt API service.
func ( r * Runtime ) RunCommand ( config * Config , args ... string ) ( [ ] string , error ) {
if config == nil {
config = r . config
}
2016-06-14 12:04:38 +00:00
glog . V ( 4 ) . Infof ( "rkt: Run command: %q with config: %#v" , args , config )
2015-04-28 23:11:37 +00:00
2015-08-26 01:50:42 +00:00
var stdout , stderr bytes . Buffer
2016-05-24 01:16:01 +00:00
cmd := buildCommand ( config , args ... )
cmd . Stdout , cmd . Stderr = & stdout , & stderr
2015-08-26 01:50:42 +00:00
if err := cmd . Run ( ) ; err != nil {
return nil , fmt . Errorf ( "failed to run %v: %v\nstdout: %v\nstderr: %v" , args , err , stdout . String ( ) , stderr . String ( ) )
2015-04-28 23:11:37 +00:00
}
2015-08-26 01:50:42 +00:00
return strings . Split ( strings . TrimSpace ( stdout . String ( ) ) , "\n" ) , nil
2015-04-28 23:11:37 +00:00
}
2015-04-30 03:04:29 +00:00
2016-04-05 01:03:40 +00:00
// makePodServiceFileName constructs the unit file name for a pod using its rkt pod uuid.
func makePodServiceFileName ( uuid string ) string {
2015-08-13 23:39:17 +00:00
// TODO(yifan): Add name for readability? We need to consider the
// limit of the length.
2016-04-21 00:49:08 +00:00
return fmt . Sprintf ( "%s%s.service" , kubernetesUnitPrefix , uuid )
}
func getRktUUIDFromServiceFileName ( filename string ) string {
return strings . TrimPrefix ( strings . TrimSuffix ( filename , path . Ext ( filename ) ) , kubernetesUnitPrefix )
2015-04-30 03:04:29 +00:00
}
2015-04-30 01:11:30 +00:00
2016-01-08 21:19:49 +00:00
// setIsolators sets the apps' isolators according to the security context and resource spec.
2016-11-18 20:50:58 +00:00
func setIsolators ( app * appctypes . App , c * v1 . Container , ctx * v1 . SecurityContext ) error {
2016-01-08 21:19:49 +00:00
var isolators [ ] appctypes . Isolator
2015-04-30 01:11:30 +00:00
2016-01-08 21:19:49 +00:00
// Capabilities isolators.
if ctx != nil {
var addCaps , dropCaps [ ] string
2015-05-09 21:17:36 +00:00
2016-01-08 21:19:49 +00:00
if ctx . Capabilities != nil {
2017-01-27 02:22:13 +00:00
addCaps , dropCaps = kubecontainer . MakeCapabilities ( ctx . Capabilities . Add , ctx . Capabilities . Drop )
2015-05-05 23:02:13 +00:00
}
2016-01-08 21:19:49 +00:00
if ctx . Privileged != nil && * ctx . Privileged {
addCaps , dropCaps = allCapabilities ( ) , [ ] string { }
}
if len ( addCaps ) > 0 {
set , err := appctypes . NewLinuxCapabilitiesRetainSet ( addCaps ... )
if err != nil {
return err
}
2017-01-29 17:45:01 +00:00
isolator , err := set . AsIsolator ( )
if err != nil {
return err
}
isolators = append ( isolators , * isolator )
2016-01-08 21:19:49 +00:00
}
if len ( dropCaps ) > 0 {
set , err := appctypes . NewLinuxCapabilitiesRevokeSet ( dropCaps ... )
if err != nil {
return err
}
2017-01-29 17:45:01 +00:00
isolator , err := set . AsIsolator ( )
if err != nil {
return err
}
isolators = append ( isolators , * isolator )
2015-04-30 01:11:30 +00:00
}
}
2016-01-08 21:19:49 +00:00
// Resources isolators.
type resource struct {
limit string
request string
2015-04-30 01:11:30 +00:00
}
2016-01-29 18:43:00 +00:00
// If limit is empty, populate it with request and vice versa.
2016-11-18 20:50:58 +00:00
resources := make ( map [ v1 . ResourceName ] * resource )
2015-04-30 01:11:30 +00:00
for name , quantity := range c . Resources . Limits {
2016-01-29 18:43:00 +00:00
resources [ name ] = & resource { limit : quantity . String ( ) , request : quantity . String ( ) }
2015-04-30 01:11:30 +00:00
}
for name , quantity := range c . Resources . Requests {
r , ok := resources [ name ]
2016-01-29 18:43:00 +00:00
if ok {
r . request = quantity . String ( )
continue
2015-04-30 01:11:30 +00:00
}
2016-01-29 18:43:00 +00:00
resources [ name ] = & resource { limit : quantity . String ( ) , request : quantity . String ( ) }
2015-04-30 01:11:30 +00:00
}
2016-01-08 21:19:49 +00:00
2015-04-30 01:11:30 +00:00
for name , res := range resources {
switch name {
2016-11-18 20:50:58 +00:00
case v1 . ResourceCPU :
2016-01-08 21:19:49 +00:00
cpu , err := appctypes . NewResourceCPUIsolator ( res . request , res . limit )
if err != nil {
return err
}
isolators = append ( isolators , cpu . AsIsolator ( ) )
2016-11-18 20:50:58 +00:00
case v1 . ResourceMemory :
2016-01-08 21:19:49 +00:00
memory , err := appctypes . NewResourceMemoryIsolator ( res . request , res . limit )
if err != nil {
return err
}
isolators = append ( isolators , memory . AsIsolator ( ) )
2015-04-30 01:11:30 +00:00
default :
return fmt . Errorf ( "resource type not supported: %v" , name )
}
}
2016-01-08 21:19:49 +00:00
mergeIsolators ( app , isolators )
2015-04-30 01:11:30 +00:00
return nil
}
2016-01-08 21:19:49 +00:00
// mergeIsolators replaces the app.Isolators with isolators.
func mergeIsolators ( app * appctypes . App , isolators [ ] appctypes . Isolator ) {
for _ , is := range isolators {
found := false
for j , js := range app . Isolators {
if is . Name . Equals ( js . Name ) {
switch is . Name {
case appctypes . LinuxCapabilitiesRetainSetName :
// TODO(yifan): More fine grain merge for capability set instead of override.
fallthrough
case appctypes . LinuxCapabilitiesRevokeSetName :
fallthrough
case appctypes . ResourceCPUName :
fallthrough
case appctypes . ResourceMemoryName :
app . Isolators [ j ] = is
default :
panic ( fmt . Sprintf ( "unexpected isolator name: %v" , is . Name ) )
}
found = true
break
}
}
if ! found {
app . Isolators = append ( app . Isolators , is )
}
}
}
2016-01-01 01:01:34 +00:00
// mergeEnv merges the optEnv with the image's environments.
// The environments defined in the image will be overridden by
// the ones with the same name in optEnv.
func mergeEnv ( app * appctypes . App , optEnv [ ] kubecontainer . EnvVar ) {
envMap := make ( map [ string ] string )
for _ , e := range app . Environment {
envMap [ e . Name ] = e . Value
}
for _ , e := range optEnv {
envMap [ e . Name ] = e . Value
}
app . Environment = nil
for name , value := range envMap {
app . Environment = append ( app . Environment , appctypes . EnvironmentVariable {
Name : name ,
Value : value ,
} )
}
}
2016-08-17 01:18:36 +00:00
// mergeMounts merges the mountPoints with the image's mount points.
2016-01-01 01:01:34 +00:00
// The mount points defined in the image will be overridden by the ones
2016-08-17 01:18:36 +00:00
// with the same container path.
func mergeMounts ( app * appctypes . App , mountPoints [ ] appctypes . MountPoint ) {
mountMap := make ( map [ string ] appctypes . MountPoint )
2016-01-01 01:01:34 +00:00
for _ , m := range app . MountPoints {
2016-08-17 01:18:36 +00:00
mountMap [ m . Path ] = m
2016-01-01 01:01:34 +00:00
}
2016-08-17 01:18:36 +00:00
for _ , m := range mountPoints {
mountMap [ m . Path ] = m
2015-08-21 18:47:05 +00:00
}
2016-01-01 01:01:34 +00:00
app . MountPoints = nil
for _ , mount := range mountMap {
app . MountPoints = append ( app . MountPoints , mount )
}
}
2016-08-17 01:18:36 +00:00
// mergePortMappings merges the containerPorts with the image's container ports.
2016-01-01 01:01:34 +00:00
// The port mappings defined in the image will be overridden by the ones
// with the same name in optPortMappings.
2016-08-17 01:18:36 +00:00
func mergePortMappings ( app * appctypes . App , containerPorts [ ] appctypes . Port ) {
2016-01-01 01:01:34 +00:00
portMap := make ( map [ appctypes . ACName ] appctypes . Port )
for _ , p := range app . Ports {
portMap [ p . Name ] = p
}
2016-08-17 01:18:36 +00:00
for _ , p := range containerPorts {
portMap [ p . Name ] = p
2016-01-01 01:01:34 +00:00
}
app . Ports = nil
for _ , port := range portMap {
app . Ports = append ( app . Ports , port )
}
2015-08-21 18:47:05 +00:00
}
2016-11-18 20:50:58 +00:00
func verifyNonRoot ( app * appctypes . App , ctx * v1 . SecurityContext ) error {
2016-01-08 21:19:49 +00:00
if ctx != nil && ctx . RunAsNonRoot != nil && * ctx . RunAsNonRoot {
if ctx . RunAsUser != nil && * ctx . RunAsUser == 0 {
return fmt . Errorf ( "container's runAsUser breaks non-root policy" )
}
if ctx . RunAsUser == nil && app . User == "0" {
return fmt . Errorf ( "container has no runAsUser and image will run as root" )
}
}
return nil
}
2015-08-21 18:47:05 +00:00
2016-11-18 20:50:58 +00:00
func setSupplementalGIDs ( app * appctypes . App , podCtx * v1 . PodSecurityContext , supplementalGids [ ] int64 ) {
2016-07-22 21:43:24 +00:00
if podCtx != nil || len ( supplementalGids ) != 0 {
2016-01-08 21:19:49 +00:00
app . SupplementaryGIDs = app . SupplementaryGIDs [ : 0 ]
2016-07-22 21:43:24 +00:00
}
if podCtx != nil {
2016-01-08 21:19:49 +00:00
for _ , v := range podCtx . SupplementalGroups {
app . SupplementaryGIDs = append ( app . SupplementaryGIDs , int ( v ) )
}
if podCtx . FSGroup != nil {
app . SupplementaryGIDs = append ( app . SupplementaryGIDs , int ( * podCtx . FSGroup ) )
}
}
2016-07-22 21:43:24 +00:00
for _ , v := range supplementalGids {
app . SupplementaryGIDs = append ( app . SupplementaryGIDs , int ( v ) )
}
2016-01-08 21:19:49 +00:00
}
// setApp merges the container spec with the image's manifest.
2016-11-18 20:50:58 +00:00
func setApp ( imgManifest * appcschema . ImageManifest , c * v1 . Container ,
2016-08-17 01:18:36 +00:00
mountPoints [ ] appctypes . MountPoint , containerPorts [ ] appctypes . Port , envs [ ] kubecontainer . EnvVar ,
2016-11-18 20:50:58 +00:00
ctx * v1 . SecurityContext , podCtx * v1 . PodSecurityContext , supplementalGids [ ] int64 ) error {
2016-08-17 01:18:36 +00:00
2016-03-08 19:46:40 +00:00
app := imgManifest . App
// Set up Exec.
var command , args [ ] string
cmd , ok := imgManifest . Annotations . Get ( appcDockerEntrypoint )
if ok {
2016-03-18 23:58:55 +00:00
err := json . Unmarshal ( [ ] byte ( cmd ) , & command )
if err != nil {
return fmt . Errorf ( "cannot unmarshal ENTRYPOINT %q: %v" , cmd , err )
}
2016-03-08 19:46:40 +00:00
}
ag , ok := imgManifest . Annotations . Get ( appcDockerCmd )
if ok {
2016-03-18 23:58:55 +00:00
err := json . Unmarshal ( [ ] byte ( ag ) , & args )
if err != nil {
return fmt . Errorf ( "cannot unmarshal CMD %q: %v" , ag , err )
}
2016-03-08 19:46:40 +00:00
}
2016-08-17 01:18:36 +00:00
userCommand , userArgs := kubecontainer . ExpandContainerCommandAndArgs ( c , envs )
2016-03-08 19:46:40 +00:00
if len ( userCommand ) > 0 {
command = userCommand
args = nil // If 'command' is specified, then drop the default args.
}
if len ( userArgs ) > 0 {
args = userArgs
}
2016-01-28 07:14:50 +00:00
exec := append ( command , args ... )
if len ( exec ) > 0 {
app . Exec = exec
2015-04-30 01:11:30 +00:00
}
2016-01-08 21:19:49 +00:00
// Set UID and GIDs.
if err := verifyNonRoot ( app , ctx ) ; err != nil {
return err
}
if ctx != nil && ctx . RunAsUser != nil {
app . User = strconv . Itoa ( int ( * ctx . RunAsUser ) )
}
2016-07-22 21:43:24 +00:00
setSupplementalGIDs ( app , podCtx , supplementalGids )
2015-04-30 01:11:30 +00:00
2016-01-27 19:55:56 +00:00
// If 'User' or 'Group' are still empty at this point,
// then apply the root UID and GID.
2016-06-10 20:43:58 +00:00
// TODO(yifan): If only the GID is empty, rkt should be able to determine the GID
// using the /etc/passwd file in the image.
// See https://github.com/appc/docker2aci/issues/175.
// Maybe we can remove this check in the future.
2016-01-27 19:55:56 +00:00
if app . User == "" {
app . User = "0"
2016-06-10 20:43:58 +00:00
app . Group = "0"
2016-01-27 19:55:56 +00:00
}
if app . Group == "" {
2016-06-10 20:43:58 +00:00
return fmt . Errorf ( "cannot determine the GID of the app %q" , imgManifest . Name )
2016-01-27 19:55:56 +00:00
}
2016-01-08 21:19:49 +00:00
// Set working directory.
2015-04-30 01:11:30 +00:00
if len ( c . WorkingDir ) > 0 {
app . WorkingDirectory = c . WorkingDir
}
2016-01-01 01:01:34 +00:00
// Notes that we don't create Mounts section in the pod manifest here,
// as Mounts will be automatically generated by rkt.
2016-08-17 01:18:36 +00:00
mergeMounts ( app , mountPoints )
mergeEnv ( app , envs )
mergePortMappings ( app , containerPorts )
2015-04-30 01:11:30 +00:00
2016-01-08 21:19:49 +00:00
return setIsolators ( app , c , ctx )
2015-04-30 01:11:30 +00:00
}
// makePodManifest transforms a kubelet pod spec to the rkt pod manifest.
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) makePodManifest ( pod * v1 . Pod , podIP string , pullSecrets [ ] v1 . Secret ) ( * appcschema . PodManifest , error ) {
2015-04-30 01:11:30 +00:00
manifest := appcschema . BlankPodManifest ( )
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
listResp , err := r . apisvc . ListPods ( ctx , & rktapi . ListPodsRequest {
2015-12-17 00:52:39 +00:00
Detail : true ,
Filters : kubernetesPodFilters ( pod . UID ) ,
2015-11-21 00:56:35 +00:00
} )
if err != nil {
return nil , fmt . Errorf ( "couldn't list pods: %v" , err )
}
restartCount := 0
2015-12-17 00:52:39 +00:00
for _ , pod := range listResp . Pods {
2015-11-21 00:56:35 +00:00
manifest := & appcschema . PodManifest { }
2015-12-17 00:52:39 +00:00
err = json . Unmarshal ( pod . Manifest , manifest )
2015-04-30 01:11:30 +00:00
if err != nil {
2015-11-21 00:56:35 +00:00
glog . Warningf ( "rkt: error unmatshaling pod manifest: %v" , err )
continue
2015-04-30 01:11:30 +00:00
}
2015-11-21 00:56:35 +00:00
if countString , ok := manifest . Annotations . Get ( k8sRktRestartCountAnno ) ; ok {
num , err := strconv . Atoi ( countString )
if err != nil {
glog . Warningf ( "rkt: error reading restart count on pod: %v" , err )
continue
}
if num + 1 > restartCount {
restartCount = num + 1
}
2015-05-14 00:57:54 +00:00
}
2015-11-21 00:56:35 +00:00
}
2015-05-14 00:57:54 +00:00
2016-04-26 22:20:06 +00:00
requiresPrivileged := false
2015-11-21 00:56:35 +00:00
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( k8sRktKubeletAnno ) , k8sRktKubeletAnnoValue )
2016-05-22 05:00:38 +00:00
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( types . KubernetesPodUIDLabel ) , string ( pod . UID ) )
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( types . KubernetesPodNameLabel ) , pod . Name )
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( types . KubernetesPodNamespaceLabel ) , pod . Namespace )
2016-06-13 16:40:17 +00:00
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( types . KubernetesContainerNameLabel ) , leaky . PodInfraContainerName )
2015-11-21 00:56:35 +00:00
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( k8sRktRestartCountAnno ) , strconv . Itoa ( restartCount ) )
2016-04-26 22:20:06 +00:00
if stage1Name , ok := pod . Annotations [ k8sRktStage1NameAnno ] ; ok {
requiresPrivileged = true
manifest . Annotations . Set ( * appctypes . MustACIdentifier ( k8sRktStage1NameAnno ) , stage1Name )
}
2015-05-14 00:57:54 +00:00
2015-11-21 00:56:35 +00:00
for _ , c := range pod . Spec . Containers {
2016-05-06 23:01:42 +00:00
err := r . newAppcRuntimeApp ( pod , podIP , c , requiresPrivileged , pullSecrets , manifest )
2015-08-10 18:15:13 +00:00
if err != nil {
return nil , err
}
2015-04-30 01:11:30 +00:00
}
// TODO(yifan): Set pod-level isolators once it's supported in kubernetes.
return manifest , nil
}
2016-07-15 19:12:04 +00:00
func copyfile ( src , dst string ) error {
data , err := ioutil . ReadFile ( src )
if err != nil {
return err
}
2016-07-23 13:32:37 +00:00
return ioutil . WriteFile ( dst , data , 0644 )
2016-07-15 19:12:04 +00:00
}
2016-04-14 19:00:51 +00:00
// TODO(yifan): Can make rkt handle this when '--net=host'. See https://github.com/coreos/rkt/issues/2430.
2016-07-15 19:12:04 +00:00
func makeHostNetworkMount ( opts * kubecontainer . RunContainerOptions ) ( * kubecontainer . Mount , * kubecontainer . Mount , error ) {
2016-09-06 19:20:58 +00:00
mountHosts , mountResolvConf := true , true
for _ , mnt := range opts . Mounts {
switch mnt . ContainerPath {
case etcHostsPath :
mountHosts = false
case etcResolvConfPath :
mountResolvConf = false
}
2016-07-15 19:12:04 +00:00
}
2016-09-06 19:20:58 +00:00
var hostsMount , resolvMount kubecontainer . Mount
if mountHosts {
hostsPath := filepath . Join ( opts . PodContainerDir , "etc-hosts" )
if err := copyfile ( etcHostsPath , hostsPath ) ; err != nil {
return nil , nil , err
}
hostsMount = kubecontainer . Mount {
Name : "kubernetes-hostnetwork-hosts-conf" ,
ContainerPath : etcHostsPath ,
HostPath : hostsPath ,
}
opts . Mounts = append ( opts . Mounts , hostsMount )
2016-04-14 19:00:51 +00:00
}
2016-09-06 19:20:58 +00:00
if mountResolvConf {
resolvPath := filepath . Join ( opts . PodContainerDir , "etc-resolv-conf" )
if err := copyfile ( etcResolvConfPath , resolvPath ) ; err != nil {
return nil , nil , err
}
resolvMount = kubecontainer . Mount {
Name : "kubernetes-hostnetwork-resolv-conf" ,
ContainerPath : etcResolvConfPath ,
HostPath : resolvPath ,
}
opts . Mounts = append ( opts . Mounts , resolvMount )
2016-04-14 19:00:51 +00:00
}
2016-07-15 19:12:04 +00:00
return & hostsMount , & resolvMount , nil
2016-04-14 19:00:51 +00:00
}
2016-04-15 01:01:40 +00:00
// podFinishedMarkerPath returns the path to a file which should be used to
// indicate the pod exiting, and the time thereof.
// If the file at the path does not exist, the pod should not be exited. If it
// does exist, then the ctime of the file should indicate the time the pod
// exited.
func podFinishedMarkerPath ( podDir string , rktUID string ) string {
return filepath . Join ( podDir , "finished-" + rktUID )
}
func podFinishedMarkCommand ( touchPath , podDir , rktUID string ) string {
// TODO, if the path has a `'` character in it, this breaks.
return touchPath + " " + podFinishedMarkerPath ( podDir , rktUID )
}
// podFinishedAt returns the time that a pod exited, or a zero time if it has
// not.
2016-05-22 05:00:38 +00:00
func ( r * Runtime ) podFinishedAt ( podUID kubetypes . UID , rktUID string ) time . Time {
2016-04-15 01:01:40 +00:00
markerFile := podFinishedMarkerPath ( r . runtimeHelper . GetPodDir ( podUID ) , rktUID )
stat , err := r . os . Stat ( markerFile )
if err != nil {
if ! os . IsNotExist ( err ) {
glog . Warningf ( "rkt: unexpected fs error checking pod finished marker: %v" , err )
}
return time . Time { }
}
return stat . ModTime ( )
}
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) makeContainerLogMount ( opts * kubecontainer . RunContainerOptions , container * v1 . Container ) ( * kubecontainer . Mount , error ) {
2016-01-12 02:30:29 +00:00
if opts . PodContainerDir == "" || container . TerminationMessagePath == "" {
return nil , nil
}
// In docker runtime, the container log path contains the container ID.
// However, for rkt runtime, we cannot get the container ID before the
// the container is launched, so here we generate a random uuid to enable
2016-08-02 22:13:54 +00:00
// us to map a container's termination message path to a unique log file
2016-01-12 02:30:29 +00:00
// on the disk.
2016-07-26 15:13:18 +00:00
randomUID := uuid . NewUUID ( )
2016-01-12 02:30:29 +00:00
containerLogPath := path . Join ( opts . PodContainerDir , string ( randomUID ) )
2016-04-21 01:21:41 +00:00
fs , err := r . os . Create ( containerLogPath )
2016-01-12 02:30:29 +00:00
if err != nil {
return nil , err
}
if err := fs . Close ( ) ; err != nil {
return nil , err
}
2016-04-14 19:00:51 +00:00
mnt := kubecontainer . Mount {
2016-01-12 02:30:29 +00:00
// Use a random name for the termination message mount, so that
// when a container restarts, it will not overwrite the old termination
// message.
Name : fmt . Sprintf ( "termination-message-%s" , randomUID ) ,
ContainerPath : container . TerminationMessagePath ,
HostPath : containerLogPath ,
ReadOnly : false ,
}
2016-04-14 19:00:51 +00:00
opts . Mounts = append ( opts . Mounts , mnt )
2016-01-12 02:30:29 +00:00
2016-04-14 19:00:51 +00:00
return & mnt , nil
2016-01-12 02:30:29 +00:00
}
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) newAppcRuntimeApp ( pod * v1 . Pod , podIP string , c v1 . Container , requiresPrivileged bool , pullSecrets [ ] v1 . Secret , manifest * appcschema . PodManifest ) error {
2016-08-17 01:18:36 +00:00
var annotations appctypes . Annotations = [ ] appctypes . Annotation {
{
Name : * appctypes . MustACIdentifier ( k8sRktContainerHashAnno ) ,
2017-01-25 23:01:41 +00:00
Value : strconv . FormatUint ( kubecontainer . HashContainerLegacy ( & c ) , 10 ) ,
2016-08-17 01:18:36 +00:00
} ,
{
Name : * appctypes . MustACIdentifier ( types . KubernetesContainerNameLabel ) ,
Value : c . Name ,
} ,
}
2016-06-24 23:33:12 +00:00
if requiresPrivileged && ! securitycontext . HasPrivilegedRequest ( & c ) {
2016-04-26 22:20:06 +00:00
return fmt . Errorf ( "cannot make %q: running a custom stage1 requires a privileged security context" , format . Pod ( pod ) )
}
2016-12-29 08:53:09 +00:00
imageRef , _ , err := r . imagePuller . EnsureImageExists ( pod , & c , pullSecrets )
if err != nil {
return err
2015-11-21 00:56:35 +00:00
}
imgManifest , err := r . getImageManifest ( c . Image )
if err != nil {
2016-01-12 02:30:29 +00:00
return err
2015-11-21 00:56:35 +00:00
}
if imgManifest . App == nil {
imgManifest . App = new ( appctypes . App )
}
2016-12-29 08:53:09 +00:00
hash , err := appctypes . NewHash ( imageRef )
2015-11-21 00:56:35 +00:00
if err != nil {
2016-01-12 02:30:29 +00:00
return err
2015-11-21 00:56:35 +00:00
}
2016-03-07 20:24:08 +00:00
// TODO: determine how this should be handled for rkt
2016-08-05 08:19:17 +00:00
opts , _ , err := r . runtimeHelper . GenerateRunContainerOptions ( pod , & c , podIP )
2015-11-21 00:56:35 +00:00
if err != nil {
2016-01-12 02:30:29 +00:00
return err
}
2016-08-17 01:18:36 +00:00
// Create additional mount for termintation message path.
mount , err := r . makeContainerLogMount ( opts , & c )
2016-01-12 02:30:29 +00:00
if err != nil {
return err
2015-11-21 00:56:35 +00:00
}
2016-08-17 01:18:36 +00:00
mounts := append ( opts . Mounts , * mount )
annotations = append ( annotations , appctypes . Annotation {
Name : * appctypes . MustACIdentifier ( k8sRktTerminationMessagePathAnno ) ,
Value : mount . HostPath ,
} )
2015-11-21 00:56:35 +00:00
2016-08-17 01:18:36 +00:00
// If run in 'hostnetwork' mode, then copy the host's /etc/resolv.conf and /etc/hosts,
// and add mounts.
2016-04-14 19:00:51 +00:00
if kubecontainer . IsHostNetworkPod ( pod ) {
2016-08-17 01:18:36 +00:00
hostsMount , resolvMount , err := makeHostNetworkMount ( opts )
2016-07-15 19:12:04 +00:00
if err != nil {
return err
}
2016-08-17 01:18:36 +00:00
mounts = append ( mounts , * hostsMount , * resolvMount )
2016-04-14 19:00:51 +00:00
}
2016-07-22 21:43:24 +00:00
supplementalGids := r . runtimeHelper . GetExtraSupplementalGroupsForPod ( pod )
2016-01-08 21:19:49 +00:00
ctx := securitycontext . DetermineEffectiveSecurityContext ( pod , & c )
2015-11-21 00:56:35 +00:00
2016-08-17 01:18:36 +00:00
volumes , mountPoints := convertKubeMounts ( mounts )
containerPorts , hostPorts := convertKubePortMappings ( opts . PortMappings )
if err := setApp ( imgManifest , & c , mountPoints , containerPorts , opts . Envs , ctx , pod . Spec . SecurityContext , supplementalGids ) ; err != nil {
return err
2016-05-11 22:51:13 +00:00
}
2016-01-12 02:30:29 +00:00
ra := appcschema . RuntimeApp {
2016-08-17 01:18:36 +00:00
Name : convertToACName ( c . Name ) ,
Image : appcschema . RuntimeImage { ID : * hash } ,
App : imgManifest . App ,
Annotations : annotations ,
2016-01-12 02:30:29 +00:00
}
2016-05-27 03:33:46 +00:00
if c . SecurityContext != nil && c . SecurityContext . ReadOnlyRootFilesystem != nil {
ra . ReadOnlyRootFS = * c . SecurityContext . ReadOnlyRootFilesystem
}
2016-01-12 02:30:29 +00:00
manifest . Apps = append ( manifest . Apps , ra )
2016-08-17 01:18:36 +00:00
manifest . Volumes = append ( manifest . Volumes , volumes ... )
manifest . Ports = append ( manifest . Ports , hostPorts ... )
2016-01-12 02:30:29 +00:00
return nil
2015-11-21 00:56:35 +00:00
}
2016-05-22 05:00:38 +00:00
func runningKubernetesPodFilters ( uid kubetypes . UID ) [ ] * rktapi . PodFilter {
2015-12-21 19:25:38 +00:00
return [ ] * rktapi . PodFilter {
{
States : [ ] rktapi . PodState {
rktapi . PodState_POD_STATE_RUNNING ,
} ,
Annotations : [ ] * rktapi . KeyValue {
{
Key : k8sRktKubeletAnno ,
Value : k8sRktKubeletAnnoValue ,
} ,
{
2016-05-22 05:00:38 +00:00
Key : types . KubernetesPodUIDLabel ,
2015-12-21 19:25:38 +00:00
Value : string ( uid ) ,
} ,
} ,
} ,
}
}
2016-05-22 05:00:38 +00:00
func kubernetesPodFilters ( uid kubetypes . UID ) [ ] * rktapi . PodFilter {
2015-12-17 00:52:39 +00:00
return [ ] * rktapi . PodFilter {
{
Annotations : [ ] * rktapi . KeyValue {
{
Key : k8sRktKubeletAnno ,
Value : k8sRktKubeletAnnoValue ,
} ,
{
2016-05-22 05:00:38 +00:00
Key : types . KubernetesPodUIDLabel ,
2015-12-17 00:52:39 +00:00
Value : string ( uid ) ,
} ,
2015-11-21 00:56:35 +00:00
} ,
} ,
}
}
2016-04-21 00:49:08 +00:00
func kubernetesPodsFilters ( ) [ ] * rktapi . PodFilter {
return [ ] * rktapi . PodFilter {
{
Annotations : [ ] * rktapi . KeyValue {
{
Key : k8sRktKubeletAnno ,
Value : k8sRktKubeletAnnoValue ,
} ,
} ,
} ,
}
}
2015-04-30 20:34:46 +00:00
func newUnitOption ( section , name , value string ) * unit . UnitOption {
return & unit . UnitOption { Section : section , Name : name , Value : value }
}
2016-11-18 20:50:58 +00:00
// apiPodToruntimePod converts an v1.Pod to kubelet/container.Pod.
func apiPodToruntimePod ( uuid string , pod * v1 . Pod ) * kubecontainer . Pod {
2015-04-30 20:34:46 +00:00
p := & kubecontainer . Pod {
ID : pod . UID ,
Name : pod . Name ,
Namespace : pod . Namespace ,
}
for i := range pod . Spec . Containers {
c := & pod . Spec . Containers [ i ]
p . Containers = append ( p . Containers , & kubecontainer . Container {
2016-04-21 06:32:28 +00:00
ID : buildContainerID ( & containerID { uuid , c . Name } ) ,
Name : c . Name ,
Image : c . Image ,
2017-01-25 23:01:41 +00:00
Hash : kubecontainer . HashContainerLegacy ( c ) ,
2015-04-30 20:34:46 +00:00
} )
}
return p
}
2015-08-25 20:03:33 +00:00
// serviceFilePath returns the absolute path of the service file.
func serviceFilePath ( serviceName string ) string {
return path . Join ( systemdServiceDir , serviceName )
}
2016-06-07 20:22:55 +00:00
// shouldCreateNetns returns true if:
// The pod does not run in host network. And
// The pod runs inside a netns created outside of rkt.
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) shouldCreateNetns ( pod * v1 . Pod ) bool {
2016-12-13 23:00:34 +00:00
return ! kubecontainer . IsHostNetworkPod ( pod ) && r . network . PluginName ( ) != network . DefaultPluginName
2016-06-07 20:22:55 +00:00
}
// usesRktHostNetwork returns true if:
// The pod runs in the host network. Or
// The pod runs inside a netns created outside of rkt.
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) usesRktHostNetwork ( pod * v1 . Pod ) bool {
2016-06-07 20:22:55 +00:00
return kubecontainer . IsHostNetworkPod ( pod ) || r . shouldCreateNetns ( pod )
}
2016-01-29 00:01:01 +00:00
// generateRunCommand crafts a 'rkt run-prepared' command with necessary parameters.
2017-04-29 16:04:39 +00:00
func ( r * Runtime ) generateRunCommand ( pod * v1 . Pod , uuid , networkNamespaceID string ) ( string , error ) {
2016-08-23 19:16:30 +00:00
config := * r . config
privileged := true
for _ , c := range pod . Spec . Containers {
ctx := securitycontext . DetermineEffectiveSecurityContext ( pod , & c )
if ctx == nil || ctx . Privileged == nil || * ctx . Privileged == false {
privileged = false
break
}
}
// Use "all-run" insecure option (https://github.com/coreos/rkt/pull/2983) to take care
// of privileged pod.
// TODO(yifan): Have more granular app-level control of the insecure options.
// See: https://github.com/coreos/rkt/issues/2996.
if privileged {
config . InsecureOptions = fmt . Sprintf ( "%s,%s" , config . InsecureOptions , "all-run" )
}
runPrepared := buildCommand ( & config , "run-prepared" ) . Args
2016-01-29 00:01:01 +00:00
2016-04-28 00:16:28 +00:00
var hostname string
var err error
osInfos , err := getOSReleaseInfo ( )
if err != nil {
2016-08-19 19:05:00 +00:00
glog . Warningf ( "rkt: Failed to read the os release info: %v" , err )
} else {
// Overlay fs is not supported for SELinux yet on many distros.
// See https://github.com/coreos/rkt/issues/1727#issuecomment-173203129.
// For now, coreos carries a patch to support it: https://github.com/coreos/coreos-overlay/pull/1703
if osInfos [ "ID" ] != "coreos" && pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . SELinuxOptions != nil {
runPrepared = append ( runPrepared , "--no-overlay=true" )
}
2016-04-28 00:16:28 +00:00
}
2016-06-07 20:22:55 +00:00
// Apply '--net=host' to pod that is running on host network or inside a network namespace.
if r . usesRktHostNetwork ( pod ) {
runPrepared = append ( runPrepared , "--net=host" )
} else {
runPrepared = append ( runPrepared , fmt . Sprintf ( "--net=%s" , defaultNetworkName ) )
}
2016-05-03 00:49:42 +00:00
2016-06-07 20:22:55 +00:00
if kubecontainer . IsHostNetworkPod ( pod ) {
2016-04-14 19:00:51 +00:00
// TODO(yifan): Let runtimeHelper.GeneratePodHostNameAndDomain() to handle this.
2016-04-21 01:21:41 +00:00
hostname , err = r . os . Hostname ( )
2016-04-14 19:00:51 +00:00
if err != nil {
return "" , err
}
2016-01-29 00:01:01 +00:00
} else {
2016-04-14 19:00:51 +00:00
// Setup DNS.
2016-08-05 08:19:17 +00:00
dnsServers , dnsSearches , _ , err := r . runtimeHelper . GetClusterDNS ( pod )
2016-04-14 19:00:51 +00:00
if err != nil {
return "" , err
}
for _ , server := range dnsServers {
runPrepared = append ( runPrepared , fmt . Sprintf ( "--dns=%s" , server ) )
}
for _ , search := range dnsSearches {
runPrepared = append ( runPrepared , fmt . Sprintf ( "--dns-search=%s" , search ) )
}
if len ( dnsServers ) > 0 || len ( dnsSearches ) > 0 {
runPrepared = append ( runPrepared , fmt . Sprintf ( "--dns-opt=%s" , defaultDNSOption ) )
}
// TODO(yifan): host domain is not being used.
2016-04-14 17:45:29 +00:00
hostname , _ , err = r . runtimeHelper . GeneratePodHostNameAndDomain ( pod )
if err != nil {
return "" , err
}
2016-06-07 20:22:55 +00:00
}
2016-05-03 00:49:42 +00:00
2016-06-07 20:22:55 +00:00
runPrepared = append ( runPrepared , fmt . Sprintf ( "--hostname=%s" , hostname ) )
runPrepared = append ( runPrepared , uuid )
if r . shouldCreateNetns ( pod ) {
2016-05-03 00:49:42 +00:00
// Drop the `rkt run-prepared` into the network namespace we
// created.
// TODO: switch to 'ip netns exec' once we can depend on a new
// enough version that doesn't have bugs like
// https://bugzilla.redhat.com/show_bug.cgi?id=882047
2017-04-29 16:04:39 +00:00
nsenterExec := [ ] string { r . nsenterPath , "--net=" + netnsPathFromName ( networkNamespaceID ) , "--" }
2016-05-03 00:49:42 +00:00
runPrepared = append ( nsenterExec , runPrepared ... )
2016-01-29 00:01:01 +00:00
}
2016-05-03 00:49:42 +00:00
2016-01-29 00:01:01 +00:00
return strings . Join ( runPrepared , " " ) , nil
}
2017-04-29 16:04:39 +00:00
func ( r * Runtime ) cleanupPodNetwork ( pod * v1 . Pod , networkNamespace kubecontainer . ContainerID ) error {
2016-06-07 20:22:55 +00:00
// No-op if the pod is not running in a created netns.
if ! r . shouldCreateNetns ( pod ) {
return nil
}
2017-05-09 14:15:22 +00:00
glog . V ( 3 ) . Infof ( "Calling network plugin %s to tear down pod for %s" , r . network . PluginName ( ) , format . Pod ( pod ) )
2017-04-29 16:04:39 +00:00
teardownErr := r . network . TearDownPod ( pod . Namespace , pod . Name , networkNamespace )
2016-12-13 23:00:34 +00:00
if teardownErr != nil {
glog . Error ( teardownErr )
2016-05-03 00:49:42 +00:00
}
2017-04-29 16:04:39 +00:00
if _ , err := r . execer . Command ( "ip" , "netns" , "del" , networkNamespace . ID ) . Output ( ) ; err != nil {
2016-05-03 00:49:42 +00:00
return fmt . Errorf ( "rkt: Failed to remove network namespace for pod %s: %v" , format . Pod ( pod ) , err )
}
return teardownErr
}
2016-04-26 22:20:06 +00:00
func ( r * Runtime ) preparePodArgs ( manifest * appcschema . PodManifest , manifestFileName string ) [ ] string {
// Order of precedence for the stage1:
// 1) pod annotation (stage1 name)
// 2) kubelet configured stage1 (stage1 path)
// 3) empty; whatever rkt's compiled to default to
stage1ImageCmd := ""
if r . config . Stage1Image != "" {
2016-06-16 22:36:24 +00:00
stage1ImageCmd = "--stage1-name=" + r . config . Stage1Image
2016-04-26 22:20:06 +00:00
}
if stage1Name , ok := manifest . Annotations . Get ( k8sRktStage1NameAnno ) ; ok {
stage1ImageCmd = "--stage1-name=" + stage1Name
}
// Run 'rkt prepare' to get the rkt UUID.
cmds := [ ] string { "prepare" , "--quiet" , "--pod-manifest" , manifestFileName }
if stage1ImageCmd != "" {
cmds = append ( cmds , stage1ImageCmd )
}
return cmds
}
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) getSelinuxContext ( opt * v1 . SELinuxOptions ) ( string , error ) {
2016-10-25 15:51:11 +00:00
selinuxRunner := selinux . NewSELinuxRunner ( )
2016-06-06 23:23:16 +00:00
str , err := selinuxRunner . Getfilecon ( r . config . Dir )
if err != nil {
return "" , err
}
ctx := strings . SplitN ( str , ":" , 4 )
if len ( ctx ) != 4 {
return "" , fmt . Errorf ( "malformated selinux context" )
}
if opt . User != "" {
ctx [ 0 ] = opt . User
}
if opt . Role != "" {
ctx [ 1 ] = opt . Role
}
if opt . Type != "" {
ctx [ 2 ] = opt . Type
}
if opt . Level != "" {
ctx [ 3 ] = opt . Level
}
return strings . Join ( ctx , ":" ) , nil
}
2017-02-20 17:16:41 +00:00
// From the generateName or the podName return a basename for improving the logging with the Journal
2017-01-17 09:29:50 +00:00
// journalctl -t podBaseName
func constructSyslogIdentifier ( generateName string , podName string ) string {
2017-02-21 22:06:13 +00:00
if len ( generateName ) > 1 && generateName [ len ( generateName ) - 1 ] == '-' {
return generateName [ 0 : len ( generateName ) - 1 ]
2017-02-20 17:24:41 +00:00
}
2017-02-21 22:06:13 +00:00
if len ( generateName ) > 0 {
2017-01-17 09:29:50 +00:00
return generateName
}
2017-02-20 17:24:41 +00:00
return podName
2017-01-17 09:29:50 +00:00
}
2017-06-17 10:43:15 +00:00
// Setup additional systemd field specified in the Pod Annotation
func setupSystemdCustomFields ( annotations map [ string ] string , unitOptionArray [ ] * unit . UnitOption ) ( [ ] * unit . UnitOption , error ) {
// LimitNOFILE
if strSize := annotations [ k8sRktLimitNoFileAnno ] ; strSize != "" {
size , err := strconv . Atoi ( strSize )
if err != nil {
return unitOptionArray , err
}
if size < 1 {
return unitOptionArray , fmt . Errorf ( "invalid value for %s: %s" , k8sRktLimitNoFileAnno , strSize )
}
unitOptionArray = append ( unitOptionArray , newUnitOption ( "Service" , "LimitNOFILE" , strSize ) )
}
return unitOptionArray , nil
}
2015-04-30 20:34:46 +00:00
// preparePod will:
//
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
2015-09-01 02:25:26 +00:00
// 2. Create the unit file and save it under systemdUnitDir.
2015-04-30 20:34:46 +00:00
//
2015-08-26 01:50:42 +00:00
// On success, it will return a string that represents name of the unit file
// and the runtime pod.
2017-04-29 16:04:39 +00:00
func ( r * Runtime ) preparePod ( pod * v1 . Pod , podIP string , pullSecrets [ ] v1 . Secret , networkNamespaceID string ) ( string , * kubecontainer . Pod , error ) {
2016-04-26 22:20:06 +00:00
// Generate the appc pod manifest from the k8s pod spec.
2016-05-06 23:01:42 +00:00
manifest , err := r . makePodManifest ( pod , podIP , pullSecrets )
2015-04-30 20:34:46 +00:00
if err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
2015-08-13 23:39:17 +00:00
manifestFile , err := ioutil . TempFile ( "" , fmt . Sprintf ( "manifest-%s-" , pod . Name ) )
2015-04-30 20:34:46 +00:00
if err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
defer func ( ) {
manifestFile . Close ( )
2016-04-21 01:21:41 +00:00
if err := r . os . Remove ( manifestFile . Name ( ) ) ; err != nil {
2015-04-30 20:34:46 +00:00
glog . Warningf ( "rkt: Cannot remove temp manifest file %q: %v" , manifestFile . Name ( ) , err )
}
} ( )
data , err := json . Marshal ( manifest )
if err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
2016-01-14 00:52:47 +00:00
glog . V ( 4 ) . Infof ( "Generating pod manifest for pod %q: %v" , format . Pod ( pod ) , string ( data ) )
2015-04-30 20:34:46 +00:00
// Since File.Write returns error if the written length is less than len(data),
// so check error is enough for us.
if _ , err := manifestFile . Write ( data ) ; err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
2016-04-26 22:20:06 +00:00
prepareCmd := r . preparePodArgs ( manifest , manifestFile . Name ( ) )
2016-05-24 01:16:01 +00:00
output , err := r . cli . RunCommand ( nil , prepareCmd ... )
2015-04-30 20:34:46 +00:00
if err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
if len ( output ) != 1 {
2015-08-26 01:50:42 +00:00
return "" , nil , fmt . Errorf ( "invalid output from 'rkt prepare': %v" , output )
2015-04-30 20:34:46 +00:00
}
uuid := output [ 0 ]
2015-08-13 23:39:17 +00:00
glog . V ( 4 ) . Infof ( "'rkt prepare' returns %q" , uuid )
2015-04-30 20:34:46 +00:00
2015-08-25 20:03:33 +00:00
// Create systemd service file for the rkt pod.
2017-04-29 16:04:39 +00:00
runPrepared , err := r . generateRunCommand ( pod , uuid , networkNamespaceID )
2016-01-29 00:01:01 +00:00
if err != nil {
return "" , nil , fmt . Errorf ( "failed to generate 'rkt run-prepared' command: %v" , err )
2015-08-13 23:39:17 +00:00
}
2015-09-15 16:43:59 +00:00
// TODO handle pod.Spec.HostPID
2015-08-10 08:14:01 +00:00
// TODO handle pod.Spec.HostIPC
2015-09-15 16:43:59 +00:00
2016-04-15 01:01:40 +00:00
// TODO per container finishedAt, not just per pod
markPodFinished := podFinishedMarkCommand ( r . touchPath , r . runtimeHelper . GetPodDir ( pod . UID ) , uuid )
2016-06-07 20:22:55 +00:00
hostNetwork := kubecontainer . IsHostNetworkPod ( pod )
2015-04-30 20:34:46 +00:00
units := [ ] * unit . UnitOption {
newUnitOption ( "Service" , "ExecStart" , runPrepared ) ,
2016-04-15 01:01:40 +00:00
newUnitOption ( "Service" , "ExecStopPost" , markPodFinished ) ,
2015-09-01 02:25:26 +00:00
// This enables graceful stop.
newUnitOption ( "Service" , "KillMode" , "mixed" ) ,
2016-06-21 00:12:10 +00:00
newUnitOption ( "Service" , "TimeoutStopSec" , fmt . Sprintf ( "%ds" , getPodTerminationGracePeriodInSecond ( pod ) ) ) ,
2017-01-17 09:29:50 +00:00
// Ops helpers
newUnitOption ( "Unit" , "Description" , pod . Name ) ,
newUnitOption ( "Service" , "SyslogIdentifier" , constructSyslogIdentifier ( pod . GenerateName , pod . Name ) ) ,
2016-05-03 00:49:42 +00:00
// Track pod info for garbage collection
newUnitOption ( unitKubernetesSection , unitPodUID , string ( pod . UID ) ) ,
newUnitOption ( unitKubernetesSection , unitPodName , pod . Name ) ,
newUnitOption ( unitKubernetesSection , unitPodNamespace , pod . Namespace ) ,
2016-06-07 20:22:55 +00:00
newUnitOption ( unitKubernetesSection , unitPodHostNetwork , fmt . Sprintf ( "%v" , hostNetwork ) ) ,
2017-04-29 16:04:39 +00:00
newUnitOption ( unitKubernetesSection , unitPodNetworkNamespace , networkNamespaceID ) ,
2015-04-30 20:34:46 +00:00
}
2016-04-28 00:16:28 +00:00
if pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . SELinuxOptions != nil {
opt := pod . Spec . SecurityContext . SELinuxOptions
2016-06-06 23:23:16 +00:00
selinuxContext , err := r . getSelinuxContext ( opt )
if err != nil {
glog . Errorf ( "rkt: Failed to construct selinux context with selinux option %q: %v" , opt , err )
return "" , nil , err
}
2016-04-28 00:16:28 +00:00
units = append ( units , newUnitOption ( "Service" , "SELinuxContext" , selinuxContext ) )
}
2017-06-17 10:43:15 +00:00
units , err = setupSystemdCustomFields ( pod . Annotations , units )
if err != nil {
glog . Warningf ( "fail to add custom systemd fields provided by pod Annotations: %q" , err )
}
2016-04-05 01:03:40 +00:00
serviceName := makePodServiceFileName ( uuid )
2015-11-20 17:54:37 +00:00
glog . V ( 4 ) . Infof ( "rkt: Creating service file %q for pod %q" , serviceName , format . Pod ( pod ) )
2016-04-21 01:21:41 +00:00
serviceFile , err := r . os . Create ( serviceFilePath ( serviceName ) )
2015-04-30 20:34:46 +00:00
if err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-04-30 20:34:46 +00:00
}
2015-12-21 19:25:38 +00:00
if _ , err := io . Copy ( serviceFile , unit . Serialize ( units ) ) ; err != nil {
2015-08-26 01:50:42 +00:00
return "" , nil , err
2015-08-25 20:03:33 +00:00
}
2015-12-21 19:25:38 +00:00
serviceFile . Close ( )
return serviceName , apiPodToruntimePod ( uuid , pod ) , nil
2015-08-26 01:50:42 +00:00
}
// generateEvents is a helper function that generates some container
// life cycle events for containers in a pod.
2015-09-28 22:46:29 +00:00
func ( r * Runtime ) generateEvents ( runtimePod * kubecontainer . Pod , reason string , failure error ) {
2015-08-26 01:50:42 +00:00
// Set up container references.
for _ , c := range runtimePod . Containers {
2015-10-07 17:58:05 +00:00
containerID := c . ID
2015-08-26 01:50:42 +00:00
id , err := parseContainerID ( containerID )
if err != nil {
glog . Warningf ( "Invalid container ID %q" , containerID )
continue
}
ref , ok := r . containerRefManager . GetRef ( containerID )
if ! ok {
glog . Warningf ( "No ref for container %q" , containerID )
continue
}
// Note that 'rkt id' is the pod id.
2016-01-11 07:55:51 +00:00
uuid := utilstrings . ShortenString ( id . uuid , 8 )
2015-08-26 01:50:42 +00:00
switch reason {
case "Created" :
2017-05-22 21:24:36 +00:00
r . recorder . Eventf ( events . ToObjectReference ( ref ) , v1 . EventTypeNormal , events . CreatedContainer , "Created with rkt id %v" , uuid )
2015-08-26 01:50:42 +00:00
case "Started" :
2017-05-22 21:24:36 +00:00
r . recorder . Eventf ( events . ToObjectReference ( ref ) , v1 . EventTypeNormal , events . StartedContainer , "Started with rkt id %v" , uuid )
2015-08-26 01:50:42 +00:00
case "Failed" :
2017-05-22 21:24:36 +00:00
r . recorder . Eventf ( events . ToObjectReference ( ref ) , v1 . EventTypeWarning , events . FailedToStartContainer , "Failed to start with rkt id %v with error %v" , uuid , failure )
2015-08-26 01:50:42 +00:00
case "Killing" :
2017-05-22 21:24:36 +00:00
r . recorder . Eventf ( events . ToObjectReference ( ref ) , v1 . EventTypeNormal , events . KillingContainer , "Killing with rkt id %v" , uuid )
2015-08-26 01:50:42 +00:00
default :
glog . Errorf ( "rkt: Unexpected event %q" , reason )
2015-08-25 20:03:33 +00:00
}
2015-04-30 20:34:46 +00:00
}
2015-08-26 01:50:42 +00:00
return
2015-04-30 20:34:46 +00:00
}
2017-04-29 16:04:39 +00:00
// Generate a Network Namespace based on a New UUID
// to run the Pod and all of its containers inside a dedicated unique namespace
func generateNetworkNamespaceUUID ( ) kubecontainer . ContainerID {
return kubecontainer . ContainerID { ID : fmt . Sprintf ( "%s%s" , kubernetesUnitPrefix , uuid . NewUUID ( ) ) }
2016-05-03 00:49:42 +00:00
}
func netnsPathFromName ( netnsName string ) string {
return fmt . Sprintf ( "/var/run/netns/%s" , netnsName )
}
2016-05-06 23:01:42 +00:00
// setupPodNetwork creates a network namespace for the given pod and calls
// configured NetworkPlugin's setup function on it.
// It returns the namespace name, configured IP (if available), and an error if
2016-06-17 16:27:25 +00:00
// one occurred.
2016-06-07 20:22:55 +00:00
//
// If the pod is running in host network or is running using the no-op plugin, then nothing will be done.
2017-04-29 16:04:39 +00:00
func ( r * Runtime ) setupPodNetwork ( pod * v1 . Pod ) ( kubecontainer . ContainerID , string , error ) {
2016-12-13 23:00:34 +00:00
glog . V ( 3 ) . Infof ( "Calling network plugin %s to set up pod for %s" , r . network . PluginName ( ) , format . Pod ( pod ) )
2016-06-07 20:22:55 +00:00
2017-04-29 16:04:39 +00:00
var networkNamespace kubecontainer . ContainerID
2016-06-07 20:22:55 +00:00
// No-op if the pod is not running in a created netns.
if ! r . shouldCreateNetns ( pod ) {
2017-04-29 16:04:39 +00:00
return networkNamespace , "" , nil
2016-06-07 20:22:55 +00:00
}
2017-04-29 16:04:39 +00:00
networkNamespace = generateNetworkNamespaceUUID ( )
glog . V ( 5 ) . Infof ( "New network namespace %q generated for pod %s" , networkNamespace . ID , format . Pod ( pod ) )
2016-05-03 00:49:42 +00:00
2017-04-29 16:04:39 +00:00
// Create the network namespace for the pod
_ , err := r . execer . Command ( "ip" , "netns" , "add" , networkNamespace . ID ) . Output ( )
2016-05-03 00:49:42 +00:00
if err != nil {
2017-04-29 16:04:39 +00:00
return networkNamespace , "" , fmt . Errorf ( "failed to create pod network namespace: %v" , err )
2016-05-03 00:49:42 +00:00
}
// Set up networking with the network plugin
2017-04-29 16:04:39 +00:00
err = r . network . SetUpPod ( pod . Namespace , pod . Name , networkNamespace , pod . Annotations )
2016-05-03 00:49:42 +00:00
if err != nil {
2017-04-29 16:04:39 +00:00
return networkNamespace , "" , err
2016-05-06 23:01:42 +00:00
}
2017-04-29 16:04:39 +00:00
status , err := r . network . GetPodNetworkStatus ( pod . Namespace , pod . Name , networkNamespace )
2016-05-06 23:01:42 +00:00
if err != nil {
2017-04-29 16:04:39 +00:00
return networkNamespace , "" , err
2016-05-03 00:49:42 +00:00
}
if r . configureHairpinMode {
2017-04-29 16:04:39 +00:00
if err = hairpin . SetUpContainerPath ( netnsPathFromName ( networkNamespace . ID ) , network . DefaultInterfaceName ) ; err != nil {
2016-05-03 00:49:42 +00:00
glog . Warningf ( "Hairpin setup failed for pod %q: %v" , format . Pod ( pod ) , err )
}
}
2017-04-29 16:04:39 +00:00
return networkNamespace , status . IP . String ( ) , nil
2016-05-03 00:49:42 +00:00
}
2017-03-16 13:48:34 +00:00
// For a hostPath volume: rkt doesn't create any missing volume on the node/host so we need to create it
func createHostPathVolumes ( pod * v1 . Pod ) ( err error ) {
for _ , v := range pod . Spec . Volumes {
if v . VolumeSource . HostPath != nil {
2017-04-13 13:39:13 +00:00
_ , err = os . Stat ( v . HostPath . Path )
if os . IsNotExist ( err ) {
if err = os . MkdirAll ( v . HostPath . Path , os . ModePerm ) ; err != nil {
glog . Errorf ( "Create volume HostPath %q for Pod %q failed: %q" , v . HostPath . Path , format . Pod ( pod ) , err . Error ( ) )
return err
}
glog . V ( 4 ) . Infof ( "Created volume HostPath %q for Pod %q" , v . HostPath . Path , format . Pod ( pod ) )
2017-03-16 13:48:34 +00:00
}
}
}
return nil
}
2015-08-25 20:03:33 +00:00
// RunPod first creates the unit file for a pod, and then
// starts the unit over d-bus.
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) RunPod ( pod * v1 . Pod , pullSecrets [ ] v1 . Secret ) error {
2015-11-20 17:54:37 +00:00
glog . V ( 4 ) . Infof ( "Rkt starts to run pod: name %q." , format . Pod ( pod ) )
2015-04-30 20:34:46 +00:00
2016-05-03 00:49:42 +00:00
var err error
2017-04-29 16:04:39 +00:00
var networkNamespace kubecontainer . ContainerID
2016-05-06 23:01:42 +00:00
var podIP string
2017-03-16 13:48:34 +00:00
err = createHostPathVolumes ( pod )
if err != nil {
return err
}
2017-04-29 16:04:39 +00:00
networkNamespace , podIP , err = r . setupPodNetwork ( pod )
2016-06-07 20:22:55 +00:00
if err != nil {
2017-04-29 16:04:39 +00:00
r . cleanupPodNetwork ( pod , networkNamespace )
2016-06-07 20:22:55 +00:00
return err
2016-05-03 00:49:42 +00:00
}
2017-04-29 16:04:39 +00:00
name , runtimePod , prepareErr := r . preparePod ( pod , podIP , pullSecrets , networkNamespace . ID )
2015-08-26 01:50:42 +00:00
// Set container references and generate events.
// If preparedPod fails, then send out 'failed' events for each container.
// Otherwise, store the container references so we can use them later to send events.
for i , c := range pod . Spec . Containers {
ref , err := kubecontainer . GenerateContainerRef ( pod , & c )
if err != nil {
2015-11-20 17:54:37 +00:00
glog . Errorf ( "Couldn't make a ref to pod %q, container %v: '%v'" , format . Pod ( pod ) , c . Name , err )
2015-08-26 01:50:42 +00:00
continue
}
if prepareErr != nil {
2016-11-18 20:50:58 +00:00
r . recorder . Eventf ( ref , v1 . EventTypeWarning , events . FailedToCreateContainer , "Failed to create rkt container with error: %v" , prepareErr )
2015-08-26 01:50:42 +00:00
continue
}
2015-10-07 17:58:05 +00:00
containerID := runtimePod . Containers [ i ] . ID
2015-08-26 01:50:42 +00:00
r . containerRefManager . SetRef ( containerID , ref )
}
if prepareErr != nil {
2017-04-29 16:04:39 +00:00
r . cleanupPodNetwork ( pod , networkNamespace )
2015-08-26 01:50:42 +00:00
return prepareErr
2015-04-30 20:34:46 +00:00
}
2015-08-26 01:50:42 +00:00
r . generateEvents ( runtimePod , "Created" , nil )
2015-08-25 20:03:33 +00:00
// RestartUnit has the same effect as StartUnit if the unit is not running, besides it can restart
// a unit if the unit file is changed and reloaded.
2015-12-11 13:25:35 +00:00
reschan := make ( chan string )
2016-05-03 00:49:42 +00:00
_ , err = r . systemd . RestartUnit ( name , "replace" , reschan )
2015-12-11 13:25:35 +00:00
if err != nil {
r . generateEvents ( runtimePod , "Failed" , err )
2017-04-29 16:04:39 +00:00
r . cleanupPodNetwork ( pod , networkNamespace )
2015-12-11 13:25:35 +00:00
return err
}
res := <- reschan
if res != "done" {
err := fmt . Errorf ( "Failed to restart unit %q: %s" , name , res )
2015-08-26 01:50:42 +00:00
r . generateEvents ( runtimePod , "Failed" , err )
2017-04-29 16:04:39 +00:00
r . cleanupPodNetwork ( pod , networkNamespace )
2015-04-30 20:34:46 +00:00
return err
}
2015-08-26 01:50:42 +00:00
r . generateEvents ( runtimePod , "Started" , nil )
2016-04-20 02:11:51 +00:00
// This is a temporary solution until we have a clean design on how
// kubelet handles events. See https://github.com/kubernetes/kubernetes/issues/23084.
if err := r . runLifecycleHooks ( pod , runtimePod , lifecyclePostStartHook ) ; err != nil {
2016-04-27 03:30:59 +00:00
if errKill := r . KillPod ( pod , * runtimePod , nil ) ; errKill != nil {
2016-04-20 02:11:51 +00:00
return errors . NewAggregate ( [ ] error { err , errKill } )
}
2017-04-29 16:04:39 +00:00
r . cleanupPodNetwork ( pod , networkNamespace )
2016-04-20 02:11:51 +00:00
return err
}
2015-04-30 20:34:46 +00:00
return nil
}
2015-04-30 22:11:07 +00:00
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) runPreStopHook ( containerID kubecontainer . ContainerID , pod * v1 . Pod , container * v1 . Container ) error {
2016-04-20 02:11:51 +00:00
glog . V ( 4 ) . Infof ( "rkt: Running pre-stop hook for container %q of pod %q" , container . Name , format . Pod ( pod ) )
2016-05-23 20:19:54 +00:00
msg , err := r . runner . Run ( containerID , pod , container , container . Lifecycle . PreStop )
if err != nil {
ref , ok := r . containerRefManager . GetRef ( containerID )
if ! ok {
glog . Warningf ( "No ref for container %q" , containerID )
} else {
2016-11-18 20:50:58 +00:00
r . recorder . Eventf ( ref , v1 . EventTypeWarning , events . FailedPreStopHook , msg )
2016-05-23 20:19:54 +00:00
}
}
return err
2016-04-20 02:11:51 +00:00
}
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) runPostStartHook ( containerID kubecontainer . ContainerID , pod * v1 . Pod , container * v1 . Container ) error {
2016-04-20 02:11:51 +00:00
glog . V ( 4 ) . Infof ( "rkt: Running post-start hook for container %q of pod %q" , container . Name , format . Pod ( pod ) )
cid , err := parseContainerID ( containerID )
if err != nil {
return fmt . Errorf ( "cannot parse container ID %v" , containerID )
}
isContainerRunning := func ( ) ( done bool , err error ) {
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
resp , err := r . apisvc . InspectPod ( ctx , & rktapi . InspectPodRequest { Id : cid . uuid } )
2016-04-20 02:11:51 +00:00
if err != nil {
return false , fmt . Errorf ( "failed to inspect rkt pod %q for pod %q" , cid . uuid , format . Pod ( pod ) )
}
for _ , app := range resp . Pod . Apps {
if app . Name == cid . appName {
return app . State == rktapi . AppState_APP_STATE_RUNNING , nil
}
}
return false , fmt . Errorf ( "failed to find container %q in rkt pod %q" , cid . appName , cid . uuid )
}
// TODO(yifan): Polling the pod's state for now.
timeout := time . Second * 5
pollInterval := time . Millisecond * 500
if err := utilwait . Poll ( pollInterval , timeout , isContainerRunning ) ; err != nil {
return fmt . Errorf ( "rkt: Pod %q doesn't become running in %v: %v" , format . Pod ( pod ) , timeout , err )
}
2016-05-23 20:19:54 +00:00
msg , err := r . runner . Run ( containerID , pod , container , container . Lifecycle . PostStart )
if err != nil {
ref , ok := r . containerRefManager . GetRef ( containerID )
if ! ok {
glog . Warningf ( "No ref for container %q" , containerID )
} else {
2016-11-18 20:50:58 +00:00
r . recorder . Eventf ( ref , v1 . EventTypeWarning , events . FailedPostStartHook , msg )
2016-05-23 20:19:54 +00:00
}
}
return err
2016-04-20 02:11:51 +00:00
}
type lifecycleHookType string
const (
lifecyclePostStartHook lifecycleHookType = "post-start"
lifecyclePreStopHook lifecycleHookType = "pre-stop"
)
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) runLifecycleHooks ( pod * v1 . Pod , runtimePod * kubecontainer . Pod , typ lifecycleHookType ) error {
2016-03-04 22:52:45 +00:00
var wg sync . WaitGroup
var errlist [ ] error
errCh := make ( chan error , len ( pod . Spec . Containers ) )
wg . Add ( len ( pod . Spec . Containers ) )
for i , c := range pod . Spec . Containers {
2016-11-18 20:50:58 +00:00
var hookFunc func ( kubecontainer . ContainerID , * v1 . Pod , * v1 . Container ) error
2016-04-20 02:11:51 +00:00
switch typ {
case lifecyclePostStartHook :
if c . Lifecycle != nil && c . Lifecycle . PostStart != nil {
hookFunc = r . runPostStartHook
}
case lifecyclePreStopHook :
if c . Lifecycle != nil && c . Lifecycle . PreStop != nil {
hookFunc = r . runPreStopHook
}
default :
errCh <- fmt . Errorf ( "Unrecognized lifecycle hook type %q for container %q in pod %q" , typ , c . Name , format . Pod ( pod ) )
}
if hookFunc == nil {
2016-03-04 22:52:45 +00:00
wg . Done ( )
continue
}
container := & pod . Spec . Containers [ i ]
2016-04-20 02:11:51 +00:00
runtimeContainer := runtimePod . FindContainerByName ( container . Name )
if runtimeContainer == nil {
// Container already gone.
wg . Done ( )
continue
}
containerID := runtimeContainer . ID
2016-03-04 22:52:45 +00:00
go func ( ) {
2016-04-20 02:11:51 +00:00
defer wg . Done ( )
if err := hookFunc ( containerID , pod , container ) ; err != nil {
glog . Errorf ( "rkt: Failed to run %s hook for container %q of pod %q: %v" , typ , container . Name , format . Pod ( pod ) , err )
2016-03-04 22:52:45 +00:00
errCh <- err
2016-04-20 02:11:51 +00:00
} else {
glog . V ( 4 ) . Infof ( "rkt: %s hook completed successfully for container %q of pod %q" , typ , container . Name , format . Pod ( pod ) )
2016-03-04 22:52:45 +00:00
}
} ( )
}
wg . Wait ( )
close ( errCh )
for err := range errCh {
errlist = append ( errlist , err )
}
return errors . NewAggregate ( errlist )
}
2015-11-21 00:56:35 +00:00
// convertRktPod will convert a rktapi.Pod to a kubecontainer.Pod
2015-12-17 00:52:39 +00:00
func ( r * Runtime ) convertRktPod ( rktpod * rktapi . Pod ) ( * kubecontainer . Pod , error ) {
2015-11-21 00:56:35 +00:00
manifest := & appcschema . PodManifest { }
err := json . Unmarshal ( rktpod . Manifest , manifest )
if err != nil {
return nil , err
}
2016-05-22 05:00:38 +00:00
podUID , ok := manifest . Annotations . Get ( types . KubernetesPodUIDLabel )
2015-11-21 00:56:35 +00:00
if ! ok {
2016-05-22 05:00:38 +00:00
return nil , fmt . Errorf ( "pod is missing annotation %s" , types . KubernetesPodUIDLabel )
2015-11-21 00:56:35 +00:00
}
2016-05-22 05:00:38 +00:00
podName , ok := manifest . Annotations . Get ( types . KubernetesPodNameLabel )
2015-11-21 00:56:35 +00:00
if ! ok {
2016-05-22 05:00:38 +00:00
return nil , fmt . Errorf ( "pod is missing annotation %s" , types . KubernetesPodNameLabel )
2015-11-21 00:56:35 +00:00
}
2016-05-22 05:00:38 +00:00
podNamespace , ok := manifest . Annotations . Get ( types . KubernetesPodNamespaceLabel )
2015-11-21 00:56:35 +00:00
if ! ok {
2016-05-22 05:00:38 +00:00
return nil , fmt . Errorf ( "pod is missing annotation %s" , types . KubernetesPodNamespaceLabel )
2015-11-21 00:56:35 +00:00
}
kubepod := & kubecontainer . Pod {
2016-05-22 05:00:38 +00:00
ID : kubetypes . UID ( podUID ) ,
2015-11-21 00:56:35 +00:00
Name : podName ,
Namespace : podNamespace ,
}
2015-12-15 01:26:43 +00:00
for i , app := range rktpod . Apps {
// The order of the apps is determined by the rkt pod manifest.
// TODO(yifan): Let the server to unmarshal the annotations? https://github.com/coreos/rkt/issues/1872
hashStr , ok := manifest . Apps [ i ] . Annotations . Get ( k8sRktContainerHashAnno )
2015-11-21 00:56:35 +00:00
if ! ok {
2015-12-15 01:26:43 +00:00
return nil , fmt . Errorf ( "app %q is missing annotation %s" , app . Name , k8sRktContainerHashAnno )
2015-11-21 00:56:35 +00:00
}
2015-12-15 01:26:43 +00:00
containerHash , err := strconv . ParseUint ( hashStr , 10 , 64 )
2015-11-21 00:56:35 +00:00
if err != nil {
2015-12-15 01:26:43 +00:00
return nil , fmt . Errorf ( "couldn't parse container's hash %q: %v" , hashStr , err )
2015-11-21 00:56:35 +00:00
}
kubepod . Containers = append ( kubepod . Containers , & kubecontainer . Container {
2016-03-18 18:43:20 +00:00
ID : buildContainerID ( & containerID { rktpod . Id , app . Name } ) ,
Name : app . Name ,
// By default, the version returned by rkt API service will be "latest" if not specified.
2016-07-21 00:06:18 +00:00
Image : fmt . Sprintf ( "%s:%s" , app . Image . Name , app . Image . Version ) ,
ImageID : app . Image . Id ,
Hash : containerHash ,
State : appStateToContainerState ( app . State ) ,
2015-11-21 00:56:35 +00:00
} )
}
return kubepod , nil
}
2016-05-05 19:04:36 +00:00
// GetPods runs 'rkt list' to get the list of rkt pods.
2015-08-08 21:29:57 +00:00
// Then it will use the result to construct a list of container runtime pods.
2015-04-30 22:11:07 +00:00
// If all is false, then only running pods will be returned, otherwise all pods will be
// returned.
2015-09-28 22:46:29 +00:00
func ( r * Runtime ) GetPods ( all bool ) ( [ ] * kubecontainer . Pod , error ) {
2015-04-30 22:11:07 +00:00
glog . V ( 4 ) . Infof ( "Rkt getting pods" )
2015-11-21 00:56:35 +00:00
listReq := & rktapi . ListPodsRequest {
2015-12-17 00:52:39 +00:00
Detail : true ,
Filters : [ ] * rktapi . PodFilter {
{
Annotations : [ ] * rktapi . KeyValue {
{
Key : k8sRktKubeletAnno ,
Value : k8sRktKubeletAnnoValue ,
} ,
2015-11-21 00:56:35 +00:00
} ,
} ,
} ,
}
if ! all {
2015-12-17 00:52:39 +00:00
listReq . Filters [ 0 ] . States = [ ] rktapi . PodState { rktapi . PodState_POD_STATE_RUNNING }
2015-11-21 00:56:35 +00:00
}
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
listResp , err := r . apisvc . ListPods ( ctx , listReq )
2015-04-30 22:11:07 +00:00
if err != nil {
2015-11-21 00:56:35 +00:00
return nil , fmt . Errorf ( "couldn't list pods: %v" , err )
2015-04-30 22:11:07 +00:00
}
2016-05-22 05:00:38 +00:00
pods := make ( map [ kubetypes . UID ] * kubecontainer . Pod )
var podIDs [ ] kubetypes . UID
2015-12-17 00:52:39 +00:00
for _ , pod := range listResp . Pods {
pod , err := r . convertRktPod ( pod )
2015-11-21 00:56:35 +00:00
if err != nil {
glog . Warningf ( "rkt: Cannot construct pod from unit file: %v." , err )
continue
2015-04-30 22:11:07 +00:00
}
2016-03-10 00:49:29 +00:00
// Group pods together.
oldPod , found := pods [ pod . ID ]
if ! found {
pods [ pod . ID ] = pod
2016-03-11 17:32:22 +00:00
podIDs = append ( podIDs , pod . ID )
2016-03-10 00:49:29 +00:00
continue
}
oldPod . Containers = append ( oldPod . Containers , pod . Containers ... )
}
2016-03-11 17:32:22 +00:00
// Convert map to list, using the consistent order from the podIDs array.
2016-03-10 00:49:29 +00:00
var result [ ] * kubecontainer . Pod
2016-03-11 17:32:22 +00:00
for _ , id := range podIDs {
result = append ( result , pods [ id ] )
2015-04-30 22:11:07 +00:00
}
2016-03-10 00:49:29 +00:00
return result , nil
2015-04-30 22:11:07 +00:00
}
2015-04-30 23:58:12 +00:00
2016-11-18 20:50:58 +00:00
func getPodTerminationGracePeriodInSecond ( pod * v1 . Pod ) int64 {
2016-06-21 00:12:10 +00:00
var gracePeriod int64
2016-03-04 22:52:45 +00:00
switch {
case pod . DeletionGracePeriodSeconds != nil :
gracePeriod = * pod . DeletionGracePeriodSeconds
case pod . Spec . TerminationGracePeriodSeconds != nil :
gracePeriod = * pod . Spec . TerminationGracePeriodSeconds
}
2016-06-21 00:12:10 +00:00
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
return gracePeriod
}
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) waitPreStopHooks ( pod * v1 . Pod , runningPod * kubecontainer . Pod ) {
2016-06-21 00:12:10 +00:00
gracePeriod := getPodTerminationGracePeriodInSecond ( pod )
2016-03-04 22:52:45 +00:00
2016-04-20 02:11:51 +00:00
done := make ( chan struct { } )
2016-03-04 22:52:45 +00:00
go func ( ) {
2016-04-20 02:11:51 +00:00
if err := r . runLifecycleHooks ( pod , runningPod , lifecyclePreStopHook ) ; err != nil {
glog . Errorf ( "rkt: Some pre-stop hooks failed for pod %q: %v" , format . Pod ( pod ) , err )
2016-03-04 22:52:45 +00:00
}
2016-04-20 02:11:51 +00:00
close ( done )
2016-03-04 22:52:45 +00:00
} ( )
select {
case <- time . After ( time . Duration ( gracePeriod ) * time . Second ) :
2016-04-20 02:11:51 +00:00
glog . V ( 2 ) . Infof ( "rkt: Some pre-stop hooks did not complete in %d seconds for pod %q" , gracePeriod , format . Pod ( pod ) )
case <- done :
2016-03-04 22:52:45 +00:00
}
}
2015-04-30 23:58:12 +00:00
// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
2016-04-27 03:30:59 +00:00
// TODO: add support for gracePeriodOverride which is used in eviction scenarios
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) KillPod ( pod * v1 . Pod , runningPod kubecontainer . Pod , gracePeriodOverride * int64 ) error {
2015-08-20 01:57:58 +00:00
glog . V ( 4 ) . Infof ( "Rkt is killing pod: name %q." , runningPod . Name )
2015-08-26 01:50:42 +00:00
2016-03-04 22:52:45 +00:00
if len ( runningPod . Containers ) == 0 {
glog . V ( 4 ) . Infof ( "rkt: Pod %q is already being killed, no action will be taken" , runningPod . Name )
return nil
}
if pod != nil {
r . waitPreStopHooks ( pod , & runningPod )
}
2016-04-05 01:03:40 +00:00
containerID , err := parseContainerID ( runningPod . Containers [ 0 ] . ID )
if err != nil {
glog . Errorf ( "rkt: Failed to get rkt uuid of the pod %q: %v" , runningPod . Name , err )
return err
}
serviceName := makePodServiceFileName ( containerID . uuid )
2016-06-07 20:22:55 +00:00
serviceFile := serviceFilePath ( serviceName )
2015-08-26 01:50:42 +00:00
r . generateEvents ( & runningPod , "Killing" , nil )
for _ , c := range runningPod . Containers {
2015-10-07 17:58:05 +00:00
r . containerRefManager . ClearRef ( c . ID )
2015-08-26 01:50:42 +00:00
}
2015-09-01 02:25:26 +00:00
// Since all service file have 'KillMode=mixed', the processes in
// the unit's cgroup will receive a SIGKILL if the normal stop timeouts.
2015-12-11 13:25:35 +00:00
reschan := make ( chan string )
2016-04-05 01:03:40 +00:00
if _ , err = r . systemd . StopUnit ( serviceName , "replace" , reschan ) ; err != nil {
2015-10-07 21:04:41 +00:00
glog . Errorf ( "rkt: Failed to stop unit %q: %v" , serviceName , err )
2015-09-01 02:25:26 +00:00
return err
}
2015-12-11 13:25:35 +00:00
res := <- reschan
if res != "done" {
2016-02-23 01:07:31 +00:00
err := fmt . Errorf ( "invalid result: %s" , res )
glog . Errorf ( "rkt: Failed to stop unit %q: %v" , serviceName , err )
2015-12-11 13:25:35 +00:00
return err
}
2016-06-07 20:22:55 +00:00
// Clean up networking. Use the service file to get pod details since 'pod' can be nil.
if err := r . cleanupPodNetworkFromServiceFile ( serviceFile ) ; err != nil {
glog . Errorf ( "rkt: failed to tear down network for unit %q: %v" , serviceName , err )
return err
2016-05-03 00:49:42 +00:00
}
2015-10-07 21:04:41 +00:00
return nil
2015-08-18 00:58:09 +00:00
}
2015-10-21 20:04:10 +00:00
func ( r * Runtime ) Type ( ) string {
return RktType
}
2015-09-28 22:46:29 +00:00
func ( r * Runtime ) Version ( ) ( kubecontainer . Version , error ) {
2016-03-12 01:29:25 +00:00
r . versions . RLock ( )
defer r . versions . RUnlock ( )
return r . versions . binVersion , nil
2015-05-04 23:51:31 +00:00
}
2016-01-14 23:16:07 +00:00
func ( r * Runtime ) APIVersion ( ) ( kubecontainer . Version , error ) {
2016-03-12 01:29:25 +00:00
r . versions . RLock ( )
defer r . versions . RUnlock ( )
return r . versions . apiVersion , nil
2016-03-03 10:01:15 +00:00
}
// Status returns error if rkt is unhealthy, nil otherwise.
2016-11-02 04:39:46 +00:00
func ( r * Runtime ) Status ( ) ( * kubecontainer . RuntimeStatus , error ) {
return nil , r . checkVersion ( minimumRktBinVersion , minimumRktApiVersion , minimumSystemdVersion )
2016-01-14 23:16:07 +00:00
}
2015-05-04 23:51:31 +00:00
// SyncPod syncs the running pod to match the specified desired pod.
2016-11-18 20:50:58 +00:00
func ( r * Runtime ) SyncPod ( pod * v1 . Pod , _ v1 . PodStatus , podStatus * kubecontainer . PodStatus , pullSecrets [ ] v1 . Secret , backOff * flowcontrol . Backoff ) ( result kubecontainer . PodSyncResult ) {
2016-01-12 10:19:13 +00:00
var err error
defer func ( ) {
if err != nil {
result . Fail ( err )
}
} ( )
2015-12-22 23:05:01 +00:00
// TODO: (random-liu) Stop using running pod in SyncPod()
2016-09-24 08:17:11 +00:00
runningPod := kubecontainer . ConvertPodStatusToRunningPod ( r . Type ( ) , podStatus )
2015-05-04 23:51:31 +00:00
// Add references to all containers.
2015-10-07 17:58:05 +00:00
unidentifiedContainers := make ( map [ kubecontainer . ContainerID ] * kubecontainer . Container )
2015-05-04 23:51:31 +00:00
for _ , c := range runningPod . Containers {
unidentifiedContainers [ c . ID ] = c
}
restartPod := false
for _ , container := range pod . Spec . Containers {
2017-01-25 23:01:41 +00:00
expectedHash := kubecontainer . HashContainerLegacy ( & container )
2015-05-04 23:51:31 +00:00
c := runningPod . FindContainerByName ( container . Name )
if c == nil {
2016-09-24 08:17:11 +00:00
if kubecontainer . ShouldContainerBeRestarted ( & container , pod , podStatus ) {
2015-05-04 23:51:31 +00:00
glog . V ( 3 ) . Infof ( "Container %+v is dead, but RestartPolicy says that we should restart it." , container )
// TODO(yifan): Containers in one pod are fate-sharing at this moment, see:
// https://github.com/appc/spec/issues/276.
restartPod = true
break
}
continue
}
2015-08-10 17:30:34 +00:00
// TODO: check for non-root image directives. See ../docker/manager.go#SyncPod
2015-05-04 23:51:31 +00:00
// TODO(yifan): Take care of host network change.
containerChanged := c . Hash != 0 && c . Hash != expectedHash
if containerChanged {
2015-12-07 21:31:02 +00:00
glog . Infof ( "Pod %q container %q hash changed (%d vs %d), it will be killed and re-created." , format . Pod ( pod ) , container . Name , c . Hash , expectedHash )
2015-05-04 23:51:31 +00:00
restartPod = true
break
}
2015-10-19 22:15:59 +00:00
liveness , found := r . livenessManager . Get ( c . ID )
2016-11-18 20:50:58 +00:00
if found && liveness != proberesults . Success && pod . Spec . RestartPolicy != v1 . RestartPolicyNever {
2015-12-07 21:31:02 +00:00
glog . Infof ( "Pod %q container %q is unhealthy, it will be killed and re-created." , format . Pod ( pod ) , container . Name )
2015-05-04 23:51:31 +00:00
restartPod = true
break
}
delete ( unidentifiedContainers , c . ID )
}
// If there is any unidentified containers, restart the pod.
if len ( unidentifiedContainers ) > 0 {
restartPod = true
}
if restartPod {
2015-09-29 02:14:18 +00:00
// Kill the pod only if the pod is actually running.
if len ( runningPod . Containers ) > 0 {
2016-04-27 03:30:59 +00:00
if err = r . KillPod ( pod , runningPod , nil ) ; err != nil {
2016-01-12 10:19:13 +00:00
return
2015-09-29 02:14:18 +00:00
}
2015-05-04 23:51:31 +00:00
}
2016-01-12 10:19:13 +00:00
if err = r . RunPod ( pod , pullSecrets ) ; err != nil {
return
2015-05-04 23:51:31 +00:00
}
}
2016-01-12 10:19:13 +00:00
return
2015-05-04 23:51:31 +00:00
}
2016-04-21 00:49:08 +00:00
// Sort rkt pods by creation time.
type podsByCreatedAt [ ] * rktapi . Pod
func ( s podsByCreatedAt ) Len ( ) int { return len ( s ) }
func ( s podsByCreatedAt ) Swap ( i , j int ) { s [ i ] , s [ j ] = s [ j ] , s [ i ] }
func ( s podsByCreatedAt ) Less ( i , j int ) bool { return s [ i ] . CreatedAt < s [ j ] . CreatedAt }
// getPodUID returns the pod's API UID, it returns
// empty UID if the UID cannot be determined.
2016-05-22 05:00:38 +00:00
func getPodUID ( pod * rktapi . Pod ) kubetypes . UID {
2016-04-21 00:49:08 +00:00
for _ , anno := range pod . Annotations {
2016-05-22 05:00:38 +00:00
if anno . Key == types . KubernetesPodUIDLabel {
return kubetypes . UID ( anno . Value )
2016-04-21 00:49:08 +00:00
}
}
2016-05-22 05:00:38 +00:00
return kubetypes . UID ( "" )
2016-04-21 00:49:08 +00:00
}
// podIsActive returns true if the pod is embryo, preparing or running.
// If a pod is prepared, it is not guaranteed to be active (e.g. the systemd
// service might fail).
func podIsActive ( pod * rktapi . Pod ) bool {
return pod . State == rktapi . PodState_POD_STATE_EMBRYO ||
pod . State == rktapi . PodState_POD_STATE_PREPARING ||
pod . State == rktapi . PodState_POD_STATE_RUNNING
}
2016-05-03 00:49:02 +00:00
// GetNetNS returns the network namespace path for the given container
func ( r * Runtime ) GetNetNS ( containerID kubecontainer . ContainerID ) ( string , error ) {
2017-04-29 16:04:39 +00:00
// Currently the containerID is a UUID for a network namespace
// This hack is a way to create an unique network namespace for each new starting/restarting Pod
// We can do this because we played the same trick in
2016-05-03 00:49:42 +00:00
// `networkPlugin.SetUpPod` and `networkPlugin.TearDownPod`.
2017-04-29 16:04:39 +00:00
// See https://github.com/kubernetes/kubernetes/issues/45149
return netnsPathFromName ( containerID . ID ) , nil
2016-05-03 00:49:42 +00:00
}
2016-06-22 14:44:33 +00:00
func ( r * Runtime ) GetPodContainerID ( pod * kubecontainer . Pod ) ( kubecontainer . ContainerID , error ) {
return kubecontainer . ContainerID { ID : string ( pod . ID ) } , nil
}
2017-05-09 14:15:22 +00:00
func ( r * Runtime ) getKubernetesDirective ( serviceFilePath string ) ( podService podServiceDirective , err error ) {
2016-05-03 00:49:42 +00:00
f , err := os . Open ( serviceFilePath )
if err != nil {
2017-04-29 16:04:39 +00:00
return podService , err
2016-05-03 00:49:42 +00:00
}
defer f . Close ( )
opts , err := unit . Deserialize ( f )
if err != nil {
2017-04-29 16:04:39 +00:00
return podService , err
2016-05-03 00:49:42 +00:00
}
2017-04-29 16:04:39 +00:00
var hostnetwork , networkNamespace string
2016-05-03 00:49:42 +00:00
for _ , o := range opts {
if o . Section != unitKubernetesSection {
continue
}
switch o . Name {
case unitPodUID :
2017-04-29 16:04:39 +00:00
podService . id = o . Value
2016-05-03 00:49:42 +00:00
case unitPodName :
2017-04-29 16:04:39 +00:00
podService . name = o . Value
2016-05-03 00:49:42 +00:00
case unitPodNamespace :
2017-04-29 16:04:39 +00:00
podService . namespace = o . Value
2016-06-07 20:22:55 +00:00
case unitPodHostNetwork :
hostnetwork = o . Value
2017-04-29 16:04:39 +00:00
case unitPodNetworkNamespace :
networkNamespace = o . Value
2016-05-03 00:49:42 +00:00
}
2017-04-29 16:04:39 +00:00
if podService . id != "" && podService . name != "" && podService . namespace != "" && hostnetwork != "" && networkNamespace != "" {
podService . hostNetwork , err = strconv . ParseBool ( hostnetwork )
podService . networkNamespace = kubecontainer . ContainerID { ID : networkNamespace }
2016-06-07 20:22:55 +00:00
if err != nil {
2017-04-29 16:04:39 +00:00
return podService , err
2016-06-07 20:22:55 +00:00
}
2017-04-29 16:04:39 +00:00
return podService , nil
2016-05-03 00:49:42 +00:00
}
}
2017-04-29 16:04:39 +00:00
return podService , fmt . Errorf ( "failed to parse pod from file %s" , serviceFilePath )
2016-05-03 00:49:02 +00:00
}
2016-07-06 22:44:15 +00:00
func ( r * Runtime ) DeleteContainer ( containerID kubecontainer . ContainerID ) error {
return fmt . Errorf ( "unimplemented" )
}
2017-04-29 16:04:39 +00:00
// Collects all the systemd units for k8s Pods
func ( r * Runtime ) getPodSystemdServiceFiles ( ) ( [ ] os . FileInfo , error ) {
// Get all the current units
files , err := r . os . ReadDir ( systemdServiceDir )
if err != nil {
glog . Errorf ( "rkt: Failed to read the systemd service directory: %v" , err )
return files , err
}
// Keep only k8s unit files
k8sSystemdServiceFiles := files [ : 0 ]
for _ , f := range files {
if strings . HasPrefix ( f . Name ( ) , kubernetesUnitPrefix ) {
k8sSystemdServiceFiles = append ( k8sSystemdServiceFiles , f )
}
}
return k8sSystemdServiceFiles , err
}
2015-09-28 22:46:29 +00:00
// GarbageCollect collects the pods/containers.
2016-04-21 00:49:08 +00:00
// After one GC iteration:
// - The deleted pods will be removed.
// - If the number of containers exceeds gcPolicy.MaxContainers,
// then containers whose ages are older than gcPolicy.minAge will
// be removed.
2017-05-22 18:00:22 +00:00
func ( r * Runtime ) GarbageCollect ( gcPolicy kubecontainer . ContainerGCPolicy , allSourcesReady bool , _ bool ) error {
2016-04-21 00:49:08 +00:00
var errlist [ ] error
var totalInactiveContainers int
var inactivePods [ ] * rktapi . Pod
var removeCandidates [ ] * rktapi . Pod
var allPods = map [ string ] * rktapi . Pod { }
glog . V ( 4 ) . Infof ( "rkt: Garbage collecting triggered with policy %v" , gcPolicy )
// GC all inactive systemd service files and pods.
2017-04-29 16:04:39 +00:00
files , err := r . getPodSystemdServiceFiles ( )
2016-04-21 00:49:08 +00:00
if err != nil {
return err
2015-10-07 21:04:41 +00:00
}
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
resp , err := r . apisvc . ListPods ( ctx , & rktapi . ListPodsRequest { Filters : kubernetesPodsFilters ( ) } )
2015-10-07 21:04:41 +00:00
if err != nil {
2016-04-21 00:49:08 +00:00
glog . Errorf ( "rkt: Failed to list pods: %v" , err )
2015-05-04 23:51:31 +00:00
return err
}
2016-04-21 00:49:08 +00:00
// Mark inactive pods.
for _ , pod := range resp . Pods {
allPods [ pod . Id ] = pod
if ! podIsActive ( pod ) {
uid := getPodUID ( pod )
2016-05-22 05:00:38 +00:00
if uid == kubetypes . UID ( "" ) {
2016-04-21 00:49:08 +00:00
glog . Errorf ( "rkt: Cannot get the UID of pod %q, pod is broken, will remove it" , pod . Id )
removeCandidates = append ( removeCandidates , pod )
continue
}
_ , found := r . podGetter . GetPodByUID ( uid )
2016-06-14 21:45:41 +00:00
if ! found && allSourcesReady {
2016-04-21 00:49:08 +00:00
removeCandidates = append ( removeCandidates , pod )
continue
}
inactivePods = append ( inactivePods , pod )
totalInactiveContainers = totalInactiveContainers + len ( pod . Apps )
2015-10-07 21:04:41 +00:00
}
}
2016-04-21 00:49:08 +00:00
// Remove any orphan service files.
2015-10-07 21:04:41 +00:00
for _ , f := range files {
2016-04-21 00:49:08 +00:00
serviceName := f . Name ( )
2017-04-29 16:04:39 +00:00
rktUUID := getRktUUIDFromServiceFileName ( serviceName )
if _ , ok := allPods [ rktUUID ] ; ! ok {
glog . V ( 4 ) . Infof ( "rkt: No rkt pod found for service file %q, will remove it" , serviceName )
2016-05-03 00:49:42 +00:00
2017-05-09 14:15:22 +00:00
if err := r . cleanupByPodId ( rktUUID ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to clean up rkt pod %q: %v" , rktUUID , err ) )
2015-10-07 21:04:41 +00:00
}
}
}
2016-04-21 00:49:08 +00:00
sort . Sort ( podsByCreatedAt ( inactivePods ) )
// Enforce GCPolicy.MaxContainers.
for _ , pod := range inactivePods {
if totalInactiveContainers <= gcPolicy . MaxContainers {
break
}
creationTime := time . Unix ( 0 , pod . CreatedAt )
if creationTime . Add ( gcPolicy . MinAge ) . Before ( time . Now ( ) ) {
// The pod is old and we are exceeding the MaxContainers limit.
// Delete the pod.
removeCandidates = append ( removeCandidates , pod )
totalInactiveContainers = totalInactiveContainers - len ( pod . Apps )
}
}
2017-04-29 16:04:39 +00:00
// Remove pods and their service files.
2016-04-21 00:49:08 +00:00
for _ , pod := range removeCandidates {
2017-05-09 14:15:22 +00:00
if err := r . removePod ( pod ) ; err != nil {
2016-04-21 00:49:08 +00:00
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to clean up rkt pod %q: %v" , pod . Id , err ) )
}
}
return errors . NewAggregate ( errlist )
}
2017-04-29 16:04:39 +00:00
// Read kubernetes pod UUID, namespace, netns and name from systemd service file and
2016-05-03 00:49:42 +00:00
// use that to clean up any pod network that may still exist.
2016-06-07 20:22:55 +00:00
func ( r * Runtime ) cleanupPodNetworkFromServiceFile ( serviceFilePath string ) error {
2017-05-09 14:15:22 +00:00
podService , err := r . unitGetter . getKubernetesDirective ( serviceFilePath )
2016-06-07 20:22:55 +00:00
if err != nil {
return err
2016-05-03 00:49:42 +00:00
}
2016-11-18 20:50:58 +00:00
return r . cleanupPodNetwork ( & v1 . Pod {
2017-01-17 03:38:19 +00:00
ObjectMeta : metav1 . ObjectMeta {
2017-04-29 16:04:39 +00:00
UID : kubetypes . UID ( podService . id ) ,
Name : podService . name ,
Namespace : podService . namespace ,
2016-06-07 20:22:55 +00:00
} ,
2016-11-18 20:50:58 +00:00
Spec : v1 . PodSpec {
2017-04-29 16:04:39 +00:00
HostNetwork : podService . hostNetwork ,
2016-06-07 20:22:55 +00:00
} ,
2017-04-29 16:04:39 +00:00
} , podService . networkNamespace )
2016-05-03 00:49:42 +00:00
}
2017-05-09 14:15:22 +00:00
// Remove the touched file created by ExecStartPost in the systemd service file
func ( r * Runtime ) removeFinishedMarkerFile ( serviceName string ) error {
2016-05-03 00:49:42 +00:00
serviceFile := serviceFilePath ( serviceName )
2017-05-09 14:15:22 +00:00
podDetail , err := r . unitGetter . getKubernetesDirective ( serviceFile )
if err != nil {
return err
}
podDir := r . runtimeHelper . GetPodDir ( kubetypes . UID ( podDetail . id ) )
finishedFile := podFinishedMarkerPath ( podDir , getRktUUIDFromServiceFileName ( serviceName ) )
return r . os . Remove ( finishedFile )
}
2016-05-03 00:49:42 +00:00
2017-05-09 14:15:22 +00:00
// Iter over each container in the pod to delete its termination log file
func ( r * Runtime ) removeTerminationFiles ( pod * rktapi . Pod ) ( errlist [ ] error ) {
// container == app
for _ , app := range pod . Apps {
for _ , annotation := range app . Annotations {
if annotation . GetKey ( ) == k8sRktTerminationMessagePathAnno {
if err := r . os . Remove ( annotation . GetValue ( ) ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to remove for pod %q container file %v" , pod . Id , err ) )
}
}
}
2016-06-07 20:22:55 +00:00
}
2017-05-09 14:15:22 +00:00
return errlist
}
func ( r * Runtime ) cleanupByPodId ( podID string ) ( errlist [ ] error ) {
serviceName := makePodServiceFileName ( podID )
serviceFile := serviceFilePath ( serviceName )
2016-05-03 00:49:42 +00:00
2017-05-09 14:15:22 +00:00
if err := r . cleanupPodNetworkFromServiceFile ( serviceFile ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to clean up pod network from service %q: %v, the network may not be around already" , serviceName , err ) )
2016-04-21 00:49:08 +00:00
}
2017-05-09 14:15:22 +00:00
// GC finished marker, termination-log file, systemd service files as well.
2016-05-05 19:04:36 +00:00
if err := r . systemd . ResetFailedUnit ( serviceName ) ; err != nil {
2017-05-09 14:15:22 +00:00
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to reset the failed systemd service %q: %v" , serviceName , err ) )
}
if err := r . removeFinishedMarkerFile ( serviceName ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to remove finished file %q for unit %q: %v" , serviceName , podID , err ) )
2016-05-05 19:04:36 +00:00
}
2016-05-03 00:49:42 +00:00
if err := r . os . Remove ( serviceFile ) ; err != nil {
2017-05-09 14:15:22 +00:00
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to remove service file %q for pod %q: %v" , serviceFile , podID , err ) )
}
return errlist
}
// removePod calls 'rkt rm $UUID' to delete a rkt pod,
// it also remove the systemd service file,
// the finished-* marker and the termination-log files
// related to the pod.
func ( r * Runtime ) removePod ( pod * rktapi . Pod ) error {
var errlist [ ] error
glog . V ( 4 ) . Infof ( "rkt: GC is removing pod %q" , pod )
if err := r . cleanupByPodId ( pod . Id ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to remove pod %q: %v" , pod . Id , err ) )
}
if err := r . removeTerminationFiles ( pod ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to clean up pod TerminationMessageFile %q: %v" , pod . Id , err ) )
}
if _ , err := r . cli . RunCommand ( nil , "rm" , pod . Id ) ; err != nil {
errlist = append ( errlist , fmt . Errorf ( "rkt: Failed to remove pod %q: %v" , pod . Id , err ) )
2016-04-21 00:49:08 +00:00
}
return errors . NewAggregate ( errlist )
2015-05-04 23:51:31 +00:00
}
2016-07-13 14:06:24 +00:00
// rktExitError implements /pkg/util/exec.ExitError interface.
2015-10-08 01:38:01 +00:00
type rktExitError struct { * exec . ExitError }
var _ utilexec . ExitError = & rktExitError { }
func ( r * rktExitError ) ExitStatus ( ) int {
if status , ok := r . Sys ( ) . ( syscall . WaitStatus ) ; ok {
return status . ExitStatus ( )
}
return 0
2015-05-04 23:51:31 +00:00
}
2016-06-01 18:39:40 +00:00
func newRktExitError ( e error ) error {
if exitErr , ok := e . ( * exec . ExitError ) ; ok {
return & rktExitError { exitErr }
}
return e
}
2017-02-15 10:34:49 +00:00
func ( r * Runtime ) AttachContainer ( containerID kubecontainer . ContainerID , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool , resize <- chan remotecommand . TerminalSize ) error {
2015-08-18 00:58:09 +00:00
return fmt . Errorf ( "unimplemented" )
2015-07-28 04:48:55 +00:00
}
2015-08-13 23:39:17 +00:00
// Note: In rkt, the container ID is in the form of "UUID:appName", where UUID is
// the rkt UUID, and appName is the container name.
2015-09-01 02:25:26 +00:00
// TODO(yifan): If the rkt is using lkvm as the stage1 image, then this function will fail.
2017-02-15 10:34:49 +00:00
func ( r * Runtime ) ExecInContainer ( containerID kubecontainer . ContainerID , cmd [ ] string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool , resize <- chan remotecommand . TerminalSize , timeout time . Duration ) error {
2015-05-04 23:51:31 +00:00
glog . V ( 4 ) . Infof ( "Rkt execing in container." )
id , err := parseContainerID ( containerID )
if err != nil {
return err
}
2016-04-26 22:20:06 +00:00
args := [ ] string { "enter" , fmt . Sprintf ( "--app=%s" , id . appName ) , id . uuid }
2015-05-04 23:51:31 +00:00
args = append ( args , cmd ... )
2016-05-24 01:16:01 +00:00
command := buildCommand ( r . config , args ... )
2015-05-04 23:51:31 +00:00
if tty {
2015-05-08 06:36:47 +00:00
p , err := kubecontainer . StartPty ( command )
2015-05-04 23:51:31 +00:00
if err != nil {
return err
}
defer p . Close ( )
// make sure to close the stdout stream
defer stdout . Close ( )
2017-02-15 10:34:49 +00:00
kubecontainer . HandleResizing ( resize , func ( size remotecommand . TerminalSize ) {
2017-07-05 17:44:41 +00:00
term . SetSize ( p . Fd ( ) , size )
2016-04-18 16:54:44 +00:00
} )
2015-05-04 23:51:31 +00:00
if stdin != nil {
go io . Copy ( p , stdin )
}
if stdout != nil {
go io . Copy ( stdout , p )
}
2016-06-01 18:39:40 +00:00
return newRktExitError ( command . Wait ( ) )
2015-05-04 23:51:31 +00:00
}
if stdin != nil {
// Use an os.Pipe here as it returns true *os.File objects.
2015-05-20 23:00:19 +00:00
// This way, if you run 'kubectl exec <pod> -i bash' (no tty) and type 'exit',
2015-05-04 23:51:31 +00:00
// the call below to command.Run() can unblock because its Stdin is the read half
// of the pipe.
2016-04-21 01:21:41 +00:00
r , w , err := r . os . Pipe ( )
2015-05-04 23:51:31 +00:00
if err != nil {
2016-06-01 18:39:40 +00:00
return newRktExitError ( err )
2015-05-04 23:51:31 +00:00
}
go io . Copy ( w , stdin )
command . Stdin = r
}
if stdout != nil {
command . Stdout = stdout
}
if stderr != nil {
command . Stderr = stderr
}
2016-06-01 18:39:40 +00:00
return newRktExitError ( command . Run ( ) )
2015-05-04 23:51:31 +00:00
}
// PortForward executes socat in the pod's network namespace and copies
// data between stream (representing the user's local connection on their
// computer) and the specified port in the container.
//
// TODO:
// - match cgroups of container
// - should we support nsenter + socat on the host? (current impl)
// - should we support nsenter + socat in a container, running with elevated privs and --pid=host?
//
// TODO(yifan): Merge with the same function in dockertools.
2017-01-07 05:06:19 +00:00
func ( r * Runtime ) PortForward ( pod * kubecontainer . Pod , port int32 , stream io . ReadWriteCloser ) error {
2015-05-04 23:51:31 +00:00
glog . V ( 4 ) . Infof ( "Rkt port forwarding in container." )
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
listResp , err := r . apisvc . ListPods ( ctx , & rktapi . ListPodsRequest {
2015-12-21 19:25:38 +00:00
Detail : true ,
Filters : runningKubernetesPodFilters ( pod . ID ) ,
} )
2015-05-04 23:51:31 +00:00
if err != nil {
2015-12-21 19:25:38 +00:00
return fmt . Errorf ( "couldn't list pods: %v" , err )
2015-05-04 23:51:31 +00:00
}
2015-12-21 19:25:38 +00:00
if len ( listResp . Pods ) != 1 {
var podlist [ ] string
for _ , p := range listResp . Pods {
podlist = append ( podlist , p . Id )
}
return fmt . Errorf ( "more than one running rkt pod for the kubernetes pod [%s]" , strings . Join ( podlist , ", " ) )
2015-05-04 23:51:31 +00:00
}
2016-09-05 08:27:30 +00:00
listPod := listResp . Pods [ 0 ]
2015-05-04 23:51:31 +00:00
2015-09-22 20:29:51 +00:00
socatPath , lookupErr := exec . LookPath ( "socat" )
2015-05-04 23:51:31 +00:00
if lookupErr != nil {
return fmt . Errorf ( "unable to do port forwarding: socat not found." )
}
2016-09-05 08:27:30 +00:00
// Check in config and in annotations if we're running kvm flavor
isKvm := strings . Contains ( r . config . Stage1Image , "kvm" )
for _ , anno := range listPod . Annotations {
if anno . Key == k8sRktStage1NameAnno {
isKvm = strings . Contains ( anno . Value , "kvm" )
break
}
}
2015-09-22 20:29:51 +00:00
2016-09-05 08:27:30 +00:00
var args [ ] string
var fwCaller string
if isKvm {
podNetworks := listPod . GetNetworks ( )
if podNetworks == nil {
return fmt . Errorf ( "unable to get networks" )
}
args = [ ] string { "-" , fmt . Sprintf ( "TCP4:%s:%d" , podNetworks [ 0 ] . Ipv4 , port ) }
fwCaller = socatPath
} else {
args = [ ] string { "-t" , fmt . Sprintf ( "%d" , listPod . Pid ) , "-n" , socatPath , "-" , fmt . Sprintf ( "TCP4:localhost:%d" , port ) }
nsenterPath , lookupErr := exec . LookPath ( "nsenter" )
if lookupErr != nil {
return fmt . Errorf ( "unable to do port forwarding: nsenter not found" )
}
fwCaller = nsenterPath
2015-05-04 23:51:31 +00:00
}
2015-09-22 20:29:51 +00:00
2016-09-05 08:27:30 +00:00
command := exec . Command ( fwCaller , args ... )
2015-05-04 23:51:31 +00:00
command . Stdout = stream
2015-09-22 20:29:51 +00:00
// If we use Stdin, command.Run() won't return until the goroutine that's copying
// from stream finishes. Unfortunately, if you have a client like telnet connected
// via port forwarding, as long as the user's telnet client is connected to the user's
// local listener that port forwarding sets up, the telnet session never exits. This
// means that even if socat has finished running, command.Run() won't ever return
// (because the client still has the connection and stream open).
//
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
// when the command (socat) exits.
inPipe , err := command . StdinPipe ( )
if err != nil {
return fmt . Errorf ( "unable to do port forwarding: error creating stdin pipe: %v" , err )
}
go func ( ) {
io . Copy ( inPipe , stream )
inPipe . Close ( )
} ( )
2015-05-04 23:51:31 +00:00
return command . Run ( )
}
2016-10-28 23:53:33 +00:00
// UpdatePodCIDR updates the runtimeconfig with the podCIDR.
// Currently no-ops, just implemented to satisfy the cri.
func ( r * Runtime ) UpdatePodCIDR ( podCIDR string ) error {
return nil
}
2015-12-12 01:09:21 +00:00
// appStateToContainerState converts rktapi.AppState to kubecontainer.ContainerState.
func appStateToContainerState ( state rktapi . AppState ) kubecontainer . ContainerState {
switch state {
case rktapi . AppState_APP_STATE_RUNNING :
return kubecontainer . ContainerStateRunning
case rktapi . AppState_APP_STATE_EXITED :
return kubecontainer . ContainerStateExited
}
return kubecontainer . ContainerStateUnknown
}
2015-12-21 19:25:38 +00:00
// getPodInfo returns the pod manifest, creation time and restart count of the pod.
2016-04-15 01:01:40 +00:00
func getPodInfo ( pod * rktapi . Pod ) ( podManifest * appcschema . PodManifest , restartCount int , err error ) {
2015-12-17 00:52:39 +00:00
// TODO(yifan): The manifest is only used for getting the annotations.
// Consider to let the server to unmarshal the annotations.
2015-12-12 01:09:21 +00:00
var manifest appcschema . PodManifest
if err = json . Unmarshal ( pod . Manifest , & manifest ) ; err != nil {
return
}
if countString , ok := manifest . Annotations . Get ( k8sRktRestartCountAnno ) ; ok {
restartCount , err = strconv . Atoi ( countString )
if err != nil {
return
}
}
2016-04-15 01:01:40 +00:00
return & manifest , restartCount , nil
2015-12-12 01:09:21 +00:00
}
2015-12-17 00:52:39 +00:00
// populateContainerStatus fills the container status according to the app's information.
2016-04-15 01:01:40 +00:00
func populateContainerStatus ( pod rktapi . Pod , app rktapi . App , runtimeApp appcschema . RuntimeApp , restartCount int , finishedTime time . Time ) ( * kubecontainer . ContainerStatus , error ) {
2015-12-17 00:52:39 +00:00
hashStr , ok := runtimeApp . Annotations . Get ( k8sRktContainerHashAnno )
if ! ok {
return nil , fmt . Errorf ( "No container hash in pod manifest" )
}
hashNum , err := strconv . ParseUint ( hashStr , 10 , 64 )
if err != nil {
return nil , err
}
2016-01-12 02:30:29 +00:00
var reason , message string
if app . State == rktapi . AppState_APP_STATE_EXITED {
if app . ExitCode == 0 {
reason = "Completed"
} else {
reason = "Error"
}
}
terminationMessagePath , ok := runtimeApp . Annotations . Get ( k8sRktTerminationMessagePathAnno )
if ok {
if data , err := ioutil . ReadFile ( terminationMessagePath ) ; err != nil {
message = fmt . Sprintf ( "Error on reading termination-log %s: %v" , terminationMessagePath , err )
} else {
message = string ( data )
}
}
2016-04-15 01:01:40 +00:00
createdTime := time . Unix ( 0 , pod . CreatedAt )
startedTime := time . Unix ( 0 , pod . StartedAt )
2015-12-17 00:52:39 +00:00
return & kubecontainer . ContainerStatus {
2016-04-15 01:01:40 +00:00
ID : buildContainerID ( & containerID { uuid : pod . Id , appName : app . Name } ) ,
Name : app . Name ,
State : appStateToContainerState ( app . State ) ,
CreatedAt : createdTime ,
StartedAt : startedTime ,
FinishedAt : finishedTime ,
ExitCode : int ( app . ExitCode ) ,
2016-03-18 18:43:20 +00:00
// By default, the version returned by rkt API service will be "latest" if not specified.
Image : fmt . Sprintf ( "%s:%s" , app . Image . Name , app . Image . Version ) ,
2016-11-18 20:50:58 +00:00
ImageID : "rkt://" + app . Image . Id , // TODO(yifan): Add the prefix only in v1.PodStatus.
2016-03-18 18:43:20 +00:00
Hash : hashNum ,
2015-12-17 00:52:39 +00:00
// TODO(yifan): Note that now all apps share the same restart count, this might
// change once apps don't share the same lifecycle.
// See https://github.com/appc/spec/pull/547.
RestartCount : restartCount ,
2016-01-12 02:30:29 +00:00
Reason : reason ,
Message : message ,
2015-12-17 00:52:39 +00:00
} , nil
}
2017-04-29 16:04:39 +00:00
// from a running systemd unit, return the network namespace of a Pod
// this field is inside the X-Kubernetes directive
2017-05-09 14:15:22 +00:00
func ( r * Runtime ) getNetworkNamespace ( uid kubetypes . UID , latestPod * rktapi . Pod ) ( networkNamespace kubecontainer . ContainerID , err error ) {
2017-04-29 16:04:39 +00:00
serviceFiles , err := r . getPodSystemdServiceFiles ( )
if err != nil {
return networkNamespace , err
}
for _ , f := range serviceFiles {
fileName := f . Name ( )
if latestPod . Id == getRktUUIDFromServiceFileName ( fileName ) {
2017-05-09 14:15:22 +00:00
podService , err := r . unitGetter . getKubernetesDirective ( serviceFilePath ( fileName ) )
2017-04-29 16:04:39 +00:00
if err != nil {
return networkNamespace , err
}
return podService . networkNamespace , nil
}
}
return networkNamespace , fmt . Errorf ( "Pod %q containing rktPod %q haven't find a corresponding NetworkNamespace in %d systemd units" , uid , latestPod . Id , len ( serviceFiles ) )
}
2016-04-15 01:01:40 +00:00
// GetPodStatus returns the status for a pod specified by a given UID, name,
// and namespace. It will attempt to find pod's information via a request to
// the rkt api server.
// An error will be returned if the api server returns an error. If the api
// server doesn't error, but doesn't provide meaningful information about the
// pod, a status with no information (other than the passed in arguments) is
// returned anyways.
2016-05-22 05:00:38 +00:00
func ( r * Runtime ) GetPodStatus ( uid kubetypes . UID , name , namespace string ) ( * kubecontainer . PodStatus , error ) {
2015-12-12 01:09:21 +00:00
podStatus := & kubecontainer . PodStatus {
ID : uid ,
Name : name ,
Namespace : namespace ,
}
2016-06-17 21:28:30 +00:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , r . requestTimeout )
defer cancel ( )
listResp , err := r . apisvc . ListPods ( ctx , & rktapi . ListPodsRequest {
2015-12-17 00:52:39 +00:00
Detail : true ,
Filters : kubernetesPodFilters ( uid ) ,
} )
2015-12-12 01:09:21 +00:00
if err != nil {
return nil , fmt . Errorf ( "couldn't list pods: %v" , err )
}
2016-06-07 20:22:55 +00:00
var latestPod * rktapi . Pod
2015-12-12 01:09:21 +00:00
var latestRestartCount int = - 1
2015-12-17 00:52:39 +00:00
// In this loop, we group all containers from all pods together,
// also we try to find the latest pod, so we can fill other info of the pod below.
for _ , pod := range listResp . Pods {
2016-04-15 01:01:40 +00:00
manifest , restartCount , err := getPodInfo ( pod )
2015-12-12 01:09:21 +00:00
if err != nil {
2016-01-29 19:35:04 +00:00
glog . Warningf ( "rkt: Couldn't get necessary info from the rkt pod, (uuid %q): %v" , pod . Id , err )
2015-12-12 01:09:21 +00:00
continue
}
if restartCount > latestRestartCount {
2016-06-07 20:22:55 +00:00
latestPod = pod
2015-12-12 01:09:21 +00:00
latestRestartCount = restartCount
}
2016-04-15 01:01:40 +00:00
finishedTime := r . podFinishedAt ( uid , pod . Id )
2015-12-12 01:09:21 +00:00
for i , app := range pod . Apps {
// The order of the apps is determined by the rkt pod manifest.
2016-04-15 01:01:40 +00:00
cs , err := populateContainerStatus ( * pod , * app , manifest . Apps [ i ] , restartCount , finishedTime )
2015-12-12 01:09:21 +00:00
if err != nil {
2015-12-17 00:52:39 +00:00
glog . Warningf ( "rkt: Failed to populate container status(uuid %q, app %q): %v" , pod . Id , app . Name , err )
2015-12-12 01:09:21 +00:00
continue
}
2015-12-17 00:52:39 +00:00
podStatus . ContainerStatuses = append ( podStatus . ContainerStatuses , cs )
2015-12-12 01:09:21 +00:00
}
}
2017-04-29 16:04:39 +00:00
if latestPod == nil {
glog . Warningf ( "No latestPod: rkt api-svc returns [%d]rktPods, cannot fill podStatus.IP" , len ( listResp . Pods ) )
return podStatus , nil
}
2016-06-07 20:22:55 +00:00
// If we are running no-op network plugin, then get the pod IP from the rkt pod status.
2016-12-13 23:00:34 +00:00
if r . network . PluginName ( ) == network . DefaultPluginName {
2017-04-29 16:04:39 +00:00
for _ , n := range latestPod . Networks {
if n . Name == defaultNetworkName {
podStatus . IP = n . Ipv4
break
2016-06-07 20:22:55 +00:00
}
}
2017-04-29 16:04:39 +00:00
return podStatus , nil
}
2017-05-09 14:15:22 +00:00
networkNamespace , err := r . unitGetter . getNetworkNamespace ( uid , latestPod )
2017-04-29 16:04:39 +00:00
if err != nil {
glog . Warningf ( "networkNamespace: %v" , err )
}
status , err := r . network . GetPodNetworkStatus ( namespace , name , networkNamespace )
if err != nil {
glog . Warningf ( "rkt: %v" , err )
} else if status != nil {
// status can be nil when the pod is running on the host network,
// in which case the pod IP will be populated by the upper layer.
podStatus . IP = status . IP . String ( )
2015-12-12 01:09:21 +00:00
}
return podStatus , nil
2015-11-10 22:18:47 +00:00
}
2016-04-28 00:16:28 +00:00
// getOSReleaseInfo reads /etc/os-release and returns a map
// that contains the key value pairs in that file.
func getOSReleaseInfo ( ) ( map [ string ] string , error ) {
result := make ( map [ string ] string )
path := "/etc/os-release"
f , err := os . Open ( path )
if err != nil {
return nil , err
}
defer f . Close ( )
scanner := bufio . NewScanner ( f )
for scanner . Scan ( ) {
line := scanner . Text ( )
2016-08-19 21:49:51 +00:00
if len ( strings . TrimSpace ( line ) ) == 0 {
// Skips empty lines
continue
}
2016-04-28 00:16:28 +00:00
info := strings . SplitN ( line , "=" , 2 )
if len ( info ) != 2 {
2016-08-19 19:05:00 +00:00
glog . Warningf ( "Unexpected entry in os-release %q" , line )
continue
2016-04-28 00:16:28 +00:00
}
result [ info [ 0 ] ] = info [ 1 ]
}
if err := scanner . Err ( ) ; err != nil {
return nil , err
}
return result , nil
}
2016-08-17 01:18:36 +00:00
// convertKubeMounts creates appc volumes and mount points according to the given mounts.
// Only one volume will be created for every unique host path.
// Only one mount point will be created for every unique container path.
func convertKubeMounts ( mounts [ ] kubecontainer . Mount ) ( [ ] appctypes . Volume , [ ] appctypes . MountPoint ) {
volumeMap := make ( map [ string ] * appctypes . Volume )
mountPointMap := make ( map [ string ] * appctypes . MountPoint )
for _ , mnt := range mounts {
readOnly := mnt . ReadOnly
if _ , existed := volumeMap [ mnt . HostPath ] ; ! existed {
volumeMap [ mnt . HostPath ] = & appctypes . Volume {
Name : * appctypes . MustACName ( string ( uuid . NewUUID ( ) ) ) ,
Kind : "host" ,
Source : mnt . HostPath ,
ReadOnly : & readOnly ,
}
}
if _ , existed := mountPointMap [ mnt . ContainerPath ] ; existed {
glog . Warningf ( "Multiple mount points with the same container path %v, ignore it" , mnt )
continue
}
mountPointMap [ mnt . ContainerPath ] = & appctypes . MountPoint {
Name : volumeMap [ mnt . HostPath ] . Name ,
Path : mnt . ContainerPath ,
ReadOnly : readOnly ,
}
}
volumes := make ( [ ] appctypes . Volume , 0 , len ( volumeMap ) )
mountPoints := make ( [ ] appctypes . MountPoint , 0 , len ( mountPointMap ) )
for _ , vol := range volumeMap {
volumes = append ( volumes , * vol )
}
for _ , mnt := range mountPointMap {
mountPoints = append ( mountPoints , * mnt )
}
return volumes , mountPoints
}
// convertKubePortMappings creates appc container ports and host ports according to the given port mappings.
// The container ports and host ports are mapped by PortMapping.Name.
func convertKubePortMappings ( portMappings [ ] kubecontainer . PortMapping ) ( [ ] appctypes . Port , [ ] appctypes . ExposedPort ) {
containerPorts := make ( [ ] appctypes . Port , 0 , len ( portMappings ) )
hostPorts := make ( [ ] appctypes . ExposedPort , 0 , len ( portMappings ) )
for _ , p := range portMappings {
// This matches the docker code's behaviour.
if p . HostPort == 0 {
continue
}
portName := convertToACName ( p . Name )
containerPorts = append ( containerPorts , appctypes . Port {
Name : portName ,
Protocol : string ( p . Protocol ) ,
Port : uint ( p . ContainerPort ) ,
} )
hostPorts = append ( hostPorts , appctypes . ExposedPort {
Name : portName ,
HostPort : uint ( p . HostPort ) ,
} )
}
return containerPorts , hostPorts
}