2015-01-13 02:11:27 +00:00
/ *
2015-05-01 16:19:44 +00:00
Copyright 2014 The Kubernetes Authors All rights reserved .
2015-01-13 02:11:27 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package e2e
import (
2015-03-26 20:34:18 +00:00
"bytes"
2015-02-08 15:57:48 +00:00
"fmt"
2015-06-30 16:43:37 +00:00
"io"
2015-05-06 15:03:22 +00:00
"math"
2015-01-24 01:06:13 +00:00
"math/rand"
2015-04-24 17:26:12 +00:00
"os"
2015-03-26 20:34:18 +00:00
"os/exec"
2015-01-13 02:11:27 +00:00
"path/filepath"
2015-01-24 01:06:13 +00:00
"strconv"
2015-03-26 20:34:18 +00:00
"strings"
2015-01-13 02:11:27 +00:00
"time"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
apierrs "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/resource"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/cache"
2015-09-03 21:43:19 +00:00
client "k8s.io/kubernetes/pkg/client/unversioned"
2015-08-13 19:01:50 +00:00
"k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubectl"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util"
2015-09-29 23:55:06 +00:00
deploymentUtil "k8s.io/kubernetes/pkg/util/deployment"
2015-09-09 17:45:01 +00:00
"k8s.io/kubernetes/pkg/util/sets"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
2015-03-26 20:34:18 +00:00
2015-05-29 00:03:55 +00:00
"github.com/davecgh/go-spew/spew"
2015-07-24 11:36:16 +00:00
"golang.org/x/crypto/ssh"
2015-04-24 17:26:12 +00:00
2015-02-08 15:57:48 +00:00
. "github.com/onsi/ginkgo"
2015-02-12 18:37:31 +00:00
. "github.com/onsi/gomega"
2015-01-13 02:11:27 +00:00
)
2015-03-05 20:04:00 +00:00
const (
// Initial pod start can be delayed O(minutes) by slow docker pulls
// TODO: Make this 30 seconds once #4566 is resolved.
podStartTimeout = 5 * time . Minute
2015-05-08 00:11:48 +00:00
2015-07-01 21:58:37 +00:00
// How long to wait for a service endpoint to be resolvable.
serviceStartTimeout = 1 * time . Minute
2015-05-08 00:11:48 +00:00
// String used to mark pod deletion
nonExist = "NonExist"
2015-04-29 22:28:48 +00:00
2015-05-21 21:43:42 +00:00
// How often to poll pods and nodes.
poll = 5 * time . Second
2015-05-22 20:46:52 +00:00
// service accounts are provisioned after namespace creation
// a service account is required to support pod creation in a namespace as part of admission control
serviceAccountProvisionTimeout = 2 * time . Minute
2015-05-21 21:43:42 +00:00
// How long to try single API calls (like 'get' or 'list'). Used to prevent
// transient failures from failing tests.
singleCallTimeout = 30 * time . Second
// How long nodes have to be "ready" when a test begins. They should already
// be "ready" before the test starts, so this is small.
nodeReadyInitialTimeout = 20 * time . Second
2015-09-30 00:10:46 +00:00
// How long pods have to be "ready" when a test begins.
podReadyBeforeTimeout = 2 * time . Minute
2015-05-21 21:43:42 +00:00
2015-07-10 09:54:54 +00:00
podRespondingTimeout = 2 * time . Minute
serviceRespondingTimeout = 2 * time . Minute
2015-05-21 21:43:42 +00:00
// How wide to print pod names, by default. Useful for aligning printing to
// quickly scan through output.
podPrintWidth = 55
2015-03-05 20:04:00 +00:00
)
2015-05-19 16:13:08 +00:00
type CloudConfig struct {
2015-04-23 14:28:16 +00:00
ProjectID string
Zone string
2015-06-19 22:17:28 +00:00
Cluster string
2015-04-23 14:28:16 +00:00
MasterName string
NodeInstanceGroup string
NumNodes int
2015-05-23 00:12:53 +00:00
ClusterTag string
2015-05-19 16:13:08 +00:00
Provider cloudprovider . Interface
}
2015-03-31 23:36:31 +00:00
type TestContextType struct {
2015-07-09 17:53:17 +00:00
KubeConfig string
KubeContext string
CertDir string
Host string
RepoRoot string
Provider string
CloudConfig CloudConfig
KubectlPath string
OutputDir string
prefix string
MinStartupPods int
UpgradeTarget string
PrometheusPushGateway string
2015-09-15 16:22:12 +00:00
VerifyServiceAccount bool
2015-01-13 02:11:27 +00:00
}
2015-03-31 23:36:31 +00:00
var testContext TestContextType
2015-01-13 02:11:27 +00:00
2015-08-12 19:38:51 +00:00
func SetTestContext ( t TestContextType ) {
testContext = t
}
2015-05-14 13:18:24 +00:00
type ContainerFailures struct {
status * api . ContainerStateTerminated
restarts int
}
2015-06-01 12:24:44 +00:00
// Convenient wrapper around cache.Store that returns list of api.Pod instead of interface{}.
type podStore struct {
cache . Store
stopCh chan struct { }
}
func newPodStore ( c * client . Client , namespace string , label labels . Selector , field fields . Selector ) * podStore {
lw := & cache . ListWatch {
ListFunc : func ( ) ( runtime . Object , error ) {
return c . Pods ( namespace ) . List ( label , field )
} ,
WatchFunc : func ( rv string ) ( watch . Interface , error ) {
return c . Pods ( namespace ) . Watch ( label , field , rv )
} ,
}
store := cache . NewStore ( cache . MetaNamespaceKeyFunc )
stopCh := make ( chan struct { } )
cache . NewReflector ( lw , & api . Pod { } , store , 0 ) . RunUntil ( stopCh )
return & podStore { store , stopCh }
}
func ( s * podStore ) List ( ) [ ] * api . Pod {
objects := s . Store . List ( )
pods := make ( [ ] * api . Pod , 0 )
for _ , o := range objects {
pods = append ( pods , o . ( * api . Pod ) )
}
return pods
}
func ( s * podStore ) Stop ( ) {
close ( s . stopCh )
}
2015-05-26 14:24:46 +00:00
type RCConfig struct {
Client * client . Client
Image string
2015-07-07 12:29:16 +00:00
Command [ ] string
2015-05-26 14:24:46 +00:00
Name string
Namespace string
2015-06-10 11:59:30 +00:00
PollInterval time . Duration
2015-06-25 10:11:01 +00:00
Timeout time . Duration
2015-05-26 14:24:46 +00:00
PodStatusFile * os . File
Replicas int
2015-08-17 08:54:33 +00:00
CpuRequest int64 // millicores
2015-07-21 14:15:55 +00:00
CpuLimit int64 // millicores
2015-08-17 08:54:33 +00:00
MemRequest int64 // bytes
2015-07-21 14:15:55 +00:00
MemLimit int64 // bytes
2015-06-11 22:55:25 +00:00
// Env vars, set the same for every pod.
Env map [ string ] string
// Extra labels added to every pod.
Labels map [ string ] string
// Ports to declare in the container (map of name to containerPort).
Ports map [ string ] int
// Pointer to a list of pods; if non-nil, will be set to a list of pods
// created by this RC by RunRC.
CreatedPods * [ ] * api . Pod
2015-07-07 01:11:30 +00:00
// Maximum allowable container failures. If exceeded, RunRC returns an error.
// Defaults to replicas*0.1 if unspecified.
MaxContainerFailures * int
2015-05-26 14:24:46 +00:00
}
2015-08-05 06:28:12 +00:00
func nowStamp ( ) string {
return time . Now ( ) . Format ( time . StampMilli )
}
2015-02-10 00:57:06 +00:00
func Logf ( format string , a ... interface { } ) {
2015-08-05 06:28:12 +00:00
fmt . Fprintf ( GinkgoWriter , nowStamp ( ) + ": INFO: " + format + "\n" , a ... )
2015-02-10 00:57:06 +00:00
}
2015-02-10 01:02:11 +00:00
func Failf ( format string , a ... interface { } ) {
2015-08-05 06:28:12 +00:00
Fail ( nowStamp ( ) + ": " + fmt . Sprintf ( format , a ... ) , 1 )
2015-02-10 01:02:11 +00:00
}
2015-06-22 21:14:54 +00:00
func Skipf ( format string , args ... interface { } ) {
2015-08-05 06:28:12 +00:00
Skip ( nowStamp ( ) + ": " + fmt . Sprintf ( format , args ... ) )
2015-06-22 21:14:54 +00:00
}
func SkipUnlessNodeCountIsAtLeast ( minNodeCount int ) {
if testContext . CloudConfig . NumNodes < minNodeCount {
Skipf ( "Requires at least %d nodes (not %d)" , minNodeCount , testContext . CloudConfig . NumNodes )
}
}
func SkipIfProviderIs ( unsupportedProviders ... string ) {
if providerIs ( unsupportedProviders ... ) {
Skipf ( "Not supported for providers %v (found %s)" , unsupportedProviders , testContext . Provider )
}
}
func SkipUnlessProviderIs ( supportedProviders ... string ) {
if ! providerIs ( supportedProviders ... ) {
Skipf ( "Only supported for providers %v (not %s)" , supportedProviders , testContext . Provider )
}
}
2015-04-18 22:30:10 +00:00
func providerIs ( providers ... string ) bool {
for _ , provider := range providers {
if strings . ToLower ( provider ) == strings . ToLower ( testContext . Provider ) {
return true
}
}
return false
}
2015-03-05 20:04:00 +00:00
type podCondition func ( pod * api . Pod ) ( bool , error )
2015-05-18 20:49:32 +00:00
// podReady returns whether pod has a condition of Ready with a status of true.
func podReady ( pod * api . Pod ) bool {
for _ , cond := range pod . Status . Conditions {
if cond . Type == api . PodReady && cond . Status == api . ConditionTrue {
return true
}
}
return false
}
2015-05-21 22:59:35 +00:00
// logPodStates logs basic info of provided pods for debugging.
func logPodStates ( pods [ ] api . Pod ) {
// Find maximum widths for pod, node, and phase strings for column printing.
2015-08-26 17:43:46 +00:00
maxPodW , maxNodeW , maxPhaseW , maxGraceW := len ( "POD" ) , len ( "NODE" ) , len ( "PHASE" ) , len ( "GRACE" )
for i := range pods {
pod := & pods [ i ]
2015-05-21 22:59:35 +00:00
if len ( pod . ObjectMeta . Name ) > maxPodW {
maxPodW = len ( pod . ObjectMeta . Name )
}
2015-05-22 23:40:57 +00:00
if len ( pod . Spec . NodeName ) > maxNodeW {
maxNodeW = len ( pod . Spec . NodeName )
2015-05-21 22:59:35 +00:00
}
if len ( pod . Status . Phase ) > maxPhaseW {
maxPhaseW = len ( pod . Status . Phase )
}
2015-05-18 20:49:32 +00:00
}
2015-05-21 22:59:35 +00:00
// Increase widths by one to separate by a single space.
maxPodW ++
maxNodeW ++
maxPhaseW ++
2015-08-26 17:43:46 +00:00
maxGraceW ++
2015-05-21 22:59:35 +00:00
// Log pod info. * does space padding, - makes them left-aligned.
2015-08-26 17:43:46 +00:00
Logf ( "%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %-[7]*[8]s %[9]s" ,
maxPodW , "POD" , maxNodeW , "NODE" , maxPhaseW , "PHASE" , maxGraceW , "GRACE" , "CONDITIONS" )
2015-05-21 22:59:35 +00:00
for _ , pod := range pods {
2015-08-26 17:43:46 +00:00
grace := ""
if pod . DeletionGracePeriodSeconds != nil {
grace = fmt . Sprintf ( "%ds" , * pod . DeletionGracePeriodSeconds )
}
Logf ( "%-[1]*[2]s %-[3]*[4]s %-[5]*[6]s %[7]s %[8]s" ,
maxPodW , pod . ObjectMeta . Name , maxNodeW , pod . Spec . NodeName , maxPhaseW , pod . Status . Phase , grace , pod . Status . Conditions )
2015-05-18 20:49:32 +00:00
}
2015-05-21 22:59:35 +00:00
Logf ( "" ) // Final empty line helps for readability.
2015-05-18 20:49:32 +00:00
}
// podRunningReady checks whether pod p's phase is running and it has a ready
// condition of status true.
func podRunningReady ( p * api . Pod ) ( bool , error ) {
// Check the phase is running.
if p . Status . Phase != api . PodRunning {
return false , fmt . Errorf ( "want pod '%s' on '%s' to be '%v' but was '%v'" ,
2015-05-22 23:40:57 +00:00
p . ObjectMeta . Name , p . Spec . NodeName , api . PodRunning , p . Status . Phase )
2015-05-18 20:49:32 +00:00
}
// Check the ready condition is true.
if ! podReady ( p ) {
return false , fmt . Errorf ( "pod '%s' on '%s' didn't have condition {%v %v}; conditions: %v" ,
2015-05-22 23:40:57 +00:00
p . ObjectMeta . Name , p . Spec . NodeName , api . PodReady , api . ConditionTrue , p . Status . Conditions )
2015-05-18 20:49:32 +00:00
}
return true , nil
}
2015-07-09 21:59:53 +00:00
// check if a Pod is controlled by a Replication Controller in the List
func hasReplicationControllersForPod ( rcs * api . ReplicationControllerList , pod api . Pod ) bool {
for _ , rc := range rcs . Items {
selector := labels . SelectorFromSet ( rc . Spec . Selector )
if selector . Matches ( labels . Set ( pod . ObjectMeta . Labels ) ) {
return true
}
}
return false
}
2015-05-18 20:49:32 +00:00
// waitForPodsRunningReady waits up to timeout to ensure that all pods in
2015-07-09 21:59:53 +00:00
// namespace ns are either running and ready, or failed but controlled by a
// replication controller. Also, it ensures that at least minPods are running
// and ready. It has separate behavior from other 'wait for' pods functions in
// that it requires the list of pods on every iteration. This is useful, for
// example, in cluster startup, because the number of pods increases while
// waiting.
2015-05-18 20:49:32 +00:00
func waitForPodsRunningReady ( ns string , minPods int , timeout time . Duration ) error {
c , err := loadClient ( )
if err != nil {
return err
}
2015-05-21 21:43:42 +00:00
start := time . Now ( )
2015-05-18 20:49:32 +00:00
Logf ( "Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready" ,
timeout , minPods , ns )
2015-05-21 21:43:42 +00:00
if wait . Poll ( poll , timeout , func ( ) ( bool , error ) {
2015-07-09 21:59:53 +00:00
// We get the new list of pods and replication controllers in every
// iteration because more pods come online during startup and we want to
// ensure they are also checked.
rcList , err := c . ReplicationControllers ( ns ) . List ( labels . Everything ( ) )
if err != nil {
Logf ( "Error getting replication controllers in namespace '%s': %v" , ns , err )
return false , nil
}
replicas := 0
for _ , rc := range rcList . Items {
replicas += rc . Spec . Replicas
}
2015-05-18 20:49:32 +00:00
podList , err := c . Pods ( ns ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
Logf ( "Error getting pods in namespace '%s': %v" , ns , err )
2015-05-21 21:43:42 +00:00
return false , nil
2015-05-18 20:49:32 +00:00
}
2015-07-09 21:59:53 +00:00
nOk , replicaOk , badPods := 0 , 0 , [ ] api . Pod { }
2015-05-18 20:49:32 +00:00
for _ , pod := range podList . Items {
if res , err := podRunningReady ( & pod ) ; res && err == nil {
nOk ++
2015-07-09 21:59:53 +00:00
if hasReplicationControllersForPod ( rcList , pod ) {
replicaOk ++
}
2015-05-21 22:59:35 +00:00
} else {
2015-07-09 21:59:53 +00:00
if pod . Status . Phase != api . PodFailed {
Logf ( "The status of Pod %s is %s, waiting for it to be either Running or Failed" , pod . ObjectMeta . Name , pod . Status . Phase )
badPods = append ( badPods , pod )
} else if ! hasReplicationControllersForPod ( rcList , pod ) {
Logf ( "Pod %s is Failed, but it's not controlled by a ReplicationController" , pod . ObjectMeta . Name )
badPods = append ( badPods , pod )
}
//ignore failed pods that are controlled by a replication controller
2015-05-18 20:49:32 +00:00
}
}
2015-07-09 21:59:53 +00:00
2015-05-21 22:59:35 +00:00
Logf ( "%d / %d pods in namespace '%s' are running and ready (%d seconds elapsed)" ,
nOk , len ( podList . Items ) , ns , int ( time . Since ( start ) . Seconds ( ) ) )
2015-07-09 21:59:53 +00:00
Logf ( "expected %d pod replicas in namespace '%s', %d are Running and Ready." , replicas , ns , replicaOk )
if replicaOk == replicas && nOk >= minPods && len ( badPods ) == 0 {
2015-05-21 21:43:42 +00:00
return true , nil
2015-05-18 20:49:32 +00:00
}
2015-05-21 22:59:35 +00:00
logPodStates ( badPods )
2015-05-21 21:43:42 +00:00
return false , nil
} ) != nil {
return fmt . Errorf ( "Not all pods in namespace '%s' running and ready within %v" , ns , timeout )
2015-05-18 20:49:32 +00:00
}
2015-05-21 21:43:42 +00:00
return nil
2015-05-18 20:49:32 +00:00
}
2015-05-21 21:43:42 +00:00
func waitForServiceAccountInNamespace ( c * client . Client , ns , serviceAccountName string , timeout time . Duration ) error {
2015-05-22 20:46:52 +00:00
Logf ( "Waiting up to %v for service account %s to be provisioned in ns %s" , timeout , serviceAccountName , ns )
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( poll ) {
2015-06-30 01:31:46 +00:00
sa , err := c . ServiceAccounts ( ns ) . Get ( serviceAccountName )
if apierrs . IsNotFound ( err ) {
2015-05-22 20:46:52 +00:00
Logf ( "Get service account %s in ns %s failed, ignoring for %v: %v" , serviceAccountName , ns , poll , err )
continue
}
2015-06-30 01:31:46 +00:00
if err != nil {
Logf ( "Get service account %s in ns %s failed: %v" , serviceAccountName , ns , err )
return err
}
if len ( sa . Secrets ) == 0 {
Logf ( "Service account %s in ns %s had 0 secrets, ignoring for %v: %v" , serviceAccountName , ns , poll , err )
continue
}
Logf ( "Service account %s in ns %s with secrets found. (%v)" , serviceAccountName , ns , time . Since ( start ) )
2015-05-22 20:46:52 +00:00
return nil
}
return fmt . Errorf ( "Service account %s in namespace %s not ready within %v" , serviceAccountName , ns , timeout )
}
2015-05-21 21:43:42 +00:00
func waitForPodCondition ( c * client . Client , ns , podName , desc string , timeout time . Duration , condition podCondition ) error {
Logf ( "Waiting up to %[1]v for pod %-[2]*[3]s status to be %[4]s" , timeout , podPrintWidth , podName , desc )
2015-04-29 22:28:48 +00:00
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( poll ) {
2015-03-05 20:04:00 +00:00
pod , err := c . Pods ( ns ) . Get ( podName )
2015-01-13 02:11:27 +00:00
if err != nil {
2015-05-21 21:43:42 +00:00
// Aligning this text makes it much more readable
Logf ( "Get pod %-[1]*[2]s in namespace '%[3]s' failed, ignoring for %[4]v. Error: %[5]v" ,
podPrintWidth , podName , ns , poll , err )
2015-03-05 20:04:00 +00:00
continue
2015-01-13 02:11:27 +00:00
}
2015-03-05 20:04:00 +00:00
done , err := condition ( pod )
if done {
return err
2015-01-13 02:11:27 +00:00
}
2015-05-21 21:43:42 +00:00
Logf ( "Waiting for pod %-[1]*[2]s in namespace '%[3]s' status to be '%[4]s'" +
"(found phase: %[5]q, readiness: %[6]t) (%[7]v elapsed)" ,
podPrintWidth , podName , ns , desc , pod . Status . Phase , podReady ( pod ) , time . Since ( start ) )
2015-01-13 02:11:27 +00:00
}
2015-05-18 20:49:32 +00:00
return fmt . Errorf ( "gave up waiting for pod '%s' to be '%s' after %v" , podName , desc , timeout )
2015-01-13 02:11:27 +00:00
}
2015-05-22 20:46:52 +00:00
// waitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
// the default service account is what is associated with pods when they do not specify a service account
// as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
func waitForDefaultServiceAccountInNamespace ( c * client . Client , namespace string ) error {
2015-05-21 21:43:42 +00:00
return waitForServiceAccountInNamespace ( c , namespace , "default" , serviceAccountProvisionTimeout )
2015-05-22 20:46:52 +00:00
}
2015-06-05 01:40:41 +00:00
// waitForPersistentVolumePhase waits for a PersistentVolume to be in a specific phase or until timeout occurs, whichever comes first.
func waitForPersistentVolumePhase ( phase api . PersistentVolumePhase , c * client . Client , pvName string , poll , timeout time . Duration ) error {
Logf ( "Waiting up to %v for PersistentVolume %s to have phase %s" , timeout , pvName , phase )
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( poll ) {
pv , err := c . PersistentVolumes ( ) . Get ( pvName )
if err != nil {
Logf ( "Get persistent volume %s in failed, ignoring for %v: %v" , pvName , poll , err )
continue
} else {
if pv . Status . Phase == phase {
Logf ( "PersistentVolume %s found and phase=%s (%v)" , pvName , phase , time . Since ( start ) )
return nil
} else {
Logf ( "PersistentVolume %s found but phase is %s instead of %s." , pvName , pv . Status . Phase , phase )
}
}
}
return fmt . Errorf ( "PersistentVolume %s not in phase %s within %v" , pvName , phase , timeout )
}
2015-06-08 15:13:57 +00:00
// createTestingNS should be used by every test, note that we append a common prefix to the provided test name.
2015-05-29 00:03:55 +00:00
// Please see NewFramework instead of using this directly.
2015-04-17 15:22:29 +00:00
func createTestingNS ( baseName string , c * client . Client ) ( * api . Namespace , error ) {
namespaceObj := & api . Namespace {
ObjectMeta : api . ObjectMeta {
2015-06-08 17:51:32 +00:00
GenerateName : fmt . Sprintf ( "e2e-tests-%v-" , baseName ) ,
Namespace : "" ,
2015-04-17 15:22:29 +00:00
} ,
Status : api . NamespaceStatus { } ,
}
2015-06-24 19:41:12 +00:00
// Be robust about making the namespace creation call.
var got * api . Namespace
if err := wait . Poll ( poll , singleCallTimeout , func ( ) ( bool , error ) {
var err error
got , err = c . Namespaces ( ) . Create ( namespaceObj )
if err != nil {
return false , nil
}
return true , nil
} ) ; err != nil {
return nil , err
2015-06-05 17:27:26 +00:00
}
2015-06-24 19:41:12 +00:00
2015-09-15 16:22:12 +00:00
if testContext . VerifyServiceAccount {
if err := waitForDefaultServiceAccountInNamespace ( c , got . Name ) ; err != nil {
return nil , err
}
2015-06-05 17:27:26 +00:00
}
2015-06-08 17:51:32 +00:00
return got , nil
2015-04-17 15:22:29 +00:00
}
2015-09-16 18:03:21 +00:00
// checkTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
// and waits until they are finally deleted. It ignores namespace skip.
func checkTestingNSDeletedExcept ( c * client . Client , skip string ) error {
2015-06-23 13:31:59 +00:00
// TODO: Since we don't have support for bulk resource deletion in the API,
// while deleting a namespace we are deleting all objects from that namespace
// one by one (one deletion == one API call). This basically exposes us to
// throttling - currently controller-manager has a limit of max 20 QPS.
// Once #10217 is implemented and used in namespace-controller, deleting all
// object from a given namespace should be much faster and we will be able
// to lower this timeout.
// However, now Density test is producing ~26000 events and Load capacity test
// is producing ~35000 events, thus assuming there are no other requests it will
// take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
// minutes to avoid any timeouts here.
timeout := 60 * time . Minute
2015-06-22 11:47:05 +00:00
Logf ( "Waiting for terminating namespaces to be deleted..." )
2015-06-23 13:31:59 +00:00
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( 15 * time . Second ) {
2015-06-22 11:47:05 +00:00
namespaces , err := c . Namespaces ( ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
Logf ( "Listing namespaces failed: %v" , err )
continue
}
terminating := 0
for _ , ns := range namespaces . Items {
2015-09-16 18:03:21 +00:00
if strings . HasPrefix ( ns . ObjectMeta . Name , "e2e-tests-" ) && ns . ObjectMeta . Name != skip {
2015-06-22 11:47:05 +00:00
if ns . Status . Phase == api . NamespaceActive {
2015-08-07 16:40:59 +00:00
return fmt . Errorf ( "Namespace %s is active" , ns . ObjectMeta . Name )
2015-06-22 11:47:05 +00:00
}
terminating ++
}
}
if terminating == 0 {
return nil
}
}
return fmt . Errorf ( "Waiting for terminating namespaces to be deleted timed out" )
}
2015-08-07 16:40:59 +00:00
// deleteNS deletes the provided namespace, waits for it to be completely deleted, and then checks
// whether there are any pods remaining in a non-terminating state.
2015-09-24 08:02:07 +00:00
func deleteNS ( c * client . Client , namespace string , timeout time . Duration ) error {
2015-08-07 16:40:59 +00:00
if err := c . Namespaces ( ) . Delete ( namespace ) ; err != nil {
return err
}
2015-09-24 08:02:07 +00:00
err := wait . Poll ( 5 * time . Second , timeout , func ( ) ( bool , error ) {
2015-08-07 16:40:59 +00:00
if _ , err := c . Namespaces ( ) . Get ( namespace ) ; err != nil {
if apierrs . IsNotFound ( err ) {
return true , nil
}
Logf ( "Error while waiting for namespace to be terminated: %v" , err )
return false , nil
}
return false , nil
} )
// check for pods that were not deleted
remaining := [ ] string { }
missingTimestamp := false
if pods , perr := c . Pods ( namespace ) . List ( labels . Everything ( ) , fields . Everything ( ) ) ; perr == nil {
for _ , pod := range pods . Items {
Logf ( "Pod %s %s on node %s remains, has deletion timestamp %s" , namespace , pod . Name , pod . Spec . NodeName , pod . DeletionTimestamp )
remaining = append ( remaining , pod . Name )
if pod . DeletionTimestamp == nil {
missingTimestamp = true
}
}
}
// a timeout occured
if err != nil {
if missingTimestamp {
return fmt . Errorf ( "namespace %s was not deleted within limit: %v, some pods were not marked with a deletion timestamp, pods remaining: %v" , namespace , err , remaining )
}
return fmt . Errorf ( "namespace %s was not deleted within limit: %v, pods remaining: %v" , namespace , err , remaining )
}
// pods were not deleted but the namespace was deleted
if len ( remaining ) > 0 {
return fmt . Errorf ( "pods remained within namespace %s after deletion: %v" , namespace , remaining )
}
return nil
}
2015-03-19 00:46:31 +00:00
func waitForPodRunningInNamespace ( c * client . Client , podName string , namespace string ) error {
2015-05-21 21:43:42 +00:00
return waitForPodCondition ( c , namespace , podName , "running" , podStartTimeout , func ( pod * api . Pod ) ( bool , error ) {
2015-05-27 23:07:26 +00:00
if pod . Status . Phase == api . PodRunning {
2015-06-18 11:18:21 +00:00
Logf ( "Found pod '%s' on node '%s'" , podName , pod . Spec . NodeName )
2015-05-27 23:07:26 +00:00
return true , nil
}
if pod . Status . Phase == api . PodFailed {
2015-05-29 00:03:55 +00:00
return true , fmt . Errorf ( "Giving up; pod went into failed status: \n%s" , spew . Sprintf ( "%#v" , pod ) )
2015-05-27 23:07:26 +00:00
}
return false , nil
2015-03-05 20:04:00 +00:00
} )
}
// waitForPodNotPending returns an error if it took too long for the pod to go out of pending state.
func waitForPodNotPending ( c * client . Client , ns , podName string ) error {
2015-05-21 21:43:42 +00:00
return waitForPodCondition ( c , ns , podName , "!pending" , podStartTimeout , func ( pod * api . Pod ) ( bool , error ) {
2015-01-22 16:15:36 +00:00
if pod . Status . Phase != api . PodPending {
2015-05-18 20:49:32 +00:00
Logf ( "Saw pod '%s' in namespace '%s' out of pending state (found '%q')" , podName , ns , pod . Status . Phase )
2015-03-05 20:04:00 +00:00
return true , nil
2015-01-22 16:15:36 +00:00
}
2015-03-05 20:04:00 +00:00
return false , nil
} )
2015-01-22 16:15:36 +00:00
}
2015-03-20 18:26:34 +00:00
// waitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or ran too long.
func waitForPodSuccessInNamespace ( c * client . Client , podName string , contName string , namespace string ) error {
2015-05-21 21:43:42 +00:00
return waitForPodCondition ( c , namespace , podName , "success or failure" , podStartTimeout , func ( pod * api . Pod ) ( bool , error ) {
2015-01-13 02:11:27 +00:00
// Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632
2015-03-25 11:09:35 +00:00
ci , ok := api . GetContainerStatus ( pod . Status . ContainerStatuses , contName )
2015-01-13 02:11:27 +00:00
if ! ok {
2015-05-18 20:49:32 +00:00
Logf ( "No Status.Info for container '%s' in pod '%s' yet" , contName , podName )
2015-01-13 02:11:27 +00:00
} else {
2015-05-27 22:02:11 +00:00
if ci . State . Terminated != nil {
if ci . State . Terminated . ExitCode == 0 {
2015-02-09 15:48:07 +00:00
By ( "Saw pod success" )
2015-03-05 20:04:00 +00:00
return true , nil
2015-01-13 02:11:27 +00:00
}
2015-08-07 16:40:59 +00:00
return true , fmt . Errorf ( "pod '%s' terminated with failure: %+v" , podName , ci . State . Terminated )
2015-01-13 02:11:27 +00:00
}
2015-08-07 16:40:59 +00:00
Logf ( "Nil State.Terminated for container '%s' in pod '%s' in namespace '%s' so far" , contName , podName , namespace )
2015-01-13 02:11:27 +00:00
}
2015-03-05 20:04:00 +00:00
return false , nil
} )
2015-01-13 02:11:27 +00:00
}
2015-08-08 21:29:57 +00:00
// waitForRCPodOnNode returns the pod from the given replication controller (described by rcName) which is scheduled on the given node.
2015-06-08 08:52:37 +00:00
// In case of failure or too long waiting time, an error is returned.
func waitForRCPodOnNode ( c * client . Client , ns , rcName , node string ) ( * api . Pod , error ) {
label := labels . SelectorFromSet ( labels . Set ( map [ string ] string { "name" : rcName } ) )
var p * api . Pod = nil
err := wait . Poll ( 10 * time . Second , 5 * time . Minute , func ( ) ( bool , error ) {
Logf ( "Waiting for pod %s to appear on node %s" , rcName , node )
pods , err := c . Pods ( ns ) . List ( label , fields . Everything ( ) )
if err != nil {
return false , err
}
for _ , pod := range pods . Items {
if pod . Spec . NodeName == node {
Logf ( "Pod %s found on node %s" , pod . Name , node )
p = & pod
return true , nil
}
}
return false , nil
} )
return p , err
}
2015-09-22 09:29:55 +00:00
func waitForPodToDisappear ( c * client . Client , ns , podName string , label labels . Selector , interval , timeout time . Duration ) error {
return wait . Poll ( interval , timeout , func ( ) ( bool , error ) {
2015-06-08 08:52:37 +00:00
Logf ( "Waiting for pod %s to disappear" , podName )
pods , err := c . Pods ( ns ) . List ( label , fields . Everything ( ) )
if err != nil {
return false , err
}
found := false
for _ , pod := range pods . Items {
if pod . Name == podName {
Logf ( "Pod %s still exists" , podName )
found = true
}
}
if ! found {
Logf ( "Pod %s no longer exists" , podName )
return true , nil
}
return false , nil
} )
}
2015-09-22 09:29:55 +00:00
// waitForRCPodToDisappear returns nil if the pod from the given replication controller (described by rcName) no longer exists.
// In case of failure or too long waiting time, an error is returned.
func waitForRCPodToDisappear ( c * client . Client , ns , rcName , podName string ) error {
label := labels . SelectorFromSet ( labels . Set ( map [ string ] string { "name" : rcName } ) )
return waitForPodToDisappear ( c , ns , podName , label , 20 * time . Second , 5 * time . Minute )
}
2015-06-25 07:51:38 +00:00
// waitForService waits until the service appears (exist == true), or disappears (exist == false)
2015-06-15 14:31:17 +00:00
func waitForService ( c * client . Client , namespace , name string , exist bool , interval , timeout time . Duration ) error {
2015-06-25 07:51:38 +00:00
err := wait . Poll ( interval , timeout , func ( ) ( bool , error ) {
2015-06-15 14:31:17 +00:00
_ , err := c . Services ( namespace ) . Get ( name )
2015-09-10 01:28:53 +00:00
switch {
case err == nil :
if ! exist {
return false , nil
}
2015-06-15 14:31:17 +00:00
Logf ( "Service %s in namespace %s found." , name , namespace )
2015-09-10 01:28:53 +00:00
return true , nil
case apierrs . IsNotFound ( err ) :
if exist {
return false , nil
}
Logf ( "Service %s in namespace %s disappeared." , name , namespace )
return true , nil
default :
Logf ( "Get service %s in namespace %s failed: %v" , name , namespace , err )
return false , nil
2015-06-15 14:31:17 +00:00
}
} )
2015-06-25 07:51:38 +00:00
if err != nil {
stateMsg := map [ bool ] string { true : "to appear" , false : "to disappear" }
return fmt . Errorf ( "error waiting for service %s/%s %s: %v" , namespace , name , stateMsg [ exist ] , err )
}
return nil
2015-06-15 14:31:17 +00:00
}
2015-09-22 09:29:55 +00:00
//waitForServiceEndpointsNum waits until the amount of endpoints that implement service to expectNum.
func waitForServiceEndpointsNum ( c * client . Client , namespace , serviceName string , expectNum int , interval , timeout time . Duration ) error {
return wait . Poll ( interval , timeout , func ( ) ( bool , error ) {
Logf ( "Waiting for amount of service:%s endpoints to %d" , serviceName , expectNum )
list , err := c . Endpoints ( namespace ) . List ( labels . Everything ( ) )
if err != nil {
return false , err
}
for _ , e := range list . Items {
if e . Name == serviceName && countEndpointsNum ( & e ) == expectNum {
return true , nil
}
}
return false , nil
} )
}
func countEndpointsNum ( e * api . Endpoints ) int {
num := 0
for _ , sub := range e . Subsets {
num += len ( sub . Addresses )
}
return num
}
2015-06-25 07:51:38 +00:00
// waitForReplicationController waits until the RC appears (exist == true), or disappears (exist == false)
2015-06-15 14:31:17 +00:00
func waitForReplicationController ( c * client . Client , namespace , name string , exist bool , interval , timeout time . Duration ) error {
2015-06-25 07:51:38 +00:00
err := wait . Poll ( interval , timeout , func ( ) ( bool , error ) {
2015-06-15 14:31:17 +00:00
_ , err := c . ReplicationControllers ( namespace ) . Get ( name )
if err != nil {
Logf ( "Get ReplicationController %s in namespace %s failed (%v)." , name , namespace , err )
return ! exist , nil
} else {
Logf ( "ReplicationController %s in namespace %s found." , name , namespace )
return exist , nil
}
} )
2015-06-25 07:51:38 +00:00
if err != nil {
stateMsg := map [ bool ] string { true : "to appear" , false : "to disappear" }
return fmt . Errorf ( "error waiting for ReplicationController %s/%s %s: %v" , namespace , name , stateMsg [ exist ] , err )
}
return nil
2015-06-15 14:31:17 +00:00
}
2015-04-23 14:28:16 +00:00
// Context for checking pods responses by issuing GETs to them and verifying if the answer with pod name.
type podResponseChecker struct {
c * client . Client
ns string
label labels . Selector
controllerName string
2015-06-17 07:13:26 +00:00
respondName bool // Whether the pod should respond with its own name.
2015-04-23 14:28:16 +00:00
pods * api . PodList
}
// checkAllResponses issues GETs to all pods in the context and verify they reply with pod name.
func ( r podResponseChecker ) checkAllResponses ( ) ( done bool , err error ) {
successes := 0
currentPods , err := r . c . Pods ( r . ns ) . List ( r . label , fields . Everything ( ) )
Expect ( err ) . NotTo ( HaveOccurred ( ) )
for i , pod := range r . pods . Items {
// Check that the replica list remains unchanged, otherwise we have problems.
if ! isElementOf ( pod . UID , currentPods ) {
return false , fmt . Errorf ( "pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason. Current replica set: %v" , pod . UID , currentPods )
}
body , err := r . c . Get ( ) .
Prefix ( "proxy" ) .
Namespace ( r . ns ) .
Resource ( "pods" ) .
Name ( string ( pod . Name ) ) .
Do ( ) .
Raw ( )
if err != nil {
2015-06-17 07:13:26 +00:00
Logf ( "Controller %s: Failed to GET from replica %d [%s]: %v:" , r . controllerName , i + 1 , pod . Name , err )
2015-04-23 14:28:16 +00:00
continue
}
2015-06-17 07:13:26 +00:00
// The response checker expects the pod's name unless !respondName, in
// which case it just checks for a non-empty response.
got := string ( body )
what := ""
if r . respondName {
what = "expected"
want := pod . Name
if got != want {
Logf ( "Controller %s: Replica %d [%s] expected response %q but got %q" ,
r . controllerName , i + 1 , pod . Name , want , got )
continue
}
} else {
what = "non-empty"
if len ( got ) == 0 {
Logf ( "Controller %s: Replica %d [%s] expected non-empty response" ,
r . controllerName , i + 1 , pod . Name )
continue
}
2015-04-23 14:28:16 +00:00
}
successes ++
2015-06-17 07:13:26 +00:00
Logf ( "Controller %s: Got %s result from replica %d [%s]: %q, %d of %d required successes so far" ,
r . controllerName , what , i + 1 , pod . Name , got , successes , len ( r . pods . Items ) )
2015-04-23 14:28:16 +00:00
}
if successes < len ( r . pods . Items ) {
return false , nil
}
return true , nil
}
2015-07-10 09:54:54 +00:00
func podsResponding ( c * client . Client , ns , name string , wantName bool , pods * api . PodList ) error {
By ( "trying to dial each unique pod" )
label := labels . SelectorFromSet ( labels . Set ( map [ string ] string { "name" : name } ) )
return wait . Poll ( poll , podRespondingTimeout , podResponseChecker { c , ns , label , name , wantName , pods } . checkAllResponses )
}
func serviceResponding ( c * client . Client , ns , name string ) error {
By ( fmt . Sprintf ( "trying to dial the service %s.%s via the proxy" , ns , name ) )
return wait . Poll ( poll , serviceRespondingTimeout , func ( ) ( done bool , err error ) {
body , err := c . Get ( ) .
Prefix ( "proxy" ) .
Namespace ( ns ) .
Resource ( "services" ) .
Name ( name ) .
Do ( ) .
Raw ( )
if err != nil {
Logf ( "Failed to GET from service %s: %v:" , name , err )
return false , nil
}
got := string ( body )
if len ( got ) == 0 {
Logf ( "Service %s: expected non-empty response" , name )
return false , err // stop polling
}
Logf ( "Service %s: found nonempty answer: %s" , name , got )
return true , nil
} )
}
2015-01-08 20:41:38 +00:00
func loadConfig ( ) ( * client . Config , error ) {
2015-03-06 22:49:25 +00:00
switch {
2015-03-31 23:36:31 +00:00
case testContext . KubeConfig != "" :
fmt . Printf ( ">>> testContext.KubeConfig: %s\n" , testContext . KubeConfig )
c , err := clientcmd . LoadFromFile ( testContext . KubeConfig )
2015-03-06 22:49:25 +00:00
if err != nil {
2015-03-31 23:36:31 +00:00
return nil , fmt . Errorf ( "error loading KubeConfig: %v" , err . Error ( ) )
}
if testContext . KubeContext != "" {
fmt . Printf ( ">>> testContext.KubeContext: %s\n" , testContext . KubeContext )
c . CurrentContext = testContext . KubeContext
2015-03-06 22:49:25 +00:00
}
2015-05-19 13:59:46 +00:00
return clientcmd . NewDefaultClientConfig ( * c , & clientcmd . ConfigOverrides { ClusterInfo : clientcmdapi . Cluster { Server : testContext . Host } } ) . ClientConfig ( )
2015-03-06 22:49:25 +00:00
default :
2015-05-13 20:54:02 +00:00
return nil , fmt . Errorf ( "KubeConfig must be specified to load client config" )
2015-01-13 02:11:27 +00:00
}
2015-01-08 20:41:38 +00:00
}
func loadClient ( ) ( * client . Client , error ) {
config , err := loadConfig ( )
2015-01-13 02:11:27 +00:00
if err != nil {
2015-03-06 22:49:25 +00:00
return nil , fmt . Errorf ( "error creating client: %v" , err . Error ( ) )
2015-01-13 02:11:27 +00:00
}
2015-01-08 20:41:38 +00:00
c , err := client . New ( config )
2015-01-13 02:11:27 +00:00
if err != nil {
2015-03-06 22:49:25 +00:00
return nil , fmt . Errorf ( "error creating client: %v" , err . Error ( ) )
2015-01-13 02:11:27 +00:00
}
2015-09-21 19:34:51 +00:00
if c . Timeout == 0 {
c . Timeout = singleCallTimeout
}
2015-02-09 15:10:02 +00:00
return c , nil
2015-01-13 02:11:27 +00:00
}
2015-01-24 01:06:13 +00:00
2015-04-17 15:22:29 +00:00
// randomSuffix provides a random string to append to pods,services,rcs.
2015-01-24 01:06:13 +00:00
// TODO: Allow service names to have the same form as names
// for pods and replication controllers so we don't
// need to use such a function and can instead
// use the UUID utilty function.
func randomSuffix ( ) string {
r := rand . New ( rand . NewSource ( time . Now ( ) . UnixNano ( ) ) )
return strconv . Itoa ( r . Int ( ) % 10000 )
}
2015-02-12 18:37:31 +00:00
func expectNoError ( err error , explain ... interface { } ) {
ExpectWithOffset ( 1 , err ) . NotTo ( HaveOccurred ( ) , explain ... )
}
2015-03-26 20:34:18 +00:00
2015-08-20 02:09:57 +00:00
// Stops everything from filePath from namespace ns and checks if everything matching selectors from the given namespace is correctly stopped.
2015-05-15 09:39:30 +00:00
func cleanup ( filePath string , ns string , selectors ... string ) {
2015-08-20 02:09:57 +00:00
By ( "using delete to clean up resources" )
2015-05-15 09:39:30 +00:00
var nsArg string
if ns != "" {
nsArg = fmt . Sprintf ( "--namespace=%s" , ns )
}
2015-08-20 02:09:57 +00:00
runKubectl ( "stop" , "--grace-period=0" , "-f" , filePath , nsArg )
2015-03-26 20:34:18 +00:00
for _ , selector := range selectors {
2015-08-20 02:09:57 +00:00
resources := runKubectl ( "get" , "rc,svc" , "-l" , selector , "--no-headers" , nsArg )
2015-03-26 20:34:18 +00:00
if resources != "" {
Failf ( "Resources left running after stop:\n%s" , resources )
}
2015-08-21 13:09:41 +00:00
pods := runKubectl ( "get" , "pods" , "-l" , selector , nsArg , "-o" , "go-template={{ range .items }}{{ if not .metadata.deletionTimestamp }}{{ .metadata.name }}{{ \"\\n\" }}{{ end }}{{ end }}" )
2015-08-20 02:09:57 +00:00
if pods != "" {
Failf ( "Pods left unterminated after stop:\n%s" , pods )
}
2015-03-26 20:34:18 +00:00
}
}
// validatorFn is the function which is individual tests will implement.
// we may want it to return more than just an error, at some point.
type validatorFn func ( c * client . Client , podID string ) error
// validateController is a generic mechanism for testing RC's that are running.
// It takes a container name, a test name, and a validator function which is plugged in by a specific test.
// "containername": this is grepped for.
// "containerImage" : this is the name of the image we expect to be launched. Not to confuse w/ images (kitten.jpg) which are validated.
// "testname": which gets bubbled up to the logging/failure messages if errors happen.
// "validator" function: This function is given a podID and a client, and it can do some specific validations that way.
2015-04-30 02:53:09 +00:00
func validateController ( c * client . Client , containerImage string , replicas int , containername string , testname string , validator validatorFn , ns string ) {
2015-04-08 17:22:33 +00:00
getPodsTemplate := "--template={{range.items}}{{.metadata.name}} {{end}}"
2015-03-26 20:34:18 +00:00
// NB: kubectl adds the "exists" function to the standard template functions.
// This lets us check to see if the "running" entry exists for each of the containers
// we care about. Exists will never return an error and it's safe to check a chain of
// things, any one of which may not exist. In the below template, all of info,
// containername, and running might be nil, so the normal index function isn't very
// helpful.
// This template is unit-tested in kubectl, so if you change it, update the unit test.
// You can read about the syntax here: http://golang.org/pkg/text/template/.
2015-04-08 17:22:33 +00:00
getContainerStateTemplate := fmt . Sprintf ( ` --template= {{ if ( exists . "status" "containerStatuses" ) }} {{ range .status .containerStatuses }} {{ if ( and ( eq .name "%s" ) ( exists . "state" "running" ) ) }} true {{ end }} {{ end }} {{ end }} ` , containername )
2015-03-26 20:34:18 +00:00
2015-04-08 17:22:33 +00:00
getImageTemplate := fmt . Sprintf ( ` --template= {{ if ( exists . "status" "containerStatuses" ) }} {{ range .status .containerStatuses }} {{ if eq .name "%s" }} {{ .image }} {{ end }} {{ end }} {{ end }} ` , containername )
2015-03-26 20:34:18 +00:00
By ( fmt . Sprintf ( "waiting for all containers in %s pods to come up." , testname ) ) //testname should be selector
2015-07-14 17:45:56 +00:00
waitLoop :
2015-05-19 18:17:32 +00:00
for start := time . Now ( ) ; time . Since ( start ) < podStartTimeout ; time . Sleep ( 5 * time . Second ) {
2015-06-04 18:15:51 +00:00
getPodsOutput := runKubectl ( "get" , "pods" , "-o" , "template" , getPodsTemplate , "--api-version=v1" , "-l" , testname , fmt . Sprintf ( "--namespace=%v" , ns ) )
2015-03-26 20:34:18 +00:00
pods := strings . Fields ( getPodsOutput )
if numPods := len ( pods ) ; numPods != replicas {
By ( fmt . Sprintf ( "Replicas for %s: expected=%d actual=%d" , testname , replicas , numPods ) )
2015-05-19 18:17:32 +00:00
continue
2015-03-26 20:34:18 +00:00
}
var runningPods [ ] string
for _ , podID := range pods {
2015-06-04 18:15:51 +00:00
running := runKubectl ( "get" , "pods" , podID , "-o" , "template" , getContainerStateTemplate , "--api-version=v1" , fmt . Sprintf ( "--namespace=%v" , ns ) )
2015-04-08 17:22:33 +00:00
if running != "true" {
2015-03-26 20:34:18 +00:00
Logf ( "%s is created but not running" , podID )
2015-07-14 17:45:56 +00:00
continue waitLoop
2015-03-26 20:34:18 +00:00
}
2015-06-04 18:15:51 +00:00
currentImage := runKubectl ( "get" , "pods" , podID , "-o" , "template" , getImageTemplate , "--api-version=v1" , fmt . Sprintf ( "--namespace=%v" , ns ) )
2015-03-26 20:34:18 +00:00
if currentImage != containerImage {
Logf ( "%s is created but running wrong image; expected: %s, actual: %s" , podID , containerImage , currentImage )
2015-07-14 17:45:56 +00:00
continue waitLoop
2015-03-26 20:34:18 +00:00
}
// Call the generic validator function here.
// This might validate for example, that (1) getting a url works and (2) url is serving correct content.
if err := validator ( c , podID ) ; err != nil {
Logf ( "%s is running right image but validator function failed: %v" , podID , err )
2015-07-14 17:45:56 +00:00
continue waitLoop
2015-03-26 20:34:18 +00:00
}
Logf ( "%s is verified up and running" , podID )
runningPods = append ( runningPods , podID )
}
// If we reach here, then all our checks passed.
if len ( runningPods ) == replicas {
2015-05-19 18:17:32 +00:00
return
2015-03-26 20:34:18 +00:00
}
2015-05-19 18:17:32 +00:00
}
2015-03-26 20:34:18 +00:00
// Reaching here means that one of more checks failed multiple times. Assuming its not a race condition, something is broken.
Failf ( "Timed out after %v seconds waiting for %s pods to reach valid state" , podStartTimeout . Seconds ( ) , testname )
}
2015-05-19 16:13:08 +00:00
// kubectlCmd runs the kubectl executable through the wrapper script.
2015-03-26 20:34:18 +00:00
func kubectlCmd ( args ... string ) * exec . Cmd {
defaultArgs := [ ] string { }
2015-04-21 14:07:08 +00:00
// Reference a --server option so tests can run anywhere.
if testContext . Host != "" {
defaultArgs = append ( defaultArgs , "--" + clientcmd . FlagAPIServer + "=" + testContext . Host )
}
2015-03-31 23:36:31 +00:00
if testContext . KubeConfig != "" {
defaultArgs = append ( defaultArgs , "--" + clientcmd . RecommendedConfigPathFlag + "=" + testContext . KubeConfig )
2015-04-20 19:51:16 +00:00
// Reference the KubeContext
2015-04-02 04:24:16 +00:00
if testContext . KubeContext != "" {
defaultArgs = append ( defaultArgs , "--" + clientcmd . FlagContext + "=" + testContext . KubeContext )
}
2015-04-20 19:51:16 +00:00
2015-03-26 20:34:18 +00:00
} else {
2015-03-31 23:36:31 +00:00
if testContext . CertDir != "" {
2015-03-26 20:34:18 +00:00
defaultArgs = append ( defaultArgs ,
2015-03-31 23:36:31 +00:00
fmt . Sprintf ( "--certificate-authority=%s" , filepath . Join ( testContext . CertDir , "ca.crt" ) ) ,
fmt . Sprintf ( "--client-certificate=%s" , filepath . Join ( testContext . CertDir , "kubecfg.crt" ) ) ,
fmt . Sprintf ( "--client-key=%s" , filepath . Join ( testContext . CertDir , "kubecfg.key" ) ) )
2015-03-26 20:34:18 +00:00
}
}
kubectlArgs := append ( defaultArgs , args ... )
2015-04-02 04:24:16 +00:00
2015-05-26 16:27:13 +00:00
//We allow users to specify path to kubectl, so you can test either "kubectl" or "cluster/kubectl.sh"
//and so on.
cmd := exec . Command ( testContext . KubectlPath , kubectlArgs ... )
//caller will invoke this and wait on it.
2015-03-26 20:34:18 +00:00
return cmd
}
2015-07-15 07:52:15 +00:00
// kubectlBuilder is used to build, custimize and execute a kubectl Command.
// Add more functions to customize the builder as needed.
type kubectlBuilder struct {
cmd * exec . Cmd
}
func newKubectlCommand ( args ... string ) * kubectlBuilder {
b := new ( kubectlBuilder )
b . cmd = kubectlCmd ( args ... )
return b
}
func ( b kubectlBuilder ) withStdinData ( data string ) * kubectlBuilder {
b . cmd . Stdin = strings . NewReader ( data )
return & b
}
2015-08-31 17:23:47 +00:00
func ( b kubectlBuilder ) withStdinReader ( reader io . Reader ) * kubectlBuilder {
b . cmd . Stdin = reader
return & b
}
2015-07-15 07:52:15 +00:00
func ( b kubectlBuilder ) exec ( ) string {
2015-03-26 20:34:18 +00:00
var stdout , stderr bytes . Buffer
2015-07-15 07:52:15 +00:00
cmd := b . cmd
2015-03-26 20:34:18 +00:00
cmd . Stdout , cmd . Stderr = & stdout , & stderr
2015-07-10 09:54:54 +00:00
Logf ( "Running '%s %s'" , cmd . Path , strings . Join ( cmd . Args [ 1 : ] , " " ) ) // skip arg[0] as it is printed separately
2015-03-26 20:34:18 +00:00
if err := cmd . Run ( ) ; err != nil {
Failf ( "Error running %v:\nCommand stdout:\n%v\nstderr:\n%v\n" , cmd , cmd . Stdout , cmd . Stderr )
return ""
}
Logf ( stdout . String ( ) )
// TODO: trimspace should be unnecessary after switching to use kubectl binary directly
return strings . TrimSpace ( stdout . String ( ) )
}
2015-03-31 17:25:14 +00:00
2015-07-15 07:52:15 +00:00
// runKubectl is a convenience wrapper over kubectlBuilder
func runKubectl ( args ... string ) string {
return newKubectlCommand ( args ... ) . exec ( )
}
2015-06-30 16:43:37 +00:00
func startCmdAndStreamOutput ( cmd * exec . Cmd ) ( stdout , stderr io . ReadCloser , err error ) {
stdout , err = cmd . StdoutPipe ( )
if err != nil {
return
}
stderr , err = cmd . StderrPipe ( )
if err != nil {
return
}
2015-07-03 20:29:14 +00:00
Logf ( "Asynchronously running '%s %s'" , cmd . Path , strings . Join ( cmd . Args , " " ) )
2015-06-30 16:43:37 +00:00
err = cmd . Start ( )
return
}
2015-07-07 05:09:22 +00:00
// Rough equivalent of ctrl+c for cleaning up processes. Intended to be run in defer.
func tryKill ( cmd * exec . Cmd ) {
if err := cmd . Process . Kill ( ) ; err != nil {
Logf ( "ERROR failed to kill command %v! The process may leak" , cmd )
}
}
2015-03-31 17:25:14 +00:00
// testContainerOutputInNamespace runs the given pod in the given namespace and waits
2015-09-08 07:34:10 +00:00
// for all of the containers in the podSpec to move into the 'Success' status. It retrieves
// the exact container log and searches for lines of expected output.
func testContainerOutputInNamespace ( scenarioName string , c * client . Client , pod * api . Pod , containerIndex int , expectedOutput [ ] string , ns string ) {
2015-03-31 17:25:14 +00:00
By ( fmt . Sprintf ( "Creating a pod to test %v" , scenarioName ) )
2015-08-07 16:40:59 +00:00
defer c . Pods ( ns ) . Delete ( pod . Name , api . NewDeleteOptions ( 0 ) )
2015-03-31 17:25:14 +00:00
if _ , err := c . Pods ( ns ) . Create ( pod ) ; err != nil {
Failf ( "Failed to create pod: %v" , err )
}
// Wait for client pod to complete.
2015-06-23 08:03:31 +00:00
var containerName string
for id , container := range pod . Spec . Containers {
expectNoError ( waitForPodSuccessInNamespace ( c , pod . Name , container . Name , ns ) )
if id == containerIndex {
containerName = container . Name
}
}
if containerName == "" {
Failf ( "Invalid container index: %d" , containerIndex )
}
2015-03-31 17:25:14 +00:00
// Grab its logs. Get host first.
podStatus , err := c . Pods ( ns ) . Get ( pod . Name )
if err != nil {
Failf ( "Failed to get pod status: %v" , err )
}
2015-05-22 23:40:57 +00:00
By ( fmt . Sprintf ( "Trying to get logs from node %s pod %s container %s: %v" ,
podStatus . Spec . NodeName , podStatus . Name , containerName , err ) )
2015-03-31 17:25:14 +00:00
var logs [ ] byte
2015-05-19 18:17:32 +00:00
start := time . Now ( )
2015-03-31 17:25:14 +00:00
// Sometimes the actual containers take a second to get started, try to get logs for 60s
2015-05-19 18:17:32 +00:00
for time . Now ( ) . Sub ( start ) < ( 60 * time . Second ) {
2015-07-07 22:52:01 +00:00
err = nil
2015-03-31 17:25:14 +00:00
logs , err = c . Get ( ) .
Prefix ( "proxy" ) .
2015-04-02 20:15:39 +00:00
Resource ( "nodes" ) .
2015-05-22 23:40:57 +00:00
Name ( podStatus . Spec . NodeName ) .
2015-03-31 17:25:14 +00:00
Suffix ( "containerLogs" , ns , podStatus . Name , containerName ) .
Do ( ) .
Raw ( )
2015-07-07 22:52:01 +00:00
if err == nil && strings . Contains ( string ( logs ) , "Internal Error" ) {
err = fmt . Errorf ( "Fetched log contains \"Internal Error\": %q." , string ( logs ) )
}
if err != nil {
By ( fmt . Sprintf ( "Warning: Failed to get logs from node %q pod %q container %q. %v" ,
podStatus . Spec . NodeName , podStatus . Name , containerName , err ) )
2015-05-19 18:17:32 +00:00
time . Sleep ( 5 * time . Second )
continue
2015-07-07 22:52:01 +00:00
2015-03-31 17:25:14 +00:00
}
2015-08-08 21:29:57 +00:00
By ( fmt . Sprintf ( "Successfully fetched pod logs:%v\n" , string ( logs ) ) )
2015-05-19 18:17:32 +00:00
break
}
2015-03-31 17:25:14 +00:00
for _ , m := range expectedOutput {
2015-09-08 07:34:10 +00:00
Expect ( string ( logs ) ) . To ( ContainSubstring ( m ) , "%q in container output" , m )
2015-03-31 17:25:14 +00:00
}
}
2015-04-22 19:08:08 +00:00
2015-05-08 00:11:48 +00:00
// podInfo contains pod information useful for debugging e2e tests.
type podInfo struct {
oldHostname string
oldPhase string
hostname string
phase string
}
// PodDiff is a map of pod name to podInfos
type PodDiff map [ string ] * podInfo
// Print formats and prints the give PodDiff.
2015-09-09 17:45:01 +00:00
func ( p PodDiff ) Print ( ignorePhases sets . String ) {
2015-05-08 00:11:48 +00:00
for name , info := range p {
if ignorePhases . Has ( info . phase ) {
continue
}
if info . phase == nonExist {
2015-06-12 14:15:29 +00:00
Logf ( "Pod %v was deleted, had phase %v and host %v" , name , info . oldPhase , info . oldHostname )
2015-05-08 00:11:48 +00:00
continue
}
phaseChange , hostChange := false , false
msg := fmt . Sprintf ( "Pod %v " , name )
if info . oldPhase != info . phase {
phaseChange = true
if info . oldPhase == nonExist {
msg += fmt . Sprintf ( "in phase %v " , info . phase )
} else {
msg += fmt . Sprintf ( "went from phase: %v -> %v " , info . oldPhase , info . phase )
}
}
if info . oldHostname != info . hostname {
hostChange = true
if info . oldHostname == nonExist || info . oldHostname == "" {
msg += fmt . Sprintf ( "assigned host %v " , info . hostname )
} else {
msg += fmt . Sprintf ( "went from host: %v -> %v " , info . oldHostname , info . hostname )
}
}
if phaseChange || hostChange {
Logf ( msg )
}
}
}
// Diff computes a PodDiff given 2 lists of pods.
2015-06-01 12:24:44 +00:00
func Diff ( oldPods [ ] * api . Pod , curPods [ ] * api . Pod ) PodDiff {
2015-05-08 00:11:48 +00:00
podInfoMap := PodDiff { }
// New pods will show up in the curPods list but not in oldPods. They have oldhostname/phase == nonexist.
2015-06-01 12:24:44 +00:00
for _ , pod := range curPods {
2015-05-22 23:40:57 +00:00
podInfoMap [ pod . Name ] = & podInfo { hostname : pod . Spec . NodeName , phase : string ( pod . Status . Phase ) , oldHostname : nonExist , oldPhase : nonExist }
2015-05-08 00:11:48 +00:00
}
// Deleted pods will show up in the oldPods list but not in curPods. They have a hostname/phase == nonexist.
2015-06-01 12:24:44 +00:00
for _ , pod := range oldPods {
2015-05-08 00:11:48 +00:00
if info , ok := podInfoMap [ pod . Name ] ; ok {
2015-05-22 23:40:57 +00:00
info . oldHostname , info . oldPhase = pod . Spec . NodeName , string ( pod . Status . Phase )
2015-05-08 00:11:48 +00:00
} else {
2015-05-22 23:40:57 +00:00
podInfoMap [ pod . Name ] = & podInfo { hostname : nonExist , phase : nonExist , oldHostname : pod . Spec . NodeName , oldPhase : string ( pod . Status . Phase ) }
2015-05-08 00:11:48 +00:00
}
}
return podInfoMap
}
2015-05-06 15:03:22 +00:00
// RunRC Launches (and verifies correctness) of a Replication Controller
2015-06-01 18:27:14 +00:00
// and will wait for all pods it spawns to become "Running".
2015-05-06 15:03:22 +00:00
// It's the caller's responsibility to clean up externally (i.e. use the
// namespace lifecycle for handling cleanup).
2015-05-26 14:24:46 +00:00
func RunRC ( config RCConfig ) error {
2015-07-07 01:11:30 +00:00
// Don't force tests to fail if they don't care about containers restarting.
var maxContainerFailures int
if config . MaxContainerFailures == nil {
maxContainerFailures = int ( math . Max ( 1.0 , float64 ( config . Replicas ) * .01 ) )
} else {
maxContainerFailures = * config . MaxContainerFailures
}
2015-06-10 11:59:30 +00:00
label := labels . SelectorFromSet ( labels . Set ( map [ string ] string { "name" : config . Name } ) )
2015-06-03 18:59:29 +00:00
2015-06-10 11:59:30 +00:00
By ( fmt . Sprintf ( "%v Creating replication controller %s" , time . Now ( ) , config . Name ) )
2015-05-08 00:11:48 +00:00
rc := & api . ReplicationController {
2015-04-22 19:08:08 +00:00
ObjectMeta : api . ObjectMeta {
2015-06-10 11:59:30 +00:00
Name : config . Name ,
2015-04-22 19:08:08 +00:00
} ,
Spec : api . ReplicationControllerSpec {
2015-06-10 11:59:30 +00:00
Replicas : config . Replicas ,
2015-04-22 19:08:08 +00:00
Selector : map [ string ] string {
2015-06-10 11:59:30 +00:00
"name" : config . Name ,
2015-04-22 19:08:08 +00:00
} ,
Template : & api . PodTemplateSpec {
ObjectMeta : api . ObjectMeta {
2015-06-10 11:59:30 +00:00
Labels : map [ string ] string { "name" : config . Name } ,
2015-04-22 19:08:08 +00:00
} ,
Spec : api . PodSpec {
Containers : [ ] api . Container {
{
2015-07-07 12:29:16 +00:00
Name : config . Name ,
Image : config . Image ,
Command : config . Command ,
Ports : [ ] api . ContainerPort { { ContainerPort : 80 } } ,
2015-04-22 19:08:08 +00:00
} ,
} ,
} ,
} ,
} ,
2015-05-08 00:11:48 +00:00
}
2015-06-11 22:55:25 +00:00
if config . Env != nil {
for k , v := range config . Env {
c := & rc . Spec . Template . Spec . Containers [ 0 ]
c . Env = append ( c . Env , api . EnvVar { Name : k , Value : v } )
}
}
if config . Labels != nil {
for k , v := range config . Labels {
rc . Spec . Template . ObjectMeta . Labels [ k ] = v
}
}
if config . Ports != nil {
for k , v := range config . Ports {
c := & rc . Spec . Template . Spec . Containers [ 0 ]
c . Ports = append ( c . Ports , api . ContainerPort { Name : k , ContainerPort : v } )
}
}
2015-07-21 14:15:55 +00:00
if config . CpuLimit > 0 || config . MemLimit > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Limits = api . ResourceList { }
}
if config . CpuLimit > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Limits [ api . ResourceCPU ] = * resource . NewMilliQuantity ( config . CpuLimit , resource . DecimalSI )
}
if config . MemLimit > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Limits [ api . ResourceMemory ] = * resource . NewQuantity ( config . MemLimit , resource . DecimalSI )
}
2015-08-17 08:54:33 +00:00
if config . CpuRequest > 0 || config . MemRequest > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Requests = api . ResourceList { }
}
if config . CpuRequest > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Requests [ api . ResourceCPU ] = * resource . NewMilliQuantity ( config . CpuRequest , resource . DecimalSI )
}
if config . MemRequest > 0 {
rc . Spec . Template . Spec . Containers [ 0 ] . Resources . Requests [ api . ResourceMemory ] = * resource . NewQuantity ( config . MemRequest , resource . DecimalSI )
}
2015-07-21 14:15:55 +00:00
2015-06-10 11:59:30 +00:00
_ , err := config . Client . ReplicationControllers ( config . Namespace ) . Create ( rc )
2015-04-22 19:08:08 +00:00
if err != nil {
return fmt . Errorf ( "Error creating replication controller: %v" , err )
}
2015-06-10 11:59:30 +00:00
Logf ( "%v Created replication controller with name: %v, namespace: %v, replica count: %v" , time . Now ( ) , rc . Name , config . Namespace , rc . Spec . Replicas )
podStore := newPodStore ( config . Client , config . Namespace , label , fields . Everything ( ) )
2015-06-01 12:24:44 +00:00
defer podStore . Stop ( )
2015-04-22 19:08:08 +00:00
2015-06-10 11:59:30 +00:00
interval := config . PollInterval
if interval <= 0 {
interval = 10 * time . Second
}
2015-06-25 10:11:01 +00:00
timeout := config . Timeout
if timeout <= 0 {
timeout = 5 * time . Minute
}
2015-06-10 11:59:30 +00:00
oldPods := make ( [ ] * api . Pod , 0 )
oldRunning := 0
lastChange := time . Now ( )
2015-06-25 02:35:42 +00:00
for oldRunning != config . Replicas {
2015-06-10 11:59:30 +00:00
time . Sleep ( interval )
2015-08-19 15:35:00 +00:00
terminating := 0
2015-06-10 11:59:30 +00:00
running := 0
2015-09-28 23:11:48 +00:00
runningButNotReady := 0
2015-06-10 11:59:30 +00:00
waiting := 0
pending := 0
unknown := 0
inactive := 0
failedContainers := 0
2015-09-09 17:45:01 +00:00
containerRestartNodes := sets . NewString ( )
2015-07-07 01:11:30 +00:00
2015-06-10 11:59:30 +00:00
pods := podStore . List ( )
2015-08-19 15:35:00 +00:00
created := [ ] * api . Pod { }
2015-06-10 11:59:30 +00:00
for _ , p := range pods {
2015-08-19 15:35:00 +00:00
if p . DeletionTimestamp != nil {
terminating ++
continue
}
created = append ( created , p )
2015-06-10 11:59:30 +00:00
if p . Status . Phase == api . PodRunning {
2015-09-28 23:11:48 +00:00
ready := false
for _ , c := range p . Status . Conditions {
if c . Type == api . PodReady && c . Status == api . ConditionTrue {
ready = true
break
}
}
if ready {
// Only count a pod is running when it is also ready.
running ++
} else {
runningButNotReady ++
}
2015-06-10 11:59:30 +00:00
for _ , v := range FailedContainers ( p ) {
failedContainers = failedContainers + v . restarts
2015-07-07 01:11:30 +00:00
containerRestartNodes . Insert ( p . Spec . NodeName )
2015-06-10 11:59:30 +00:00
}
} else if p . Status . Phase == api . PodPending {
if p . Spec . NodeName == "" {
waiting ++
} else {
pending ++
}
} else if p . Status . Phase == api . PodSucceeded || p . Status . Phase == api . PodFailed {
inactive ++
} else if p . Status . Phase == api . PodUnknown {
unknown ++
2015-05-26 14:24:46 +00:00
}
2015-05-22 20:00:46 +00:00
}
2015-08-19 15:35:00 +00:00
pods = created
if config . CreatedPods != nil {
* config . CreatedPods = pods
}
2015-05-26 14:24:46 +00:00
2015-09-28 23:11:48 +00:00
Logf ( "%v %v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady " ,
time . Now ( ) , rc . Name , len ( pods ) , config . Replicas , running , pending , waiting , inactive , terminating , unknown , runningButNotReady )
2015-07-09 17:53:17 +00:00
promPushRunningPending ( running , pending )
2015-06-10 11:59:30 +00:00
if config . PodStatusFile != nil {
2015-09-28 23:11:48 +00:00
fmt . Fprintf ( config . PodStatusFile , "%s, %d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n" , time . Now ( ) , running , pending , waiting , inactive , unknown , runningButNotReady )
2015-05-26 14:24:46 +00:00
}
2015-06-10 11:59:30 +00:00
if failedContainers > maxContainerFailures {
2015-07-07 01:11:30 +00:00
dumpNodeDebugInfo ( config . Client , containerRestartNodes . List ( ) )
2015-06-10 11:59:30 +00:00
return fmt . Errorf ( "%d containers failed which is more than allowed %d" , failedContainers , maxContainerFailures )
}
if len ( pods ) < len ( oldPods ) || len ( pods ) > config . Replicas {
// This failure mode includes:
// kubelet is dead, so node controller deleted pods and rc creates more
// - diagnose by noting the pod diff below.
// pod is unhealthy, so replication controller creates another to take its place
// - diagnose by comparing the previous "2 Pod states" lines for inactive pods
errorStr := fmt . Sprintf ( "Number of reported pods changed: %d vs %d" , len ( pods ) , len ( oldPods ) )
Logf ( "%v, pods that changed since the last iteration:" , errorStr )
2015-09-09 17:45:01 +00:00
Diff ( oldPods , pods ) . Print ( sets . NewString ( ) )
2015-06-10 11:59:30 +00:00
return fmt . Errorf ( errorStr )
}
2015-05-26 14:24:46 +00:00
2015-06-10 11:59:30 +00:00
if len ( pods ) > len ( oldPods ) || running > oldRunning {
lastChange = time . Now ( )
2015-05-14 13:18:24 +00:00
}
2015-06-10 11:59:30 +00:00
oldPods = pods
oldRunning = running
2015-06-25 02:35:42 +00:00
if time . Since ( lastChange ) > timeout {
dumpPodDebugInfo ( config . Client , pods )
break
}
2015-04-22 19:08:08 +00:00
}
2015-06-10 11:59:30 +00:00
if oldRunning != config . Replicas {
2015-08-07 16:40:59 +00:00
if pods , err := config . Client . Pods ( api . NamespaceAll ) . List ( labels . Everything ( ) , fields . Everything ( ) ) ; err == nil {
for _ , pod := range pods . Items {
Logf ( "Pod %s\t%s\t%s\t%s" , pod . Namespace , pod . Name , pod . Spec . NodeName , pod . DeletionTimestamp )
}
} else {
Logf ( "Can't list pod debug info: %v" , err )
}
2015-06-10 11:59:30 +00:00
return fmt . Errorf ( "Only %d pods started out of %d" , oldRunning , config . Replicas )
2015-04-22 19:08:08 +00:00
}
return nil
}
2015-06-25 02:35:42 +00:00
func dumpPodDebugInfo ( c * client . Client , pods [ ] * api . Pod ) {
2015-09-09 17:45:01 +00:00
badNodes := sets . NewString ( )
2015-06-25 02:35:42 +00:00
for _ , p := range pods {
if p . Status . Phase != api . PodRunning {
if p . Spec . NodeName != "" {
Logf ( "Pod %v assigned to host %v (IP: %v) in %v" , p . Name , p . Spec . NodeName , p . Status . HostIP , p . Status . Phase )
badNodes . Insert ( p . Spec . NodeName )
} else {
Logf ( "Pod %v still unassigned" , p . Name )
}
}
}
dumpNodeDebugInfo ( c , badNodes . List ( ) )
}
2015-08-19 15:35:00 +00:00
func dumpAllPodInfo ( c * client . Client ) {
pods , err := c . Pods ( "" ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
Logf ( "unable to fetch pod debug info: %v" , err )
}
2015-08-26 17:43:46 +00:00
logPodStates ( pods . Items )
2015-08-19 15:35:00 +00:00
}
2015-06-25 02:35:42 +00:00
func dumpNodeDebugInfo ( c * client . Client , nodeNames [ ] string ) {
for _ , n := range nodeNames {
2015-07-07 01:11:30 +00:00
Logf ( "\nLogging kubelet events for node %v" , n )
for _ , e := range getNodeEvents ( c , n ) {
Logf ( "source %v message %v reason %v first ts %v last ts %v, involved obj %+v" ,
e . Source , e . Message , e . Reason , e . FirstTimestamp , e . LastTimestamp , e . InvolvedObject )
}
2015-06-25 02:35:42 +00:00
Logf ( "\nLogging pods the kubelet thinks is on node %v" , n )
podList , err := GetKubeletPods ( c , n )
if err != nil {
Logf ( "Unable to retrieve kubelet pods for node %v" , n )
continue
}
for _ , p := range podList . Items {
2015-06-25 04:33:23 +00:00
Logf ( "%v started at %v (%d container statuses recorded)" , p . Name , p . Status . StartTime , len ( p . Status . ContainerStatuses ) )
2015-06-25 02:35:42 +00:00
for _ , c := range p . Status . ContainerStatuses {
Logf ( "\tContainer %v ready: %v, restart count %v" ,
c . Name , c . Ready , c . RestartCount )
}
}
HighLatencyKubeletOperations ( c , 10 * time . Second , n )
// TODO: Log node resource info
}
}
2015-07-07 01:11:30 +00:00
// logNodeEvents logs kubelet events from the given node. This includes kubelet
// restart and node unhealthy events. Note that listing events like this will mess
// with latency metrics, beware of calling it during a test.
func getNodeEvents ( c * client . Client , nodeName string ) [ ] api . Event {
2015-07-10 21:01:45 +00:00
events , err := c . Events ( api . NamespaceSystem ) . List (
2015-07-07 01:11:30 +00:00
labels . Everything ( ) ,
fields . Set {
"involvedObject.kind" : "Node" ,
"involvedObject.name" : nodeName ,
"involvedObject.namespace" : api . NamespaceAll ,
"source" : "kubelet" ,
} . AsSelector ( ) )
if err != nil {
Logf ( "Unexpected error retrieving node events %v" , err )
return [ ] api . Event { }
}
return events . Items
}
2015-06-23 23:20:04 +00:00
func ScaleRC ( c * client . Client , ns , name string , size uint , wait bool ) error {
2015-06-09 14:13:02 +00:00
By ( fmt . Sprintf ( "%v Scaling replication controller %s in namespace %s to %d" , time . Now ( ) , name , ns , size ) )
2015-09-16 15:32:59 +00:00
scaler , err := kubectl . ScalerFor ( "ReplicationController" , c )
2015-05-05 14:48:50 +00:00
if err != nil {
return err
}
2015-07-24 12:12:55 +00:00
waitForScale := kubectl . NewRetryParams ( 5 * time . Second , 1 * time . Minute )
2015-05-05 14:48:50 +00:00
waitForReplicas := kubectl . NewRetryParams ( 5 * time . Second , 5 * time . Minute )
2015-07-24 12:12:55 +00:00
if err = scaler . Scale ( ns , name , size , nil , waitForScale , waitForReplicas ) ; err != nil {
2015-05-05 14:48:50 +00:00
return err
}
2015-06-23 23:20:04 +00:00
if ! wait {
return nil
}
2015-05-05 14:48:50 +00:00
return waitForRCPodsRunning ( c , ns , name )
}
// Wait up to 10 minutes for pods to become Running.
func waitForRCPodsRunning ( c * client . Client , ns , rcName string ) error {
running := false
label := labels . SelectorFromSet ( labels . Set ( map [ string ] string { "name" : rcName } ) )
2015-06-01 12:24:44 +00:00
podStore := newPodStore ( c , ns , label , fields . Everything ( ) )
defer podStore . Stop ( )
2015-07-14 17:45:56 +00:00
waitLoop :
2015-05-05 14:48:50 +00:00
for start := time . Now ( ) ; time . Since ( start ) < 10 * time . Minute ; time . Sleep ( 5 * time . Second ) {
2015-06-01 12:24:44 +00:00
pods := podStore . List ( )
for _ , p := range pods {
2015-05-05 14:48:50 +00:00
if p . Status . Phase != api . PodRunning {
2015-07-14 17:45:56 +00:00
continue waitLoop
2015-05-05 14:48:50 +00:00
}
}
running = true
break
}
if ! running {
return fmt . Errorf ( "Timeout while waiting for replication controller %s pods to be running" , rcName )
}
return nil
}
2015-07-09 18:26:48 +00:00
// Wait up to 10 minutes for getting pods with certain label
func waitForPodsWithLabel ( c * client . Client , ns string , label labels . Selector ) ( pods * api . PodList , err error ) {
for t := time . Now ( ) ; time . Since ( t ) < podListTimeout ; time . Sleep ( poll ) {
pods , err = c . Pods ( ns ) . List ( label , fields . Everything ( ) )
Expect ( err ) . NotTo ( HaveOccurred ( ) )
if len ( pods . Items ) > 0 {
break
}
}
if pods == nil || len ( pods . Items ) == 0 {
err = fmt . Errorf ( "Timeout while waiting for pods with label %v" , label )
}
return
}
2015-05-05 14:48:50 +00:00
// Delete a Replication Controller and all pods it spawned
func DeleteRC ( c * client . Client , ns , name string ) error {
2015-06-09 14:13:02 +00:00
By ( fmt . Sprintf ( "%v Deleting replication controller %s in namespace %s" , time . Now ( ) , name , ns ) )
2015-08-19 17:55:10 +00:00
rc , err := c . ReplicationControllers ( ns ) . Get ( name )
if err != nil {
if apierrs . IsNotFound ( err ) {
Logf ( "RC %s was already deleted: %v" , name , err )
return nil
}
return err
}
2015-05-25 08:20:34 +00:00
reaper , err := kubectl . ReaperForReplicationController ( c , 10 * time . Minute )
2015-05-05 14:48:50 +00:00
if err != nil {
2015-08-19 17:55:10 +00:00
if apierrs . IsNotFound ( err ) {
Logf ( "RC %s was already deleted: %v" , name , err )
return nil
}
2015-05-05 14:48:50 +00:00
return err
}
2015-05-25 08:20:34 +00:00
startTime := time . Now ( )
2015-05-29 23:16:30 +00:00
_ , err = reaper . Stop ( ns , name , 0 , api . NewDeleteOptions ( 0 ) )
2015-08-19 17:55:10 +00:00
if apierrs . IsNotFound ( err ) {
Logf ( "RC %s was already deleted: %v" , name , err )
return nil
}
2015-05-25 08:20:34 +00:00
deleteRCTime := time . Now ( ) . Sub ( startTime )
2015-09-02 08:09:44 +00:00
Logf ( "Deleting RC %s took: %v" , name , deleteRCTime )
2015-08-19 15:35:00 +00:00
if err == nil {
2015-08-19 17:55:10 +00:00
err = waitForRCPodsGone ( c , rc )
2015-08-19 15:35:00 +00:00
}
terminatePodTime := time . Now ( ) . Sub ( startTime ) - deleteRCTime
2015-09-02 08:09:44 +00:00
Logf ( "Terminating RC %s pods took: %v" , name , terminatePodTime )
2015-05-05 14:48:50 +00:00
return err
}
2015-08-19 15:35:00 +00:00
// waitForRCPodsGone waits until there are no pods reported under an RC's selector (because the pods
// have completed termination).
2015-08-19 17:55:10 +00:00
func waitForRCPodsGone ( c * client . Client , rc * api . ReplicationController ) error {
2015-09-02 08:09:44 +00:00
return wait . Poll ( poll , 2 * time . Minute , func ( ) ( bool , error ) {
2015-08-19 17:55:10 +00:00
if pods , err := c . Pods ( rc . Namespace ) . List ( labels . SelectorFromSet ( rc . Spec . Selector ) , fields . Everything ( ) ) ; err == nil && len ( pods . Items ) == 0 {
2015-08-19 15:35:00 +00:00
return true , nil
}
return false , nil
} )
}
2015-09-29 23:55:06 +00:00
// Waits for the deployment to reach desired state.
// Returns an error if minAvailable or maxCreated is broken at any times.
func waitForDeploymentStatus ( c * client . Client , ns , deploymentName string , desiredUpdatedReplicas , minAvailable , maxCreated int ) error {
return wait . Poll ( poll , 2 * time . Minute , func ( ) ( bool , error ) {
deployment , err := c . Deployments ( ns ) . Get ( deploymentName )
if err != nil {
return false , err
}
oldRCs , err := deploymentUtil . GetOldRCs ( * deployment , c )
if err != nil {
return false , err
}
newRC , err := deploymentUtil . GetNewRC ( * deployment , c )
if err != nil {
return false , err
}
if newRC == nil {
// New RC hasnt been created yet.
return false , nil
}
allRCs := append ( oldRCs , newRC )
totalCreated := deploymentUtil . GetReplicaCountForRCs ( allRCs )
totalAvailable , err := deploymentUtil . GetAvailablePodsForRCs ( c , allRCs )
if err != nil {
return false , err
}
if totalCreated > maxCreated {
return false , fmt . Errorf ( "total pods created: %d, more than the max allowed: %d" , totalCreated , maxCreated )
}
if totalAvailable < minAvailable {
return false , fmt . Errorf ( "total pods available: %d, less than the min required: %d" , totalAvailable , minAvailable )
}
if deployment . Status . Replicas == desiredUpdatedReplicas &&
deployment . Status . UpdatedReplicas == desiredUpdatedReplicas {
// Verify RCs.
if deploymentUtil . GetReplicaCountForRCs ( oldRCs ) != 0 {
return false , fmt . Errorf ( "old RCs are not fully scaled down" )
}
if deploymentUtil . GetReplicaCountForRCs ( [ ] * api . ReplicationController { newRC } ) != desiredUpdatedReplicas {
return false , fmt . Errorf ( "new RCs is not fully scaled up" )
}
return true , nil
}
return false , nil
} )
}
2015-05-21 21:43:42 +00:00
// Convenient wrapper around listing nodes supporting retries.
func listNodes ( c * client . Client , label labels . Selector , field fields . Selector ) ( * api . NodeList , error ) {
var nodes * api . NodeList
var errLast error
if wait . Poll ( poll , singleCallTimeout , func ( ) ( bool , error ) {
nodes , errLast = c . Nodes ( ) . List ( label , field )
return errLast == nil , nil
} ) != nil {
return nil , fmt . Errorf ( "listNodes() failed with last error: %v" , errLast )
}
return nodes , nil
}
2015-05-14 13:18:24 +00:00
// FailedContainers inspects all containers in a pod and returns failure
// information for containers that have failed or been restarted.
// A map is returned where the key is the containerID and the value is a
// struct containing the restart and failure information
2015-05-26 14:24:46 +00:00
func FailedContainers ( pod * api . Pod ) map [ string ] ContainerFailures {
2015-05-14 13:18:24 +00:00
var state ContainerFailures
states := make ( map [ string ] ContainerFailures )
2015-04-27 18:05:41 +00:00
statuses := pod . Status . ContainerStatuses
if len ( statuses ) == 0 {
return nil
} else {
for _ , status := range statuses {
2015-05-27 22:02:11 +00:00
if status . State . Terminated != nil {
states [ status . ContainerID ] = ContainerFailures { status : status . State . Terminated }
} else if status . LastTerminationState . Terminated != nil {
states [ status . ContainerID ] = ContainerFailures { status : status . LastTerminationState . Terminated }
2015-05-06 15:03:22 +00:00
}
2015-05-14 13:18:24 +00:00
if status . RestartCount > 0 {
var ok bool
if state , ok = states [ status . ContainerID ] ; ! ok {
state = ContainerFailures { }
}
state . restarts = status . RestartCount
states [ status . ContainerID ] = state
2015-04-27 18:05:41 +00:00
}
}
}
2015-05-14 13:18:24 +00:00
return states
2015-04-27 18:05:41 +00:00
}
2015-04-28 11:58:20 +00:00
// Prints the histogram of the events and returns the number of bad events.
func BadEvents ( events [ ] * api . Event ) int {
type histogramKey struct {
reason string
source string
}
histogram := make ( map [ histogramKey ] int )
for _ , e := range events {
histogram [ histogramKey { reason : e . Reason , source : e . Source . Component } ] ++
}
for key , number := range histogram {
Logf ( "- reason: %s, source: %s -> %d" , key . reason , key . source , number )
}
badPatterns := [ ] string { "kill" , "fail" }
badEvents := 0
for key , number := range histogram {
for _ , s := range badPatterns {
if strings . Contains ( key . reason , s ) {
Logf ( "WARNING %d events from %s with reason: %s" , number , key . source , key . reason )
badEvents += number
break
}
}
}
return badEvents
}
2015-04-24 17:26:12 +00:00
2015-07-24 11:36:16 +00:00
// NodeSSHHosts returns SSH-able host names for all nodes. It returns an error
// if it can't find an external IP for every node, though it still returns all
// hosts that it found in that case.
func NodeSSHHosts ( c * client . Client ) ( [ ] string , error ) {
var hosts [ ] string
nodelist , err := c . Nodes ( ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
return hosts , fmt . Errorf ( "error getting nodes: %v" , err )
}
for _ , n := range nodelist . Items {
for _ , addr := range n . Status . Addresses {
// Use the first external IP address we find on the node, and
// use at most one per node.
2015-08-26 17:05:34 +00:00
// TODO(roberthbailey): Use the "preferred" address for the node, once
2015-07-24 11:36:16 +00:00
// such a thing is defined (#2462).
if addr . Type == api . NodeExternalIP {
hosts = append ( hosts , addr . Address + ":22" )
break
}
}
}
// Error if any node didn't have an external IP.
if len ( hosts ) != len ( nodelist . Items ) {
return hosts , fmt . Errorf (
"only found %d external IPs on nodes, but found %d nodes. Nodelist: %v" ,
len ( hosts ) , len ( nodelist . Items ) , nodelist )
}
return hosts , nil
}
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
// is no error performing the SSH, the stdout, stderr, and exit code are
// returned.
func SSH ( cmd , host , provider string ) ( string , string , int , error ) {
2015-07-24 02:00:27 +00:00
return sshCore ( cmd , host , provider , false )
}
// SSHVerbose is just like SSH, but it logs the command, user, host, stdout,
// stderr, exit code, and error.
func SSHVerbose ( cmd , host , provider string ) ( string , string , int , error ) {
return sshCore ( cmd , host , provider , true )
}
func sshCore ( cmd , host , provider string , verbose bool ) ( string , string , int , error ) {
2015-07-24 11:36:16 +00:00
// Get a signer for the provider.
signer , err := getSigner ( provider )
if err != nil {
return "" , "" , 0 , fmt . Errorf ( "error getting signer for provider %s: '%v'" , provider , err )
}
2015-07-24 02:00:27 +00:00
// RunSSHCommand will default to Getenv("USER") if user == "", but we're
// defaulting here as well for logging clarity.
2015-07-24 11:36:16 +00:00
user := os . Getenv ( "KUBE_SSH_USER" )
2015-07-24 02:00:27 +00:00
if user == "" {
user = os . Getenv ( "USER" )
}
stdout , stderr , code , err := util . RunSSHCommand ( cmd , user , host , signer )
if verbose {
remote := fmt . Sprintf ( "%s@%s" , user , host )
Logf ( "[%s] Running `%s`" , remote , cmd )
Logf ( "[%s] stdout: %q" , remote , stdout )
Logf ( "[%s] stderr: %q" , remote , stderr )
Logf ( "[%s] exit code: %d" , remote , code )
Logf ( "[%s] error: %v" , remote , err )
}
return stdout , stderr , code , err
2015-07-24 11:36:16 +00:00
}
// getSigner returns an ssh.Signer for the provider ("gce", etc.) that can be
// used to SSH to their nodes.
func getSigner ( provider string ) ( ssh . Signer , error ) {
// Get the directory in which SSH keys are located.
keydir := filepath . Join ( os . Getenv ( "HOME" ) , ".ssh" )
// Select the key itself to use. When implementing more providers here,
// please also add them to any SSH tests that are disabled because of signer
// support.
keyfile := ""
switch provider {
case "gce" , "gke" :
keyfile = "google_compute_engine"
case "aws" :
keyfile = "kube_aws_rsa"
default :
return nil , fmt . Errorf ( "getSigner(...) not implemented for %s" , provider )
}
key := filepath . Join ( keydir , keyfile )
return util . MakePrivateKeySignerFromFile ( key )
}
2015-05-21 21:43:42 +00:00
// checkPodsRunning returns whether all pods whose names are listed in podNames
2015-06-17 07:13:26 +00:00
// in namespace ns are running and ready, using c and waiting at most timeout.
func checkPodsRunningReady ( c * client . Client , ns string , podNames [ ] string , timeout time . Duration ) bool {
2015-05-21 21:43:42 +00:00
np , desc := len ( podNames ) , "running and ready"
Logf ( "Waiting up to %v for the following %d pods to be %s: %s" , timeout , np , desc , podNames )
result := make ( chan bool , len ( podNames ) )
for ix := range podNames {
// Launch off pod readiness checkers.
go func ( name string ) {
2015-06-17 07:13:26 +00:00
err := waitForPodCondition ( c , ns , name , desc , timeout , podRunningReady )
2015-05-21 21:43:42 +00:00
result <- err == nil
} ( podNames [ ix ] )
}
// Wait for them all to finish.
success := true
2015-08-26 17:05:34 +00:00
// TODO(a-robinson): Change to `for range` syntax and remove logging once we
2015-05-21 21:43:42 +00:00
// support only Go >= 1.4.
for _ , podName := range podNames {
if ! <- result {
Logf ( "Pod %-[1]*[2]s failed to be %[3]s." , podPrintWidth , podName , desc )
success = false
}
}
Logf ( "Wanted all %d pods to be %s. Result: %t. Pods: %v" , np , desc , success , podNames )
return success
}
// waitForNodeToBeReady returns whether node name is ready within timeout.
func waitForNodeToBeReady ( c * client . Client , name string , timeout time . Duration ) bool {
return waitForNodeToBe ( c , name , true , timeout )
}
// waitForNodeToBeNotReady returns whether node name is not ready (i.e. the
// readiness condition is anything but ready, e.g false or unknown) within
// timeout.
func waitForNodeToBeNotReady ( c * client . Client , name string , timeout time . Duration ) bool {
return waitForNodeToBe ( c , name , false , timeout )
}
2015-06-11 17:29:41 +00:00
func isNodeReadySetAsExpected ( node * api . Node , wantReady bool ) bool {
// Check the node readiness condition (logging all).
for i , cond := range node . Status . Conditions {
2015-08-07 00:32:05 +00:00
Logf ( "Node %s condition %d/%d: type: %v, status: %v, reason: %q, message: %q, last transition time: %v" ,
2015-07-09 18:38:10 +00:00
node . Name , i + 1 , len ( node . Status . Conditions ) , cond . Type , cond . Status ,
cond . Reason , cond . Message , cond . LastTransitionTime )
2015-06-11 17:29:41 +00:00
// Ensure that the condition type is readiness and the status
// matches as desired.
if cond . Type == api . NodeReady && ( cond . Status == api . ConditionTrue ) == wantReady {
Logf ( "Successfully found node %s readiness to be %t" , node . Name , wantReady )
return true
}
}
return false
}
2015-05-21 21:43:42 +00:00
// waitForNodeToBe returns whether node name's readiness state matches wantReady
// within timeout. If wantReady is true, it will ensure the node is ready; if
// it's false, it ensures the node is in any state other than ready (e.g. not
// ready or unknown).
func waitForNodeToBe ( c * client . Client , name string , wantReady bool , timeout time . Duration ) bool {
Logf ( "Waiting up to %v for node %s readiness to be %t" , timeout , name , wantReady )
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( poll ) {
node , err := c . Nodes ( ) . Get ( name )
if err != nil {
Logf ( "Couldn't get node %s" , name )
continue
}
2015-06-11 17:29:41 +00:00
if isNodeReadySetAsExpected ( node , wantReady ) {
return true
2015-05-21 21:43:42 +00:00
}
}
Logf ( "Node %s didn't reach desired readiness (%t) within %v" , name , wantReady , timeout )
return false
}
2015-07-07 11:53:03 +00:00
// checks whether all registered nodes are ready
2015-07-09 18:38:10 +00:00
func allNodesReady ( c * client . Client , timeout time . Duration ) error {
Logf ( "Waiting up to %v for all nodes to be ready" , timeout )
2015-07-07 11:53:03 +00:00
var notReady [ ] api . Node
2015-07-09 18:38:10 +00:00
err := wait . Poll ( poll , timeout , func ( ) ( bool , error ) {
notReady = nil
nodes , err := c . Nodes ( ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
return false , err
}
for _ , node := range nodes . Items {
if ! isNodeReadySetAsExpected ( & node , true ) {
notReady = append ( notReady , node )
}
2015-07-07 11:53:03 +00:00
}
2015-07-09 18:38:10 +00:00
return len ( notReady ) == 0 , nil
} )
if err != nil && err != wait . ErrWaitTimeout {
return err
2015-07-07 11:53:03 +00:00
}
2015-07-09 18:38:10 +00:00
2015-07-07 11:53:03 +00:00
if len ( notReady ) > 0 {
return fmt . Errorf ( "Not ready nodes: %v" , notReady )
}
return nil
}
2015-06-11 17:29:41 +00:00
// Filters nodes in NodeList in place, removing nodes that do not
// satisfy the given condition
// TODO: consider merging with pkg/client/cache.NodeLister
func filterNodes ( nodeList * api . NodeList , fn func ( node api . Node ) bool ) {
var l [ ] api . Node
for _ , node := range nodeList . Items {
if fn ( node ) {
l = append ( l , node )
}
}
nodeList . Items = l
}
2015-07-09 22:51:40 +00:00
// parseKVLines parses output that looks like lines containing "<key>: <val>"
// and returns <val> if <key> is found. Otherwise, it returns the empty string.
func parseKVLines ( output , key string ) string {
delim := ":"
key = key + delim
for _ , line := range strings . Split ( output , "\n" ) {
pieces := strings . SplitAfterN ( line , delim , 2 )
if len ( pieces ) != 2 {
continue
}
k , v := pieces [ 0 ] , pieces [ 1 ]
if k == key {
return strings . TrimSpace ( v )
}
}
return ""
}
2015-07-16 12:38:47 +00:00
func restartKubeProxy ( host string ) error {
// TODO: Make it work for all providers.
if ! providerIs ( "gce" , "gke" , "aws" ) {
return fmt . Errorf ( "unsupported provider: %s" , testContext . Provider )
}
_ , _ , code , err := SSH ( "sudo /etc/init.d/kube-proxy restart" , host , testContext . Provider )
if err != nil || code != 0 {
return fmt . Errorf ( "couldn't restart kube-proxy: %v (code %v)" , err , code )
}
return nil
}
func restartApiserver ( ) error {
// TODO: Make it work for all providers.
if ! providerIs ( "gce" , "gke" , "aws" ) {
return fmt . Errorf ( "unsupported provider: %s" , testContext . Provider )
}
var command string
if providerIs ( "gce" , "gke" ) {
command = "sudo docker ps | grep /kube-apiserver | cut -d ' ' -f 1 | xargs sudo docker kill"
} else {
command = "sudo /etc/init.d/kube-apiserver restart"
}
_ , _ , code , err := SSH ( command , getMasterHost ( ) + ":22" , testContext . Provider )
if err != nil || code != 0 {
return fmt . Errorf ( "couldn't restart apiserver: %v (code %v)" , err , code )
}
return nil
}
func waitForApiserverUp ( c * client . Client ) error {
for start := time . Now ( ) ; time . Since ( start ) < time . Minute ; time . Sleep ( 5 * time . Second ) {
body , err := c . Get ( ) . AbsPath ( "/healthz" ) . Do ( ) . Raw ( )
if err == nil && string ( body ) == "ok" {
return nil
}
}
return fmt . Errorf ( "waiting for apiserver timed out" )
}
2015-09-02 08:55:59 +00:00
// waitForClusterSize waits until the cluster has desired size and there is no not-ready nodes in it.
func waitForClusterSize ( c * client . Client , size int , timeout time . Duration ) error {
for start := time . Now ( ) ; time . Since ( start ) < timeout ; time . Sleep ( 20 * time . Second ) {
nodes , err := c . Nodes ( ) . List ( labels . Everything ( ) , fields . Everything ( ) )
if err != nil {
Logf ( "Failed to list nodes: %v" , err )
continue
}
numNodes := len ( nodes . Items )
// Filter out not-ready nodes.
filterNodes ( nodes , func ( node api . Node ) bool {
return isNodeReadySetAsExpected ( & node , true )
} )
numReady := len ( nodes . Items )
if numNodes == size && numReady == size {
Logf ( "Cluster has reached the desired size %d" , size )
return nil
}
Logf ( "Waiting for cluster size %d, current size %d, not ready nodes %d" , size , numNodes , numNodes - numReady )
}
return fmt . Errorf ( "timeout waiting %v for cluster size to be %d" , timeout , size )
}