2016-02-22 18:52:20 +00:00
/ *
Copyright 2016 The Kubernetes Authors All rights reserved .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package e2e_node
import (
"flag"
"fmt"
"io/ioutil"
2016-06-10 02:25:36 +00:00
"math/rand"
2016-02-22 18:52:20 +00:00
"net/http"
"os"
"os/exec"
2016-05-23 20:16:47 +00:00
"path"
2016-06-02 22:28:32 +00:00
"reflect"
"strconv"
2016-02-22 18:52:20 +00:00
"strings"
2016-06-02 22:28:32 +00:00
"syscall"
2016-02-22 18:52:20 +00:00
"time"
"github.com/golang/glog"
)
2016-03-04 22:07:35 +00:00
var serverStartTimeout = flag . Duration ( "server-start-timeout" , time . Second * 120 , "Time to wait for each server to become healthy." )
2016-05-23 20:16:47 +00:00
var reportDir = flag . String ( "report-dir" , "" , "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports." )
2016-02-22 18:52:20 +00:00
type e2eService struct {
2016-06-10 02:25:36 +00:00
etcdCmd * killCmd
2016-05-23 20:16:47 +00:00
etcdDataDir string
2016-06-10 02:25:36 +00:00
apiServerCmd * killCmd
kubeletCmd * killCmd
2016-05-23 20:16:47 +00:00
kubeletStaticPodDir string
nodeName string
2016-06-10 02:25:36 +00:00
logFiles map [ string ] logFileData
}
type logFileData struct {
files [ ] string
journalctlCommand [ ] string
2016-02-22 18:52:20 +00:00
}
2016-02-24 21:12:42 +00:00
func newE2eService ( nodeName string ) * e2eService {
2016-06-10 02:25:36 +00:00
// Special log files that need to be collected for additional debugging.
var logFiles = map [ string ] logFileData {
"kern.log" : { [ ] string { "/var/log/kern.log" } , [ ] string { "-k" } } ,
"docker.log" : { [ ] string { "/var/log/docker.log" , "/var/log/upstart/docker.log" } , [ ] string { "-u" , "docker" } } ,
}
return & e2eService { nodeName : nodeName , logFiles : logFiles }
2016-02-22 18:52:20 +00:00
}
func ( es * e2eService ) start ( ) error {
if _ , err := getK8sBin ( "kubelet" ) ; err != nil {
return err
}
if _ , err := getK8sBin ( "kube-apiserver" ) ; err != nil {
return err
}
cmd , err := es . startEtcd ( )
if err != nil {
return err
}
es . etcdCmd = cmd
cmd , err = es . startApiServer ( )
if err != nil {
return err
}
es . apiServerCmd = cmd
cmd , err = es . startKubeletServer ( )
if err != nil {
return err
}
es . kubeletCmd = cmd
return nil
}
2016-05-25 22:04:02 +00:00
// Get logs of interest either via journalctl or by creating sym links.
// Since we scp files from the remote directory, symlinks will be treated as normal files and file contents will be copied over.
func ( es * e2eService ) getLogFiles ( ) {
// Nothing to do if report dir is not specified.
if * reportDir == "" {
return
}
journaldFound := isJournaldAvailable ( )
2016-06-10 02:25:36 +00:00
for targetFileName , logFileData := range es . logFiles {
2016-05-25 22:04:02 +00:00
targetLink := path . Join ( * reportDir , targetFileName )
if journaldFound {
// Skip log files that do not have an equivalent in journald based machines.
if len ( logFileData . journalctlCommand ) == 0 {
continue
}
out , err := exec . Command ( "sudo" , append ( [ ] string { "journalctl" } , logFileData . journalctlCommand ... ) ... ) . CombinedOutput ( )
if err != nil {
glog . Errorf ( "failed to get %q from journald: %v, %v" , targetFileName , string ( out ) , err )
} else {
if err = ioutil . WriteFile ( targetLink , out , 0755 ) ; err != nil {
glog . Errorf ( "failed to write logs to %q: %v" , targetLink , err )
}
}
continue
}
for _ , file := range logFileData . files {
if _ , err := os . Stat ( file ) ; err != nil {
// Expected file not found on this distro.
continue
}
if err := copyLogFile ( file , targetLink ) ; err != nil {
glog . Error ( err )
} else {
break
}
}
}
}
func copyLogFile ( src , target string ) error {
// If not a journald based distro, then just symlink files.
if out , err := exec . Command ( "sudo" , "cp" , src , target ) . CombinedOutput ( ) ; err != nil {
return fmt . Errorf ( "failed to copy %q to %q: %v, %v" , src , target , out , err )
}
if out , err := exec . Command ( "sudo" , "chmod" , "a+r" , target ) . CombinedOutput ( ) ; err != nil {
return fmt . Errorf ( "failed to make log file %q world readable: %v, %v" , target , out , err )
}
return nil
}
func isJournaldAvailable ( ) bool {
_ , err := exec . LookPath ( "journalctl" )
return err == nil
}
2016-02-22 18:52:20 +00:00
func ( es * e2eService ) stop ( ) {
2016-06-10 02:25:36 +00:00
if err := es . stopService ( es . kubeletCmd ) ; err != nil {
2016-06-02 22:28:32 +00:00
glog . Errorf ( "Failed to stop kubelet: %v" , err )
2016-02-22 18:52:20 +00:00
}
2016-04-18 05:00:59 +00:00
if es . kubeletStaticPodDir != "" {
err := os . RemoveAll ( es . kubeletStaticPodDir )
if err != nil {
glog . Errorf ( "Failed to delete kubelet static pod directory %s.\n%v" , es . kubeletStaticPodDir , err )
}
}
2016-06-10 02:25:36 +00:00
if err := es . stopService ( es . apiServerCmd ) ; err != nil {
2016-06-02 22:28:32 +00:00
glog . Errorf ( "Failed to stop kube-apiserver: %v" , err )
2016-02-22 18:52:20 +00:00
}
2016-06-10 02:25:36 +00:00
if err := es . stopService ( es . etcdCmd ) ; err != nil {
2016-06-02 22:28:32 +00:00
glog . Errorf ( "Failed to stop etcd: %v" , err )
2016-02-22 18:52:20 +00:00
}
if es . etcdDataDir != "" {
err := os . RemoveAll ( es . etcdDataDir )
if err != nil {
glog . Errorf ( "Failed to delete etcd data directory %s.\n%v" , es . etcdDataDir , err )
}
}
}
2016-06-10 02:25:36 +00:00
func ( es * e2eService ) startEtcd ( ) ( * killCmd , error ) {
2016-02-22 18:52:20 +00:00
dataDir , err := ioutil . TempDir ( "" , "node-e2e" )
if err != nil {
return nil , err
}
es . etcdDataDir = dataDir
2016-02-26 23:06:25 +00:00
cmd := exec . Command ( "etcd" )
// Execute etcd in the data directory instead of using --data-dir because the flag sometimes requires additional
// configuration (e.g. --name in version 0.4.9)
cmd . Dir = es . etcdDataDir
hcc := newHealthCheckCommand (
"http://127.0.0.1:4001/v2/keys/" , // Trailing slash is required,
cmd ,
2016-05-23 20:16:47 +00:00
"etcd.log" )
2016-06-10 02:25:36 +00:00
return & killCmd { name : "etcd" , cmd : cmd } , es . startServer ( hcc )
2016-02-22 18:52:20 +00:00
}
2016-06-10 02:25:36 +00:00
func ( es * e2eService ) startApiServer ( ) ( * killCmd , error ) {
2016-02-26 23:06:25 +00:00
cmd := exec . Command ( "sudo" , getApiServerBin ( ) ,
"--etcd-servers" , "http://127.0.0.1:4001" ,
"--insecure-bind-address" , "0.0.0.0" ,
"--service-cluster-ip-range" , "10.0.0.1/24" ,
2016-04-07 00:11:13 +00:00
"--kubelet-port" , "10250" ,
"--allow-privileged" , "true" ,
2016-05-23 20:16:47 +00:00
"--v" , "8" , "--logtostderr" ,
2016-04-07 00:11:13 +00:00
)
2016-02-26 23:06:25 +00:00
hcc := newHealthCheckCommand (
"http://127.0.0.1:8080/healthz" ,
cmd ,
2016-05-23 20:16:47 +00:00
"kube-apiserver.log" )
2016-06-10 02:25:36 +00:00
return & killCmd { name : "kube-apiserver" , cmd : cmd } , es . startServer ( hcc )
2016-02-22 18:52:20 +00:00
}
2016-06-10 02:25:36 +00:00
func ( es * e2eService ) startKubeletServer ( ) ( * killCmd , error ) {
2016-04-18 05:00:59 +00:00
dataDir , err := ioutil . TempDir ( "" , "node-e2e-pod" )
if err != nil {
return nil , err
}
es . kubeletStaticPodDir = dataDir
2016-06-10 02:25:36 +00:00
var killOverride * exec . Cmd
2016-06-09 22:18:55 +00:00
cmdArgs := [ ] string { }
if systemdRun , err := exec . LookPath ( "systemd-run" ) ; err == nil {
// On systemd services, detection of a service / unit works reliably while
// detection of a process started from an ssh session does not work.
// Since kubelet will typically be run as a service it also makes more
// sense to test it that way
2016-06-10 02:25:36 +00:00
unitName := fmt . Sprintf ( "kubelet-%d.service" , rand . Int31 ( ) )
cmdArgs = append ( cmdArgs , systemdRun , "--unit=" + unitName , getKubeletServerBin ( ) )
killOverride = exec . Command ( "sudo" , "systemctl" , "kill" , unitName )
es . logFiles [ "kubelet.log" ] = logFileData {
journalctlCommand : [ ] string { "-u" , unitName } ,
}
2016-06-09 22:18:55 +00:00
} else {
cmdArgs = append ( cmdArgs , getKubeletServerBin ( ) )
}
cmdArgs = append ( cmdArgs ,
2016-02-26 23:06:25 +00:00
"--api-servers" , "http://127.0.0.1:8080" ,
"--address" , "0.0.0.0" ,
"--port" , "10250" ,
2016-03-14 18:18:27 +00:00
"--hostname-override" , es . nodeName , // Required because hostname is inconsistent across hosts
"--volume-stats-agg-period" , "10s" , // Aggregate volumes frequently so tests don't need to wait as long
2016-04-07 00:11:13 +00:00
"--allow-privileged" , "true" ,
2016-04-21 22:34:28 +00:00
"--serialize-image-pulls" , "false" ,
2016-04-18 05:00:59 +00:00
"--config" , es . kubeletStaticPodDir ,
"--file-check-frequency" , "10s" , // Check file frequently so tests won't wait too long
2016-05-23 20:16:47 +00:00
"--v" , "8" , "--logtostderr" ,
2016-03-14 18:18:27 +00:00
)
2016-06-09 22:18:55 +00:00
cmd := exec . Command ( "sudo" , cmdArgs ... )
2016-02-26 23:06:25 +00:00
hcc := newHealthCheckCommand (
"http://127.0.0.1:10255/healthz" ,
cmd ,
2016-05-23 20:16:47 +00:00
"kubelet.log" )
2016-06-10 02:25:36 +00:00
return & killCmd { name : "kubelet" , cmd : cmd , override : killOverride } , es . startServer ( hcc )
2016-02-22 18:52:20 +00:00
}
2016-02-26 23:06:25 +00:00
func ( es * e2eService ) startServer ( cmd * healthCheckCommand ) error {
2016-02-22 18:52:20 +00:00
cmdErrorChan := make ( chan error )
go func ( ) {
2016-05-23 20:16:47 +00:00
defer close ( cmdErrorChan )
// Create the output filename
outPath := path . Join ( * reportDir , cmd . outputFilename )
outfile , err := os . Create ( outPath )
if err != nil {
cmdErrorChan <- fmt . Errorf ( "Failed to create file %s for `%s` %v." , outPath , cmd , err )
return
}
defer outfile . Close ( )
defer outfile . Sync ( )
// Set the command to write the output file
cmd . Cmd . Stdout = outfile
cmd . Cmd . Stderr = outfile
2016-06-02 22:28:32 +00:00
// Killing the sudo command should kill the server as well.
attrs := & syscall . SysProcAttr { }
// Hack to set linux-only field without build tags.
deathSigField := reflect . ValueOf ( attrs ) . Elem ( ) . FieldByName ( "Pdeathsig" )
if deathSigField . IsValid ( ) {
deathSigField . Set ( reflect . ValueOf ( syscall . SIGKILL ) )
} else {
cmdErrorChan <- fmt . Errorf ( "Failed to set Pdeathsig field (non-linux build)" )
return
}
cmd . Cmd . SysProcAttr = attrs
2016-05-23 20:16:47 +00:00
// Run the command
err = cmd . Run ( )
2016-02-22 18:52:20 +00:00
if err != nil {
2016-05-23 20:16:47 +00:00
cmdErrorChan <- fmt . Errorf ( "%s Failed with error \"%v\". Output written to: %s" , cmd , err , outPath )
return
2016-02-22 18:52:20 +00:00
}
} ( )
endTime := time . Now ( ) . Add ( * serverStartTimeout )
for endTime . After ( time . Now ( ) ) {
select {
case err := <- cmdErrorChan :
2016-02-26 23:06:25 +00:00
return err
2016-02-22 18:52:20 +00:00
case <- time . After ( time . Second ) :
2016-02-26 23:06:25 +00:00
resp , err := http . Get ( cmd . HealthCheckUrl )
2016-02-22 18:52:20 +00:00
if err == nil && resp . StatusCode == http . StatusOK {
2016-02-26 23:06:25 +00:00
return nil
2016-02-22 18:52:20 +00:00
}
}
}
2016-02-26 23:06:25 +00:00
return fmt . Errorf ( "Timeout waiting for service %s" , cmd )
2016-02-22 18:52:20 +00:00
}
2016-06-10 02:25:36 +00:00
func ( es * e2eService ) stopService ( cmd * killCmd ) error {
return cmd . Kill ( )
}
// killCmd is a struct to kill a given cmd. The cmd member specifies a command
// to find the pid of and attempt to kill.
// If the override field is set, that will be used instead to kill the command.
// name is only used for logging
type killCmd struct {
name string
cmd * exec . Cmd
override * exec . Cmd
}
func ( k * killCmd ) Kill ( ) error {
if k . override != nil {
return k . override . Run ( )
}
name := k . name
cmd := k . cmd
2016-06-02 22:28:32 +00:00
if cmd == nil || cmd . Process == nil {
glog . V ( 2 ) . Infof ( "%s not running" , name )
return nil
}
pid := cmd . Process . Pid
if pid <= 1 {
return fmt . Errorf ( "invalid PID %d for %s" , pid , name )
}
// Attempt to shut down the process in a friendly manner before forcing it.
waitChan := make ( chan error )
go func ( ) {
_ , err := cmd . Process . Wait ( )
waitChan <- err
close ( waitChan )
} ( )
const timeout = 10 * time . Second
for _ , signal := range [ ] string { "-TERM" , "-KILL" } {
glog . V ( 2 ) . Infof ( "Killing process %d (%s) with %s" , pid , name , signal )
_ , err := exec . Command ( "sudo" , "kill" , signal , strconv . Itoa ( pid ) ) . Output ( )
if err != nil {
glog . Errorf ( "Error signaling process %d (%s) with %s: %v" , pid , name , signal , err )
continue
}
select {
case err := <- waitChan :
if err != nil {
return fmt . Errorf ( "error stopping %s: %v" , name , err )
}
// Success!
return nil
case <- time . After ( timeout ) :
// Continue.
}
}
return fmt . Errorf ( "unable to stop %s" , name )
}
2016-02-22 18:52:20 +00:00
type healthCheckCommand struct {
2016-02-26 23:06:25 +00:00
* exec . Cmd
HealthCheckUrl string
2016-05-23 20:16:47 +00:00
outputFilename string
2016-02-26 23:06:25 +00:00
}
2016-05-23 20:16:47 +00:00
func newHealthCheckCommand ( healthCheckUrl string , cmd * exec . Cmd , filename string ) * healthCheckCommand {
2016-02-26 23:06:25 +00:00
return & healthCheckCommand {
HealthCheckUrl : healthCheckUrl ,
Cmd : cmd ,
2016-05-23 20:16:47 +00:00
outputFilename : filename ,
2016-02-26 23:06:25 +00:00
}
2016-02-22 18:52:20 +00:00
}
func ( hcc * healthCheckCommand ) String ( ) string {
2016-02-26 23:06:25 +00:00
return fmt . Sprintf ( "`%s %s` health-check: %s" , hcc . Path , strings . Join ( hcc . Args , " " ) , hcc . HealthCheckUrl )
2016-02-22 18:52:20 +00:00
}