Refactor RunRemote to support TestSuite interface.

pull/6/head
Random-Liu 2016-11-29 17:55:18 -08:00
parent 99dc80ccc2
commit bca5aea5ba
4 changed files with 213 additions and 150 deletions

View File

@ -21,11 +21,15 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/test/e2e_node/builder"
)
// NodeE2ERemote contains the specific functions in the node e2e test suite.
type NodeE2ERemote struct{}
func InitNodeE2ERemote() TestSuite {
@ -33,12 +37,7 @@ func InitNodeE2ERemote() TestSuite {
return &NodeE2ERemote{}
}
const (
localGCIMounterPath = "cluster/gce/gci/mounter/mounter"
CNIRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e"
CNIDirectory = "cni"
CNIURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + CNIRelease + ".tar.gz"
)
const localGCIMounterPath = "cluster/gce/gci/mounter/mounter"
// SetupTestPackage sets up the test package with binaries k8s required for node e2e tests
func (n *NodeE2ERemote) SetupTestPackage(tardir string) error {
@ -71,8 +70,7 @@ func (n *NodeE2ERemote) SetupTestPackage(tardir string) error {
if err != nil {
return fmt.Errorf("Could not find K8s root dir! Err: %v", err)
}
localSource := "cluster/gce/gci/mounter/mounter"
source := filepath.Join(k8sDir, localSource)
source := filepath.Join(k8sDir, localGCIMounterPath)
// Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves
if _, err := os.Stat(source); err != nil {
@ -94,7 +92,73 @@ func (n *NodeE2ERemote) SetupTestPackage(tardir string) error {
return nil
}
// RunTest runs test on the node.
func (n *NodeE2ERemote) RunTest(host, workspace, results, junitFilePrefix, testArgs, ginkgoFlags string, timeout time.Duration) (string, error) {
return "", fmt.Errorf("not implemented")
// updateGCIMounterPath updates kubelet flags to set gci mounter path. This will only take effect for
// GCI image.
func updateGCIMounterPath(args, host, workspace string) (string, error) {
// Determine if tests will run on a GCI node.
output, err := SSH(host, "cat", "/etc/os-release")
if err != nil {
return args, fmt.Errorf("issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output)
}
if !strings.Contains(output, "ID=gci") {
// This is not a GCI image
return args, nil
}
// If we are testing on a GCI node, we chmod 544 the mounter and specify a different mounter path in the test args.
// We do this here because the local var `workspace` tells us which /tmp/node-e2e-%d is relevant to the current test run.
// Determine if the GCI mounter script exists locally.
k8sDir, err := builder.GetK8sRootDir()
if err != nil {
return args, fmt.Errorf("could not find K8s root dir! Err: %v", err)
}
source := filepath.Join(k8sDir, localGCIMounterPath)
// Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves
if _, err = os.Stat(source); err != nil {
return args, fmt.Errorf("could not find GCI mounter script at %q! If this script has been (re)moved, please update the e2e node remote test runner accordingly! Err: %v", source, err)
}
glog.Infof("GCI node and GCI mounter both detected, modifying --experimental-mounter-path accordingly")
// Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error
// here will tell us to update the remote test runner.
mounterPath := filepath.Join(workspace, localGCIMounterPath)
output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath))
if err != nil {
return args, fmt.Errorf("unabled to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output)
}
// Insert args at beginning of test args, so any values from command line take precedence
args = fmt.Sprintf("--kubelet-flags=--experimental-mounter-path=%s ", mounterPath) + args
return args, nil
}
// RunTest runs test on the node.
func (n *NodeE2ERemote) RunTest(host, workspace, results, junitFilePrefix, testArgs, ginkgoArgs string, timeout time.Duration) (string, error) {
// Install the cni plugin.
if err := installCNI(host, workspace); err != nil {
return "", err
}
// Configure iptables firewall rules
if err := configureFirewall(host); err != nil {
return "", err
}
// Kill any running node processes
cleanupNodeProcesses(host)
testArgs, err := updateGCIMounterPath(testArgs, host, workspace)
if err != nil {
return "", err
}
// Run the tests
glog.Infof("Starting tests on %q", host)
cmd := getSSHCommand(" && ",
fmt.Sprintf("cd %s", workspace),
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s %s",
timeout.Seconds(), ginkgoArgs, host, results, junitFilePrefix, testArgs),
)
return SSH(host, "sh", "-c", cmd)
}

View File

@ -23,12 +23,10 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/golang/glog"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/test/e2e_node/builder"
)
var testTimeoutSeconds = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.")
@ -63,177 +61,54 @@ func CreateTestArchive(suite TestSuite) (string, error) {
return filepath.Join(dir, archiveName), nil
}
const (
CNIRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e"
CNIDirectory = "cni"
CNIURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + CNIRelease + ".tar.gz"
)
// Returns the command output, whether the exit was ok, and any errors
func RunRemote(archive string, host string, cleanup bool, junitFilePrefix string, testArgs string, ginkgoFlags string) (string, bool, error) {
// TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
func RunRemote(suite TestSuite, archive string, host string, cleanup bool, junitFilePrefix string, testArgs string, ginkgoArgs string) (string, bool, error) {
// Create the temp staging directory
glog.Infof("Staging test binaries on %s", host)
glog.Infof("Staging test binaries on %q", host)
workspace := fmt.Sprintf("/tmp/node-e2e-%s", getTimestamp())
// Do not sudo here, so that we can use scp to copy test archive to the directdory.
if output, err := SSHNoSudo(host, "mkdir", workspace); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to create workspace directory: %v output: %q", err, output)
return "", false, fmt.Errorf("failed to create workspace directory %q on host %q: %v output: %q", workspace, host, err, output)
}
if cleanup {
defer func() {
output, err := SSH(host, "rm", "-rf", workspace)
if err != nil {
glog.Errorf("failed to cleanup workspace %s on host %v. Output:\n%s", workspace, err, output)
glog.Errorf("failed to cleanup workspace %q on host %q: %v. Output:\n%s", workspace, host, err, output)
}
}()
}
// Install the cni plugin.
cniPath := filepath.Join(workspace, CNIDirectory)
cmd := getSSHCommand(" ; ",
fmt.Sprintf("mkdir -p %s", cniPath),
fmt.Sprintf("wget -O - %s | tar -xz -C %s", CNIURL, cniPath),
)
if output, err := SSH(host, "sh", "-c", cmd); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to install cni plugin: %v output: %q", err, output)
}
// Configure iptables firewall rules
// TODO: consider calling bootstrap script to configure host based on OS
output, err := SSH(host, "iptables", "-L", "INPUT")
if err != nil {
return "", false, fmt.Errorf("failed to get iptables INPUT: %v output: %q", err, output)
}
if strings.Contains(output, "Chain INPUT (policy DROP)") {
cmd = getSSHCommand("&&",
"(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)",
"(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)",
"(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)")
output, err := SSH(host, "sh", "-c", cmd)
if err != nil {
return "", false, fmt.Errorf("failed to configured firewall: %v output: %v", err, output)
}
}
output, err = SSH(host, "iptables", "-L", "FORWARD")
if err != nil {
return "", false, fmt.Errorf("failed to get iptables FORWARD: %v output: %q", err, output)
}
if strings.Contains(output, "Chain FORWARD (policy DROP)") {
cmd = getSSHCommand("&&",
"(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)",
"(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)",
"(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)")
output, err = SSH(host, "sh", "-c", cmd)
if err != nil {
return "", false, fmt.Errorf("failed to configured firewall: %v output: %v", err, output)
}
}
// Copy the archive to the staging directory
if output, err = runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), workspace)); err != nil {
if output, err := runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIp(host), workspace)); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to copy test archive: %v, output: %q", err, output)
}
// Kill any running node processes
cmd = getSSHCommand(" ; ",
"pkill kubelet",
"pkill kube-apiserver",
"pkill etcd",
)
// No need to log an error if pkill fails since pkill will fail if the commands are not running.
// If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd
// logs about failing to bind the required ports.
glog.Infof("Killing any existing node processes on %s", host)
SSH(host, "sh", "-c", cmd)
// Extract the archive
cmd = getSSHCommand(" && ",
cmd := getSSHCommand(" && ",
fmt.Sprintf("cd %s", workspace),
fmt.Sprintf("tar -xzvf ./%s", archiveName),
)
glog.Infof("Extracting tar on %s", host)
if output, err = SSH(host, "sh", "-c", cmd); err != nil {
glog.Infof("Extracting tar on %q", host)
if output, err := SSH(host, "sh", "-c", cmd); err != nil {
// Exit failure with the error
return "", false, fmt.Errorf("failed to extract test archive: %v, output: %q", err, output)
}
// If we are testing on a GCI node, we chmod 544 the mounter and specify a different mounter path in the test args.
// We do this here because the local var `workspace` tells us which /tmp/node-e2e-%d is relevant to the current test run.
glog.Infof("Running test on %q", host)
output, err := suite.RunTest(host, workspace, filepath.Join(workspace, "results"), junitFilePrefix, testArgs, ginkgoArgs, *testTimeoutSeconds)
// Determine if the GCI mounter script exists locally.
k8sDir, err := builder.GetK8sRootDir()
if err != nil {
return "", false, fmt.Errorf("Could not find K8s root dir! Err: %v", err)
}
localSource := "cluster/gce/gci/mounter/mounter"
source := filepath.Join(k8sDir, localSource)
// Require the GCI mounter script, we want to make sure the remote test runner stays up to date if the mounter file moves
if _, err = os.Stat(source); err != nil {
return "", false, fmt.Errorf("Could not find GCI mounter script at %q! If this script has been (re)moved, please update the e2e node remote test runner accordingly! Err: %v", source, err)
}
// Determine if tests will run on a GCI node.
output, err = SSH(host, "sh", "-c", "'cat /etc/os-release'")
if err != nil {
glog.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output)
return "", false, fmt.Errorf("Issue detecting node's OS via node's /etc/os-release. Err: %v, Output:\n%s", err, output)
}
if strings.Contains(output, "ID=gci") {
glog.Infof("GCI node and GCI mounter both detected, modifying --experimental-mounter-path accordingly")
// Note this implicitly requires the script to be where we expect in the tarball, so if that location changes the error
// here will tell us to update the remote test runner.
mounterPath := filepath.Join(workspace, "cluster/gce/gci/mounter/mounter")
output, err = SSH(host, "sh", "-c", fmt.Sprintf("'chmod 544 %s'", mounterPath))
if err != nil {
glog.Errorf("Unable to chmod 544 GCI mounter script. Err: %v, Output:\n%s", err, output)
return "", false, err
}
// Insert args at beginning of testArgs, so any values from command line take precedence
testArgs = fmt.Sprintf("--kubelet-flags=--experimental-mounter-path=%s ", mounterPath) + testArgs
}
// Run the tests
cmd = getSSHCommand(" && ",
fmt.Sprintf("cd %s", workspace),
fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --logtostderr --v 4 --node-name=%s --report-dir=%s/results --report-prefix=%s %s",
testTimeoutSeconds.Seconds(), ginkgoFlags, host, workspace, junitFilePrefix, testArgs),
)
aggErrs := []error{}
glog.Infof("Starting tests on %s", host)
output, err = SSH(host, "sh", "-c", cmd)
// Do not log the output here, let the caller deal with the test output.
if err != nil {
aggErrs = append(aggErrs, err)
// Encountered an unexpected error. The remote test harness may not
// have finished retrieved and stored all the logs in this case. Try
// to get some logs for debugging purposes.
// TODO: This is a best-effort, temporary hack that only works for
// journald nodes. We should have a more robust way to collect logs.
var (
logName = "system.log"
logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName)
destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName)
)
glog.Infof("Test failed unexpectedly. Attempting to retreiving system logs (only works for nodes with journald)")
// Try getting the system logs from journald and store it to a file.
// Don't reuse the original test directory on the remote host because
// it could've be been removed if the node was rebooted.
if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil {
glog.Infof("Got the system logs from journald; copying it back...")
if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil {
glog.Infof("Failed to copy the log: err: %v, output: %q", err, output)
}
} else {
glog.Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output)
}
collectSystemLog(host, workspace)
}
glog.Infof("Copying test artifacts from %s", host)
glog.Infof("Copying test artifacts from %q", host)
scpErr := getTestArtifacts(host, workspace)
if scpErr != nil {
aggErrs = append(aggErrs, scpErr)
@ -267,6 +142,33 @@ func getTestArtifacts(host, testDir string) error {
return nil
}
// collectSystemLog is a temporary hack to collect system log when encountered on
// unexpected error.
func collectSystemLog(host, workspace string) {
// Encountered an unexpected error. The remote test harness may not
// have finished retrieved and stored all the logs in this case. Try
// to get some logs for debugging purposes.
// TODO: This is a best-effort, temporary hack that only works for
// journald nodes. We should have a more robust way to collect logs.
var (
logName = "system.log"
logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName)
destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName)
)
glog.Infof("Test failed unexpectedly. Attempting to retreiving system logs (only works for nodes with journald)")
// Try getting the system logs from journald and store it to a file.
// Don't reuse the original test directory on the remote host because
// it could've be been removed if the node was rebooted.
if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil {
glog.Infof("Got the system logs from journald; copying it back...")
if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIp(host), logPath), destPath); err != nil {
glog.Infof("Failed to copy the log: err: %v, output: %q", err, output)
}
} else {
glog.Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output)
}
}
// WriteLog is a temporary function to make it possible to write log
// in the runner. This is used to collect serial console log.
// TODO(random-liu): Use the log-dump script in cluster e2e.

View File

@ -0,0 +1,97 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package remote
import (
"fmt"
"path/filepath"
"strings"
"github.com/golang/glog"
)
// utils.go contains functions used accross test suites.
const (
cniRelease = "07a8a28637e97b22eb8dfe710eeae1344f69d16e"
cniDirectory = "cni"
cniURL = "https://storage.googleapis.com/kubernetes-release/network-plugins/cni-" + cniRelease + ".tar.gz"
)
// Install the cni plugin.
func installCNI(host, workspace string) error {
glog.Infof("Install CNI on %q", host)
cniPath := filepath.Join(workspace, cniDirectory)
cmd := getSSHCommand(" ; ",
fmt.Sprintf("mkdir -p %s", cniPath),
fmt.Sprintf("wget -O - %s | tar -xz -C %s", cniURL, cniPath),
)
if output, err := SSH(host, "sh", "-c", cmd); err != nil {
return fmt.Errorf("failed to install cni plugin on %q: %v output: %q", host, err, output)
}
return nil
}
// configureFirewall configures iptable firewall rules.
func configureFirewall(host string) error {
glog.Infof("Configure iptables firewall rules on %q", host)
// TODO: consider calling bootstrap script to configure host based on OS
output, err := SSH(host, "iptables", "-L", "INPUT")
if err != nil {
return fmt.Errorf("failed to get iptables INPUT on %q: %v output: %q", host, err, output)
}
if strings.Contains(output, "Chain INPUT (policy DROP)") {
cmd := getSSHCommand("&&",
"(iptables -C INPUT -w -p TCP -j ACCEPT || iptables -A INPUT -w -p TCP -j ACCEPT)",
"(iptables -C INPUT -w -p UDP -j ACCEPT || iptables -A INPUT -w -p UDP -j ACCEPT)",
"(iptables -C INPUT -w -p ICMP -j ACCEPT || iptables -A INPUT -w -p ICMP -j ACCEPT)")
output, err := SSH(host, "sh", "-c", cmd)
if err != nil {
return fmt.Errorf("failed to configured firewall on %q: %v output: %v", host, err, output)
}
}
output, err = SSH(host, "iptables", "-L", "FORWARD")
if err != nil {
return fmt.Errorf("failed to get iptables FORWARD on %q: %v output: %q", host, err, output)
}
if strings.Contains(output, "Chain FORWARD (policy DROP)") {
cmd := getSSHCommand("&&",
"(iptables -C FORWARD -w -p TCP -j ACCEPT || iptables -A FORWARD -w -p TCP -j ACCEPT)",
"(iptables -C FORWARD -w -p UDP -j ACCEPT || iptables -A FORWARD -w -p UDP -j ACCEPT)",
"(iptables -C FORWARD -w -p ICMP -j ACCEPT || iptables -A FORWARD -w -p ICMP -j ACCEPT)")
output, err = SSH(host, "sh", "-c", cmd)
if err != nil {
return fmt.Errorf("failed to configured firewall on %q: %v output: %v", host, err, output)
}
}
return nil
}
// cleanupNodeProcesses kills all running node processes may conflict with the test.
func cleanupNodeProcesses(host string) {
glog.Infof("Killing any existing node processes on %q", host)
cmd := getSSHCommand(" ; ",
"pkill kubelet",
"pkill kube-apiserver",
"pkill etcd",
"pkill e2e_node.test",
)
// No need to log an error if pkill fails since pkill will fail if the commands are not running.
// If we are unable to stop existing running k8s processes, we should see messages in the kubelet/apiserver/etcd
// logs about failing to bind the required ports.
SSH(host, "sh", "-c", cmd)
}

View File

@ -382,7 +382,7 @@ func testHost(host string, deleteFiles bool, junitFilePrefix string, ginkgoFlags
}
}
output, exitOk, err := remote.RunRemote(path, host, deleteFiles, junitFilePrefix, *testArgs, ginkgoFlagsStr)
output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, junitFilePrefix, *testArgs, ginkgoFlagsStr)
return &TestResult{
output: output,
err: err,