Merge pull request #11778 from mbforbes/sshLog

Improve SSH logging for debugging
pull/6/head
Jerzy Szczepkowski 2015-07-24 09:36:52 +02:00
commit 6488ac5512
9 changed files with 153 additions and 89 deletions

View File

@ -26,6 +26,8 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
sshutil "github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
"golang.org/x/crypto/ssh"
. "github.com/onsi/ginkgo"
@ -355,7 +357,7 @@ func waitForReplicationControllerInAddonTest(c *client.Client, addonNamespace, n
// kubernetes v1.0 is released. In particular the code of sshExec.
func getMasterSSHClient() (*ssh.Client, error) {
// Get a signer for the provider.
signer, err := getSigner(testContext.Provider)
signer, err := sshutil.GetSigner(testContext.Provider)
if err != nil {
return nil, fmt.Errorf("error getting signer for provider %s: '%v'", testContext.Provider, err)
}

View File

@ -22,6 +22,8 @@ import (
"net"
"strings"
"sync"
"github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
)
type command struct {
@ -44,7 +46,7 @@ func coreDump(dir string) {
}
// Get all nodes' external IPs.
hosts, err := NodeSSHHosts(c)
hosts, err := ssh.NodeSSHHosts(c)
if err != nil {
fmt.Printf("Error getting node hostnames: %v", err)
return
@ -85,7 +87,7 @@ func logCore(cmds []command, hosts []string, dir, provider string) {
defer wg.Done()
logfile := fmt.Sprintf("%s/%s-%s.log", dir, host, cmd.component)
fmt.Printf("Writing to %s.\n", logfile)
stdout, stderr, _, err := SSH(cmd.cmd, host, provider)
stdout, stderr, _, err := ssh.SSH(cmd.cmd, host, provider)
if err != nil {
fmt.Printf("Error running command: %v\n", err)
}

View File

@ -23,6 +23,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
"github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@ -103,7 +104,7 @@ func doEtcdFailure(failCommand, fixCommand string) {
}
func masterExec(cmd string) {
stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
stdout, stderr, code, err := ssh.SSH(cmd, getMasterHost()+":22", testContext.Provider)
Expect(err).NotTo(HaveOccurred())
if code != 0 {
Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)

View File

@ -24,6 +24,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@ -138,7 +139,7 @@ func issueSSHCommand(node *api.Node, provider, cmd string) error {
return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
}
Logf("Calling %s on %s", cmd, node.Name)
if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
if _, _, code, err := ssh.SSH(cmd, host, provider); code != 0 || err != nil {
return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
}
return nil

View File

@ -26,12 +26,13 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/latest"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/aws"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
"github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/aws"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
@ -365,7 +366,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// may fail). Manual intervention is required in such case (recreating the
// cluster solves the problem too).
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
_, _, code, err := SSH(undropCmd, host, testContext.Provider)
_, _, code, err := ssh.SSH(undropCmd, host, testContext.Provider)
if code == 0 && err == nil {
return true, nil
} else {
@ -386,7 +387,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
// We could also block network traffic from the master(s)s to this node,
// but blocking it one way is sufficient for this test.
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
if _, _, code, err := SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
if _, _, code, err := ssh.SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
dropCmd, node.Name, code, err)
}

View File

@ -21,6 +21,7 @@ import (
"strings"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/test/e2e/ssh"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@ -41,7 +42,7 @@ var _ = Describe("SSH", func() {
It("should SSH to all nodes and run commands", func() {
// Get all nodes' external IPs.
By("Getting all nodes' SSH-able IP addresses")
hosts, err := NodeSSHHosts(c)
hosts, err := ssh.NodeSSHHosts(c)
if err != nil {
Failf("Error getting node hostnames: %v", err)
}
@ -65,7 +66,7 @@ var _ = Describe("SSH", func() {
for _, testCase := range testCases {
By(fmt.Sprintf("SSH'ing to all nodes and running %s", testCase.cmd))
for _, host := range hosts {
stdout, stderr, code, err := SSH(testCase.cmd, host, testContext.Provider)
stdout, stderr, code, err := ssh.SSH(testCase.cmd, host, testContext.Provider)
stdout, stderr = strings.TrimSpace(stdout), strings.TrimSpace(stderr)
if err != testCase.expectedError {
Failf("Ran %s on %s, got error %v, expected %v", testCase.cmd, host, err, testCase.expectedError)
@ -79,19 +80,12 @@ var _ = Describe("SSH", func() {
if code != testCase.expectedCode {
Failf("Ran %s on %s, got exit code %d, expected %d", testCase.cmd, host, code, testCase.expectedCode)
}
// Show stdout, stderr for logging purposes.
if len(stdout) > 0 {
Logf("Got stdout from %s: %s", host, strings.TrimSpace(stdout))
}
if len(stderr) > 0 {
Logf("Got stderr from %s: %s", host, strings.TrimSpace(stderr))
}
}
}
// Quickly test that SSH itself errors correctly.
By("SSH'ing to a nonexistent host")
if _, _, _, err = SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
if _, _, _, err = ssh.SSH(`echo "hello"`, "i.do.not.exist", testContext.Provider); err == nil {
Failf("Expected error trying to SSH to nonexistent host.")
}
})

21
test/e2e/ssh/doc.go Normal file
View File

@ -0,0 +1,21 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package ssh contains utilities for ssh'ing into machines for e2e tests.
//
// It hooks into both the Golang SSH implementation and the core SSH utilities
// in Kubernetes.
package ssh

113
test/e2e/ssh/ssh.go Normal file
View File

@ -0,0 +1,113 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ssh
import (
"fmt"
"os"
"path/filepath"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
"golang.org/x/crypto/ssh"
)
// NodeSSHHosts returns SSH-able host names for all nodes. It returns an error
// if it can't find an external IP for every node, though it still returns all
// hosts that it found in that case.
func NodeSSHHosts(c *client.Client) ([]string, error) {
var hosts []string
nodelist, err := c.Nodes().List(labels.Everything(), fields.Everything())
if err != nil {
return hosts, fmt.Errorf("error getting nodes: %v", err)
}
for _, n := range nodelist.Items {
for _, addr := range n.Status.Addresses {
// Use the first external IP address we find on the node, and
// use at most one per node.
// TODO(mbforbes): Use the "preferred" address for the node, once
// such a thing is defined (#2462).
if addr.Type == api.NodeExternalIP {
hosts = append(hosts, addr.Address+":22")
break
}
}
}
// Error if any node didn't have an external IP.
if len(hosts) != len(nodelist.Items) {
return hosts, fmt.Errorf(
"only found %d external IPs on nodes, but found %d nodes. Nodelist: %v",
len(hosts), len(nodelist.Items), nodelist)
}
return hosts, nil
}
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
// is no error performing the SSH, the stdout, stderr, and exit code are
// returned.
func SSH(cmd, host, provider string) (string, string, int, error) {
// Get a signer for the provider.
signer, err := GetSigner(provider)
if err != nil {
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
}
user := os.Getenv("KUBE_SSH_USER")
if user == "" {
user = os.Getenv("USER")
}
remote := fmt.Sprintf("%s@%s", user, host)
glog.Infof("Running `%s` on %s", cmd, remote)
stdout, stderr, code, err := util.RunSSHCommand(cmd, user, host, signer)
glog.Infof("[%s] stdout: %q", remote, stdout)
glog.Infof("[%s] stderr: %q", remote, stderr)
glog.Infof("[%s] exit code: %d", remote, code)
glog.Infof("[%s] error: %v", remote, err)
return stdout, stderr, code, err
}
// getSigner returns an ssh.Signer for the provider ("gce", etc.) that can be
// used to SSH to their nodes.
// TODO(mbforbes): Make GetSigner unexported by doing the refactor proposed as a
// TODO in addon_update.go (it copy/pastes much of this code).
func GetSigner(provider string) (ssh.Signer, error) {
// Get the directory in which SSH keys are located.
keydir := filepath.Join(os.Getenv("HOME"), ".ssh")
// Select the key itself to use. When implementing more providers here,
// please also add them to any SSH tests that are disabled because of signer
// support.
keyfile := ""
switch provider {
case "gce", "gke":
keyfile = "google_compute_engine"
case "aws":
keyfile = "kube_aws_rsa"
default:
return nil, fmt.Errorf("GetSigner(...) not implemented for %s", provider)
}
key := filepath.Join(keydir, keyfile)
glog.Infof("Using SSH key: %s", key)
return util.MakePrivateKeySignerFromFile(key)
}

View File

@ -48,7 +48,6 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
"github.com/davecgh/go-spew/spew"
"golang.org/x/crypto/ssh"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@ -1347,76 +1346,6 @@ func BadEvents(events []*api.Event) int {
return badEvents
}
// NodeSSHHosts returns SSH-able host names for all nodes. It returns an error
// if it can't find an external IP for every node, though it still returns all
// hosts that it found in that case.
func NodeSSHHosts(c *client.Client) ([]string, error) {
var hosts []string
nodelist, err := c.Nodes().List(labels.Everything(), fields.Everything())
if err != nil {
return hosts, fmt.Errorf("error getting nodes: %v", err)
}
for _, n := range nodelist.Items {
for _, addr := range n.Status.Addresses {
// Use the first external IP address we find on the node, and
// use at most one per node.
// TODO(mbforbes): Use the "preferred" address for the node, once
// such a thing is defined (#2462).
if addr.Type == api.NodeExternalIP {
hosts = append(hosts, addr.Address+":22")
break
}
}
}
// Error if any node didn't have an external IP.
if len(hosts) != len(nodelist.Items) {
return hosts, fmt.Errorf(
"only found %d external IPs on nodes, but found %d nodes. Nodelist: %v",
len(hosts), len(nodelist.Items), nodelist)
}
return hosts, nil
}
// SSH synchronously SSHs to a node running on provider and runs cmd. If there
// is no error performing the SSH, the stdout, stderr, and exit code are
// returned.
func SSH(cmd, host, provider string) (string, string, int, error) {
// Get a signer for the provider.
signer, err := getSigner(provider)
if err != nil {
return "", "", 0, fmt.Errorf("error getting signer for provider %s: '%v'", provider, err)
}
user := os.Getenv("KUBE_SSH_USER")
// RunSSHCommand will default to Getenv("USER") if user == ""
return util.RunSSHCommand(cmd, user, host, signer)
}
// getSigner returns an ssh.Signer for the provider ("gce", etc.) that can be
// used to SSH to their nodes.
func getSigner(provider string) (ssh.Signer, error) {
// Get the directory in which SSH keys are located.
keydir := filepath.Join(os.Getenv("HOME"), ".ssh")
// Select the key itself to use. When implementing more providers here,
// please also add them to any SSH tests that are disabled because of signer
// support.
keyfile := ""
switch provider {
case "gce", "gke":
keyfile = "google_compute_engine"
case "aws":
keyfile = "kube_aws_rsa"
default:
return nil, fmt.Errorf("getSigner(...) not implemented for %s", provider)
}
key := filepath.Join(keydir, keyfile)
Logf("Using SSH key: %s", key)
return util.MakePrivateKeySignerFromFile(key)
}
// checkPodsRunning returns whether all pods whose names are listed in podNames
// in namespace ns are running and ready, using c and waiting at most timeout.
func checkPodsRunningReady(c *client.Client, ns string, podNames []string, timeout time.Duration) bool {