2015-01-23 18:03:04 +00:00
|
|
|
/*
|
|
|
|
Copyright 2014 Google Inc. All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2015-04-28 23:05:26 +00:00
|
|
|
package prober
|
2015-01-23 18:03:04 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2015-04-29 00:51:21 +00:00
|
|
|
"io"
|
2015-01-23 18:03:04 +00:00
|
|
|
"strconv"
|
2015-01-29 04:35:49 +00:00
|
|
|
"time"
|
2015-01-23 18:03:04 +00:00
|
|
|
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
2015-04-17 22:54:28 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
|
2015-03-23 17:14:30 +00:00
|
|
|
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
|
2015-01-23 18:03:04 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
|
|
|
execprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/exec"
|
|
|
|
httprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/http"
|
|
|
|
tcprobe "github.com/GoogleCloudPlatform/kubernetes/pkg/probe/tcp"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/exec"
|
2015-03-06 23:00:45 +00:00
|
|
|
|
2015-01-23 18:03:04 +00:00
|
|
|
"github.com/golang/glog"
|
|
|
|
)
|
|
|
|
|
2015-02-16 07:44:55 +00:00
|
|
|
const maxProbeRetries = 3
|
2015-02-02 18:51:52 +00:00
|
|
|
|
2015-04-28 23:05:26 +00:00
|
|
|
// Prober checks the healthiness of a container.
|
|
|
|
type Prober interface {
|
|
|
|
Probe(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error)
|
|
|
|
}
|
|
|
|
|
2015-04-29 00:51:21 +00:00
|
|
|
type ContainerCommandRunner interface {
|
|
|
|
RunInContainer(containerID string, cmd []string) ([]byte, error)
|
|
|
|
ExecInContainer(containerID string, cmd []string, in io.Reader, out, err io.WriteCloser, tty bool) error
|
|
|
|
PortForward(pod *kubecontainer.Pod, port uint16, stream io.ReadWriteCloser) error
|
|
|
|
}
|
|
|
|
|
2015-04-28 23:05:26 +00:00
|
|
|
// Prober helps to check the liveness/readiness of a container.
|
2015-04-22 18:55:04 +00:00
|
|
|
type prober struct {
|
2015-04-29 00:51:21 +00:00
|
|
|
exec execprobe.ExecProber
|
|
|
|
http httprobe.HTTPProber
|
|
|
|
tcp tcprobe.TCPProber
|
|
|
|
// TODO(vmarmol): Remove when we remove the circular dependency to DockerManager.
|
|
|
|
Runner ContainerCommandRunner
|
2015-04-17 22:54:28 +00:00
|
|
|
|
|
|
|
readinessManager *kubecontainer.ReadinessManager
|
|
|
|
refManager *kubecontainer.RefManager
|
|
|
|
recorder record.EventRecorder
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewProber creates a Prober, it takes a command runner and
|
|
|
|
// several container info managers.
|
2015-04-28 23:05:26 +00:00
|
|
|
func New(
|
2015-04-29 00:51:21 +00:00
|
|
|
runner ContainerCommandRunner,
|
2015-04-17 22:54:28 +00:00
|
|
|
readinessManager *kubecontainer.ReadinessManager,
|
|
|
|
refManager *kubecontainer.RefManager,
|
2015-04-28 23:05:26 +00:00
|
|
|
recorder record.EventRecorder) Prober {
|
2015-04-17 22:54:28 +00:00
|
|
|
|
2015-04-22 18:55:04 +00:00
|
|
|
return &prober{
|
2015-04-17 22:54:28 +00:00
|
|
|
exec: execprobe.New(),
|
|
|
|
http: httprobe.New(),
|
|
|
|
tcp: tcprobe.New(),
|
2015-04-29 00:51:21 +00:00
|
|
|
Runner: runner,
|
2015-04-17 22:54:28 +00:00
|
|
|
|
|
|
|
readinessManager: readinessManager,
|
|
|
|
refManager: refManager,
|
|
|
|
recorder: recorder,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-28 23:05:26 +00:00
|
|
|
// New prober for use in tests.
|
|
|
|
func NewTestProber(
|
|
|
|
exec execprobe.ExecProber,
|
|
|
|
readinessManager *kubecontainer.ReadinessManager,
|
|
|
|
refManager *kubecontainer.RefManager,
|
|
|
|
recorder record.EventRecorder) Prober {
|
|
|
|
|
|
|
|
return &prober{
|
|
|
|
exec: exec,
|
|
|
|
readinessManager: readinessManager,
|
|
|
|
refManager: refManager,
|
|
|
|
recorder: recorder,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-17 22:54:28 +00:00
|
|
|
// Probe checks the liveness/readiness of the given container.
|
2015-03-06 23:00:45 +00:00
|
|
|
// If the container's liveness probe is unsuccessful, set readiness to false.
|
|
|
|
// If liveness is successful, do a readiness check and set readiness accordingly.
|
2015-04-22 18:55:04 +00:00
|
|
|
func (pb *prober) Probe(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
|
2015-03-06 23:00:45 +00:00
|
|
|
// Probe liveness.
|
2015-04-17 22:54:28 +00:00
|
|
|
live, err := pb.probeLiveness(pod, status, container, containerID, createdAt)
|
2015-03-06 23:00:45 +00:00
|
|
|
if err != nil {
|
2015-03-07 01:19:32 +00:00
|
|
|
glog.V(1).Infof("Liveness probe errored: %v", err)
|
2015-04-17 22:54:28 +00:00
|
|
|
pb.readinessManager.SetReadiness(containerID, false)
|
2015-03-06 23:00:45 +00:00
|
|
|
return probe.Unknown, err
|
|
|
|
}
|
|
|
|
if live != probe.Success {
|
2015-03-07 01:19:32 +00:00
|
|
|
glog.V(1).Infof("Liveness probe unsuccessful: %v", live)
|
2015-04-17 22:54:28 +00:00
|
|
|
pb.readinessManager.SetReadiness(containerID, false)
|
2015-03-06 23:00:45 +00:00
|
|
|
return live, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Probe readiness.
|
2015-04-17 22:54:28 +00:00
|
|
|
ready, err := pb.probeReadiness(pod, status, container, containerID, createdAt)
|
2015-03-06 23:00:45 +00:00
|
|
|
if err == nil && ready == probe.Success {
|
2015-03-07 01:19:32 +00:00
|
|
|
glog.V(3).Infof("Readiness probe successful: %v", ready)
|
2015-04-17 22:54:28 +00:00
|
|
|
pb.readinessManager.SetReadiness(containerID, true)
|
2015-03-06 23:00:45 +00:00
|
|
|
return probe.Success, nil
|
|
|
|
}
|
|
|
|
|
2015-03-07 01:19:32 +00:00
|
|
|
glog.V(1).Infof("Readiness probe failed/errored: %v, %v", ready, err)
|
2015-04-17 22:54:28 +00:00
|
|
|
pb.readinessManager.SetReadiness(containerID, false)
|
2015-03-06 23:00:45 +00:00
|
|
|
|
2015-04-17 22:54:28 +00:00
|
|
|
ref, ok := pb.refManager.GetRef(containerID)
|
2015-03-06 23:00:45 +00:00
|
|
|
if !ok {
|
|
|
|
glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name)
|
2015-04-01 23:40:31 +00:00
|
|
|
return probe.Success, err
|
2015-03-06 23:00:45 +00:00
|
|
|
}
|
2015-04-01 23:40:31 +00:00
|
|
|
|
|
|
|
if ready != probe.Success {
|
2015-04-17 22:54:28 +00:00
|
|
|
pb.recorder.Eventf(ref, "unhealthy", "Readiness Probe Failed %v - %v", containerID, container.Name)
|
2015-04-01 23:40:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return probe.Success, nil
|
2015-03-06 23:00:45 +00:00
|
|
|
}
|
|
|
|
|
2015-04-17 22:54:28 +00:00
|
|
|
// probeLiveness probes the liveness of a container.
|
2015-03-06 23:00:45 +00:00
|
|
|
// If the initalDelay since container creation on liveness probe has not passed the probe will return probe.Success.
|
2015-04-22 18:55:04 +00:00
|
|
|
func (pb *prober) probeLiveness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
|
2015-03-06 23:00:45 +00:00
|
|
|
p := container.LivenessProbe
|
|
|
|
if p == nil {
|
|
|
|
return probe.Success, nil
|
|
|
|
}
|
2015-03-18 21:28:27 +00:00
|
|
|
if time.Now().Unix()-createdAt < p.InitialDelaySeconds {
|
2015-03-06 23:00:45 +00:00
|
|
|
return probe.Success, nil
|
|
|
|
}
|
2015-04-17 22:54:28 +00:00
|
|
|
return pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries)
|
2015-03-06 23:00:45 +00:00
|
|
|
}
|
|
|
|
|
2015-04-17 22:54:28 +00:00
|
|
|
// probeReadiness probes the readiness of a container.
|
2015-03-06 23:00:45 +00:00
|
|
|
// If the initial delay on the readiness probe has not passed the probe will return probe.Failure.
|
2015-04-22 18:55:04 +00:00
|
|
|
func (pb *prober) probeReadiness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
|
2015-03-06 23:00:45 +00:00
|
|
|
p := container.ReadinessProbe
|
2015-02-02 18:51:52 +00:00
|
|
|
if p == nil {
|
|
|
|
return probe.Success, nil
|
|
|
|
}
|
2015-03-18 21:28:27 +00:00
|
|
|
if time.Now().Unix()-createdAt < p.InitialDelaySeconds {
|
2015-03-06 23:00:45 +00:00
|
|
|
return probe.Failure, nil
|
2015-02-02 18:51:52 +00:00
|
|
|
}
|
2015-04-17 22:54:28 +00:00
|
|
|
return pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries)
|
2015-03-07 00:52:31 +00:00
|
|
|
}
|
2015-03-06 23:00:45 +00:00
|
|
|
|
2015-03-07 00:52:31 +00:00
|
|
|
// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
|
|
|
|
// if it never succeeds.
|
2015-04-22 18:55:04 +00:00
|
|
|
func (pb *prober) runProbeWithRetries(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string, retires int) (probe.Result, error) {
|
2015-03-06 23:00:45 +00:00
|
|
|
var err error
|
2015-03-07 00:52:31 +00:00
|
|
|
var result probe.Result
|
|
|
|
for i := 0; i < retires; i++ {
|
2015-04-17 22:54:28 +00:00
|
|
|
result, err = pb.runProbe(p, pod, status, container, containerID)
|
2015-02-02 18:51:52 +00:00
|
|
|
if result == probe.Success {
|
2015-03-07 00:52:31 +00:00
|
|
|
return probe.Success, nil
|
2015-02-02 18:51:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result, err
|
|
|
|
}
|
|
|
|
|
2015-04-22 18:55:04 +00:00
|
|
|
func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string) (probe.Result, error) {
|
2015-02-16 07:44:55 +00:00
|
|
|
timeout := time.Duration(p.TimeoutSeconds) * time.Second
|
2015-01-23 18:03:04 +00:00
|
|
|
if p.Exec != nil {
|
2015-03-30 21:09:18 +00:00
|
|
|
glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v", pod, container)
|
2015-04-17 22:54:28 +00:00
|
|
|
return pb.exec.Probe(pb.newExecInContainer(pod, container, containerID))
|
2015-01-23 18:03:04 +00:00
|
|
|
}
|
|
|
|
if p.HTTPGet != nil {
|
|
|
|
port, err := extractPort(p.HTTPGet.Port, container)
|
|
|
|
if err != nil {
|
|
|
|
return probe.Unknown, err
|
|
|
|
}
|
2015-01-29 04:35:49 +00:00
|
|
|
host, port, path := extractGetParams(p.HTTPGet, status, port)
|
2015-03-30 21:09:18 +00:00
|
|
|
glog.V(4).Infof("HTTP-Probe Host: %v, Port: %v, Path: %v", host, port, path)
|
2015-04-17 22:54:28 +00:00
|
|
|
return pb.http.Probe(host, port, path, timeout)
|
2015-01-23 18:03:04 +00:00
|
|
|
}
|
|
|
|
if p.TCPSocket != nil {
|
|
|
|
port, err := extractPort(p.TCPSocket.Port, container)
|
|
|
|
if err != nil {
|
|
|
|
return probe.Unknown, err
|
|
|
|
}
|
2015-03-30 21:09:18 +00:00
|
|
|
glog.V(4).Infof("TCP-Probe PodIP: %v, Port: %v, Timeout: %v", status.PodIP, port, timeout)
|
2015-04-17 22:54:28 +00:00
|
|
|
return pb.tcp.Probe(status.PodIP, port, timeout)
|
2015-01-23 18:03:04 +00:00
|
|
|
}
|
|
|
|
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
|
|
|
|
return probe.Unknown, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func extractGetParams(action *api.HTTPGetAction, status api.PodStatus, port int) (string, int, string) {
|
|
|
|
host := action.Host
|
|
|
|
if host == "" {
|
|
|
|
host = status.PodIP
|
|
|
|
}
|
|
|
|
return host, port, action.Path
|
|
|
|
}
|
|
|
|
|
|
|
|
func extractPort(param util.IntOrString, container api.Container) (int, error) {
|
|
|
|
port := -1
|
|
|
|
var err error
|
|
|
|
switch param.Kind {
|
|
|
|
case util.IntstrInt:
|
2015-01-25 02:48:55 +00:00
|
|
|
port := param.IntVal
|
|
|
|
if port > 0 && port < 65536 {
|
|
|
|
return port, nil
|
|
|
|
}
|
|
|
|
return port, fmt.Errorf("invalid port number: %v", port)
|
2015-01-23 18:03:04 +00:00
|
|
|
case util.IntstrString:
|
|
|
|
port = findPortByName(container, param.StrVal)
|
|
|
|
if port == -1 {
|
|
|
|
// Last ditch effort - maybe it was an int stored as string?
|
|
|
|
if port, err = strconv.Atoi(param.StrVal); err != nil {
|
|
|
|
return port, err
|
|
|
|
}
|
|
|
|
}
|
2015-01-25 02:48:55 +00:00
|
|
|
if port > 0 && port < 65536 {
|
|
|
|
return port, nil
|
|
|
|
}
|
|
|
|
return port, fmt.Errorf("invalid port number: %v", port)
|
2015-01-23 18:03:04 +00:00
|
|
|
default:
|
|
|
|
return port, fmt.Errorf("IntOrString had no kind: %+v", param)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// findPortByName is a helper function to look up a port in a container by name.
|
|
|
|
// Returns the HostPort if found, -1 if not found.
|
|
|
|
func findPortByName(container api.Container, portName string) int {
|
|
|
|
for _, port := range container.Ports {
|
|
|
|
if port.Name == portName {
|
|
|
|
return port.HostPort
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
|
|
|
|
type execInContainer struct {
|
|
|
|
run func() ([]byte, error)
|
|
|
|
}
|
|
|
|
|
2015-04-22 18:55:04 +00:00
|
|
|
func (p *prober) newExecInContainer(pod *api.Pod, container api.Container, containerID string) exec.Cmd {
|
2015-01-23 18:03:04 +00:00
|
|
|
return execInContainer{func() ([]byte, error) {
|
2015-04-29 00:51:21 +00:00
|
|
|
return p.Runner.RunInContainer(containerID, container.LivenessProbe.Exec.Command)
|
2015-01-23 18:03:04 +00:00
|
|
|
}}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eic execInContainer) CombinedOutput() ([]byte, error) {
|
|
|
|
return eic.run()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eic execInContainer) SetDir(dir string) {
|
|
|
|
//unimplemented
|
|
|
|
}
|