mirror of https://github.com/k3s-io/k3s
Merge pull request #10925 from bprashanth/sidecar_exec
Sidecar container capable of servicing exec style liveness probes over httppull/6/head
commit
5540570e44
|
@ -0,0 +1,19 @@
|
|||
# Copyright 2015 The Kubernetes Authors. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
FROM busybox
|
||||
MAINTAINER Prashanth B <beeps@google.com>
|
||||
ADD exechealthz exechealthz
|
||||
ADD README.md README.md
|
||||
ENTRYPOINT ["/exechealthz"]
|
|
@ -0,0 +1,17 @@
|
|||
all: push
|
||||
|
||||
# 0.0 shouldn't clobber any released builds
|
||||
TAG = 0.0
|
||||
PREFIX = gcr.io/google_containers/exechealthz
|
||||
|
||||
server: exechealthz.go
|
||||
CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-w' -o exechealthz ./exechealthz.go
|
||||
|
||||
container: server
|
||||
docker build -t $(PREFIX):$(TAG) .
|
||||
|
||||
push: container
|
||||
gcloud docker push $(PREFIX):$(TAG)
|
||||
|
||||
clean:
|
||||
rm -f exechealthz
|
|
@ -0,0 +1,99 @@
|
|||
# Exec healthz server
|
||||
|
||||
The exec healthz server is a sidecar container meant to serve as a liveness-exec-over-http bridge. It isolates pods from the idiosyncracies of container runtime exec implemetations.
|
||||
|
||||
## Examples:
|
||||
|
||||
### Run the healthz server directly on localhost:
|
||||
|
||||
```shell
|
||||
$ make server
|
||||
$ ./exechealthz -cmd "ls /tmp/test"
|
||||
$ curl http://localhost:8080/healthz
|
||||
Healthz probe error: Result of last exec: ls: cannot access /tmp/test: No such file or directory
|
||||
, at 2015-07-08 17:59:45.698036238 -0700 PDT, error exit status 2
|
||||
$ touch /tmp/test
|
||||
$ curl http://localhost:8080/healthz
|
||||
ok
|
||||
```
|
||||
|
||||
### Run the healthz server in a docker container:
|
||||
|
||||
The [docker daemon](https://docs.docker.com/userguide/) needs to be running on your host.
|
||||
```shell
|
||||
$ make container PREFIX=mycontainer/test
|
||||
$ docker run -itP -p 8080:8080 mycontainer/test:0.0 -cmd "ls /tmp/test"
|
||||
$ curl http://localhost:8080/healthz
|
||||
Healthz probe error: Result of last exec: ls: cannot access /tmp/test: No such file or directory
|
||||
, at 2015-07-08 18:00:57.698103532 -0700 PDT, error exit status 2
|
||||
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
8e86f8accfa6 mycontainer/test:0.0 "/exechealthz -cm" 27 seconds ago Up 26 seconds 0.0.0.0:8080->8080/tcp loving_albattani
|
||||
$ docker exec -it 8e86f8accfa6 touch /tmp/test
|
||||
$ curl http://localhost:8080/healthz
|
||||
ok
|
||||
```
|
||||
|
||||
### Run the healthz server in a kubernetes pod:
|
||||
|
||||
You need a running [kubernetes cluster](../../docs/getting-started-guides/README.md).
|
||||
Create a pod.json that looks like:
|
||||
```json
|
||||
{
|
||||
"kind": "Pod",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "simple"
|
||||
},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "healthz",
|
||||
"image": "gcr.io/google_containers/exechealthz:1.0",
|
||||
"args": [
|
||||
"-cmd=nslookup localhost"
|
||||
],
|
||||
"ports": [
|
||||
{
|
||||
"containerPort": 8080,
|
||||
"protocol": "TCP"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
And run the pod on your cluster using kubectl:
|
||||
```shell
|
||||
$ kubectl create -f pod.json
|
||||
pods/simple
|
||||
$ kubectl get pods -o wide
|
||||
NAME READY STATUS RESTARTS AGE NODE
|
||||
simple 0/1 Pending 0 3s node
|
||||
```
|
||||
|
||||
SSH into the node (note that the recommended way to access a server in a container is through a [service](../../docs/services.md), the example that follows is just to illustrate how the kubelet performs an http liveness probe):
|
||||
```shell
|
||||
node$ kubectl get pods simple -o json | grep podIP
|
||||
"podIP": "10.1.0.2",
|
||||
|
||||
node$ curl http://10.1.0.2:8080/healthz
|
||||
ok
|
||||
```
|
||||
|
||||
### Run the healthz server as a sidecar container for liveness probes of another container:
|
||||
Create a pod.json with 2 containers, one of which is the healthz probe and the other, the container being health checked. The
|
||||
pod.json example file in this directory does exactly that. If you create the pod the same way you created the pod in the previous
|
||||
example, the kubelet on the node will periodically perform a health check similar to what you did manually and restart the container
|
||||
when it fails. Explore [liveness probes](../../examples/liveness/README.md).
|
||||
|
||||
## Limitations:
|
||||
* Doesn't handle sigterm, which means docker stop on this container can take longer than it needs to.
|
||||
* Doesn't sanity check the probe command. You should set the -period and -latency parameters of exechealthz appropriately.
|
||||
* Only ever returns 503 or 200.
|
||||
|
||||
|
||||
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/contrib/exec-healthz/README.md?pixel)]()
|
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// A tiny web server that returns 200 on it's healthz endpoint if the command
|
||||
// passed in via -cmd exits with 0. Returns 503 otherwise.
|
||||
// Usage: exechealthz -port 8080 -period 2s -latency 30s -cmd 'nslookup localhost >/dev/null'
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// 1. Sigterm handler for docker stop
|
||||
// 2. Meaningful default healthz
|
||||
// 3. 404 for unknown endpoints
|
||||
|
||||
var (
|
||||
port = flag.Int("port", 8080, "Port number to serve /healthz.")
|
||||
cmd = flag.String("cmd", "echo healthz", "Command to run in response to a GET on /healthz. If the given command exits with 0, /healthz will respond with a 200.")
|
||||
period = flag.Duration("period", 2*time.Second, "Period to run the given cmd in an async worker.")
|
||||
maxLatency = flag.Duration("latency", 30*time.Second, "If the async worker hasn't updated the probe command output in this long, return a 503.")
|
||||
// prober is the async worker running the cmd, the output of which is used to service /healthz.
|
||||
prober *execWorker
|
||||
)
|
||||
|
||||
// execResult holds the result of the latest exec from the execWorker.
|
||||
type execResult struct {
|
||||
output []byte
|
||||
err error
|
||||
ts time.Time
|
||||
}
|
||||
|
||||
func (r execResult) String() string {
|
||||
errMsg := "None"
|
||||
if r.err != nil {
|
||||
errMsg = fmt.Sprintf("%v", r.err)
|
||||
}
|
||||
return fmt.Sprintf("Result of last exec: %v, at %v, error %v", string(r.output), r.ts, errMsg)
|
||||
}
|
||||
|
||||
// execWorker provides an async interface to exec.
|
||||
type execWorker struct {
|
||||
result execResult
|
||||
mutex sync.Mutex
|
||||
period time.Duration
|
||||
probeCmd string
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
// getResults returns the results of the latest execWorker run.
|
||||
// The caller should treat returned results as read-only.
|
||||
func (h *execWorker) getResults() execResult {
|
||||
h.mutex.Lock()
|
||||
defer h.mutex.Unlock()
|
||||
return h.result
|
||||
}
|
||||
|
||||
// start attemtps to run the probeCmd every `period` seconds.
|
||||
// Meant to be called as a goroutine.
|
||||
func (h *execWorker) start() {
|
||||
ticker := time.NewTicker(h.period)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
// If the command takes > period, the command runs continuously.
|
||||
case <-ticker.C:
|
||||
log.Printf("Worker running %v", *cmd)
|
||||
output, err := exec.Command("sh", "-c", *cmd).CombinedOutput()
|
||||
ts := time.Now()
|
||||
func() {
|
||||
h.mutex.Lock()
|
||||
defer h.mutex.Unlock()
|
||||
h.result = execResult{output, err, ts}
|
||||
}()
|
||||
case <-h.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// newExecWorker is a constructor for execWorker.
|
||||
func newExecWorker(probeCmd string, execPeriod time.Duration) *execWorker {
|
||||
return &execWorker{
|
||||
// Initializing the result with a timestamp here allows us to
|
||||
// wait maxLatency for the worker goroutine to start, and for each
|
||||
// iteration of the worker to complete.
|
||||
result: execResult{[]byte{}, nil, time.Now()},
|
||||
period: execPeriod,
|
||||
probeCmd: probeCmd,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
links := []struct {
|
||||
link, desc string
|
||||
}{
|
||||
{"/healthz", "healthz probe. Returns \"ok\" if the command given through -cmd exits with 0."},
|
||||
{"/quit", "Cause this container to exit."},
|
||||
}
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintf(w, "<b> Kubernetes healthz sidecar container </b><br/><br/>")
|
||||
for _, v := range links {
|
||||
fmt.Fprintf(w, `<a href="%v">%v: %v</a><br/>`, v.link, v.link, v.desc)
|
||||
}
|
||||
})
|
||||
|
||||
http.HandleFunc("/quit", func(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("Shutdown requested via /quit by %v", r.RemoteAddr)
|
||||
os.Exit(0)
|
||||
})
|
||||
prober = newExecWorker(*cmd, *period)
|
||||
defer close(prober.stopCh)
|
||||
go prober.start()
|
||||
|
||||
http.HandleFunc("/healthz", healthzHandler)
|
||||
log.Fatal(http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", *port), nil))
|
||||
}
|
||||
|
||||
func healthzHandler(w http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("Client ip %v requesting /healthz probe servicing cmd %v", r.RemoteAddr, *cmd)
|
||||
result := prober.getResults()
|
||||
|
||||
// return 503 if the last command exec returned a non-zero status, or the worker
|
||||
// hasn't run in maxLatency (including when the worker goroutine is cpu starved,
|
||||
// because the pod is probably unavailable too).
|
||||
if result.err != nil {
|
||||
msg := fmt.Sprintf("Healthz probe error: %v", result)
|
||||
log.Printf(msg)
|
||||
http.Error(w, msg, http.StatusServiceUnavailable)
|
||||
} else if time.Since(result.ts) > *maxLatency {
|
||||
msg := fmt.Sprintf("Latest result too old to be useful: %v.", result)
|
||||
log.Printf(msg)
|
||||
http.Error(w, msg, http.StatusServiceUnavailable)
|
||||
} else {
|
||||
fmt.Fprintf(w, "ok")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"kind": "Pod",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "simple"
|
||||
},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "healthz",
|
||||
"image": "gcr.io/google_containers/exechealthz:1.0",
|
||||
"args": [
|
||||
"-cmd=nslookup localhost"
|
||||
],
|
||||
"ports": [
|
||||
{
|
||||
"containerPort": 8080,
|
||||
"protocol": "TCP"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name":"test-container",
|
||||
"image":"ubuntu:14.04",
|
||||
"command": ["bash", "-c", "while true; do sleep 100; done"],
|
||||
"livenessProbe": {
|
||||
"httpGet": {
|
||||
"path": "/healthz",
|
||||
"port":8080
|
||||
},
|
||||
"initialDelaySeconds": 10,
|
||||
"timeoutSeconds": 2
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue