mirror of https://github.com/k3s-io/k3s
Merge pull request #53318 from sjenning/fix-http-probe-conn-pools
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. create separate transports for liveness and readiness probes There is currently an issue with the http2 connection pools in golang such that two GETs to the same host:port using the same Transport can collide and one gets rejected with `http2: no cached connection was available`. This happens with readiness and liveness probes if the intervals line up such that worker goroutines invoke the two probes at the exact same time. The result is a transient probe error that appears in the events. If the failureThreshold is 1, which is kinda crazy, it would cause a pod restart. The PR creates a separate `httprobe` instance for readiness and liveness probes so that they don't share a Transport and connection pool. Fixes https://github.com/kubernetes/kubernetes/issues/49740 @smarterclayton @jhorwit2pull/6/head
commit
6ed207374f
|
@ -46,10 +46,14 @@ const maxProbeRetries = 3
|
|||
|
||||
// Prober helps to check the liveness/readiness of a container.
|
||||
type prober struct {
|
||||
exec execprobe.ExecProber
|
||||
http httprobe.HTTPProber
|
||||
tcp tcprobe.TCPProber
|
||||
runner kubecontainer.ContainerCommandRunner
|
||||
exec execprobe.ExecProber
|
||||
// probe types needs different httprobe instances so they don't
|
||||
// share a connection pool which can cause collsions to the
|
||||
// same host:port and transient failures. See #49740.
|
||||
readinessHttp httprobe.HTTPProber
|
||||
livenessHttp httprobe.HTTPProber
|
||||
tcp tcprobe.TCPProber
|
||||
runner kubecontainer.ContainerCommandRunner
|
||||
|
||||
refManager *kubecontainer.RefManager
|
||||
recorder record.EventRecorder
|
||||
|
@ -63,12 +67,13 @@ func newProber(
|
|||
recorder record.EventRecorder) *prober {
|
||||
|
||||
return &prober{
|
||||
exec: execprobe.New(),
|
||||
http: httprobe.New(),
|
||||
tcp: tcprobe.New(),
|
||||
runner: runner,
|
||||
refManager: refManager,
|
||||
recorder: recorder,
|
||||
exec: execprobe.New(),
|
||||
readinessHttp: httprobe.New(),
|
||||
livenessHttp: httprobe.New(),
|
||||
tcp: tcprobe.New(),
|
||||
runner: runner,
|
||||
refManager: refManager,
|
||||
recorder: recorder,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,7 +95,7 @@ func (pb *prober) probe(probeType probeType, pod *v1.Pod, status v1.PodStatus, c
|
|||
return results.Success, nil
|
||||
}
|
||||
|
||||
result, output, err := pb.runProbeWithRetries(probeSpec, pod, status, container, containerID, maxProbeRetries)
|
||||
result, output, err := pb.runProbeWithRetries(probeType, probeSpec, pod, status, container, containerID, maxProbeRetries)
|
||||
if err != nil || result != probe.Success {
|
||||
// Probe failed in one way or another.
|
||||
ref, hasRef := pb.refManager.GetRef(containerID)
|
||||
|
@ -116,12 +121,12 @@ func (pb *prober) probe(probeType probeType, pod *v1.Pod, status v1.PodStatus, c
|
|||
|
||||
// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
|
||||
// if it never succeeds.
|
||||
func (pb *prober) runProbeWithRetries(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID, retries int) (probe.Result, string, error) {
|
||||
func (pb *prober) runProbeWithRetries(probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID, retries int) (probe.Result, string, error) {
|
||||
var err error
|
||||
var result probe.Result
|
||||
var output string
|
||||
for i := 0; i < retries; i++ {
|
||||
result, output, err = pb.runProbe(p, pod, status, container, containerID)
|
||||
result, output, err = pb.runProbe(probeType, p, pod, status, container, containerID)
|
||||
if err == nil {
|
||||
return result, output, nil
|
||||
}
|
||||
|
@ -139,7 +144,7 @@ func buildHeader(headerList []v1.HTTPHeader) http.Header {
|
|||
return headers
|
||||
}
|
||||
|
||||
func (pb *prober) runProbe(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) {
|
||||
func (pb *prober) runProbe(probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) {
|
||||
timeout := time.Duration(p.TimeoutSeconds) * time.Second
|
||||
if p.Exec != nil {
|
||||
glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v, Command: %v", pod, container, p.Exec.Command)
|
||||
|
@ -161,7 +166,11 @@ func (pb *prober) runProbe(p *v1.Probe, pod *v1.Pod, status v1.PodStatus, contai
|
|||
url := formatURL(scheme, host, port, path)
|
||||
headers := buildHeader(p.HTTPGet.HTTPHeaders)
|
||||
glog.V(4).Infof("HTTP-Probe Headers: %v", headers)
|
||||
return pb.http.Probe(url, headers, timeout)
|
||||
if probeType == liveness {
|
||||
return pb.livenessHttp.Probe(url, headers, timeout)
|
||||
} else { // readiness
|
||||
return pb.readinessHttp.Probe(url, headers, timeout)
|
||||
}
|
||||
}
|
||||
if p.TCPSocket != nil {
|
||||
port, err := extractPort(p.TCPSocket.Port, container)
|
||||
|
|
Loading…
Reference in New Issue