Merge pull request #71617 from RobertKrawitz/branch-issue71614

Issue 71614: Protect log message maps
pull/564/head
Kubernetes Prow Robot 2018-12-03 14:03:38 -08:00 committed by GitHub
commit c7598e8844
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 0 deletions

View File

@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"os"
"sync"
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
@ -131,6 +132,7 @@ type kubeGenericRuntimeManager struct {
// Time last per-container error message was printed
errorPrinted map[string]time.Time
errorMapLock sync.Mutex
}
// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
@ -830,6 +832,8 @@ func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPo
}
func (m *kubeGenericRuntimeManager) cleanupErrorTimeouts() {
m.errorMapLock.Lock()
defer m.errorMapLock.Unlock()
for name, timeout := range m.errorPrinted {
if time.Now().Sub(timeout) >= identicalErrorDelay {
delete(m.errorPrinted, name)
@ -886,6 +890,8 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp
// Get statuses of all containers visible in the pod.
containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace)
m.errorMapLock.Lock()
defer m.errorMapLock.Unlock()
if err != nil {
lastMsg, ok := m.lastError[podFullName]
if !ok || err.Error() != lastMsg || time.Now().Sub(m.errorPrinted[podFullName]) >= identicalErrorDelay {

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"strings"
"sync"
"time"
"google.golang.org/grpc"
@ -40,6 +41,7 @@ type RemoteRuntimeService struct {
lastError map[string]string
// Time last per-container error message was printed
errorPrinted map[string]time.Time
errorMapLock sync.Mutex
}
const (
@ -236,8 +238,10 @@ func (r *RemoteRuntimeService) StopContainer(containerID string, timeout int64)
ctx, cancel := getContextWithTimeout(t)
defer cancel()
r.errorMapLock.Lock()
delete(r.lastError, containerID)
delete(r.errorPrinted, containerID)
r.errorMapLock.Unlock()
_, err := r.runtimeClient.StopContainer(ctx, &runtimeapi.StopContainerRequest{
ContainerId: containerID,
Timeout: timeout,
@ -256,8 +260,10 @@ func (r *RemoteRuntimeService) RemoveContainer(containerID string) error {
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
r.errorMapLock.Lock()
delete(r.lastError, containerID)
delete(r.errorPrinted, containerID)
r.errorMapLock.Unlock()
_, err := r.runtimeClient.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{
ContainerId: containerID,
})
@ -287,6 +293,8 @@ func (r *RemoteRuntimeService) ListContainers(filter *runtimeapi.ContainerFilter
// Clean up any expired last-error timers
func (r *RemoteRuntimeService) cleanupErrorTimeouts() {
r.errorMapLock.Lock()
defer r.errorMapLock.Unlock()
for ID, timeout := range r.errorPrinted {
if time.Now().Sub(timeout) >= identicalErrorDelay {
delete(r.lastError, ID)
@ -304,6 +312,8 @@ func (r *RemoteRuntimeService) ContainerStatus(containerID string) (*runtimeapi.
ContainerId: containerID,
})
r.cleanupErrorTimeouts()
r.errorMapLock.Lock()
defer r.errorMapLock.Unlock()
if err != nil {
// Don't spam the log with endless messages about the same failure.
lastMsg, ok := r.lastError[containerID]
@ -491,6 +501,8 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
ContainerId: containerID,
})
r.cleanupErrorTimeouts()
r.errorMapLock.Lock()
defer r.errorMapLock.Unlock()
if err != nil {
lastMsg, ok := r.lastError[containerID]
if !ok || err.Error() != lastMsg || time.Now().Sub(r.errorPrinted[containerID]) >= identicalErrorDelay {