fix node and kubelet start times

pull/564/head
David Ashpole 2018-12-05 15:07:52 -08:00
parent 2bd14ea60e
commit 54e581930c
10 changed files with 154 additions and 33 deletions

View File

@ -18,6 +18,7 @@ go_library(
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/util:go_default_library",
"//pkg/kubelet/util/format:go_default_library", "//pkg/kubelet/util/format:go_default_library",
"//pkg/volume:go_default_library", "//pkg/volume:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library",

View File

@ -19,7 +19,11 @@ package stats
import ( import (
"fmt" "fmt"
"k8s.io/klog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/util"
) )
type SummaryProvider interface { type SummaryProvider interface {
@ -32,6 +36,11 @@ type SummaryProvider interface {
// summaryProviderImpl implements the SummaryProvider interface. // summaryProviderImpl implements the SummaryProvider interface.
type summaryProviderImpl struct { type summaryProviderImpl struct {
// kubeletCreationTime is the time at which the summaryProvider was created.
kubeletCreationTime metav1.Time
// systemBootTime is the time at which the system was started
systemBootTime metav1.Time
provider StatsProvider provider StatsProvider
} }
@ -40,7 +49,18 @@ var _ SummaryProvider = &summaryProviderImpl{}
// NewSummaryProvider returns a SummaryProvider using the stats provided by the // NewSummaryProvider returns a SummaryProvider using the stats provided by the
// specified statsProvider. // specified statsProvider.
func NewSummaryProvider(statsProvider StatsProvider) SummaryProvider { func NewSummaryProvider(statsProvider StatsProvider) SummaryProvider {
return &summaryProviderImpl{statsProvider} kubeletCreationTime := metav1.Now()
bootTime, err := util.GetBootTime()
if err != nil {
// bootTime will be zero if we encounter an error getting the boot time.
klog.Warningf("Error getting system boot time. Node metrics will have an incorrect start time: %v", err)
}
return &summaryProviderImpl{
kubeletCreationTime: kubeletCreationTime,
systemBootTime: metav1.NewTime(bootTime),
provider: statsProvider,
}
} }
func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error) { func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error) {
@ -77,7 +97,7 @@ func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error)
CPU: rootStats.CPU, CPU: rootStats.CPU,
Memory: rootStats.Memory, Memory: rootStats.Memory,
Network: networkStats, Network: networkStats,
StartTime: rootStats.StartTime, StartTime: sp.systemBootTime,
Fs: rootFsStats, Fs: rootFsStats,
Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats}, Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats},
Rlimit: rlimit, Rlimit: rlimit,

View File

@ -21,6 +21,7 @@ package stats
import ( import (
"k8s.io/klog" "k8s.io/klog"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
) )
@ -29,11 +30,12 @@ func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig
systemContainers := map[string]struct { systemContainers := map[string]struct {
name string name string
forceStatsUpdate bool forceStatsUpdate bool
startTime metav1.Time
}{ }{
statsapi.SystemContainerKubelet: {nodeConfig.KubeletCgroupsName, false}, statsapi.SystemContainerKubelet: {name: nodeConfig.KubeletCgroupsName, forceStatsUpdate: false, startTime: sp.kubeletCreationTime},
statsapi.SystemContainerRuntime: {nodeConfig.RuntimeCgroupsName, false}, statsapi.SystemContainerRuntime: {name: nodeConfig.RuntimeCgroupsName, forceStatsUpdate: false},
statsapi.SystemContainerMisc: {nodeConfig.SystemCgroupsName, false}, statsapi.SystemContainerMisc: {name: nodeConfig.SystemCgroupsName, forceStatsUpdate: false},
statsapi.SystemContainerPods: {sp.provider.GetPodCgroupRoot(), updateStats}, statsapi.SystemContainerPods: {name: sp.provider.GetPodCgroupRoot(), forceStatsUpdate: updateStats},
} }
for sys, cont := range systemContainers { for sys, cont := range systemContainers {
// skip if cgroup name is undefined (not all system containers are required) // skip if cgroup name is undefined (not all system containers are required)
@ -48,6 +50,11 @@ func (sp *summaryProviderImpl) GetSystemContainersStats(nodeConfig cm.NodeConfig
// System containers don't have a filesystem associated with them. // System containers don't have a filesystem associated with them.
s.Logs, s.Rootfs = nil, nil s.Logs, s.Rootfs = nil, nil
s.Name = sys s.Name = sys
// if we know the start time of a system container, use that instead of the start time provided by cAdvisor
if !cont.startTime.IsZero() {
s.StartTime = cont.startTime
}
stats = append(stats, *s) stats = append(stats, *s)
} }
@ -58,11 +65,12 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c
systemContainers := map[string]struct { systemContainers := map[string]struct {
name string name string
forceStatsUpdate bool forceStatsUpdate bool
startTime metav1.Time
}{ }{
statsapi.SystemContainerKubelet: {nodeConfig.KubeletCgroupsName, false}, statsapi.SystemContainerKubelet: {name: nodeConfig.KubeletCgroupsName, forceStatsUpdate: false, startTime: sp.kubeletCreationTime},
statsapi.SystemContainerRuntime: {nodeConfig.RuntimeCgroupsName, false}, statsapi.SystemContainerRuntime: {name: nodeConfig.RuntimeCgroupsName, forceStatsUpdate: false},
statsapi.SystemContainerMisc: {nodeConfig.SystemCgroupsName, false}, statsapi.SystemContainerMisc: {name: nodeConfig.SystemCgroupsName, forceStatsUpdate: false},
statsapi.SystemContainerPods: {sp.provider.GetPodCgroupRoot(), updateStats}, statsapi.SystemContainerPods: {name: sp.provider.GetPodCgroupRoot(), forceStatsUpdate: updateStats},
} }
for sys, cont := range systemContainers { for sys, cont := range systemContainers {
// skip if cgroup name is undefined (not all system containers are required) // skip if cgroup name is undefined (not all system containers are required)
@ -75,6 +83,11 @@ func (sp *summaryProviderImpl) GetSystemContainersCPUAndMemoryStats(nodeConfig c
continue continue
} }
s.Name = sys s.Name = sys
// if we know the start time of a system container, use that instead of the start time provided by cAdvisor
if !cont.startTime.IsZero() {
s.StartTime = cont.startTime
}
stats = append(stats, *s) stats = append(stats, *s)
} }

View File

@ -83,12 +83,14 @@ func TestSummaryProviderGetStats(t *testing.T) {
On("GetCgroupStats", "/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil). On("GetCgroupStats", "/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil).
On("GetCgroupStats", "/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil) On("GetCgroupStats", "/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil)
provider := NewSummaryProvider(mockStatsProvider) kubeletCreationTime := metav1.Now()
systemBootTime := metav1.Now()
provider := summaryProviderImpl{kubeletCreationTime: kubeletCreationTime, systemBootTime: systemBootTime, provider: mockStatsProvider}
summary, err := provider.Get(true) summary, err := provider.Get(true)
assert.NoError(err) assert.NoError(err)
assert.Equal(summary.Node.NodeName, "test-node") assert.Equal(summary.Node.NodeName, "test-node")
assert.Equal(summary.Node.StartTime, cgroupStatsMap["/"].cs.StartTime) assert.Equal(summary.Node.StartTime, systemBootTime)
assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU) assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU)
assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory) assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns) assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
@ -98,7 +100,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
assert.Equal(len(summary.Node.SystemContainers), 4) assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{ assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "kubelet", Name: "kubelet",
StartTime: cgroupStatsMap["/kubelet"].cs.StartTime, StartTime: kubeletCreationTime,
CPU: cgroupStatsMap["/kubelet"].cs.CPU, CPU: cgroupStatsMap["/kubelet"].cs.CPU,
Memory: cgroupStatsMap["/kubelet"].cs.Memory, Memory: cgroupStatsMap["/kubelet"].cs.Memory,
Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators, Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators,

View File

@ -34,6 +34,8 @@ go_test(
go_library( go_library(
name = "go_default_library", name = "go_default_library",
srcs = [ srcs = [
"boottime_util_darwin.go",
"boottime_util_linux.go",
"doc.go", "doc.go",
"util.go", "util.go",
"util_unix.go", "util_unix.go",

View File

@ -0,0 +1,44 @@
// +build darwin
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"syscall"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// GetBootTime returns the time at which the machine was started, truncated to the nearest second
func GetBootTime() (time.Time, error) {
output, err := unix.SysctlRaw("kern.boottime")
if err != nil {
return time.Time{}, err
}
var timeval syscall.Timeval
if len(output) != int(unsafe.Sizeof(timeval)) {
return time.Time{}, fmt.Errorf("unexpected output when calling syscall kern.bootime. Expected len(output) to be %v, but got %v",
int(unsafe.Sizeof(timeval)), len(output))
}
timeval = *(*syscall.Timeval)(unsafe.Pointer(&output[0]))
sec, nsec := timeval.Unix()
return time.Unix(sec, nsec).Truncate(time.Second), nil
}

View File

@ -0,0 +1,36 @@
// +build freebsd linux
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"time"
"golang.org/x/sys/unix"
)
// GetBootTime returns the time at which the machine was started, truncated to the nearest second
func GetBootTime() (time.Time, error) {
currentTime := time.Now()
var info unix.Sysinfo_t
if err := unix.Sysinfo(&info); err != nil {
return time.Time{}, fmt.Errorf("error getting system uptime: %s", err)
}
return currentTime.Add(-time.Duration(info.Uptime) * time.Second).Truncate(time.Second), nil
}

View File

@ -45,3 +45,8 @@ func UnlockPath(fileHandles []uintptr) {
func LocalEndpoint(path, file string) string { func LocalEndpoint(path, file string) string {
return "" return ""
} }
// GetBootTime empty implementation
func GetBootTime() (time.Time, error) {
return time.Time{}, fmt.Errorf("GetBootTime is unsupported in this build")
}

View File

@ -23,6 +23,7 @@ import (
"net" "net"
"net/url" "net/url"
"strings" "strings"
"syscall"
"time" "time"
"github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio"
@ -112,3 +113,15 @@ func LocalEndpoint(path, file string) string {
} }
return u.String() + "//./pipe/" + file return u.String() + "//./pipe/" + file
} }
var tickCount = syscall.NewLazyDLL("kernel32.dll").NewProc("GetTickCount64")
// GetBootTime returns the time at which the machine was started, truncated to the nearest second
func GetBootTime() (time.Time, error) {
currentTime := time.Now()
output, _, err := tickCount.Call()
if errno, ok := err.(syscall.Errno); !ok || errno != 0 {
return time.Time{}, err
}
return currentTime.Add(-time.Duration(output) * time.Millisecond).Truncate(time.Second), nil
}

View File

@ -22,7 +22,6 @@ import (
"fmt" "fmt"
"os" "os"
"path" "path"
"syscall"
"time" "time"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
@ -34,6 +33,7 @@ import (
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
coreclientset "k8s.io/client-go/kubernetes/typed/core/v1" coreclientset "k8s.io/client-go/kubernetes/typed/core/v1"
nodeutil "k8s.io/kubernetes/pkg/api/v1/node" nodeutil "k8s.io/kubernetes/pkg/api/v1/node"
"k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo" . "github.com/onsi/ginkgo"
@ -97,8 +97,11 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete
BeforeEach(func() { BeforeEach(func() {
By("Calculate Lookback duration") By("Calculate Lookback duration")
var err error var err error
nodeTime, bootTime, err = getNodeTime()
nodeTime = time.Now()
bootTime, err = util.GetBootTime()
Expect(err).To(BeNil()) Expect(err).To(BeNil())
// Set lookback duration longer than node up time. // Set lookback duration longer than node up time.
// Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds. // Assume the test won't take more than 1 hour, in fact it usually only takes 90 seconds.
lookback = nodeTime.Sub(bootTime) + time.Hour lookback = nodeTime.Sub(bootTime) + time.Hour
@ -387,24 +390,6 @@ func injectLog(file string, timestamp time.Time, log string, num int) error {
return nil return nil
} }
// getNodeTime gets node boot time and current time.
func getNodeTime() (time.Time, time.Time, error) {
// Get node current time.
nodeTime := time.Now()
// Get system uptime.
var info syscall.Sysinfo_t
if err := syscall.Sysinfo(&info); err != nil {
return time.Time{}, time.Time{}, err
}
// Get node boot time. NOTE that because we get node current time before uptime, the boot time
// calculated will be a little earlier than the real boot time. This won't affect the correctness
// of the test result.
bootTime := nodeTime.Add(-time.Duration(info.Uptime) * time.Second)
return nodeTime, bootTime, nil
}
// verifyEvents verifies there are num specific events generated // verifyEvents verifies there are num specific events generated
func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error { func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error {
events, err := e.List(options) events, err := e.List(options)