mirror of https://github.com/k3s-io/k3s
185 lines
5.6 KiB
Go
185 lines
5.6 KiB
Go
// Copyright 2015 Google Inc. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Maintains the summary of aggregated minute, hour, and day stats.
|
|
// For a container running for more than a day, amount of tracked data can go up to
|
|
// 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
|
|
// node, followed by docker, and then all containers as we understand the usage pattern
|
|
// better
|
|
// TODO(rjnagal): Optimize the size if we start running it for every container.
|
|
package summary
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/cadvisor/info/v1"
|
|
info "github.com/google/cadvisor/info/v2"
|
|
)
|
|
|
|
// Usage fields we track for generating percentiles.
|
|
type secondSample struct {
|
|
Timestamp time.Time // time when the sample was recorded.
|
|
Cpu uint64 // cpu usage
|
|
Memory uint64 // memory usage
|
|
}
|
|
|
|
type availableResources struct {
|
|
Cpu bool
|
|
Memory bool
|
|
}
|
|
|
|
type StatsSummary struct {
|
|
// Resources being tracked for this container.
|
|
available availableResources
|
|
// list of second samples. The list is cleared when a new minute samples is generated.
|
|
secondSamples []*secondSample
|
|
// minute percentiles. We track 24 * 60 maximum samples.
|
|
minuteSamples *SamplesBuffer
|
|
// latest derived instant, minute, hour, and day stats. Instant sample updated every second.
|
|
// Others updated every minute.
|
|
derivedStats info.DerivedStats // Guarded by dataLock.
|
|
dataLock sync.RWMutex
|
|
}
|
|
|
|
// Adds a new seconds sample.
|
|
// If enough seconds samples are collected, a minute sample is generated and derived
|
|
// stats are updated.
|
|
func (s *StatsSummary) AddSample(stat v1.ContainerStats) error {
|
|
sample := secondSample{}
|
|
sample.Timestamp = stat.Timestamp
|
|
if s.available.Cpu {
|
|
sample.Cpu = stat.Cpu.Usage.Total
|
|
}
|
|
if s.available.Memory {
|
|
sample.Memory = stat.Memory.WorkingSet
|
|
}
|
|
s.secondSamples = append(s.secondSamples, &sample)
|
|
s.updateLatestUsage()
|
|
// TODO(jnagal): Use 'available' to avoid unnecessary computation.
|
|
numSamples := len(s.secondSamples)
|
|
elapsed := time.Nanosecond
|
|
if numSamples > 1 {
|
|
start := s.secondSamples[0].Timestamp
|
|
end := s.secondSamples[numSamples-1].Timestamp
|
|
elapsed = end.Sub(start)
|
|
}
|
|
if elapsed > 60*time.Second {
|
|
// Make a minute sample. This works with dynamic housekeeping as long
|
|
// as we keep max dynamic houskeeping period close to a minute.
|
|
minuteSample := GetMinutePercentiles(s.secondSamples)
|
|
// Clear seconds samples. Keep the latest sample for continuity.
|
|
// Copying and resizing helps avoid slice re-allocation.
|
|
s.secondSamples[0] = s.secondSamples[numSamples-1]
|
|
s.secondSamples = s.secondSamples[:1]
|
|
s.minuteSamples.Add(minuteSample)
|
|
err := s.updateDerivedStats()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *StatsSummary) updateLatestUsage() {
|
|
usage := info.InstantUsage{}
|
|
numStats := len(s.secondSamples)
|
|
if numStats < 1 {
|
|
return
|
|
}
|
|
latest := s.secondSamples[numStats-1]
|
|
usage.Memory = latest.Memory
|
|
if numStats > 1 {
|
|
previous := s.secondSamples[numStats-2]
|
|
cpu, err := getCPURate(*latest, *previous)
|
|
if err == nil {
|
|
usage.Cpu = cpu
|
|
}
|
|
}
|
|
|
|
s.dataLock.Lock()
|
|
defer s.dataLock.Unlock()
|
|
s.derivedStats.LatestUsage = usage
|
|
s.derivedStats.Timestamp = latest.Timestamp
|
|
}
|
|
|
|
// Generate new derived stats based on current minute stats samples.
|
|
func (s *StatsSummary) updateDerivedStats() error {
|
|
derived := info.DerivedStats{}
|
|
derived.Timestamp = time.Now()
|
|
minuteSamples := s.minuteSamples.RecentStats(1)
|
|
if len(minuteSamples) != 1 {
|
|
return fmt.Errorf("failed to retrieve minute stats")
|
|
}
|
|
derived.MinuteUsage = *minuteSamples[0]
|
|
hourUsage, err := s.getDerivedUsage(60)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to compute hour stats: %v", err)
|
|
}
|
|
dayUsage, err := s.getDerivedUsage(60 * 24)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to compute day usage: %v", err)
|
|
}
|
|
derived.HourUsage = hourUsage
|
|
derived.DayUsage = dayUsage
|
|
|
|
s.dataLock.Lock()
|
|
defer s.dataLock.Unlock()
|
|
derived.LatestUsage = s.derivedStats.LatestUsage
|
|
s.derivedStats = derived
|
|
|
|
return nil
|
|
}
|
|
|
|
// helper method to get hour and daily derived stats
|
|
func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
|
|
if n < 1 {
|
|
return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
|
|
}
|
|
samples := s.minuteSamples.RecentStats(n)
|
|
numSamples := len(samples)
|
|
if numSamples < 1 {
|
|
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats")
|
|
}
|
|
// We generate derived stats even with partial data.
|
|
usage := GetDerivedPercentiles(samples)
|
|
// Assumes we have equally placed minute samples.
|
|
usage.PercentComplete = int32(numSamples * 100 / n)
|
|
return usage, nil
|
|
}
|
|
|
|
// Return the latest calculated derived stats.
|
|
func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
|
|
s.dataLock.RLock()
|
|
defer s.dataLock.RUnlock()
|
|
|
|
return s.derivedStats, nil
|
|
}
|
|
|
|
func New(spec v1.ContainerSpec) (*StatsSummary, error) {
|
|
summary := StatsSummary{}
|
|
if spec.HasCpu {
|
|
summary.available.Cpu = true
|
|
}
|
|
if spec.HasMemory {
|
|
summary.available.Memory = true
|
|
}
|
|
if !summary.available.Cpu && !summary.available.Memory {
|
|
return nil, fmt.Errorf("none of the resources are being tracked")
|
|
}
|
|
summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
|
|
return &summary, nil
|
|
}
|