2015-12-29 08:19:54 +00:00
|
|
|
/*
|
2016-06-03 00:25:58 +00:00
|
|
|
Copyright 2015 The Kubernetes Authors.
|
2015-12-29 08:19:54 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2016-04-07 00:47:39 +00:00
|
|
|
package framework
|
2015-12-29 08:19:54 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"sort"
|
2016-01-08 08:33:06 +00:00
|
|
|
"strconv"
|
2015-12-29 08:19:54 +00:00
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"text/tabwriter"
|
|
|
|
"time"
|
|
|
|
|
2016-11-18 20:55:17 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api/v1"
|
2016-12-14 01:18:17 +00:00
|
|
|
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
|
2016-02-22 15:42:37 +00:00
|
|
|
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
|
2016-05-12 07:49:17 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/system"
|
2015-12-29 08:19:54 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2016-02-23 15:45:42 +00:00
|
|
|
resourceDataGatheringPeriod = 60 * time.Second
|
2016-02-25 10:54:34 +00:00
|
|
|
probeDuration = 15 * time.Second
|
2015-12-29 08:19:54 +00:00
|
|
|
)
|
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
type ResourceConstraint struct {
|
|
|
|
CPUConstraint float64
|
|
|
|
MemoryConstraint uint64
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
|
2016-01-11 12:22:16 +00:00
|
|
|
type SingleContainerSummary struct {
|
2016-01-08 08:33:06 +00:00
|
|
|
Name string
|
|
|
|
Cpu float64
|
2016-02-12 21:19:12 +00:00
|
|
|
Mem uint64
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
|
2016-01-08 08:33:06 +00:00
|
|
|
// we can't have int here, as JSON does not accept integer keys.
|
2016-01-11 12:22:16 +00:00
|
|
|
type ResourceUsageSummary map[string][]SingleContainerSummary
|
2015-12-29 08:19:54 +00:00
|
|
|
|
|
|
|
func (s *ResourceUsageSummary) PrintHumanReadable() string {
|
|
|
|
buf := &bytes.Buffer{}
|
|
|
|
w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
|
|
|
|
for perc, summaries := range *s {
|
|
|
|
buf.WriteString(fmt.Sprintf("%v percentile:\n", perc))
|
|
|
|
fmt.Fprintf(w, "container\tcpu(cores)\tmemory(MB)\n")
|
|
|
|
for _, summary := range summaries {
|
2016-01-08 08:33:06 +00:00
|
|
|
fmt.Fprintf(w, "%q\t%.3f\t%.2f\n", summary.Name, summary.Cpu, float64(summary.Mem)/(1024*1024))
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
w.Flush()
|
|
|
|
}
|
|
|
|
return buf.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *ResourceUsageSummary) PrintJSON() string {
|
2016-04-07 17:21:31 +00:00
|
|
|
return PrettyPrintJSON(*s)
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
func computePercentiles(timeSeries []ResourceUsagePerContainer, percentilesToCompute []int) map[int]ResourceUsagePerContainer {
|
2016-02-23 15:45:42 +00:00
|
|
|
if len(timeSeries) == 0 {
|
2016-04-07 17:21:31 +00:00
|
|
|
return make(map[int]ResourceUsagePerContainer)
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
dataMap := make(map[string]*usageDataPerContainer)
|
|
|
|
for i := range timeSeries {
|
|
|
|
for name, data := range timeSeries[i] {
|
|
|
|
if dataMap[name] == nil {
|
|
|
|
dataMap[name] = &usageDataPerContainer{
|
|
|
|
cpuData: make([]float64, len(timeSeries)),
|
|
|
|
memUseData: make([]uint64, len(timeSeries)),
|
|
|
|
memWorkSetData: make([]uint64, len(timeSeries)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dataMap[name].cpuData = append(dataMap[name].cpuData, data.CPUUsageInCores)
|
|
|
|
dataMap[name].memUseData = append(dataMap[name].memUseData, data.MemoryUsageInBytes)
|
|
|
|
dataMap[name].memWorkSetData = append(dataMap[name].memWorkSetData, data.MemoryWorkingSetInBytes)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, v := range dataMap {
|
|
|
|
sort.Float64s(v.cpuData)
|
|
|
|
sort.Sort(uint64arr(v.memUseData))
|
|
|
|
sort.Sort(uint64arr(v.memWorkSetData))
|
|
|
|
}
|
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
result := make(map[int]ResourceUsagePerContainer)
|
2016-02-23 15:45:42 +00:00
|
|
|
for _, perc := range percentilesToCompute {
|
2016-04-07 17:21:31 +00:00
|
|
|
data := make(ResourceUsagePerContainer)
|
2016-02-23 15:45:42 +00:00
|
|
|
for k, v := range dataMap {
|
|
|
|
percentileIndex := int(math.Ceil(float64(len(v.cpuData)*perc)/100)) - 1
|
2016-04-07 17:21:31 +00:00
|
|
|
data[k] = &ContainerResourceUsage{
|
2016-02-23 15:45:42 +00:00
|
|
|
Name: k,
|
|
|
|
CPUUsageInCores: v.cpuData[percentileIndex],
|
|
|
|
MemoryUsageInBytes: v.memUseData[percentileIndex],
|
|
|
|
MemoryWorkingSetInBytes: v.memWorkSetData[percentileIndex],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result[perc] = data
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
func leftMergeData(left, right map[int]ResourceUsagePerContainer) map[int]ResourceUsagePerContainer {
|
|
|
|
result := make(map[int]ResourceUsagePerContainer)
|
2016-02-23 15:45:42 +00:00
|
|
|
for percentile, data := range left {
|
|
|
|
result[percentile] = data
|
|
|
|
if _, ok := right[percentile]; !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for k, v := range right[percentile] {
|
|
|
|
result[percentile][k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
type resourceGatherWorker struct {
|
2016-10-18 13:00:38 +00:00
|
|
|
c clientset.Interface
|
2016-02-23 15:45:42 +00:00
|
|
|
nodeName string
|
|
|
|
wg *sync.WaitGroup
|
|
|
|
containerIDToNameMap map[string]string
|
|
|
|
containerIDs []string
|
|
|
|
stopCh chan struct{}
|
2016-04-07 17:21:31 +00:00
|
|
|
dataSeries []ResourceUsagePerContainer
|
2016-02-26 13:14:52 +00:00
|
|
|
finished bool
|
2016-04-14 23:00:19 +00:00
|
|
|
inKubemark bool
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (w *resourceGatherWorker) singleProbe() {
|
2016-12-09 16:23:03 +00:00
|
|
|
data := make(ResourceUsagePerContainer)
|
2016-04-14 23:00:19 +00:00
|
|
|
if w.inKubemark {
|
2016-12-09 16:23:03 +00:00
|
|
|
kubemarkData := GetKubemarkMasterComponentsResourceUsage()
|
2016-04-14 23:00:19 +00:00
|
|
|
if data == nil {
|
|
|
|
return
|
|
|
|
}
|
2016-12-09 16:23:03 +00:00
|
|
|
for k, v := range kubemarkData {
|
|
|
|
data[k] = &ContainerResourceUsage{
|
|
|
|
Name: v.Name,
|
|
|
|
MemoryWorkingSetInBytes: v.MemoryWorkingSetInBytes,
|
|
|
|
CPUUsageInCores: v.CPUUsageInCores,
|
|
|
|
}
|
|
|
|
}
|
2016-04-14 23:00:19 +00:00
|
|
|
} else {
|
|
|
|
nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, probeDuration, func() []string { return w.containerIDs }, true)
|
|
|
|
if err != nil {
|
|
|
|
Logf("Error while reading data from %v: %v", w.nodeName, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for k, v := range nodeUsage {
|
|
|
|
data[w.containerIDToNameMap[k]] = v
|
|
|
|
}
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
w.dataSeries = append(w.dataSeries, data)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *resourceGatherWorker) gather(initialSleep time.Duration) {
|
|
|
|
defer utilruntime.HandleCrash()
|
|
|
|
defer w.wg.Done()
|
2016-02-25 13:50:05 +00:00
|
|
|
defer Logf("Closing worker for %v", w.nodeName)
|
2016-02-26 13:14:52 +00:00
|
|
|
defer func() { w.finished = true }()
|
2016-02-23 15:45:42 +00:00
|
|
|
select {
|
|
|
|
case <-time.After(initialSleep):
|
|
|
|
w.singleProbe()
|
2015-12-29 08:19:54 +00:00
|
|
|
for {
|
|
|
|
select {
|
2016-02-23 15:45:42 +00:00
|
|
|
case <-time.After(resourceDataGatheringPeriod):
|
|
|
|
w.singleProbe()
|
|
|
|
case <-w.stopCh:
|
|
|
|
return
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
}
|
2016-02-23 15:45:42 +00:00
|
|
|
case <-w.stopCh:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-18 13:00:38 +00:00
|
|
|
func (g *containerResourceGatherer) getKubeSystemContainersResourceUsage(c clientset.Interface) {
|
2016-05-17 12:35:48 +00:00
|
|
|
if len(g.workers) == 0 {
|
|
|
|
return
|
|
|
|
}
|
2016-02-25 10:54:34 +00:00
|
|
|
delayPeriod := resourceDataGatheringPeriod / time.Duration(len(g.workers))
|
|
|
|
delay := time.Duration(0)
|
2016-02-23 15:45:42 +00:00
|
|
|
for i := range g.workers {
|
|
|
|
go g.workers[i].gather(delay)
|
2016-02-25 10:54:34 +00:00
|
|
|
delay += delayPeriod
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
g.workerWg.Wait()
|
|
|
|
}
|
|
|
|
|
|
|
|
type containerResourceGatherer struct {
|
2016-10-18 13:00:38 +00:00
|
|
|
client clientset.Interface
|
2016-02-23 15:45:42 +00:00
|
|
|
stopCh chan struct{}
|
|
|
|
workers []resourceGatherWorker
|
|
|
|
workerWg sync.WaitGroup
|
|
|
|
containerIDToNameMap map[string]string
|
|
|
|
containerIDs []string
|
2016-04-14 23:00:19 +00:00
|
|
|
options ResourceGathererOptions
|
|
|
|
}
|
|
|
|
|
|
|
|
type ResourceGathererOptions struct {
|
|
|
|
inKubemark bool
|
2016-05-12 07:49:17 +00:00
|
|
|
masterOnly bool
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
|
2016-10-18 13:00:38 +00:00
|
|
|
func NewResourceUsageGatherer(c clientset.Interface, options ResourceGathererOptions) (*containerResourceGatherer, error) {
|
2016-02-23 15:45:42 +00:00
|
|
|
g := containerResourceGatherer{
|
|
|
|
client: c,
|
|
|
|
stopCh: make(chan struct{}),
|
|
|
|
containerIDToNameMap: make(map[string]string),
|
|
|
|
containerIDs: make([]string, 0),
|
2016-04-14 23:00:19 +00:00
|
|
|
options: options,
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
|
2016-04-14 23:00:19 +00:00
|
|
|
if options.inKubemark {
|
|
|
|
g.workerWg.Add(1)
|
2016-02-23 15:45:42 +00:00
|
|
|
g.workers = append(g.workers, resourceGatherWorker{
|
2016-04-14 23:00:19 +00:00
|
|
|
inKubemark: true,
|
|
|
|
stopCh: g.stopCh,
|
|
|
|
wg: &g.workerWg,
|
|
|
|
finished: false,
|
2016-02-23 15:45:42 +00:00
|
|
|
})
|
2016-04-14 23:00:19 +00:00
|
|
|
} else {
|
2016-11-18 20:55:17 +00:00
|
|
|
pods, err := c.Core().Pods("kube-system").List(v1.ListOptions{})
|
2016-04-14 23:00:19 +00:00
|
|
|
if err != nil {
|
|
|
|
Logf("Error while listing Pods: %v", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for _, pod := range pods.Items {
|
|
|
|
for _, container := range pod.Status.ContainerStatuses {
|
|
|
|
containerID := strings.TrimPrefix(container.ContainerID, "docker:/")
|
|
|
|
g.containerIDToNameMap[containerID] = pod.Name + "/" + container.Name
|
|
|
|
g.containerIDs = append(g.containerIDs, containerID)
|
|
|
|
}
|
|
|
|
}
|
2016-11-18 20:55:17 +00:00
|
|
|
nodeList, err := c.Core().Nodes().List(v1.ListOptions{})
|
2016-04-14 23:00:19 +00:00
|
|
|
if err != nil {
|
|
|
|
Logf("Error while listing Nodes: %v", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, node := range nodeList.Items {
|
2016-11-18 20:55:17 +00:00
|
|
|
if !options.masterOnly || system.IsMasterNode(node.Name) {
|
2016-05-12 07:49:17 +00:00
|
|
|
g.workerWg.Add(1)
|
|
|
|
g.workers = append(g.workers, resourceGatherWorker{
|
|
|
|
c: c,
|
|
|
|
nodeName: node.Name,
|
|
|
|
wg: &g.workerWg,
|
|
|
|
containerIDToNameMap: g.containerIDToNameMap,
|
|
|
|
containerIDs: g.containerIDs,
|
|
|
|
stopCh: g.stopCh,
|
|
|
|
finished: false,
|
|
|
|
inKubemark: false,
|
|
|
|
})
|
2016-05-17 15:53:18 +00:00
|
|
|
if options.masterOnly {
|
|
|
|
break
|
|
|
|
}
|
2016-05-12 07:49:17 +00:00
|
|
|
}
|
2016-04-14 23:00:19 +00:00
|
|
|
}
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
return &g, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// startGatheringData blocks until stopAndSummarize is called.
|
|
|
|
func (g *containerResourceGatherer) startGatheringData() {
|
|
|
|
g.getKubeSystemContainersResourceUsage(g.client)
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
|
|
|
|
2016-12-09 10:55:40 +00:00
|
|
|
func (g *containerResourceGatherer) stopAndSummarize(percentiles []int, constraints map[string]ResourceConstraint) (*ResourceUsageSummary, error) {
|
2015-12-29 08:19:54 +00:00
|
|
|
close(g.stopCh)
|
2016-02-23 15:45:42 +00:00
|
|
|
Logf("Closed stop channel. Waiting for %v workers", len(g.workers))
|
2016-02-26 13:14:52 +00:00
|
|
|
finished := make(chan struct{})
|
|
|
|
go func() {
|
|
|
|
g.workerWg.Wait()
|
|
|
|
finished <- struct{}{}
|
|
|
|
}()
|
|
|
|
select {
|
|
|
|
case <-finished:
|
|
|
|
Logf("Waitgroup finished.")
|
|
|
|
case <-time.After(2 * time.Minute):
|
|
|
|
unfinished := make([]string, 0)
|
|
|
|
for i := range g.workers {
|
|
|
|
if !g.workers[i].finished {
|
|
|
|
unfinished = append(unfinished, g.workers[i].nodeName)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Logf("Timed out while waiting for waitgroup, some workers failed to finish: %v", unfinished)
|
|
|
|
}
|
|
|
|
|
2015-12-29 08:19:54 +00:00
|
|
|
if len(percentiles) == 0 {
|
|
|
|
Logf("Warning! Empty percentile list for stopAndPrintData.")
|
2016-12-09 16:23:03 +00:00
|
|
|
return &ResourceUsageSummary{}, fmt.Errorf("Failed to get any resource usage data")
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|
2016-04-07 17:21:31 +00:00
|
|
|
data := make(map[int]ResourceUsagePerContainer)
|
2016-02-23 15:45:42 +00:00
|
|
|
for i := range g.workers {
|
2016-02-26 13:14:52 +00:00
|
|
|
if g.workers[i].finished {
|
|
|
|
stats := computePercentiles(g.workers[i].dataSeries, percentiles)
|
|
|
|
data = leftMergeData(stats, data)
|
|
|
|
}
|
2016-02-23 15:45:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Workers has been stopped. We need to gather data stored in them.
|
2015-12-29 08:19:54 +00:00
|
|
|
sortedKeys := []string{}
|
2016-02-23 15:45:42 +00:00
|
|
|
for name := range data[percentiles[0]] {
|
2015-12-29 08:19:54 +00:00
|
|
|
sortedKeys = append(sortedKeys, name)
|
|
|
|
}
|
|
|
|
sort.Strings(sortedKeys)
|
|
|
|
violatedConstraints := make([]string, 0)
|
|
|
|
summary := make(ResourceUsageSummary)
|
|
|
|
for _, perc := range percentiles {
|
|
|
|
for _, name := range sortedKeys {
|
2016-02-23 15:45:42 +00:00
|
|
|
usage := data[perc][name]
|
2016-01-11 12:22:16 +00:00
|
|
|
summary[strconv.Itoa(perc)] = append(summary[strconv.Itoa(perc)], SingleContainerSummary{
|
2016-01-08 08:33:06 +00:00
|
|
|
Name: name,
|
|
|
|
Cpu: usage.CPUUsageInCores,
|
|
|
|
Mem: usage.MemoryWorkingSetInBytes,
|
2015-12-29 08:19:54 +00:00
|
|
|
})
|
|
|
|
// Verifying 99th percentile of resource usage
|
|
|
|
if perc == 99 {
|
|
|
|
// Name has a form: <pod_name>/<container_name>
|
|
|
|
containerName := strings.Split(name, "/")[1]
|
|
|
|
if constraint, ok := constraints[containerName]; ok {
|
2016-04-07 17:21:31 +00:00
|
|
|
if usage.CPUUsageInCores > constraint.CPUConstraint {
|
2015-12-29 08:19:54 +00:00
|
|
|
violatedConstraints = append(
|
|
|
|
violatedConstraints,
|
|
|
|
fmt.Sprintf("Container %v is using %v/%v CPU",
|
|
|
|
name,
|
|
|
|
usage.CPUUsageInCores,
|
2016-04-07 17:21:31 +00:00
|
|
|
constraint.CPUConstraint,
|
2015-12-29 08:19:54 +00:00
|
|
|
),
|
|
|
|
)
|
|
|
|
}
|
2016-04-07 17:21:31 +00:00
|
|
|
if usage.MemoryWorkingSetInBytes > constraint.MemoryConstraint {
|
2015-12-29 08:19:54 +00:00
|
|
|
violatedConstraints = append(
|
|
|
|
violatedConstraints,
|
|
|
|
fmt.Sprintf("Container %v is using %v/%v MB of memory",
|
|
|
|
name,
|
|
|
|
float64(usage.MemoryWorkingSetInBytes)/(1024*1024),
|
2016-04-07 17:21:31 +00:00
|
|
|
float64(constraint.MemoryConstraint)/(1024*1024),
|
2015-12-29 08:19:54 +00:00
|
|
|
),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-12-09 10:55:40 +00:00
|
|
|
if len(violatedConstraints) > 0 {
|
|
|
|
return &summary, fmt.Errorf(strings.Join(violatedConstraints, "\n"))
|
|
|
|
}
|
|
|
|
return &summary, nil
|
2015-12-29 08:19:54 +00:00
|
|
|
}
|