diff --git a/pkg/metrics/api_server_metrics.go b/pkg/metrics/api_server_metrics.go new file mode 100644 index 0000000000..56fc5f5550 --- /dev/null +++ b/pkg/metrics/api_server_metrics.go @@ -0,0 +1,77 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/prometheus/common/model" +) + +var KnownApiServerMetrics = map[string][]string{ + "apiserver_request_count": {"verb", "resource", "client", "code"}, + "apiserver_request_latencies_bucket": {"verb", "resource", "le"}, + "apiserver_request_latencies_count": {"verb", "resource"}, + "apiserver_request_latencies_sum": {"verb", "resource"}, + "apiserver_request_latencies_summary": {"verb", "resource", "quantile"}, + "apiserver_request_latencies_summary_count": {"verb", "resource"}, + "apiserver_request_latencies_summary_sum": {"verb", "resource"}, + "etcd_helper_cache_entry_count": {}, + "etcd_helper_cache_hit_count": {}, + "etcd_helper_cache_miss_count": {}, + "etcd_request_cache_add_latencies_summary": {"quantile"}, + "etcd_request_cache_add_latencies_summary_count": {}, + "etcd_request_cache_add_latencies_summary_sum": {}, + "etcd_request_cache_get_latencies_summary": {"quantile"}, + "etcd_request_cache_get_latencies_summary_count": {}, + "etcd_request_cache_get_latencies_summary_sum": {}, + "etcd_request_latencies_summary": {"operation", "type", "quantile"}, + "etcd_request_latencies_summary_count": {"operation", "type"}, + "etcd_request_latencies_summary_sum": {"operation", "type"}, + "get_token_count": {}, + "get_token_fail_count": {}, + "rest_client_request_latency_microseconds": {"url", "verb", "quantile"}, + "rest_client_request_latency_microseconds_count": {"url", "verb"}, + "rest_client_request_latency_microseconds_sum": {"url", "verb"}, + "rest_client_request_status_codes": {"code", "host", "method"}, +} + +type ApiServerMetrics Metrics + +func NewApiServerMetrics() ApiServerMetrics { + result := NewMetrics() + for metric := range KnownApiServerMetrics { + result[metric] = make(model.Samples, 0) + } + return ApiServerMetrics(result) +} + +func parseApiServerMetrics(data string, unknownMetrics sets.String) (ApiServerMetrics, error) { + result := NewApiServerMetrics() + if err := parseMetrics(data, KnownApiServerMetrics, (*Metrics)(&result), unknownMetrics); err != nil { + return ApiServerMetrics{}, err + } + return result, nil +} + +func (g *MetricsGrabber) getMetricsFromApiServer() (string, error) { + rawOutput, err := g.client.Get().RequestURI("/metrics").Do().Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/pkg/metrics/controller_manager_metrics.go b/pkg/metrics/controller_manager_metrics.go new file mode 100644 index 0000000000..84ac63720f --- /dev/null +++ b/pkg/metrics/controller_manager_metrics.go @@ -0,0 +1,59 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/prometheus/common/model" +) + +var KnownControllerManagerMetrics = map[string][]string{ + "etcd_helper_cache_entry_count": {}, + "etcd_helper_cache_hit_count": {}, + "etcd_helper_cache_miss_count": {}, + "etcd_request_cache_add_latencies_summary": {"quantile"}, + "etcd_request_cache_add_latencies_summary_count": {}, + "etcd_request_cache_add_latencies_summary_sum": {}, + "etcd_request_cache_get_latencies_summary": {"quantile"}, + "etcd_request_cache_get_latencies_summary_count": {}, + "etcd_request_cache_get_latencies_summary_sum": {}, + "get_token_count": {}, + "get_token_fail_count": {}, + "rest_client_request_latency_microseconds": {"url", "verb", "quantile"}, + "rest_client_request_latency_microseconds_count": {"url", "verb"}, + "rest_client_request_latency_microseconds_sum": {"url", "verb"}, + "rest_client_request_status_codes": {"method", "code", "host"}, +} + +type ControllerManagerMetrics Metrics + +func NewControllerManagerMetrics() ControllerManagerMetrics { + result := NewMetrics() + for metric := range KnownControllerManagerMetrics { + result[metric] = make(model.Samples, 0) + } + return ControllerManagerMetrics(result) +} + +func parseControllerManagerMetrics(data string, unknownMetrics sets.String) (ControllerManagerMetrics, error) { + result := NewControllerManagerMetrics() + if err := parseMetrics(data, KnownControllerManagerMetrics, (*Metrics)(&result), unknownMetrics); err != nil { + return ControllerManagerMetrics{}, err + } + return result, nil +} diff --git a/pkg/metrics/generic_metrics.go b/pkg/metrics/generic_metrics.go new file mode 100644 index 0000000000..fc2d701d03 --- /dev/null +++ b/pkg/metrics/generic_metrics.go @@ -0,0 +1,119 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + "io" + "strings" + + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/golang/glog" + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" +) + +var CommonMetrics = map[string][]string{ + "process_start_time_seconds": {}, + "process_resident_memory_bytes": {}, + "process_virtual_memory_bytes": {}, + "process_cpu_seconds_total": {}, + "process_max_fds": {}, + "process_open_fds": {}, + + "http_request_size_bytes": {"handler", "quantile"}, + "http_request_size_bytes_count": {"handler"}, + "http_request_size_bytes_sum": {"handler"}, + "http_request_duration_microseconds": {"handler", "quantile"}, + "http_request_duration_microseconds_count": {"handler"}, + "http_request_duration_microseconds_sum": {"handler"}, + "http_requests_total": {"handler", "method", "code"}, + + "http_response_size_bytes": {"handler", "quantile"}, + "http_response_size_bytes_count": {"handler"}, + "http_response_size_bytes_sum": {"handler"}, + + "ssh_tunnel_open_fail_count": {}, + "ssh_tunnel_open_count": {}, + + "go_gc_duration_seconds": {"quantile"}, + "go_gc_duration_seconds_count": {}, + "go_gc_duration_seconds_sum": {}, + "go_goroutines": {}, + + "kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"}, +} + +type Metrics map[string]model.Samples + +func NewMetrics() Metrics { + result := make(Metrics) + for metric := range CommonMetrics { + result[metric] = make(model.Samples, 0) + } + return result +} + +func parseMetrics(data string, knownMetrics map[string][]string, output *Metrics, unknownMetrics sets.String) error { + dec, err := expfmt.NewDecoder(strings.NewReader(data), expfmt.FmtText) + if err != nil { + return err + } + decoder := expfmt.SampleDecoder{ + Dec: dec, + Opts: &expfmt.DecodeOptions{}, + } + + for { + var v model.Vector + if err = decoder.Decode(&v); err != nil { + if err == io.EOF { + // Expected loop termination condition. + return nil + } + glog.Warningf("Invalid Decode. Skipping.") + continue + } + for _, metric := range v { + name := string(metric.Metric[model.MetricNameLabel]) + _, isCommonMetric := CommonMetrics[name] + _, isKnownMetric := knownMetrics[name] + if isKnownMetric || isCommonMetric { + (*output)[name] = append((*output)[name], metric) + } else { + glog.Warning("Unknown metric %v", metric) + unknownMetrics.Insert(name) + } + } + } + return nil +} + +func (g *MetricsGrabber) getMetricsFromPod(podName string, namespace string, port int) (string, error) { + rawOutput, err := g.client.Get(). + Prefix("proxy"). + Namespace(namespace). + Resource("pods"). + Name(fmt.Sprintf("%v:%v", podName, port)). + Suffix("metrics"). + Do().Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/pkg/metrics/kubelet_metrics.go b/pkg/metrics/kubelet_metrics.go new file mode 100644 index 0000000000..78751cc146 --- /dev/null +++ b/pkg/metrics/kubelet_metrics.go @@ -0,0 +1,130 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/prometheus/common/model" +) + +var KnownKubeletMetrics = map[string][]string{ + "cadvisor_version_info": {"cadvisorRevision", "cadvisorVersion", "dockerVersion", "kernelVersion", "osVersion"}, + "container_cpu_system_seconds_total": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_cpu_usage_seconds_total": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "cpu"}, + "container_cpu_user_seconds_total": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_io_current": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_io_time_seconds_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_io_time_weighted_seconds_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_limit_bytes": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_read_seconds_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_reads_merged_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_reads_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_sector_reads_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_sector_writes_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_usage_bytes": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_write_seconds_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_writes_merged_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_fs_writes_total": {"device", "id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_last_seen": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_memory_failcnt": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_memory_failures_total": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "scope", "type"}, + "container_memory_usage_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_memory_working_set_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_receive_bytes_total": {"id", "interface", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_receive_errors_total": {"id", "image", "interface", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_receive_packets_dropped_total": {"id", "image", "interface", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_receive_packets_total": {"id", "image", "interface", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_transmit_bytes_total": {"id", "interface", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_transmit_errors_total": {"id", "interface", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_transmit_packets_dropped_total": {"id", "interface", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_network_transmit_packets_total": {"id", "interface", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_scrape_error": {}, + "container_spec_cpu_shares": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_spec_memory_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, + "container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"}, + "get_token_count": {}, + "get_token_fail_count": {}, + "kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"}, + "kubelet_container_manager_latency_microseconds_count": {"operation_type"}, + "kubelet_container_manager_latency_microseconds_sum": {"operation_type"}, + "kubelet_containers_per_pod_count": {"quantile"}, + "kubelet_containers_per_pod_count_count": {}, + "kubelet_containers_per_pod_count_sum": {}, + "kubelet_docker_errors": {"operation_type"}, + "kubelet_docker_operations_latency_microseconds": {"operation_type", "quantile"}, + "kubelet_docker_operations_latency_microseconds_count": {"operation_type"}, + "kubelet_docker_operations_latency_microseconds_sum": {"operation_type"}, + "kubelet_generate_pod_status_latency_microseconds": {"quantile"}, + "kubelet_generate_pod_status_latency_microseconds_count": {}, + "kubelet_generate_pod_status_latency_microseconds_sum": {}, + "kubelet_pod_start_latency_microseconds": {"quantile"}, + "kubelet_pod_start_latency_microseconds_count": {}, + "kubelet_pod_start_latency_microseconds_sum": {}, + "kubelet_pod_worker_latency_microseconds": {"operation_type", "quantile"}, + "kubelet_pod_worker_latency_microseconds_count": {"operation_type"}, + "kubelet_pod_worker_latency_microseconds_sum": {"operation_type"}, + "kubelet_pod_worker_start_latency_microseconds": {"quantile"}, + "kubelet_pod_worker_start_latency_microseconds_count": {}, + "kubelet_pod_worker_start_latency_microseconds_sum": {}, + "kubelet_running_container_count": {}, + "kubelet_running_pod_count": {}, + "kubelet_sync_pods_latency_microseconds": {"quantile"}, + "kubelet_sync_pods_latency_microseconds_count": {}, + "kubelet_sync_pods_latency_microseconds_sum": {}, + "machine_cpu_cores": {}, + "machine_memory_bytes": {}, + "rest_client_request_latency_microseconds": {"quantile", "url", "verb"}, + "rest_client_request_latency_microseconds_count": {"url", "verb"}, + "rest_client_request_latency_microseconds_sum": {"url", "verb"}, + "rest_client_request_status_codes": {"code", "host", "method"}, +} + +type KubeletMetrics Metrics + +func NewKubeletMetrics() KubeletMetrics { + result := NewMetrics() + for metric := range KnownKubeletMetrics { + result[metric] = make(model.Samples, 0) + } + return KubeletMetrics(result) +} + +func parseKubeletMetrics(data string, unknownMetrics sets.String) (KubeletMetrics, error) { + result := NewKubeletMetrics() + if err := parseMetrics(data, KnownKubeletMetrics, (*Metrics)(&result), unknownMetrics); err != nil { + return KubeletMetrics{}, err + } + return result, nil +} + +func (g *MetricsGrabber) getMetricsFromNode(nodeName string) (string, error) { + rawOutput, err := g.client.Get(). + Prefix("proxy"). + Resource("nodes"). + Name(fmt.Sprintf("%v:%v", nodeName, g.kubeletPort)). + Suffix("metrics"). + Do().Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/pkg/metrics/metrics_grabber.go b/pkg/metrics/metrics_grabber.go new file mode 100644 index 0000000000..5583b8d7d2 --- /dev/null +++ b/pkg/metrics/metrics_grabber.go @@ -0,0 +1,136 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + "strings" + + "k8s.io/kubernetes/pkg/api" + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/master/ports" + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/golang/glog" +) + +type MetricsCollection struct { + ApiServerMetrics ApiServerMetrics + ControllerManagerMetrics ControllerManagerMetrics + KubeletMetrics map[string]KubeletMetrics + SchedulerMetrics SchedulerMetrics +} + +type MetricsGrabber struct { + client *client.Client + grabFromApiServer bool + grabFromControllerManager bool + grabFromKubelets bool + grabFromScheduler bool + kubeletPort int + masterName string + registeredMaster bool +} + +// TODO: find a better way of figuring out if given node is a registered master. +func isMasterNode(node *api.Node) bool { + return strings.HasSuffix(node.Name, "master") +} + +func NewMetricsGrabber(c *client.Client, kubelets bool, scheduler bool, controllers bool, apiServer bool) (*MetricsGrabber, error) { + kubeletPort := 0 + registeredMaster := false + masterName := "" + nodeList, err := c.Nodes().List(api.ListOptions{}) + if err != nil { + return nil, err + } + if len(nodeList.Items) < 1 { + glog.Warning("Can't find any Nodes in the API server to grab metrics from") + } + for _, node := range nodeList.Items { + if isMasterNode(&node) { + registeredMaster = true + masterName = node.Name + } else { + // We assume that all Kubelets run on the same port, except possibly Master's Kubelet. + // This will need to get change if assumption is prover wrong. + kubeletPort = node.Status.DaemonEndpoints.KubeletEndpoint.Port + } + } + if !registeredMaster { + scheduler = false + controllers = false + glog.Warningf("Master node is not registered. Grabbing metrics from Scheduler and ControllerManager is disabled.") + } + + return &MetricsGrabber{ + client: c, + grabFromApiServer: apiServer, + grabFromControllerManager: controllers, + grabFromKubelets: kubelets, + grabFromScheduler: scheduler, + kubeletPort: kubeletPort, + masterName: masterName, + registeredMaster: registeredMaster, + }, nil +} + +func (g *MetricsGrabber) GrabFromKubelet(nodeName string, unknownMetrics sets.String) (KubeletMetrics, error) { + if g.kubeletPort == 0 { + return KubeletMetrics{}, fmt.Errorf("MetricsGrabber wasn't able to find Kubelet port during startup. Skipping Kubelet's metrics gathering.") + } + output, err := g.getMetricsFromNode(nodeName) + if err != nil { + return KubeletMetrics{}, err + } + return parseKubeletMetrics(output, unknownMetrics) +} + +func (g *MetricsGrabber) GrabFromScheduler(unknownMetrics sets.String) (SchedulerMetrics, error) { + if !g.registeredMaster { + return SchedulerMetrics{}, fmt.Errorf("Master's Kubelet is not registered. Skipping Scheduler's metrics gathering.") + } + output, err := g.getMetricsFromPod(fmt.Sprintf("%v-%v", "kube-scheduler", g.masterName), api.NamespaceSystem, ports.SchedulerPort) + if err != nil { + return SchedulerMetrics{}, err + } + return parseSchedulerMetrics(output, unknownMetrics) +} + +func (g *MetricsGrabber) GrabFromControllerManager(unknownMetrics sets.String) (ControllerManagerMetrics, error) { + if !g.registeredMaster { + return ControllerManagerMetrics{}, fmt.Errorf("Master's Kubelet is not registered. Skipping ControllerManager's metrics gathering.") + } + output, err := g.getMetricsFromPod(fmt.Sprintf("%v-%v", "kube-controller-manager", g.masterName), api.NamespaceSystem, ports.ControllerManagerPort) + if err != nil { + return ControllerManagerMetrics{}, err + } + return parseControllerManagerMetrics(output, unknownMetrics) +} + +func (g *MetricsGrabber) GrabFromApiServer(unknownMetrics sets.String) (ApiServerMetrics, error) { + output, err := g.getMetricsFromApiServer() + if err != nil { + return ApiServerMetrics{}, nil + } + return parseApiServerMetrics(output, unknownMetrics) +} + +func (g *MetricsGrabber) Grab(unknownMetrics sets.String) (MetricsCollection, error) { + return MetricsCollection{}, nil +} diff --git a/pkg/metrics/scheduler_metrics.go b/pkg/metrics/scheduler_metrics.go new file mode 100644 index 0000000000..70c06f8de8 --- /dev/null +++ b/pkg/metrics/scheduler_metrics.go @@ -0,0 +1,58 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "k8s.io/kubernetes/pkg/util/sets" + + "github.com/prometheus/common/model" +) + +var KnownSchedulerMetrics = map[string][]string{ + "rest_client_request_latency_microseconds": {"url", "verb", "quantile"}, + "rest_client_request_latency_microseconds_count": {"url", "verb"}, + "rest_client_request_latency_microseconds_sum": {"url", "verb"}, + "rest_client_request_status_codes": {"code", "host", "method"}, + "scheduler_binding_latency_microseconds": {"quantile"}, + "scheduler_binding_latency_microseconds_count": {}, + "scheduler_binding_latency_microseconds_sum": {}, + "scheduler_binding_ratelimiter_saturation": {}, + "scheduler_e2e_scheduling_latency_microseconds": {"quantile"}, + "scheduler_e2e_scheduling_latency_microseconds_count": {}, + "scheduler_e2e_scheduling_latency_microseconds_sum": {}, + "scheduler_scheduling_algorithm_latency_microseconds": {"quantile"}, + "scheduler_scheduling_algorithm_latency_microseconds_count": {}, + "scheduler_scheduling_algorithm_latency_microseconds_sum": {}, +} + +type SchedulerMetrics Metrics + +func NewSchedulerMetrics() SchedulerMetrics { + result := NewMetrics() + for metric := range KnownSchedulerMetrics { + result[metric] = make(model.Samples, 0) + } + return SchedulerMetrics(result) +} + +func parseSchedulerMetrics(data string, unknownMetrics sets.String) (SchedulerMetrics, error) { + result := NewSchedulerMetrics() + if err := parseMetrics(data, KnownSchedulerMetrics, (*Metrics)(&result), unknownMetrics); err != nil { + return SchedulerMetrics{}, err + } + return result, nil +} diff --git a/test/e2e/metrics_grabber_test.go b/test/e2e/metrics_grabber_test.go new file mode 100644 index 0000000000..908dbc64f7 --- /dev/null +++ b/test/e2e/metrics_grabber_test.go @@ -0,0 +1,153 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "strings" + + "k8s.io/kubernetes/pkg/api" + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/metrics" + "k8s.io/kubernetes/pkg/util/sets" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +// Missing = Assumed minus Observed, Invalid = Observed minus Assumed +func validateLabelSet(labelSet map[string][]string, data metrics.Metrics, invalidLabels map[string]sets.String, missingLabels map[string]sets.String) { + for metric, labels := range labelSet { + vector, found := data[metric] + Expect(found).To(Equal(true)) + if found && len(vector) > 0 { + for _, observation := range vector { + for label := range observation.Metric { + // We need to check if it's a known label for this metric. + // Omit Prometheus internal metrics. + if strings.HasPrefix(string(label), "__") { + continue + } + invalidLabel := true + for _, knownLabel := range labels { + if string(label) == knownLabel { + invalidLabel = false + } + } + if invalidLabel { + if _, ok := invalidLabels[metric]; !ok { + invalidLabels[metric] = sets.NewString() + } + invalidLabels[metric].Insert(string(label)) + } + } + } + } + } +} + +func checkMetrics(response metrics.Metrics, assumedMetrics map[string][]string) { + invalidLabels := make(map[string]sets.String) + unknownLabels := make(map[string]sets.String) + validateLabelSet(metrics.CommonMetrics, response, invalidLabels, unknownLabels) + validateLabelSet(assumedMetrics, response, invalidLabels, unknownLabels) + + Expect(unknownLabels).To(BeEmpty()) + Expect(invalidLabels).To(BeEmpty()) +} + +var _ = Describe("MetricsGrabber", func() { + framework := NewFramework("metrics-grabber") + var c *client.Client + var grabber *metrics.MetricsGrabber + BeforeEach(func() { + var err error + c = framework.Client + expectNoError(err) + grabber, err = metrics.NewMetricsGrabber(c, true, true, true, true) + expectNoError(err) + }) + + It("should grab all metrics from API server.", func() { + By("Connecting to /metrics endpoint") + unknownMetrics := sets.NewString() + response, err := grabber.GrabFromApiServer(unknownMetrics) + expectNoError(err) + Expect(unknownMetrics).To(BeEmpty()) + + checkMetrics(metrics.Metrics(response), metrics.KnownApiServerMetrics) + }) + + It("should grab all metrics from a Kubelet.", func() { + By("Connecting proxying to Node through the API server") + nodes := ListSchedulableNodesOrDie(c) + Expect(nodes.Items).NotTo(BeEmpty()) + unknownMetrics := sets.NewString() + response, err := grabber.GrabFromKubelet(nodes.Items[0].Name, unknownMetrics) + expectNoError(err) + Expect(unknownMetrics).To(BeEmpty()) + + checkMetrics(metrics.Metrics(response), metrics.KnownKubeletMetrics) + }) + + It("should grab all metrics from a Scheduler.", func() { + By("Connecting proxying to Pod through the API server") + // Check if master Node is registered + nodes, err := c.Nodes().List(api.ListOptions{}) + expectNoError(err) + + var masterRegistered = false + for _, node := range nodes.Items { + if strings.HasSuffix(node.Name, "master") { + masterRegistered = true + } + } + if !masterRegistered { + Logf("Master is node registered. Skipping testing Scheduler metrics.") + return + } + unknownMetrics := sets.NewString() + response, err := grabber.GrabFromScheduler(unknownMetrics) + expectNoError(err) + Expect(unknownMetrics).To(BeEmpty()) + + checkMetrics(metrics.Metrics(response), metrics.KnownSchedulerMetrics) + }) + + It("should grab all metrics from a ControllerManager.", func() { + By("Connecting proxying to Pod through the API server") + // Check if master Node is registered + nodes, err := c.Nodes().List(api.ListOptions{}) + expectNoError(err) + + var masterRegistered = false + for _, node := range nodes.Items { + if strings.HasSuffix(node.Name, "master") { + masterRegistered = true + } + } + if !masterRegistered { + Logf("Master is node registered. Skipping testing ControllerManager metrics.") + return + } + unknownMetrics := sets.NewString() + response, err := grabber.GrabFromControllerManager(unknownMetrics) + expectNoError(err) + Expect(unknownMetrics).To(BeEmpty()) + + checkMetrics(metrics.Metrics(response), metrics.KnownControllerManagerMetrics) + }) +})