mirror of https://github.com/k3s-io/k3s
Merge pull request #45614 from yujuhong/container-metrics
Automatic merge from submit-queue (batch tested with PRs 45809, 46515, 46484, 46516, 45614) CRI: add methods for container stats **What this PR does / why we need it**: Define methods in CRI to get container stats. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: Part of https://github.com/kubernetes/features/issues/290; addresses #27097 **Special notes for your reviewer**: This PR defines the *minimum required* container metrics for the existing components to function, loosely based on the previous discussion on [core metrics](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/core-metrics-pipeline.md) as well as the existing cadvisor/summary APIs. Two new RPC calls are added to the RuntimeService: `ContainerStats` and `ListContainerStats`. The former retrieves stats for a given container, while the latter gets stats for all containers in one call. The stats gathering time of each subsystem can vary substantially (e.g., cpu vs. disk), so even though the on-demand model preferred due to its simplicity, we’d rather give the container runtime more flexibility to determine the collection frequency for each subsystem*. As a trade-off, each piece of stats for the subsystem must contain a timestamp to let kubelet know how fresh/recent the stats are. In the future, we should also recommend a guideline for how recent the stats should be in order to ensure the reliability (e.g., eviction) and the responsiveness (e.g., autoscaling) of the kubernetes cluster. The next step is to plumb this through kubelet so that kubelet can choose consume container stats from CRI or cadvisor. **Alternatively, we can add calls to get stats of individual subsystems. However, kubelet does not have the complete knowledge of the runtime environment, so this would only lead to unnecessary complexity in kubelet.* **Release note**: ```release-note Augment CRI to support retrieving container stats from the runtime. ```pull/6/head
commit
e903c58c9e
|
@ -72,12 +72,23 @@ type PodSandboxManager interface {
|
|||
PortForward(*runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error)
|
||||
}
|
||||
|
||||
// ContainerStatsManager contains methods for retriving the container
|
||||
// statistics.
|
||||
type ContainerStatsManager interface {
|
||||
// ContainerStats returns stats of the container. If the container does not
|
||||
// exist, the call returns an error.
|
||||
ContainerStats(req *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error)
|
||||
// ListContainerStats returns stats of all running containers.
|
||||
ListContainerStats(req *runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error)
|
||||
}
|
||||
|
||||
// RuntimeService interface should be implemented by a container runtime.
|
||||
// The methods should be thread-safe.
|
||||
type RuntimeService interface {
|
||||
RuntimeVersioner
|
||||
ContainerManager
|
||||
PodSandboxManager
|
||||
ContainerStatsManager
|
||||
|
||||
// UpdateRuntimeConfig updates runtime configuration if specified
|
||||
UpdateRuntimeConfig(runtimeConfig *runtimeapi.RuntimeConfig) error
|
||||
|
@ -98,5 +109,5 @@ type ImageManagerService interface {
|
|||
// RemoveImage removes the image.
|
||||
RemoveImage(image *runtimeapi.ImageSpec) error
|
||||
// ImageFsInfo returns information of the filesystem that is used to store images.
|
||||
ImageFsInfo() (*runtimeapi.FsInfo, error)
|
||||
ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error)
|
||||
}
|
||||
|
|
|
@ -126,7 +126,7 @@ func (r *FakeImageService) RemoveImage(image *runtimeapi.ImageSpec) error {
|
|||
}
|
||||
|
||||
// ImageFsInfo returns information of the filesystem that is used to store images.
|
||||
func (r *FakeImageService) ImageFsInfo() (*runtimeapi.FsInfo, error) {
|
||||
func (r *FakeImageService) ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
|
||||
|
|
|
@ -389,3 +389,11 @@ func (r *FakeRuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeapi.
|
|||
func (r *FakeRuntimeService) UpdateRuntimeConfig(runtimeCOnfig *runtimeapi.RuntimeConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *FakeRuntimeService) ContainerStats(req *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
||||
|
||||
func (r *FakeRuntimeService) ListContainerStats(req *runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -71,6 +71,12 @@ service RuntimeService {
|
|||
// PortForward prepares a streaming endpoint to forward ports from a PodSandbox.
|
||||
rpc PortForward(PortForwardRequest) returns (PortForwardResponse) {}
|
||||
|
||||
// ContainerStats returns stats of the container. If the container does not
|
||||
// exist, the call returns an error.
|
||||
rpc ContainerStats(ContainerStatsRequest) returns (ContainerStatsResponse) {}
|
||||
// ListContainerStats returns stats of all running containers.
|
||||
rpc ListContainerStats(ListContainerStatsRequest) returns (ListContainerStatsResponse) {}
|
||||
|
||||
// UpdateRuntimeConfig updates the runtime configuration based on the given request.
|
||||
rpc UpdateRuntimeConfig(UpdateRuntimeConfigRequest) returns (UpdateRuntimeConfigResponse) {}
|
||||
|
||||
|
@ -971,33 +977,105 @@ message UInt64Value {
|
|||
uint64 value = 1;
|
||||
}
|
||||
|
||||
// FsInfo contains data about filesystem usage.
|
||||
message FsInfo {
|
||||
// The block device name associated with the filesystem.
|
||||
string device = 1;
|
||||
// The root directory for the images.
|
||||
string path = 2;
|
||||
// CapacityBytes represents the total capacity (bytes) of the filesystems
|
||||
// underlying storage.
|
||||
UInt64Value capacity_bytes = 3;
|
||||
// AvailableBytes represents the storage space available (bytes) for the
|
||||
// filesystem.
|
||||
UInt64Value available_bytes = 4;
|
||||
// StorageIdentifier uniquely identify the storage..
|
||||
message StorageIdentifier{
|
||||
// UUID of the device.
|
||||
string uuid = 1;
|
||||
}
|
||||
|
||||
// FilesystemUsage provides the filesystem usage information.
|
||||
message FilesystemUsage {
|
||||
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
||||
int64 timestamp = 1;
|
||||
// The underlying storage of the filesystem.
|
||||
StorageIdentifier storage_id = 2;
|
||||
// UsedBytes represents the bytes used for images on the filesystem.
|
||||
// This may differ from the total bytes used on the filesystem and may not
|
||||
// equal CapacityBytes - AvailableBytes.
|
||||
UInt64Value used_bytes = 5;
|
||||
// InodesCapacity represents the total inodes in the filesystem.
|
||||
UInt64Value inodes_capacity = 6;
|
||||
// InodesAvailable represents the free inodes in the filesystem.
|
||||
UInt64Value inodes_available = 7;
|
||||
UInt64Value used_bytes = 3;
|
||||
// InodesUsed represents the inodes used by the images.
|
||||
// This may not equal InodesCapacity - InodesAvailable because the underlying
|
||||
// filesystem may also be used for purposes other than storing images.
|
||||
UInt64Value inodes_used = 8;
|
||||
UInt64Value inodes_used = 4;
|
||||
}
|
||||
|
||||
message ImageFsInfoResponse {
|
||||
// filesystem information of images.
|
||||
FsInfo fs_info = 1;
|
||||
// Information of image filesystem(s).
|
||||
repeated FilesystemUsage image_filesystems = 1;
|
||||
}
|
||||
|
||||
message ContainerStatsRequest{
|
||||
// ID of the container for which to retrieve stats.
|
||||
string container_id = 1;
|
||||
}
|
||||
|
||||
message ContainerStatsResponse {
|
||||
// Stats of the container.
|
||||
ContainerStats stats = 1;
|
||||
}
|
||||
|
||||
message ListContainerStatsRequest{
|
||||
// Filter for the list request.
|
||||
ContainerStatsFilter filter = 1;
|
||||
}
|
||||
|
||||
// ContainerStatsFilter is used to filter containers.
|
||||
// All those fields are combined with 'AND'
|
||||
message ContainerStatsFilter {
|
||||
// ID of the container.
|
||||
string id = 1;
|
||||
// ID of the PodSandbox.
|
||||
string pod_sandbox_id = 2;
|
||||
// LabelSelector to select matches.
|
||||
// Only api.MatchLabels is supported for now and the requirements
|
||||
// are ANDed. MatchExpressions is not supported yet.
|
||||
map<string, string> label_selector = 3;
|
||||
}
|
||||
|
||||
message ListContainerStatsResponse {
|
||||
// Stats of the container.
|
||||
repeated ContainerStats stats = 1;
|
||||
}
|
||||
|
||||
// ContainerAttributes provides basic information of the container.
|
||||
message ContainerAttributes {
|
||||
// ID of the container.
|
||||
string id = 1;
|
||||
// Metadata of the container.
|
||||
ContainerMetadata metadata = 2;
|
||||
// Key-value pairs that may be used to scope and select individual resources.
|
||||
map<string,string> labels = 3;
|
||||
// Unstructured key-value map holding arbitrary metadata.
|
||||
// Annotations MUST NOT be altered by the runtime; the value of this field
|
||||
// MUST be identical to that of the corresponding ContainerConfig used to
|
||||
// instantiate the Container this status represents.
|
||||
map<string,string> annotations = 4;
|
||||
}
|
||||
|
||||
// ContainerStats provides the resource usage statistics for a container.
|
||||
message ContainerStats {
|
||||
// Information of the container.
|
||||
ContainerAttributes attributes = 1;
|
||||
// CPU usage gathered from the container.
|
||||
CpuUsage cpu = 2;
|
||||
// Memory usage gathered from the container.
|
||||
MemoryUsage memory = 3;
|
||||
// Usage of the writeable layer.
|
||||
FilesystemUsage writable_layer = 4;
|
||||
}
|
||||
|
||||
// CpuUsage provides the CPU usage information.
|
||||
message CpuUsage {
|
||||
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
||||
int64 timestamp = 1;
|
||||
// Cumulative CPU usage (sum across all cores) since object creation.
|
||||
UInt64Value usage_core_nano_seconds = 2;
|
||||
}
|
||||
|
||||
// MemoryUsage provides the memory usage information.
|
||||
message MemoryUsage {
|
||||
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
||||
int64 timestamp = 1;
|
||||
// The amount of working set memory in bytes.
|
||||
UInt64Value working_set_bytes = 2;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ go_library(
|
|||
"docker_legacy.go",
|
||||
"docker_sandbox.go",
|
||||
"docker_service.go",
|
||||
"docker_stats.go",
|
||||
"docker_streaming.go",
|
||||
"exec.go",
|
||||
"helpers.go",
|
||||
|
|
|
@ -134,7 +134,7 @@ func getImageRef(client libdocker.Interface, image string) (string, error) {
|
|||
}
|
||||
|
||||
// ImageFsInfo returns information of the filesystem that is used to store images.
|
||||
func (ds *dockerService) ImageFsInfo() (*runtimeapi.FsInfo, error) {
|
||||
func (ds *dockerService) ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dockershim
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
|
||||
)
|
||||
|
||||
// DockerService does not implement container stats.
|
||||
func (ds *dockerService) ContainerStats(*runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
||||
|
||||
func (ds *dockerService) ListContainerStats(*runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
|
@ -220,3 +220,11 @@ func (d *dockerService) RemoveImage(ctx context.Context, r *runtimeapi.RemoveIma
|
|||
func (d *dockerService) ImageFsInfo(ctx context.Context, r *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (d *dockerService) ContainerStats(ctx context.Context, r *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (d *dockerService) ListContainerStats(ctx context.Context, r *runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
|
|
@ -203,6 +203,24 @@ func (in instrumentedRuntimeService) ListPodSandbox(filter *runtimeapi.PodSandbo
|
|||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) ContainerStats(req *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
|
||||
const operation = "container_stats"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ContainerStats(req)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) ListContainerStats(req *runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error) {
|
||||
const operation = "list_container_stats"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ListContainerStats(req)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
|
||||
const operation = "port_forward"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
@ -257,11 +275,11 @@ func (in instrumentedImageManagerService) RemoveImage(image *runtimeapi.ImageSpe
|
|||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedImageManagerService) ImageFsInfo() (*runtimeapi.FsInfo, error) {
|
||||
func (in instrumentedImageManagerService) ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
||||
const operation = "image_fs_info"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
fsInfo, err := in.service.ImageFsInfo()
|
||||
fsInfo, err := in.service.ImageFsInfo(req)
|
||||
recordError(operation, err)
|
||||
return fsInfo, nil
|
||||
}
|
||||
|
|
|
@ -135,6 +135,6 @@ func (r *RemoteImageService) RemoveImage(image *runtimeapi.ImageSpec) error {
|
|||
}
|
||||
|
||||
// ImageFsInfo returns information of the filesystem that is used to store images.
|
||||
func (r *RemoteImageService) ImageFsInfo() (*runtimeapi.FsInfo, error) {
|
||||
func (r *RemoteImageService) ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
|
|
@ -420,3 +420,11 @@ func (r *RemoteRuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
|
|||
|
||||
return resp.Status, nil
|
||||
}
|
||||
|
||||
func (r *RemoteRuntimeService) ContainerStats(req *runtimeapi.ContainerStatsRequest) (*runtimeapi.ContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
||||
|
||||
func (r *RemoteRuntimeService) ListContainerStats(req *runtimeapi.ListContainerStatsRequest) (*runtimeapi.ListContainerStatsResponse, error) {
|
||||
return nil, fmt.Errorf("Not implemented")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue