// Copyright 2014 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package v1 import ( "reflect" "time" ) type CpuSpec struct { Limit uint64 `json:"limit"` MaxLimit uint64 `json:"max_limit"` Mask string `json:"mask,omitempty"` Quota uint64 `json:"quota,omitempty"` Period uint64 `json:"period,omitempty"` } type MemorySpec struct { // The amount of memory requested. Default is unlimited (-1). // Units: bytes. Limit uint64 `json:"limit,omitempty"` // The amount of guaranteed memory. Default is 0. // Units: bytes. Reservation uint64 `json:"reservation,omitempty"` // The amount of swap space requested. Default is unlimited (-1). // Units: bytes. SwapLimit uint64 `json:"swap_limit,omitempty"` } type ProcessSpec struct { Limit uint64 `json:"limit,omitempty"` } type ContainerSpec struct { // Time at which the container was created. CreationTime time.Time `json:"creation_time,omitempty"` // Metadata labels associated with this container. Labels map[string]string `json:"labels,omitempty"` // Metadata envs associated with this container. Only whitelisted envs are added. Envs map[string]string `json:"envs,omitempty"` HasCpu bool `json:"has_cpu"` Cpu CpuSpec `json:"cpu,omitempty"` HasMemory bool `json:"has_memory"` Memory MemorySpec `json:"memory,omitempty"` HasHugetlb bool `json:"has_hugetlb"` HasNetwork bool `json:"has_network"` HasProcesses bool `json:"has_processes"` Processes ProcessSpec `json:"processes,omitempty"` HasFilesystem bool `json:"has_filesystem"` // HasDiskIo when true, indicates that DiskIo stats will be available. HasDiskIo bool `json:"has_diskio"` HasCustomMetrics bool `json:"has_custom_metrics"` CustomMetrics []MetricSpec `json:"custom_metrics,omitempty"` // Image name used for this container. Image string `json:"image,omitempty"` } // Container reference contains enough information to uniquely identify a container type ContainerReference struct { // The container id Id string `json:"id,omitempty"` // The absolute name of the container. This is unique on the machine. Name string `json:"name"` // Other names by which the container is known within a certain namespace. // This is unique within that namespace. Aliases []string `json:"aliases,omitempty"` // Namespace under which the aliases of a container are unique. // An example of a namespace is "docker" for Docker containers. Namespace string `json:"namespace,omitempty"` } // Sorts by container name. type ContainerReferenceSlice []ContainerReference func (s ContainerReferenceSlice) Len() int { return len(s) } func (s ContainerReferenceSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s ContainerReferenceSlice) Less(i, j int) bool { return s[i].Name < s[j].Name } // ContainerInfoRequest is used when users check a container info from the REST API. // It specifies how much data users want to get about a container type ContainerInfoRequest struct { // Max number of stats to return. Specify -1 for all stats currently available. // Default: 60 NumStats int `json:"num_stats,omitempty"` // Start time for which to query information. // If omitted, the beginning of time is assumed. Start time.Time `json:"start,omitempty"` // End time for which to query information. // If omitted, current time is assumed. End time.Time `json:"end,omitempty"` } // Returns a ContainerInfoRequest with all default values specified. func DefaultContainerInfoRequest() ContainerInfoRequest { return ContainerInfoRequest{ NumStats: 60, } } func (r *ContainerInfoRequest) Equals(other ContainerInfoRequest) bool { return r.NumStats == other.NumStats && r.Start.Equal(other.Start) && r.End.Equal(other.End) } type ContainerInfo struct { ContainerReference // The direct subcontainers of the current container. Subcontainers []ContainerReference `json:"subcontainers,omitempty"` // The isolation used in the container. Spec ContainerSpec `json:"spec,omitempty"` // Historical statistics gathered from the container. Stats []*ContainerStats `json:"stats,omitempty"` } // TODO(vmarmol): Refactor to not need this equality comparison. // ContainerInfo may be (un)marshaled by json or other en/decoder. In that // case, the Timestamp field in each stats/sample may not be precisely // en/decoded. This will lead to small but acceptable differences between a // ContainerInfo and its encode-then-decode version. Eq() is used to compare // two ContainerInfo accepting small difference (<10ms) of Time fields. func (ci *ContainerInfo) Eq(b *ContainerInfo) bool { // If both ci and b are nil, then Eq() returns true if ci == nil { return b == nil } if b == nil { return ci == nil } // For fields other than time.Time, we will compare them precisely. // This would require that any slice should have same order. if !reflect.DeepEqual(ci.ContainerReference, b.ContainerReference) { return false } if !reflect.DeepEqual(ci.Subcontainers, b.Subcontainers) { return false } if !ci.Spec.Eq(&b.Spec) { return false } for i, expectedStats := range b.Stats { selfStats := ci.Stats[i] if !expectedStats.Eq(selfStats) { return false } } return true } func (s *ContainerSpec) Eq(b *ContainerSpec) bool { // Creation within 1s of each other. diff := s.CreationTime.Sub(b.CreationTime) if (diff > time.Second) || (diff < -time.Second) { return false } if s.HasCpu != b.HasCpu { return false } if !reflect.DeepEqual(s.Cpu, b.Cpu) { return false } if s.HasMemory != b.HasMemory { return false } if !reflect.DeepEqual(s.Memory, b.Memory) { return false } if s.HasHugetlb != b.HasHugetlb { return false } if s.HasNetwork != b.HasNetwork { return false } if s.HasProcesses != b.HasProcesses { return false } if s.HasFilesystem != b.HasFilesystem { return false } if s.HasDiskIo != b.HasDiskIo { return false } if s.HasCustomMetrics != b.HasCustomMetrics { return false } if s.Image != b.Image { return false } return true } func (ci *ContainerInfo) StatsAfter(ref time.Time) []*ContainerStats { n := len(ci.Stats) + 1 for i, s := range ci.Stats { if s.Timestamp.After(ref) { n = i break } } if n > len(ci.Stats) { return nil } return ci.Stats[n:] } func (ci *ContainerInfo) StatsStartTime() time.Time { var ret time.Time for _, s := range ci.Stats { if s.Timestamp.Before(ret) || ret.IsZero() { ret = s.Timestamp } } return ret } func (ci *ContainerInfo) StatsEndTime() time.Time { var ret time.Time for i := len(ci.Stats) - 1; i >= 0; i-- { s := ci.Stats[i] if s.Timestamp.After(ret) { ret = s.Timestamp } } return ret } // This mirrors kernel internal structure. type LoadStats struct { // Number of sleeping tasks. NrSleeping uint64 `json:"nr_sleeping"` // Number of running tasks. NrRunning uint64 `json:"nr_running"` // Number of tasks in stopped state NrStopped uint64 `json:"nr_stopped"` // Number of tasks in uninterruptible state NrUninterruptible uint64 `json:"nr_uninterruptible"` // Number of tasks waiting on IO NrIoWait uint64 `json:"nr_io_wait"` } // CPU usage time statistics. type CpuUsage struct { // Total CPU usage. // Unit: nanoseconds. Total uint64 `json:"total"` // Per CPU/core usage of the container. // Unit: nanoseconds. PerCpu []uint64 `json:"per_cpu_usage,omitempty"` // Time spent in user space. // Unit: nanoseconds. User uint64 `json:"user"` // Time spent in kernel space. // Unit: nanoseconds. System uint64 `json:"system"` } // Cpu Completely Fair Scheduler statistics. type CpuCFS struct { // Total number of elapsed enforcement intervals. Periods uint64 `json:"periods"` // Total number of times tasks in the cgroup have been throttled. ThrottledPeriods uint64 `json:"throttled_periods"` // Total time duration for which tasks in the cgroup have been throttled. // Unit: nanoseconds. ThrottledTime uint64 `json:"throttled_time"` } // Cpu Aggregated scheduler statistics type CpuSchedstat struct { // https://www.kernel.org/doc/Documentation/scheduler/sched-stats.txt // time spent on the cpu RunTime uint64 `json:"run_time"` // time spent waiting on a runqueue RunqueueTime uint64 `json:"runqueue_time"` // # of timeslices run on this cpu RunPeriods uint64 `json:"run_periods"` } // All CPU usage metrics are cumulative from the creation of the container type CpuStats struct { Usage CpuUsage `json:"usage"` CFS CpuCFS `json:"cfs"` Schedstat CpuSchedstat `json:"schedstat"` // Smoothed average of number of runnable threads x 1000. // We multiply by thousand to avoid using floats, but preserving precision. // Load is smoothed over the last 10 seconds. Instantaneous value can be read // from LoadStats.NrRunning. LoadAverage int32 `json:"load_average"` } type PerDiskStats struct { Device string `json:"device"` Major uint64 `json:"major"` Minor uint64 `json:"minor"` Stats map[string]uint64 `json:"stats"` } type DiskIoStats struct { IoServiceBytes []PerDiskStats `json:"io_service_bytes,omitempty"` IoServiced []PerDiskStats `json:"io_serviced,omitempty"` IoQueued []PerDiskStats `json:"io_queued,omitempty"` Sectors []PerDiskStats `json:"sectors,omitempty"` IoServiceTime []PerDiskStats `json:"io_service_time,omitempty"` IoWaitTime []PerDiskStats `json:"io_wait_time,omitempty"` IoMerged []PerDiskStats `json:"io_merged,omitempty"` IoTime []PerDiskStats `json:"io_time,omitempty"` } type HugetlbStats struct { // current res_counter usage for hugetlb Usage uint64 `json:"usage,omitempty"` // maximum usage ever recorded. MaxUsage uint64 `json:"max_usage,omitempty"` // number of times hugetlb usage allocation failure. Failcnt uint64 `json:"failcnt"` } type MemoryStats struct { // Current memory usage, this includes all memory regardless of when it was // accessed. // Units: Bytes. Usage uint64 `json:"usage"` // Maximum memory usage recorded. // Units: Bytes. MaxUsage uint64 `json:"max_usage"` // Number of bytes of page cache memory. // Units: Bytes. Cache uint64 `json:"cache"` // The amount of anonymous and swap cache memory (includes transparent // hugepages). // Units: Bytes. RSS uint64 `json:"rss"` // The amount of swap currently used by the processes in this cgroup // Units: Bytes. Swap uint64 `json:"swap"` // The amount of memory used for mapped files (includes tmpfs/shmem) MappedFile uint64 `json:"mapped_file"` // The amount of working set memory, this includes recently accessed memory, // dirty memory, and kernel memory. Working set is <= "usage". // Units: Bytes. WorkingSet uint64 `json:"working_set"` Failcnt uint64 `json:"failcnt"` ContainerData MemoryStatsMemoryData `json:"container_data,omitempty"` HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"` } type CPUSetStats struct { MemoryMigrate uint64 `json:"memory_migrate"` } type MemoryNumaStats struct { File map[uint8]uint64 `json:"file,omitempty"` Anon map[uint8]uint64 `json:"anon,omitempty"` Unevictable map[uint8]uint64 `json:"unevictable,omitempty"` } type MemoryStatsMemoryData struct { Pgfault uint64 `json:"pgfault"` Pgmajfault uint64 `json:"pgmajfault"` NumaStats MemoryNumaStats `json:"numa_stats,omitempty"` } type InterfaceStats struct { // The name of the interface. Name string `json:"name"` // Cumulative count of bytes received. RxBytes uint64 `json:"rx_bytes"` // Cumulative count of packets received. RxPackets uint64 `json:"rx_packets"` // Cumulative count of receive errors encountered. RxErrors uint64 `json:"rx_errors"` // Cumulative count of packets dropped while receiving. RxDropped uint64 `json:"rx_dropped"` // Cumulative count of bytes transmitted. TxBytes uint64 `json:"tx_bytes"` // Cumulative count of packets transmitted. TxPackets uint64 `json:"tx_packets"` // Cumulative count of transmit errors encountered. TxErrors uint64 `json:"tx_errors"` // Cumulative count of packets dropped while transmitting. TxDropped uint64 `json:"tx_dropped"` } type NetworkStats struct { InterfaceStats `json:",inline"` Interfaces []InterfaceStats `json:"interfaces,omitempty"` // TCP connection stats (Established, Listen...) Tcp TcpStat `json:"tcp"` // TCP6 connection stats (Established, Listen...) Tcp6 TcpStat `json:"tcp6"` // UDP connection stats Udp UdpStat `json:"udp"` // UDP6 connection stats Udp6 UdpStat `json:"udp6"` // TCP advanced stats TcpAdvanced TcpAdvancedStat `json:"tcp_advanced"` } type TcpStat struct { // Count of TCP connections in state "Established" Established uint64 // Count of TCP connections in state "Syn_Sent" SynSent uint64 // Count of TCP connections in state "Syn_Recv" SynRecv uint64 // Count of TCP connections in state "Fin_Wait1" FinWait1 uint64 // Count of TCP connections in state "Fin_Wait2" FinWait2 uint64 // Count of TCP connections in state "Time_Wait TimeWait uint64 // Count of TCP connections in state "Close" Close uint64 // Count of TCP connections in state "Close_Wait" CloseWait uint64 // Count of TCP connections in state "Listen_Ack" LastAck uint64 // Count of TCP connections in state "Listen" Listen uint64 // Count of TCP connections in state "Closing" Closing uint64 } type TcpAdvancedStat struct { // The algorithm used to determine the timeout value used for // retransmitting unacknowledged octets, ref: RFC2698, default 1 RtoAlgorithm uint64 // The minimum value permitted by a TCP implementation for the // retransmission timeout, measured in milliseconds, default 200ms RtoMin uint64 // The maximum value permitted by a TCP implementation for the // retransmission timeout, measured in milliseconds, default 120s RtoMax uint64 // The limit on the total number of TCP connections the entity // can support., default -1, i.e. infinity MaxConn int64 // The number of times TCP connections have made a direct // transition to the SYN-SENT state from the CLOSED state. ActiveOpens uint64 // The number of times TCP connections have made a direct // transition to the SYN-RCVD state from the LISTEN state. PassiveOpens uint64 // The number of times TCP connections have made a direct // transition to the CLOSED state from either the SYN-SENT // state or the SYN-RCVD state, plus the number of times TCP // connections have made a direct transition to the LISTEN // state from the SYN-RCVD state. AttemptFails uint64 // The number of times TCP connections have made a direct // transition to the CLOSED state from either the ESTABLISHED // state or the CLOSE-WAIT state. EstabResets uint64 // The number of TCP connections for which the current state // is either ESTABLISHED or CLOSE- WAIT. CurrEstab uint64 // The total number of segments received, including those // received in error. InSegs uint64 // The total number of segments sent, including those on // current connections but excluding those containing only // retransmitted octets. OutSegs uint64 // The total number of segments retransmitted - that is, the // number of TCP segments transmitted containing one or more // previously transmitted octets. RetransSegs uint64 // The total number of segments received in error (e.g., bad // TCP checksums). InErrs uint64 // The number of TCP segments sent containing the RST flag. OutRsts uint64 // The number of IP Packets with checksum errors InCsumErrors uint64 // The number of resets received for embryonic SYN_RECV sockets EmbryonicRsts uint64 // The number of SYN cookies sent SyncookiesSent uint64 // The number of SYN cookies received SyncookiesRecv uint64 // The number of invalid SYN cookies received SyncookiesFailed uint64 // The number of packets pruned from receive queue because of socket buffer overrun PruneCalled uint64 // The number of packets pruned from receive queue RcvPruned uint64 // The number of packets dropped from out-of-order queue because of socket buffer overrun OfoPruned uint64 // The number of ICMP packets dropped because they were out-of-window OutOfWindowIcmps uint64 // The number of ICMP packets dropped because socket was locked LockDroppedIcmps uint64 // The number of TCP sockets finished time wait in fast timer TW uint64 // The number of time wait sockets recycled by time stamp TWRecycled uint64 // The number of TCP sockets finished time wait in slow timer TWKilled uint64 // counter, if no more mem for TIME-WAIT struct, +1 TCPTimeWaitOverflow uint64 // The number of RTO timer first timeout times TCPTimeouts uint64 // The number of fake timeouts detected by F-RTO TCPSpuriousRTOs uint64 // The number of send Tail Loss Probe (TLP) times by Probe Timeout(PTO) TCPLossProbes uint64 // The number of recovery times by TLP TCPLossProbeRecovery uint64 // The number of RTO failed times when in Recovery state, and remote end has no sack TCPRenoRecoveryFail uint64 // The number of RTO failed times when in Recovery state, and remote end has sack TCPSackRecoveryFail uint64 // The number of RTO failed times when in TCP_CA_Disorder state, and remote end has no sack TCPRenoFailures uint64 // The number of RTO failed times when in TCP_CA_Disorder state, and remote end has sack TCPSackFailures uint64 // The number of RTO failed times when in TCP_CA_Loss state, TCPLossFailures uint64 // The number of delayed acks sent DelayedACKs uint64 // The number of delayed acks further delayed because of locked socket DelayedACKLocked uint64 // The number of quick ack mode was activated times DelayedACKLost uint64 // The number of times the listen queue of a socket overflowed ListenOverflows uint64 // The number of SYNs to LISTEN sockets dropped ListenDrops uint64 // The number of packet headers predicted TCPHPHits uint64 // The number of acknowledgments not containing data payload received TCPPureAcks uint64 // The number of predicted acknowledgments TCPHPAcks uint64 // The number of times recovered from packet loss due to fast retransmit TCPRenoRecovery uint64 // The number of SACK retransmits failed TCPSackRecovery uint64 // The number of bad SACK blocks received TCPSACKReneging uint64 // The number of detected reordering times using FACK TCPFACKReorder uint64 // The number of detected reordering times using SACK TCPSACKReorder uint64 // The number of detected reordering times using Reno TCPRenoReorder uint64 // The number of detected reordering times using time stamp TCPTSReorder uint64 // The number of congestion windows fully recovered without slow start TCPFullUndo uint64 // The number of congestion windows partially recovered using Hoe heuristic TCPPartialUndo uint64 // The number of congestion windows recovered without slow start by DSACK TCPDSACKUndo uint64 // The number of congestion windows recovered without slow start after partial ack TCPLossUndo uint64 // The number of fast retransmits TCPFastRetrans uint64 // The number of retransmits in slow start TCPSlowStartRetrans uint64 // The number of retransmits lost TCPLostRetransmit uint64 // The number of retransmits failed, including FastRetrans, SlowStartRetrans TCPRetransFail uint64 // he number of packets collapsed in receive queue due to low socket buffer TCPRcvCollapsed uint64 // The number of DSACKs sent for old packets TCPDSACKOldSent uint64 // The number of DSACKs sent for out of order packets TCPDSACKOfoSent uint64 // The number of DSACKs received TCPDSACKRecv uint64 // The number of DSACKs for out of order packets received TCPDSACKOfoRecv uint64 // The number of connections reset due to unexpected data TCPAbortOnData uint64 // The number of connections reset due to early user close TCPAbortOnClose uint64 // The number of connections aborted due to memory pressure TCPAbortOnMemory uint64 // The number of connections aborted due to timeout TCPAbortOnTimeout uint64 // The number of connections aborted after user close in linger timeout TCPAbortOnLinger uint64 // The number of times unable to send RST due to no memory TCPAbortFailed uint64 // The number of TCP ran low on memory times TCPMemoryPressures uint64 // The number of TCP cumulative duration of // memory pressure events, by ms TCPMemoryPressuresChrono uint64 // The number of SACKs discard TCPSACKDiscard uint64 // The number of DSACKs ignore old TCPDSACKIgnoredOld uint64 // The number of DSACKs ignore no undo TCPDSACKIgnoredNoUndo uint64 // The number of MD5 not found TCPMD5NotFound uint64 // The number of MD5 unexpected TCPMD5Unexpected uint64 // The number of MD5 failed TCPMD5Failure uint64 // The number of Sack shifted TCPSackShifted uint64 // The number of Sack merged TCPSackMerged uint64 // The number of Sack shift fall back TCPSackShiftFallback uint64 // The number of Backlog drop TCPBacklogDrop uint64 // The number of PFmemalloc drop PFMemallocDrop uint64 // The number of memalloc drop TCPMinTTLDrop uint64 // The number of DeferAccept drop TCPDeferAcceptDrop uint64 // The number of IP reverse path filter IPReversePathFilter uint64 // The number of request full do cookies TCPReqQFullDoCookies uint64 // The number of request full drop TCPReqQFullDrop uint64 // number of successful outbound TFO connections TCPFastOpenActive uint64 // number of SYN-ACK packets received that did not acknowledge data // sent in the SYN packet and caused a retransmissions without SYN data. TCPFastOpenActiveFail uint64 // number of successful inbound TFO connections TCPFastOpenPassive uint64 // number of inbound SYN packets with TFO cookie that was invalid TCPFastOpenPassiveFail uint64 // number of inbound SYN packets that will have TFO disabled because // the socket has exceeded the max queue length TCPFastOpenListenOverflow uint64 // number of inbound SYN packets requesting TFO with TFO set but no cookie TCPFastOpenCookieReqd uint64 // number of SYN and SYN/ACK retransmits to break down retransmissions // into SYN, fast-retransmits, timeout retransmits, etc. TCPSynRetrans uint64 // number of outgoing packets with original data // (excluding retransmission but including data-in-SYN). TCPOrigDataSent uint64 // The number of active connections rejected because of time stamp PAWSActive uint64 // The number of packetes rejected in established connections because of timestamp PAWSEstab uint64 } type UdpStat struct { // Count of UDP sockets in state "Listen" Listen uint64 // Count of UDP packets dropped by the IP stack Dropped uint64 // Count of packets Queued for Receieve RxQueued uint64 // Count of packets Queued for Transmit TxQueued uint64 } type FsStats struct { // The block device name associated with the filesystem. Device string `json:"device,omitempty"` // Type of the filesytem. Type string `json:"type"` // Number of bytes that can be consumed by the container on this filesystem. Limit uint64 `json:"capacity"` // Number of bytes that is consumed by the container on this filesystem. Usage uint64 `json:"usage"` // Base Usage that is consumed by the container's writable layer. // This field is only applicable for docker container's as of now. BaseUsage uint64 `json:"base_usage"` // Number of bytes available for non-root user. Available uint64 `json:"available"` // HasInodes when true, indicates that Inodes info will be available. HasInodes bool `json:"has_inodes"` // Number of Inodes Inodes uint64 `json:"inodes"` // Number of available Inodes InodesFree uint64 `json:"inodes_free"` // Number of reads completed // This is the total number of reads completed successfully. ReadsCompleted uint64 `json:"reads_completed"` // Number of reads merged // Reads and writes which are adjacent to each other may be merged for // efficiency. Thus two 4K reads may become one 8K read before it is // ultimately handed to the disk, and so it will be counted (and queued) // as only one I/O. This field lets you know how often this was done. ReadsMerged uint64 `json:"reads_merged"` // Number of sectors read // This is the total number of sectors read successfully. SectorsRead uint64 `json:"sectors_read"` // Number of milliseconds spent reading // This is the total number of milliseconds spent by all reads (as // measured from __make_request() to end_that_request_last()). ReadTime uint64 `json:"read_time"` // Number of writes completed // This is the total number of writes completed successfully. WritesCompleted uint64 `json:"writes_completed"` // Number of writes merged // See the description of reads merged. WritesMerged uint64 `json:"writes_merged"` // Number of sectors written // This is the total number of sectors written successfully. SectorsWritten uint64 `json:"sectors_written"` // Number of milliseconds spent writing // This is the total number of milliseconds spent by all writes (as // measured from __make_request() to end_that_request_last()). WriteTime uint64 `json:"write_time"` // Number of I/Os currently in progress // The only field that should go to zero. Incremented as requests are // given to appropriate struct request_queue and decremented as they finish. IoInProgress uint64 `json:"io_in_progress"` // Number of milliseconds spent doing I/Os // This field increases so long as field 9 is nonzero. IoTime uint64 `json:"io_time"` // weighted number of milliseconds spent doing I/Os // This field is incremented at each I/O start, I/O completion, I/O // merge, or read of these stats by the number of I/Os in progress // (field 9) times the number of milliseconds spent doing I/O since the // last update of this field. This can provide an easy measure of both // I/O completion time and the backlog that may be accumulating. WeightedIoTime uint64 `json:"weighted_io_time"` } type AcceleratorStats struct { // Make of the accelerator (nvidia, amd, google etc.) Make string `json:"make"` // Model of the accelerator (tesla-p100, tesla-k80 etc.) Model string `json:"model"` // ID of the accelerator. ID string `json:"id"` // Total accelerator memory. // unit: bytes MemoryTotal uint64 `json:"memory_total"` // Total accelerator memory allocated. // unit: bytes MemoryUsed uint64 `json:"memory_used"` // Percent of time over the past sample period during which // the accelerator was actively processing. DutyCycle uint64 `json:"duty_cycle"` } // PerfStat represents value of a single monitored perf event. type PerfStat struct { PerfValue // CPU that perf event was measured on. Cpu int `json:"cpu"` } type PerfValue struct { // Indicates scaling ratio for an event: time_running/time_enabled // (amount of time that event was being measured divided by // amount of time that event was enabled for). // value 1.0 indicates that no multiplexing occurred. Value close // to 0 indicates that event was measured for short time and event's // value might be inaccurate. // See: https://lwn.net/Articles/324756/ ScalingRatio float64 `json:"scaling_ratio"` // Value represents value of perf event retrieved from OS. It is // normalized against ScalingRatio and takes multiplexing into // consideration. Value uint64 `json:"value"` // Name is human readable name of an event. Name string `json:"name"` } // MemoryBandwidthStats corresponds to MBM (Memory Bandwidth Monitoring). // See: https://01.org/cache-monitoring-technology // See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt type MemoryBandwidthStats struct { // The 'mbm_total_bytes'. TotalBytes uint64 `json:"mbm_total_bytes,omitempty"` // The 'mbm_local_bytes'. LocalBytes uint64 `json:"mbm_local_bytes,omitempty"` } // CacheStats corresponds to CMT (Cache Monitoring Technology). // See: https://01.org/cache-monitoring-technology // See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt type CacheStats struct { // The 'llc_occupancy'. LLCOccupancy uint64 `json:"llc_occupancy,omitempty"` } // ResctrlStats corresponds to statistics from Resource Control. type ResctrlStats struct { // Each NUMA Node statistics corresponds to one element in the array. MemoryBandwidth []MemoryBandwidthStats `json:"memory_bandwidth,omitempty"` Cache []CacheStats `json:"cache,omitempty"` } // PerfUncoreStat represents value of a single monitored perf uncore event. type PerfUncoreStat struct { PerfValue // Socket that perf event was measured on. Socket int `json:"socket"` // PMU is Performance Monitoring Unit which collected these stats. PMU string `json:"pmu"` } type UlimitSpec struct { Name string `json:"name"` SoftLimit int64 `json:"soft_limit"` HardLimit int64 `json:"hard_limit"` } type ProcessStats struct { // Number of processes ProcessCount uint64 `json:"process_count"` // Number of open file descriptors FdCount uint64 `json:"fd_count"` // Number of sockets SocketCount uint64 `json:"socket_count"` // Number of threads currently in container ThreadsCurrent uint64 `json:"threads_current,omitempty"` // Maxium number of threads allowed in container ThreadsMax uint64 `json:"threads_max,omitempty"` // Ulimits for the top-level container process Ulimits []UlimitSpec `json:"ulimits,omitempty"` } type ContainerStats struct { // The time of this stat point. Timestamp time.Time `json:"timestamp"` Cpu CpuStats `json:"cpu,omitempty"` DiskIo DiskIoStats `json:"diskio,omitempty"` Memory MemoryStats `json:"memory,omitempty"` Hugetlb map[string]HugetlbStats `json:"hugetlb,omitempty"` Network NetworkStats `json:"network,omitempty"` // Filesystem statistics Filesystem []FsStats `json:"filesystem,omitempty"` // Task load stats TaskStats LoadStats `json:"task_stats,omitempty"` // Metrics for Accelerators. Each Accelerator corresponds to one element in the array. Accelerators []AcceleratorStats `json:"accelerators,omitempty"` // ProcessStats for Containers Processes ProcessStats `json:"processes,omitempty"` // Custom metrics from all collectors CustomMetrics map[string][]MetricVal `json:"custom_metrics,omitempty"` // Statistics originating from perf events PerfStats []PerfStat `json:"perf_stats,omitempty"` // Statistics originating from perf uncore events. // Applies only for root container. PerfUncoreStats []PerfUncoreStat `json:"perf_uncore_stats,omitempty"` // Referenced memory ReferencedMemory uint64 `json:"referenced_memory,omitempty"` // Resource Control (resctrl) statistics Resctrl ResctrlStats `json:"resctrl,omitempty"` CpuSet CPUSetStats `json:"cpuset,omitempty"` } func timeEq(t1, t2 time.Time, tolerance time.Duration) bool { // t1 should not be later than t2 if t1.After(t2) { t1, t2 = t2, t1 } diff := t2.Sub(t1) return diff <= tolerance } const ( // 10ms, i.e. 0.01s timePrecision time.Duration = 10 * time.Millisecond ) // This function is useful because we do not require precise time // representation. func (a *ContainerStats) Eq(b *ContainerStats) bool { if !timeEq(a.Timestamp, b.Timestamp, timePrecision) { return false } return a.StatsEq(b) } // Checks equality of the stats values. func (a *ContainerStats) StatsEq(b *ContainerStats) bool { // TODO(vmarmol): Consider using this through reflection. if !reflect.DeepEqual(a.Cpu, b.Cpu) { return false } if !reflect.DeepEqual(a.Memory, b.Memory) { return false } if !reflect.DeepEqual(a.Hugetlb, b.Hugetlb) { return false } if !reflect.DeepEqual(a.DiskIo, b.DiskIo) { return false } if !reflect.DeepEqual(a.Network, b.Network) { return false } if !reflect.DeepEqual(a.Processes, b.Processes) { return false } if !reflect.DeepEqual(a.Filesystem, b.Filesystem) { return false } if !reflect.DeepEqual(a.TaskStats, b.TaskStats) { return false } if !reflect.DeepEqual(a.Accelerators, b.Accelerators) { return false } if !reflect.DeepEqual(a.CustomMetrics, b.CustomMetrics) { return false } return true } // Event contains information general to events such as the time at which they // occurred, their specific type, and the actual event. Event types are // differentiated by the EventType field of Event. type Event struct { // the absolute container name for which the event occurred ContainerName string `json:"container_name"` // the time at which the event occurred Timestamp time.Time `json:"timestamp"` // the type of event. EventType is an enumerated type EventType EventType `json:"event_type"` // the original event object and all of its extraneous data, ex. an // OomInstance EventData EventData `json:"event_data,omitempty"` } // EventType is an enumerated type which lists the categories under which // events may fall. The Event field EventType is populated by this enum. type EventType string const ( EventOom EventType = "oom" EventOomKill EventType = "oomKill" EventContainerCreation EventType = "containerCreation" EventContainerDeletion EventType = "containerDeletion" ) // Extra information about an event. Only one type will be set. type EventData struct { // Information about an OOM kill event. OomKill *OomKillEventData `json:"oom,omitempty"` } // Information related to an OOM kill instance type OomKillEventData struct { // process id of the killed process Pid int `json:"pid"` // The name of the killed process ProcessName string `json:"process_name"` }