Merge pull request #57136 from k82cn/k8s_54313

Automatic merge from submit-queue (batch tested with PRs 57136, 59920). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Updated PID pressure node condition.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
part of #54313 

**Release note**:

```release-note
Updated PID pressure node condition
```
pull/6/head
Kubernetes Submit Queue 2018-02-16 10:35:33 -08:00 committed by GitHub
commit eac5bc0035
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 297 additions and 6 deletions

View File

@ -56,6 +56,19 @@ type NodeStats struct {
// Stats about the underlying container runtime.
// +optional
Runtime *RuntimeStats `json:"runtime,omitempty"`
// Stats about the rlimit of system.
// +optional
Rlimit *RlimitStats `json:"rlimit,omitempty"`
}
// RlimitStats are stats rlimit of OS.
type RlimitStats struct {
Time metav1.Time `json:"time"`
// The max PID of OS.
MaxPID *int64 `json:"maxpid,omitempty"`
// The number of running process in the OS.
NumOfRunningProcesses *int64 `json:"curproc,omitempty"`
}
// RuntimeStats are stats pertaining to the underlying container runtime.

View File

@ -40,6 +40,8 @@ const (
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
// SignalAllocatableNodeFsAvailable is amount of local storage available for pod allocation
SignalAllocatableNodeFsAvailable Signal = "allocatableNodeFs.available"
// SignalPIDAvailable is amount of PID available for pod allocation
SignalPIDAvailable Signal = "pid.available"
)
// ThresholdOperator is the operator used to express a Threshold.

View File

@ -101,7 +101,8 @@ func NewManager(
containerGC ContainerGC,
recorder record.EventRecorder,
nodeRef *v1.ObjectReference,
clock clock.Clock) (Manager, lifecycle.PodAdmitHandler) {
clock clock.Clock,
) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{
clock: clock,
killPodFunc: killPodFunc,
@ -176,6 +177,13 @@ func (m *managerImpl) IsUnderDiskPressure() bool {
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
}
// IsUnderPIDPressure returns true if the node is under PID pressure.
func (m *managerImpl) IsUnderPIDPressure() bool {
m.RLock()
defer m.RUnlock()
return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
}
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
for _, threshold := range thresholds {
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {

View File

@ -72,6 +72,7 @@ func init() {
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure
// map signals to resources (and vice-versa)
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
@ -771,6 +772,17 @@ func makeSignalObservations(summary *statsapi.Summary, capacityProvider Capacity
}
}
if rlimit := summary.Node.Rlimit; rlimit != nil {
if rlimit.NumOfRunningProcesses != nil && rlimit.MaxPID != nil {
available := int64(*rlimit.MaxPID) - int64(*rlimit.NumOfRunningProcesses)
result[evictionapi.SignalPIDAvailable] = signalObservation{
available: resource.NewQuantity(available, resource.BinarySI),
capacity: resource.NewQuantity(int64(*rlimit.MaxPID), resource.BinarySI),
time: rlimit.Time,
}
}
}
if memoryAllocatableCapacity, ok := capacityProvider.GetCapacity()[v1.ResourceMemory]; ok {
memoryAllocatableAvailable := memoryAllocatableCapacity.Copy()
if reserved, exists := capacityProvider.GetNodeAllocatableReservation()[v1.ResourceMemory]; exists {

View File

@ -60,6 +60,9 @@ type Manager interface {
// IsUnderDiskPressure returns true if the node is under disk pressure.
IsUnderDiskPressure() bool
// IsUnderPIDPressure returns true if the node is under PID pressure.
IsUnderPIDPressure() bool
}
// DiskInfoProvider is responsible for informing the manager how disk is configured.

View File

@ -853,6 +853,62 @@ func (kl *Kubelet) setNodeMemoryPressureCondition(node *v1.Node) {
}
}
// setNodePIDPressureCondition for the node.
// TODO: this needs to move somewhere centralized...
func (kl *Kubelet) setNodePIDPressureCondition(node *v1.Node) {
currentTime := metav1.NewTime(kl.clock.Now())
var condition *v1.NodeCondition
// Check if NodePIDPressure condition already exists and if it does, just pick it up for update.
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == v1.NodePIDPressure {
condition = &node.Status.Conditions[i]
}
}
newCondition := false
// If the NodePIDPressure condition doesn't exist, create one
if condition == nil {
condition = &v1.NodeCondition{
Type: v1.NodePIDPressure,
Status: v1.ConditionUnknown,
}
// cannot be appended to node.Status.Conditions here because it gets
// copied to the slice. So if we append to the slice here none of the
// updates we make below are reflected in the slice.
newCondition = true
}
// Update the heartbeat time
condition.LastHeartbeatTime = currentTime
// Note: The conditions below take care of the case when a new NodePIDPressure condition is
// created and as well as the case when the condition already exists. When a new condition
// is created its status is set to v1.ConditionUnknown which matches either
// condition.Status != v1.ConditionTrue or
// condition.Status != v1.ConditionFalse in the conditions below depending on whether
// the kubelet is under PID pressure or not.
if kl.evictionManager.IsUnderPIDPressure() {
if condition.Status != v1.ConditionTrue {
condition.Status = v1.ConditionTrue
condition.Reason = "KubeletHasInsufficientPID"
condition.Message = "kubelet has insufficient PID available"
condition.LastTransitionTime = currentTime
kl.recordNodeStatusEvent(v1.EventTypeNormal, "NodeHasInsufficientPID")
}
} else if condition.Status != v1.ConditionFalse {
condition.Status = v1.ConditionFalse
condition.Reason = "KubeletHasSufficientPID"
condition.Message = "kubelet has sufficient PID available"
condition.LastTransitionTime = currentTime
kl.recordNodeStatusEvent(v1.EventTypeNormal, "NodeHasSufficientPID")
}
if newCondition {
node.Status.Conditions = append(node.Status.Conditions, *condition)
}
}
// setNodeDiskPressureCondition for the node.
// TODO: this needs to move somewhere centralized...
func (kl *Kubelet) setNodeDiskPressureCondition(node *v1.Node) {
@ -995,6 +1051,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
withoutError(kl.setNodeOODCondition),
withoutError(kl.setNodeMemoryPressureCondition),
withoutError(kl.setNodeDiskPressureCondition),
withoutError(kl.setNodePIDPressureCondition),
withoutError(kl.setNodeReadyCondition),
withoutError(kl.setNodeVolumesInUseStatus),
withoutError(kl.recordNodeSchedulableEvent),

View File

@ -279,6 +279,14 @@ func TestUpdateNewNodeStatus(t *testing.T) {
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodePIDPressure,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientPID",
Message: fmt.Sprintf("kubelet has sufficient PID available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
@ -335,7 +343,8 @@ func TestUpdateNewNodeStatus(t *testing.T) {
}
// Version skew workaround. See: https://github.com/kubernetes/kubernetes/issues/16961
assert.Equal(t, v1.NodeReady, updatedNode.Status.Conditions[len(updatedNode.Status.Conditions)-1].Type, "NotReady should be last")
assert.Equal(t, v1.NodeReady, updatedNode.Status.Conditions[len(updatedNode.Status.Conditions)-1].Type,
"NotReady should be last")
assert.Len(t, updatedNode.Status.Images, maxImagesInNodeStatus)
assert.True(t, apiequality.Semantic.DeepEqual(expectedNode, updatedNode), "%s", diff.ObjectDiff(expectedNode, updatedNode))
}
@ -387,6 +396,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
{
Type: v1.NodePIDPressure,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientPID",
Message: fmt.Sprintf("kubelet has sufficient PID available"),
LastHeartbeatTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
@ -454,6 +471,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodePIDPressure,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientPID",
Message: fmt.Sprintf("kubelet has sufficient PID available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
@ -650,6 +675,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{
Type: v1.NodePIDPressure,
Status: v1.ConditionFalse,
Reason: "KubeletHasSufficientPID",
Message: fmt.Sprintf("kubelet has sufficient PID available"),
LastHeartbeatTime: metav1.Time{},
LastTransitionTime: metav1.Time{},
},
{}, //placeholder
},
NodeInfo: v1.NodeSystemInfo{

View File

@ -176,6 +176,7 @@ func (fk *fakeKubelet) ListVolumesForPod(podUID types.UID) (map[string]volume.Vo
func (_ *fakeKubelet) RootFsStats() (*statsapi.FsStats, error) { return nil, nil }
func (_ *fakeKubelet) ListPodStats() ([]statsapi.PodStats, error) { return nil, nil }
func (_ *fakeKubelet) ImageFsStats() (*statsapi.FsStats, error) { return nil, nil }
func (_ *fakeKubelet) RlimitStats() (*statsapi.RlimitStats, error) { return nil, nil }
func (_ *fakeKubelet) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) {
return nil, nil, nil
}

View File

@ -78,6 +78,9 @@ type StatsProvider interface {
ListVolumesForPod(podUID types.UID) (map[string]volume.Volume, bool)
// GetPods returns the specs of all the pods running on this node.
GetPods() []*v1.Pod
// RlimitStats returns the rlimit stats of system.
RlimitStats() (*statsapi.RlimitStats, error)
}
type handler struct {

View File

@ -67,6 +67,10 @@ func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error)
if err != nil {
return nil, fmt.Errorf("failed to list pod stats: %v", err)
}
rlimit, err := sp.provider.RlimitStats()
if err != nil {
return nil, fmt.Errorf("failed to get rlimit stats: %v", err)
}
nodeStats := statsapi.NodeStats{
NodeName: node.Name,
@ -76,6 +80,7 @@ func (sp *summaryProviderImpl) Get(updateStats bool) (*statsapi.Summary, error)
StartTime: rootStats.StartTime,
Fs: rootFsStats,
Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats},
Rlimit: rlimit,
}
systemContainers := map[string]string{

View File

@ -58,6 +58,7 @@ func TestSummaryProvider(t *testing.T) {
"/misc": {cs: getContainerStats(), ns: getNetworkStats()},
"/kubelet": {cs: getContainerStats(), ns: getNetworkStats()},
}
rlimitStats = getRlimitStats()
)
assert := assert.New(t)
@ -69,6 +70,7 @@ func TestSummaryProvider(t *testing.T) {
On("ListPodStats").Return(podStats, nil).
On("ImageFsStats").Return(imageFsStats, nil).
On("RootFsStats").Return(rootFsStats, nil).
On("RlimitStats").Return(rlimitStats, nil).
On("GetCgroupStats", "/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil).
On("GetCgroupStats", "/runtime", false).Return(cgroupStatsMap["/runtime"].cs, cgroupStatsMap["/runtime"].ns, nil).
On("GetCgroupStats", "/misc", false).Return(cgroupStatsMap["/misc"].cs, cgroupStatsMap["/misc"].ns, nil).
@ -141,3 +143,10 @@ func getNetworkStats() *statsapi.NetworkStats {
f.Fuzz(v)
return v
}
func getRlimitStats() *statsapi.RlimitStats {
f := fuzz.New().NilChance(0)
v := &statsapi.RlimitStats{}
f.Fuzz(v)
return v
}

View File

@ -278,3 +278,26 @@ func (_m *StatsProvider) RootFsStats() (*v1alpha1.FsStats, error) {
return r0, r1
}
// RlimitStats provides a mock function with given fields:
func (_m *StatsProvider) RlimitStats() (*v1alpha1.RlimitStats, error) {
ret := _m.Called()
var r0 *v1alpha1.RlimitStats
if rf, ok := ret.Get(0).(func() *v1alpha1.RlimitStats); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*v1alpha1.RlimitStats)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}

View File

@ -7,7 +7,42 @@ go_library(
"cri_stats_provider.go",
"helper.go",
"stats_provider.go",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"stats_provider_linux.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"stats_provider_unsupported.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"stats_provider_unsupported.go",
],
"//conditions:default": [],
}),
importpath = "k8s.io/kubernetes/pkg/kubelet/stats",
visibility = ["//visibility:public"],
deps = [

View File

@ -77,6 +77,7 @@ type StatsProvider struct {
podManager kubepod.Manager
runtimeCache kubecontainer.RuntimeCache
containerStatsProvider
rlimitStatsProvider
}
// containerStatsProvider is an interface that provides the stats of the
@ -87,6 +88,10 @@ type containerStatsProvider interface {
ImageFsDevice() (string, error)
}
type rlimitStatsProvider interface {
RlimitStats() (*statsapi.RlimitStats, error)
}
// GetCgroupStats returns the stats of the cgroup with the cgroupName. Note that
// this function doesn't generate filesystem stats.
func (p *StatsProvider) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) {

View File

@ -0,0 +1,48 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package stats
import (
"io/ioutil"
"strconv"
"syscall"
"time"
"k8s.io/apimachinery/pkg/apis/meta/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
func (p *StatsProvider) RlimitStats() (*statsapi.RlimitStats, error) {
rlimit := &statsapi.RlimitStats{}
if content, err := ioutil.ReadFile("/proc/sys/kernel/pid_max"); err == nil {
if maxPid, err := strconv.ParseInt(string(content[:len(content)-1]), 10, 64); err == nil {
rlimit.MaxPID = &maxPid
}
}
var info syscall.Sysinfo_t
syscall.Sysinfo(&info)
procs := int64(info.Procs)
rlimit.NumOfRunningProcesses = &procs
rlimit.Time = v1.NewTime(time.Now())
return rlimit, nil
}

View File

@ -0,0 +1,27 @@
// +build !linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package stats
import (
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
func (p *StatsProvider) RlimitStats() (*statsapi.RlimitStats, error) {
return nil, nil
}

View File

@ -3962,6 +3962,8 @@ const (
NodeMemoryPressure NodeConditionType = "MemoryPressure"
// NodeDiskPressure means the kubelet is under pressure due to insufficient available disk.
NodeDiskPressure NodeConditionType = "DiskPressure"
// NodePIDPressure means the kubelet is under pressure due to insufficient available PID.
NodePIDPressure NodeConditionType = "PIDPressure"
// NodeNetworkUnavailable means that network for the node is not correctly configured.
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
// NodeKubeletConfigOk indicates whether the kubelet is correctly configured

View File

@ -277,6 +277,11 @@ var _ = framework.KubeDescribe("Summary API", func() {
"InodesUsed": bounded(0, 1E8),
}),
}),
"Rlimit": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"MaxPID": bounded(0, 1E8),
"NumOfRunningProcesses": bounded(0, 1E8),
}),
}),
// Ignore extra pods since the tests run in parallel.
"Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{