Merge pull request #6155 from simon3z/node-ready-event

nodes: record an event on NodeReady changes
pull/6/head
Victor Marmol 2015-03-30 09:32:02 -07:00
commit 2f6c3d9f4d
3 changed files with 35 additions and 4 deletions

View File

@ -31,6 +31,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
@ -314,6 +315,9 @@ func (nc *NodeController) DoCheck(node *api.Node) []api.NodeCondition {
// status. Keep listing pods to sanity check if pods are all deleted makes more sense.
nc.deletePods(node.Name)
}
if oldReadyCondition != nil && oldReadyCondition.Status == api.ConditionTrue {
nc.recordNodeOfflineEvent(node)
}
}
conditions = append(conditions, *newReadyCondition)
@ -424,6 +428,18 @@ func (nc *NodeController) PopulateAddresses(nodes *api.NodeList) (*api.NodeList,
return nodes, nil
}
func (nc *NodeController) recordNodeOfflineEvent(node *api.Node) {
ref := &api.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: types.UID(node.Name),
Namespace: "",
}
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
nc.recorder.Eventf(ref, "offline", "Node %s is now offline", node.Name)
}
// MonitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
// not reachable for a long period of time.
@ -477,6 +493,10 @@ func (nc *NodeController) MonitorNodeStatus() error {
readyCondition.LastProbeTime = lastReadyCondition.LastProbeTime
readyCondition.LastTransitionTime = nc.now()
}
if readyCondition.Status != api.ConditionTrue &&
lastReadyCondition.Status == api.ConditionTrue {
nc.recordNodeOfflineEvent(node)
}
}
_, err = nc.kubeClient.Nodes().Update(node)
if err != nil {

View File

@ -30,6 +30,7 @@ import (
apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
fake_cloud "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/fake"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
@ -911,7 +912,7 @@ func TestSyncProbedNodeStatusTransitionTime(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, &record.FakeRecorder{}, 10, time.Minute)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
@ -1064,7 +1065,7 @@ func TestSyncProbedNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, &record.FakeRecorder{}, 10, 5*time.Minute)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@ -1222,7 +1223,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, item.evictionTimeout)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, &record.FakeRecorder{}, 10, item.evictionTimeout)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@ -1404,7 +1405,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, &record.FakeRecorder{}, 10, 5*time.Minute)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)

View File

@ -1734,6 +1734,12 @@ func (kl *Kubelet) updateNodeStatus() error {
return fmt.Errorf("Update node status exceeds retry count")
}
func (kl *Kubelet) recordNodeOnlineEvent() {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
kl.recorder.Eventf(kl.getNodeReference(), "online", "Node %s is now online", kl.hostname)
}
// tryUpdateNodeStatus tries to update node status to master.
func (kl *Kubelet) tryUpdateNodeStatus() error {
node, err := kl.kubeClient.Nodes().Get(kl.hostname)
@ -1774,6 +1780,9 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == api.NodeReady {
newCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime
if node.Status.Conditions[i].Status != api.ConditionTrue {
kl.recordNodeOnlineEvent()
}
node.Status.Conditions[i] = newCondition
updated = true
}
@ -1781,6 +1790,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
if !updated {
newCondition.LastTransitionTime = currentTime
node.Status.Conditions = append(node.Status.Conditions, newCondition)
kl.recordNodeOnlineEvent()
}
_, err = kl.kubeClient.Nodes().Update(node)