From 0e603743468f83fa7577399aab5b5883b82dd899 Mon Sep 17 00:00:00 2001 From: Federico Simoncelli Date: Mon, 30 Mar 2015 09:21:01 -0400 Subject: [PATCH] nodes: record an event on NodeReady changes Signed-off-by: Federico Simoncelli --- .../controller/nodecontroller.go | 20 +++++++++++++++++++ .../controller/nodecontroller_test.go | 9 +++++---- pkg/kubelet/kubelet.go | 10 ++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pkg/cloudprovider/controller/nodecontroller.go b/pkg/cloudprovider/controller/nodecontroller.go index 02753143a0..4dc87cb1a7 100644 --- a/pkg/cloudprovider/controller/nodecontroller.go +++ b/pkg/cloudprovider/controller/nodecontroller.go @@ -31,6 +31,7 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" + "github.com/GoogleCloudPlatform/kubernetes/pkg/types" "github.com/GoogleCloudPlatform/kubernetes/pkg/util" "github.com/golang/glog" ) @@ -314,6 +315,9 @@ func (nc *NodeController) DoCheck(node *api.Node) []api.NodeCondition { // status. Keep listing pods to sanity check if pods are all deleted makes more sense. nc.deletePods(node.Name) } + if oldReadyCondition != nil && oldReadyCondition.Status == api.ConditionTrue { + nc.recordNodeOfflineEvent(node) + } } conditions = append(conditions, *newReadyCondition) @@ -424,6 +428,18 @@ func (nc *NodeController) PopulateAddresses(nodes *api.NodeList) (*api.NodeList, return nodes, nil } +func (nc *NodeController) recordNodeOfflineEvent(node *api.Node) { + ref := &api.ObjectReference{ + Kind: "Node", + Name: node.Name, + UID: types.UID(node.Name), + Namespace: "", + } + // TODO: This requires a transaction, either both node status is updated + // and event is recorded or neither should happen, see issue #6055. + nc.recorder.Eventf(ref, "offline", "Node %s is now offline", node.Name) +} + // MonitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. @@ -477,6 +493,10 @@ func (nc *NodeController) MonitorNodeStatus() error { readyCondition.LastProbeTime = lastReadyCondition.LastProbeTime readyCondition.LastTransitionTime = nc.now() } + if readyCondition.Status != api.ConditionTrue && + lastReadyCondition.Status == api.ConditionTrue { + nc.recordNodeOfflineEvent(node) + } } _, err = nc.kubeClient.Nodes().Update(node) if err != nil { diff --git a/pkg/cloudprovider/controller/nodecontroller_test.go b/pkg/cloudprovider/controller/nodecontroller_test.go index 7324a04b4a..53f9fc6729 100644 --- a/pkg/cloudprovider/controller/nodecontroller_test.go +++ b/pkg/cloudprovider/controller/nodecontroller_test.go @@ -30,6 +30,7 @@ import ( apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" fake_cloud "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/fake" "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" @@ -911,7 +912,7 @@ func TestSyncProbedNodeStatusTransitionTime(t *testing.T) { } for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, time.Minute) + nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, &record.FakeRecorder{}, 10, time.Minute) nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) } nodeController.now = func() util.Time { return fakeNow } if err := nodeController.SyncProbedNodeStatus(); err != nil { @@ -1064,7 +1065,7 @@ func TestSyncProbedNodeStatusEvictPods(t *testing.T) { } for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, 5*time.Minute) + nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, &record.FakeRecorder{}, 10, 5*time.Minute) nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) } if err := nodeController.SyncProbedNodeStatus(); err != nil { t.Errorf("unexpected error: %v", err) @@ -1222,7 +1223,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) { } for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, item.evictionTimeout) + nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, &record.FakeRecorder{}, 10, item.evictionTimeout) nodeController.now = func() util.Time { return fakeNow } if err := nodeController.MonitorNodeStatus(); err != nil { t.Errorf("unexpected error: %v", err) @@ -1404,7 +1405,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) { } for _, item := range table { - nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, 5*time.Minute) + nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, &record.FakeRecorder{}, 10, 5*time.Minute) nodeController.now = func() util.Time { return fakeNow } if err := nodeController.MonitorNodeStatus(); err != nil { t.Errorf("unexpected error: %v", err) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index a4d159f000..fe8feafa64 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1734,6 +1734,12 @@ func (kl *Kubelet) updateNodeStatus() error { return fmt.Errorf("Update node status exceeds retry count") } +func (kl *Kubelet) recordNodeOnlineEvent() { + // TODO: This requires a transaction, either both node status is updated + // and event is recorded or neither should happen, see issue #6055. + kl.recorder.Eventf(kl.getNodeReference(), "online", "Node %s is now online", kl.hostname) +} + // tryUpdateNodeStatus tries to update node status to master. func (kl *Kubelet) tryUpdateNodeStatus() error { node, err := kl.kubeClient.Nodes().Get(kl.hostname) @@ -1774,6 +1780,9 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { for i := range node.Status.Conditions { if node.Status.Conditions[i].Type == api.NodeReady { newCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime + if node.Status.Conditions[i].Status != api.ConditionTrue { + kl.recordNodeOnlineEvent() + } node.Status.Conditions[i] = newCondition updated = true } @@ -1781,6 +1790,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error { if !updated { newCondition.LastTransitionTime = currentTime node.Status.Conditions = append(node.Status.Conditions, newCondition) + kl.recordNodeOnlineEvent() } _, err = kl.kubeClient.Nodes().Update(node)