Merge pull request #5818 from simon3z/boot-id

nodecontroller: record node reboot event
pull/6/head
David Oppenheimer 2015-03-25 13:50:21 -07:00
commit 76889fa3ae
12 changed files with 62 additions and 17 deletions

View File

@ -39,6 +39,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/latest"
"github.com/GoogleCloudPlatform/kubernetes/pkg/apiserver"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
nodeControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/controller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
@ -218,7 +219,8 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
nodeResources := &api.NodeResources{}
nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, fakeKubeletClient{}, 10, 5*time.Minute)
nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, fakeKubeletClient{},
record.FromSource(api.EventSource{Component: "controllermanager"}), 10, 5*time.Minute)
nodeController.Run(5*time.Second, true, false)
cadvisorInterface := new(cadvisor.Fake)

View File

@ -29,6 +29,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
nodeControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/controller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
@ -176,7 +177,8 @@ func (s *CMServer) Run(_ []string) error {
}
nodeController := nodeControllerPkg.NewNodeController(cloud, s.MinionRegexp, s.MachineList, nodeResources,
kubeClient, kubeletClient, s.RegisterRetryCount, s.PodEvictionTimeout)
kubeClient, kubeletClient, record.FromSource(api.EventSource{Component: "controllermanager"}),
s.RegisterRetryCount, s.PodEvictionTimeout)
nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList, s.SyncNodeStatus)
resourceQuotaManager := resourcequota.NewResourceQuotaManager(kubeClient)

View File

@ -34,6 +34,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/apiserver"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
nodeControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
@ -128,7 +129,8 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
}
kubeClient := &client.HTTPKubeletClient{Client: http.DefaultClient, Port: ports.KubeletPort}
nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, kubeClient, 10, 5*time.Minute)
nodeController := nodeControllerPkg.NewNodeController(nil, "", machineList, nodeResources, cl, kubeClient,
record.FromSource(api.EventSource{Component: "controllermanager"}), 10, 5*time.Minute)
nodeController.Run(10*time.Second, true, true)
endpoints := service.NewEndpointController(cl)

View File

@ -971,6 +971,8 @@ type NodeSystemInfo struct {
MachineID string `json:"machineID"`
// SystemUUID is the system-uuid reported by the node
SystemUUID string `json:"systemUUID"`
// BootID is the boot-id reported by the node
BootID string `json:"bootID"`
}
// NodeStatus is information about the current status of a node.

View File

@ -782,6 +782,8 @@ type NodeSystemInfo struct {
MachineID string `json:"machineID" description:"machine id is the machine-id reported by the node"`
// SystemUUID is the system-uuid reported by the node
SystemUUID string `json:"systemUUID" description:"system uuid is the system-uuid reported by the node"`
// BootID is the boot-id reported by the node
BootID string `json:"bootID" description:"boot id is the boot-id reported by the node"`
}
// NodeStatus is information about the current status of a node.

View File

@ -783,6 +783,8 @@ type NodeSystemInfo struct {
MachineID string `json:"machineID" description:"machine id is the machine-id reported by the node"`
// SystemUUID is the system-uuid reported by the node
SystemUUID string `json:"systemUUID" description:"system uuid is the system-uuid reported by the node"`
// BootID is the boot-id reported by the node
BootID string `json:"bootID" description:"boot id is the boot-id reported by the node"`
}
// NodeStatus is information about the current status of a node.

View File

@ -969,6 +969,8 @@ type NodeSystemInfo struct {
MachineID string `json:"machineID"`
// SystemUUID is the system-uuid reported by the node
SystemUUID string `json:"systemUUID"`
// BootID is the boot-id reported by the node
BootID string `json:"bootID" description:"boot id is the boot-id reported by the node"`
}
// NodeStatus is information about the current status of a node.

View File

@ -27,6 +27,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
@ -73,6 +74,7 @@ type NodeController struct {
nodes []string
kubeClient client.Interface
kubeletClient client.KubeletClient
recorder record.EventRecorder
registerRetryCount int
podEvictionTimeout time.Duration
// Method for easy mocking in unittest.
@ -88,6 +90,7 @@ func NewNodeController(
staticResources *api.NodeResources,
kubeClient client.Interface,
kubeletClient client.KubeletClient,
recorder record.EventRecorder,
registerRetryCount int,
podEvictionTimeout time.Duration) *NodeController {
return &NodeController{
@ -97,6 +100,7 @@ func NewNodeController(
staticResources: staticResources,
kubeClient: kubeClient,
kubeletClient: kubeletClient,
recorder: recorder,
registerRetryCount: registerRetryCount,
podEvictionTimeout: podEvictionTimeout,
lookupIP: net.LookupIP,
@ -120,6 +124,7 @@ func (nc *NodeController) Run(period time.Duration, syncNodeList, syncNodeStatus
// Register intial set of nodes with their status set.
var nodes *api.NodeList
var err error
record.StartRecording(nc.kubeClient.Events(""))
if nc.isRunningCloudProvider() {
if syncNodeList {
if nodes, err = nc.GetCloudNodesWithSpec(); err != nil {
@ -289,6 +294,16 @@ func (nc *NodeController) populateNodeInfo(node *api.Node) error {
for key, value := range nodeInfo.Capacity {
node.Spec.Capacity[key] = value
}
if node.Status.NodeInfo.BootID != "" &&
node.Status.NodeInfo.BootID != nodeInfo.NodeSystemInfo.BootID {
ref := &api.ObjectReference{
Kind: "Minion",
Name: node.Name,
UID: node.UID,
Namespace: api.NamespaceDefault,
}
nc.recorder.Eventf(ref, "rebooted", "Node %s has been rebooted", node.Name)
}
node.Status.NodeInfo = nodeInfo.NodeSystemInfo
return nil
}

View File

@ -247,7 +247,7 @@ func TestRegisterNodes(t *testing.T) {
for _, machine := range item.machines {
nodes.Items = append(nodes.Items, *newNode(machine))
}
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, item.fakeNodeHandler, nil, nil, 10, time.Minute)
err := nodeController.RegisterNodes(&nodes, item.retryCount, time.Millisecond)
if !item.expectedFail && err != nil {
t.Errorf("unexpected error: %v", err)
@ -305,7 +305,7 @@ func TestCreateGetStaticNodesWithSpec(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, nil, nil, 10, time.Minute)
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, nil, nil, nil, 10, time.Minute)
nodes, err := nodeController.GetStaticNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
@ -366,7 +366,7 @@ func TestCreateGetCloudNodesWithSpec(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, &api.NodeResources{}, nil, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, &api.NodeResources{}, nil, nil, nil, 10, time.Minute)
nodes, err := nodeController.GetCloudNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
@ -463,7 +463,7 @@ func TestSyncCloudNodes(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, nil, 10, time.Minute)
if err := nodeController.SyncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
@ -545,7 +545,7 @@ func TestSyncCloudNodesEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, nil, 10, time.Minute)
if err := nodeController.SyncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
@ -647,7 +647,7 @@ func TestNodeConditionsCheck(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, nil, 10, time.Minute)
nodeController.now = func() util.Time { return fakeNow }
conditions := nodeController.DoCheck(item.node)
if !reflect.DeepEqual(item.expectedConditions, conditions) {
@ -678,7 +678,7 @@ func TestPopulateNodeAddresses(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, nil, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, nil, nil, nil, 10, time.Minute)
result, err := nodeController.PopulateAddresses(item.nodes)
// In case of IP querying error, we should continue.
if err != nil {
@ -767,7 +767,7 @@ func TestSyncProbedNodeStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, time.Minute)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@ -870,7 +870,7 @@ func TestSyncProbedNodeStatusTransitionTime(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, time.Minute)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
@ -1023,7 +1023,7 @@ func TestSyncProbedNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, nil, 10, 5*time.Minute)
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@ -1181,7 +1181,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, item.evictionTimeout)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, item.evictionTimeout)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@ -1342,7 +1342,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, nil, 10, 5*time.Minute)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)

View File

@ -1922,6 +1922,7 @@ func (kl *Kubelet) tryUpdateNodeStatus() error {
} else {
node.Status.NodeInfo.MachineID = info.MachineID
node.Status.NodeInfo.SystemUUID = info.SystemUUID
node.Status.NodeInfo.BootID = info.BootID
node.Spec.Capacity = CapacityFromMachineInfo(info)
}

View File

@ -3128,7 +3128,13 @@ func TestUpdateNewNodeStatus(t *testing.T) {
kubeClient.MinionsList = api.NodeList{Items: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "testnode"}},
}}
machineInfo := &cadvisorApi.MachineInfo{MachineID: "123", SystemUUID: "abc", NumCores: 2, MemoryCapacity: 1024}
machineInfo := &cadvisorApi.MachineInfo{
MachineID: "123",
SystemUUID: "abc",
BootID: "1b3",
NumCores: 2,
MemoryCapacity: 1024,
}
mockCadvisor.On("MachineInfo").Return(machineInfo, nil)
expectedNode := &api.Node{
ObjectMeta: api.ObjectMeta{Name: "testnode"},
@ -3151,6 +3157,7 @@ func TestUpdateNewNodeStatus(t *testing.T) {
NodeInfo: api.NodeSystemInfo{
MachineID: "123",
SystemUUID: "abc",
BootID: "1b3",
},
},
}
@ -3205,7 +3212,13 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
},
},
}}
machineInfo := &cadvisorApi.MachineInfo{MachineID: "123", SystemUUID: "abc", NumCores: 2, MemoryCapacity: 1024}
machineInfo := &cadvisorApi.MachineInfo{
MachineID: "123",
SystemUUID: "abc",
BootID: "1b3",
NumCores: 2,
MemoryCapacity: 1024,
}
mockCadvisor.On("MachineInfo").Return(machineInfo, nil)
expectedNode := &api.Node{
ObjectMeta: api.ObjectMeta{Name: "testnode"},
@ -3228,6 +3241,7 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
NodeInfo: api.NodeSystemInfo{
MachineID: "123",
SystemUUID: "abc",
BootID: "1b3",
},
},
}

View File

@ -341,6 +341,7 @@ func (s *Server) handleNodeInfoVersioned(w http.ResponseWriter, req *http.Reques
NodeSystemInfo: api.NodeSystemInfo{
MachineID: info.MachineID,
SystemUUID: info.SystemUUID,
BootID: info.BootID,
},
})