From b86dc9c0545b6e49290db8346cddf5394eccd28c Mon Sep 17 00:00:00 2001 From: Szymon Scharmach Date: Tue, 3 Oct 2017 17:09:29 +0200 Subject: [PATCH] Make CoreID's platform unique --- pkg/kubelet/cm/cpumanager/topology/BUILD | 1 + .../cm/cpumanager/topology/topology.go | 32 +++++++- .../cm/cpumanager/topology/topology_test.go | 78 +++++++++++++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/topology/BUILD b/pkg/kubelet/cm/cpumanager/topology/BUILD index a2a34307d7..176e0ea5c8 100644 --- a/pkg/kubelet/cm/cpumanager/topology/BUILD +++ b/pkg/kubelet/cm/cpumanager/topology/BUILD @@ -9,6 +9,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/kubelet/cm/cpuset:go_default_library", + "//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/google/cadvisor/info/v1:go_default_library", ], ) diff --git a/pkg/kubelet/cm/cpumanager/topology/topology.go b/pkg/kubelet/cm/cpumanager/topology/topology.go index a03cddbad8..2491abe6c0 100644 --- a/pkg/kubelet/cm/cpumanager/topology/topology.go +++ b/pkg/kubelet/cm/cpumanager/topology/topology.go @@ -18,7 +18,9 @@ package topology import ( "fmt" + "sort" + "github.com/golang/glog" cadvisorapi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" ) @@ -145,15 +147,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) { } CPUDetails := CPUDetails{} - numCPUs := machineInfo.NumCores numPhysicalCores := 0 + var coreID int + var err error + for _, socket := range machineInfo.Topology { numPhysicalCores += len(socket.Cores) for _, core := range socket.Cores { + if coreID, err = getUniqueCoreID(core.Threads); err != nil { + glog.Errorf("could not get unique coreID for socket: %d core %d threads: %v", + socket.Id, core.Id, core.Threads) + return nil, err + } for _, cpu := range core.Threads { CPUDetails[cpu] = CPUInfo{ - CoreID: core.Id, + CoreID: coreID, SocketID: socket.Id, } } @@ -167,3 +176,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) { CPUDetails: CPUDetails, }, nil } + +// getUniqueCoreID computes coreId as the lowest cpuID +// for a given Threads []int slice. This will assure that coreID's are +// platform unique (opposite to what cAdvisor reports - socket unique) +func getUniqueCoreID(threads []int) (coreID int, err error) { + err = nil + if len(threads) == 0 { + return 0, fmt.Errorf("no cpus provided") + } + + if len(threads) != cpuset.NewCPUSet(threads...).Size() { + return 0, fmt.Errorf("cpus provided are not unique") + } + + tmpThreads := make([]int, len(threads)) + copy(tmpThreads, threads) + sort.Ints(tmpThreads) + return tmpThreads[0], err +} diff --git a/pkg/kubelet/cm/cpumanager/topology/topology_test.go b/pkg/kubelet/cm/cpumanager/topology/topology_test.go index ef7e83fd10..c8441ed245 100644 --- a/pkg/kubelet/cm/cpumanager/topology/topology_test.go +++ b/pkg/kubelet/cm/cpumanager/topology/topology_test.go @@ -103,6 +103,84 @@ func Test_Discover(t *testing.T) { }, wantErr: false, }, + { + name: "DualSocketHT - non unique Core'ID's", + args: &cadvisorapi.MachineInfo{ + NumCores: 12, + Topology: []cadvisorapi.Node{ + {Id: 0, + Cores: []cadvisorapi.Core{ + {Id: 0, Threads: []int{0, 6}}, + {Id: 1, Threads: []int{1, 7}}, + {Id: 2, Threads: []int{2, 8}}, + }, + }, + {Id: 1, + Cores: []cadvisorapi.Core{ + {Id: 0, Threads: []int{3, 9}}, + {Id: 1, Threads: []int{4, 10}}, + {Id: 2, Threads: []int{5, 11}}, + }, + }, + }, + }, + want: &CPUTopology{ + NumCPUs: 12, + NumSockets: 2, + NumCores: 6, + CPUDetails: map[int]CPUInfo{ + 0: {CoreID: 0, SocketID: 0}, + 1: {CoreID: 1, SocketID: 0}, + 2: {CoreID: 2, SocketID: 0}, + 3: {CoreID: 3, SocketID: 1}, + 4: {CoreID: 4, SocketID: 1}, + 5: {CoreID: 5, SocketID: 1}, + 6: {CoreID: 0, SocketID: 0}, + 7: {CoreID: 1, SocketID: 0}, + 8: {CoreID: 2, SocketID: 0}, + 9: {CoreID: 3, SocketID: 1}, + 10: {CoreID: 4, SocketID: 1}, + 11: {CoreID: 5, SocketID: 1}, + }, + }, + wantErr: false, + }, + { + name: "OneSocketHT fail", + args: &cadvisorapi.MachineInfo{ + NumCores: 8, + Topology: []cadvisorapi.Node{ + {Id: 0, + Cores: []cadvisorapi.Core{ + {Id: 0, Threads: []int{0, 4}}, + {Id: 1, Threads: []int{1, 5}}, + {Id: 2, Threads: []int{2, 2}}, // Wrong case - should fail here + {Id: 3, Threads: []int{3, 7}}, + }, + }, + }, + }, + want: &CPUTopology{}, + wantErr: true, + }, + { + name: "OneSocketHT fail", + args: &cadvisorapi.MachineInfo{ + NumCores: 8, + Topology: []cadvisorapi.Node{ + {Id: 0, + Cores: []cadvisorapi.Core{ + {Id: 0, Threads: []int{0, 4}}, + {Id: 1, Threads: []int{1, 5}}, + {Id: 2, Threads: []int{2, 6}}, + {Id: 3, Threads: []int{}}, // Wrong case - should fail here + }, + }, + }, + }, + want: &CPUTopology{}, + wantErr: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {