Merge pull request #53328 from intelsdi-x/lscpu_fix

Automatic merge from submit-queue (batch tested with PRs 53297, 53328). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Cpu Manager - make CoreID's platform unique

**What this PR does / why we need it**:
Cpu Manager uses topology from cAdvisor(`/proc/cpuinfo`) where coreID's are socket unique - not platform unique - this causes problems on multi-socket platforms.

All code assumes unique coreID's (on platform) -  `Discovery` function has been changed to assign CoreID as the lowest cpuID from all cpus belonging to the same core. This can be expressed as:
`CoreID=min(cpuID's on the same core)`

Since cpuID's are platform unique - above gives us guarantee that CoreID's will also be platform unique.



**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53323
pull/6/head
Kubernetes Submit Queue 2017-10-10 11:20:37 -07:00 committed by GitHub
commit ec116fdc73
3 changed files with 109 additions and 2 deletions

View File

@ -9,6 +9,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/kubelet/cm/cpuset:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
],
)

View File

@ -18,7 +18,9 @@ package topology
import (
"fmt"
"sort"
"github.com/golang/glog"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
)
@ -145,15 +147,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
}
CPUDetails := CPUDetails{}
numCPUs := machineInfo.NumCores
numPhysicalCores := 0
var coreID int
var err error
for _, socket := range machineInfo.Topology {
numPhysicalCores += len(socket.Cores)
for _, core := range socket.Cores {
if coreID, err = getUniqueCoreID(core.Threads); err != nil {
glog.Errorf("could not get unique coreID for socket: %d core %d threads: %v",
socket.Id, core.Id, core.Threads)
return nil, err
}
for _, cpu := range core.Threads {
CPUDetails[cpu] = CPUInfo{
CoreID: core.Id,
CoreID: coreID,
SocketID: socket.Id,
}
}
@ -167,3 +176,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
CPUDetails: CPUDetails,
}, nil
}
// getUniqueCoreID computes coreId as the lowest cpuID
// for a given Threads []int slice. This will assure that coreID's are
// platform unique (opposite to what cAdvisor reports - socket unique)
func getUniqueCoreID(threads []int) (coreID int, err error) {
err = nil
if len(threads) == 0 {
return 0, fmt.Errorf("no cpus provided")
}
if len(threads) != cpuset.NewCPUSet(threads...).Size() {
return 0, fmt.Errorf("cpus provided are not unique")
}
tmpThreads := make([]int, len(threads))
copy(tmpThreads, threads)
sort.Ints(tmpThreads)
return tmpThreads[0], err
}

View File

@ -103,6 +103,84 @@ func Test_Discover(t *testing.T) {
},
wantErr: false,
},
{
name: "DualSocketHT - non unique Core'ID's",
args: &cadvisorapi.MachineInfo{
NumCores: 12,
Topology: []cadvisorapi.Node{
{Id: 0,
Cores: []cadvisorapi.Core{
{Id: 0, Threads: []int{0, 6}},
{Id: 1, Threads: []int{1, 7}},
{Id: 2, Threads: []int{2, 8}},
},
},
{Id: 1,
Cores: []cadvisorapi.Core{
{Id: 0, Threads: []int{3, 9}},
{Id: 1, Threads: []int{4, 10}},
{Id: 2, Threads: []int{5, 11}},
},
},
},
},
want: &CPUTopology{
NumCPUs: 12,
NumSockets: 2,
NumCores: 6,
CPUDetails: map[int]CPUInfo{
0: {CoreID: 0, SocketID: 0},
1: {CoreID: 1, SocketID: 0},
2: {CoreID: 2, SocketID: 0},
3: {CoreID: 3, SocketID: 1},
4: {CoreID: 4, SocketID: 1},
5: {CoreID: 5, SocketID: 1},
6: {CoreID: 0, SocketID: 0},
7: {CoreID: 1, SocketID: 0},
8: {CoreID: 2, SocketID: 0},
9: {CoreID: 3, SocketID: 1},
10: {CoreID: 4, SocketID: 1},
11: {CoreID: 5, SocketID: 1},
},
},
wantErr: false,
},
{
name: "OneSocketHT fail",
args: &cadvisorapi.MachineInfo{
NumCores: 8,
Topology: []cadvisorapi.Node{
{Id: 0,
Cores: []cadvisorapi.Core{
{Id: 0, Threads: []int{0, 4}},
{Id: 1, Threads: []int{1, 5}},
{Id: 2, Threads: []int{2, 2}}, // Wrong case - should fail here
{Id: 3, Threads: []int{3, 7}},
},
},
},
},
want: &CPUTopology{},
wantErr: true,
},
{
name: "OneSocketHT fail",
args: &cadvisorapi.MachineInfo{
NumCores: 8,
Topology: []cadvisorapi.Node{
{Id: 0,
Cores: []cadvisorapi.Core{
{Id: 0, Threads: []int{0, 4}},
{Id: 1, Threads: []int{1, 5}},
{Id: 2, Threads: []int{2, 6}},
{Id: 3, Threads: []int{}}, // Wrong case - should fail here
},
},
},
},
want: &CPUTopology{},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {