mirror of https://github.com/k3s-io/k3s
Merge pull request #53328 from intelsdi-x/lscpu_fix
Automatic merge from submit-queue (batch tested with PRs 53297, 53328). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Cpu Manager - make CoreID's platform unique **What this PR does / why we need it**: Cpu Manager uses topology from cAdvisor(`/proc/cpuinfo`) where coreID's are socket unique - not platform unique - this causes problems on multi-socket platforms. All code assumes unique coreID's (on platform) - `Discovery` function has been changed to assign CoreID as the lowest cpuID from all cpus belonging to the same core. This can be expressed as: `CoreID=min(cpuID's on the same core)` Since cpuID's are platform unique - above gives us guarantee that CoreID's will also be platform unique. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53323pull/6/head
commit
ec116fdc73
|
@ -9,6 +9,7 @@ go_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/kubelet/cm/cpuset:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -18,7 +18,9 @@ package topology
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/golang/glog"
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
)
|
||||
|
@ -145,15 +147,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
|||
}
|
||||
|
||||
CPUDetails := CPUDetails{}
|
||||
|
||||
numCPUs := machineInfo.NumCores
|
||||
numPhysicalCores := 0
|
||||
var coreID int
|
||||
var err error
|
||||
|
||||
for _, socket := range machineInfo.Topology {
|
||||
numPhysicalCores += len(socket.Cores)
|
||||
for _, core := range socket.Cores {
|
||||
if coreID, err = getUniqueCoreID(core.Threads); err != nil {
|
||||
glog.Errorf("could not get unique coreID for socket: %d core %d threads: %v",
|
||||
socket.Id, core.Id, core.Threads)
|
||||
return nil, err
|
||||
}
|
||||
for _, cpu := range core.Threads {
|
||||
CPUDetails[cpu] = CPUInfo{
|
||||
CoreID: core.Id,
|
||||
CoreID: coreID,
|
||||
SocketID: socket.Id,
|
||||
}
|
||||
}
|
||||
|
@ -167,3 +176,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
|||
CPUDetails: CPUDetails,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getUniqueCoreID computes coreId as the lowest cpuID
|
||||
// for a given Threads []int slice. This will assure that coreID's are
|
||||
// platform unique (opposite to what cAdvisor reports - socket unique)
|
||||
func getUniqueCoreID(threads []int) (coreID int, err error) {
|
||||
err = nil
|
||||
if len(threads) == 0 {
|
||||
return 0, fmt.Errorf("no cpus provided")
|
||||
}
|
||||
|
||||
if len(threads) != cpuset.NewCPUSet(threads...).Size() {
|
||||
return 0, fmt.Errorf("cpus provided are not unique")
|
||||
}
|
||||
|
||||
tmpThreads := make([]int, len(threads))
|
||||
copy(tmpThreads, threads)
|
||||
sort.Ints(tmpThreads)
|
||||
return tmpThreads[0], err
|
||||
}
|
||||
|
|
|
@ -103,6 +103,84 @@ func Test_Discover(t *testing.T) {
|
|||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "DualSocketHT - non unique Core'ID's",
|
||||
args: &cadvisorapi.MachineInfo{
|
||||
NumCores: 12,
|
||||
Topology: []cadvisorapi.Node{
|
||||
{Id: 0,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{Id: 0, Threads: []int{0, 6}},
|
||||
{Id: 1, Threads: []int{1, 7}},
|
||||
{Id: 2, Threads: []int{2, 8}},
|
||||
},
|
||||
},
|
||||
{Id: 1,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{Id: 0, Threads: []int{3, 9}},
|
||||
{Id: 1, Threads: []int{4, 10}},
|
||||
{Id: 2, Threads: []int{5, 11}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: &CPUTopology{
|
||||
NumCPUs: 12,
|
||||
NumSockets: 2,
|
||||
NumCores: 6,
|
||||
CPUDetails: map[int]CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0},
|
||||
1: {CoreID: 1, SocketID: 0},
|
||||
2: {CoreID: 2, SocketID: 0},
|
||||
3: {CoreID: 3, SocketID: 1},
|
||||
4: {CoreID: 4, SocketID: 1},
|
||||
5: {CoreID: 5, SocketID: 1},
|
||||
6: {CoreID: 0, SocketID: 0},
|
||||
7: {CoreID: 1, SocketID: 0},
|
||||
8: {CoreID: 2, SocketID: 0},
|
||||
9: {CoreID: 3, SocketID: 1},
|
||||
10: {CoreID: 4, SocketID: 1},
|
||||
11: {CoreID: 5, SocketID: 1},
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "OneSocketHT fail",
|
||||
args: &cadvisorapi.MachineInfo{
|
||||
NumCores: 8,
|
||||
Topology: []cadvisorapi.Node{
|
||||
{Id: 0,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{Id: 0, Threads: []int{0, 4}},
|
||||
{Id: 1, Threads: []int{1, 5}},
|
||||
{Id: 2, Threads: []int{2, 2}}, // Wrong case - should fail here
|
||||
{Id: 3, Threads: []int{3, 7}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: &CPUTopology{},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "OneSocketHT fail",
|
||||
args: &cadvisorapi.MachineInfo{
|
||||
NumCores: 8,
|
||||
Topology: []cadvisorapi.Node{
|
||||
{Id: 0,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{Id: 0, Threads: []int{0, 4}},
|
||||
{Id: 1, Threads: []int{1, 5}},
|
||||
{Id: 2, Threads: []int{2, 6}},
|
||||
{Id: 3, Threads: []int{}}, // Wrong case - should fail here
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: &CPUTopology{},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue