fix(container): inaccurate healthy container count [BE-2290] (#1114)

pull/12512/merge
Oscar Zhou 2025-09-01 17:01:13 +12:00 committed by GitHub
parent 541f281b29
commit 7cf6bb78d6
8 changed files with 347 additions and 112 deletions

View File

@ -1,37 +0,0 @@
package docker
import "github.com/docker/docker/api/types"
type ContainerStats struct {
Running int `json:"running"`
Stopped int `json:"stopped"`
Healthy int `json:"healthy"`
Unhealthy int `json:"unhealthy"`
Total int `json:"total"`
}
func CalculateContainerStats(containers []types.Container) ContainerStats {
var running, stopped, healthy, unhealthy int
for _, container := range containers {
switch container.State {
case "running":
running++
case "healthy":
running++
healthy++
case "unhealthy":
running++
unhealthy++
case "exited", "stopped":
stopped++
}
}
return ContainerStats{
Running: running,
Stopped: stopped,
Healthy: healthy,
Unhealthy: unhealthy,
Total: len(containers),
}
}

View File

@ -1,27 +0,0 @@
package docker
import (
"testing"
"github.com/docker/docker/api/types"
"github.com/stretchr/testify/assert"
)
func TestCalculateContainerStats(t *testing.T) {
containers := []types.Container{
{State: "running"},
{State: "running"},
{State: "exited"},
{State: "stopped"},
{State: "healthy"},
{State: "unhealthy"},
}
stats := CalculateContainerStats(containers)
assert.Equal(t, 4, stats.Running)
assert.Equal(t, 2, stats.Stopped)
assert.Equal(t, 1, stats.Healthy)
assert.Equal(t, 1, stats.Unhealthy)
assert.Equal(t, 6, stats.Total)
}

View File

@ -0,0 +1,92 @@
package stats
import (
"context"
"errors"
"sync"
"github.com/docker/docker/api/types/container"
)
type ContainerStats struct {
Running int `json:"running"`
Stopped int `json:"stopped"`
Healthy int `json:"healthy"`
Unhealthy int `json:"unhealthy"`
Total int `json:"total"`
}
type DockerClient interface {
ContainerInspect(ctx context.Context, containerID string) (container.InspectResponse, error)
}
func CalculateContainerStats(ctx context.Context, cli DockerClient, containers []container.Summary) (ContainerStats, error) {
var running, stopped, healthy, unhealthy int
var mu sync.Mutex
var wg sync.WaitGroup
semaphore := make(chan struct{}, 5)
var aggErr error
var aggMu sync.Mutex
for i := range containers {
id := containers[i].ID
semaphore <- struct{}{}
wg.Go(func() {
defer func() { <-semaphore }()
containerInspection, err := cli.ContainerInspect(ctx, id)
stat := ContainerStats{}
if err != nil {
aggMu.Lock()
aggErr = errors.Join(aggErr, err)
aggMu.Unlock()
return
}
stat = getContainerStatus(containerInspection.State)
mu.Lock()
running += stat.Running
stopped += stat.Stopped
healthy += stat.Healthy
unhealthy += stat.Unhealthy
mu.Unlock()
})
}
wg.Wait()
return ContainerStats{
Running: running,
Stopped: stopped,
Healthy: healthy,
Unhealthy: unhealthy,
Total: len(containers),
}, aggErr
}
func getContainerStatus(state *container.State) ContainerStats {
stat := ContainerStats{}
if state == nil {
return stat
}
switch state.Status {
case container.StateRunning:
stat.Running++
case container.StateExited, container.StateDead:
stat.Stopped++
}
if state.Health != nil {
switch state.Health.Status {
case container.Healthy:
stat.Healthy++
case container.Unhealthy:
stat.Unhealthy++
}
}
return stat
}

View File

@ -0,0 +1,234 @@
package stats
import (
"context"
"errors"
"testing"
"time"
"github.com/docker/docker/api/types/container"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)
// MockDockerClient implements the DockerClient interface for testing
type MockDockerClient struct {
mock.Mock
}
func (m *MockDockerClient) ContainerInspect(ctx context.Context, containerID string) (container.InspectResponse, error) {
args := m.Called(ctx, containerID)
return args.Get(0).(container.InspectResponse), args.Error(1)
}
func TestCalculateContainerStats(t *testing.T) {
mockClient := new(MockDockerClient)
// Test containers - using enough containers to test concurrent processing
containers := []container.Summary{
{ID: "container1"},
{ID: "container2"},
{ID: "container3"},
{ID: "container4"},
{ID: "container5"},
{ID: "container6"},
{ID: "container7"},
{ID: "container8"},
{ID: "container9"},
{ID: "container10"},
}
// Setup mock expectations with different container states to test various scenarios
containerStates := []struct {
id string
status string
health *container.Health
expected ContainerStats
}{
{"container1", container.StateRunning, &container.Health{Status: container.Healthy}, ContainerStats{Running: 1, Stopped: 0, Healthy: 1, Unhealthy: 0}},
{"container2", container.StateRunning, &container.Health{Status: container.Unhealthy}, ContainerStats{Running: 1, Stopped: 0, Healthy: 0, Unhealthy: 1}},
{"container3", container.StateRunning, nil, ContainerStats{Running: 1, Stopped: 0, Healthy: 0, Unhealthy: 0}},
{"container4", container.StateExited, nil, ContainerStats{Running: 0, Stopped: 1, Healthy: 0, Unhealthy: 0}},
{"container5", container.StateDead, nil, ContainerStats{Running: 0, Stopped: 1, Healthy: 0, Unhealthy: 0}},
{"container6", container.StateRunning, &container.Health{Status: container.Healthy}, ContainerStats{Running: 1, Stopped: 0, Healthy: 1, Unhealthy: 0}},
{"container7", container.StateRunning, &container.Health{Status: container.Unhealthy}, ContainerStats{Running: 1, Stopped: 0, Healthy: 0, Unhealthy: 1}},
{"container8", container.StateExited, nil, ContainerStats{Running: 0, Stopped: 1, Healthy: 0, Unhealthy: 0}},
{"container9", container.StateRunning, nil, ContainerStats{Running: 1, Stopped: 0, Healthy: 0, Unhealthy: 0}},
{"container10", container.StateDead, nil, ContainerStats{Running: 0, Stopped: 1, Healthy: 0, Unhealthy: 0}},
}
expected := ContainerStats{}
// Setup mock expectations for all containers with artificial delays to simulate real Docker calls
for _, state := range containerStates {
mockClient.On("ContainerInspect", mock.Anything, state.id).Return(container.InspectResponse{
ContainerJSONBase: &container.ContainerJSONBase{
State: &container.State{
Status: state.status,
Health: state.health,
},
},
}, nil).After(50 * time.Millisecond) // Simulate 50ms Docker API call
expected.Running += state.expected.Running
expected.Stopped += state.expected.Stopped
expected.Healthy += state.expected.Healthy
expected.Unhealthy += state.expected.Unhealthy
expected.Total++
}
// Call the function and measure time
startTime := time.Now()
stats, err := CalculateContainerStats(context.Background(), mockClient, containers)
require.NoError(t, err, "failed to calculate container stats")
duration := time.Since(startTime)
// Assert results
assert.Equal(t, expected, stats)
assert.Equal(t, expected.Running, stats.Running)
assert.Equal(t, expected.Stopped, stats.Stopped)
assert.Equal(t, expected.Healthy, stats.Healthy)
assert.Equal(t, expected.Unhealthy, stats.Unhealthy)
assert.Equal(t, 10, stats.Total)
// Verify concurrent processing by checking that all mock calls were made
mockClient.AssertExpectations(t)
// Test concurrency: With 5 workers and 10 containers taking 50ms each:
// Sequential would take: 10 * 50ms = 500ms
sequentialTime := 10 * 50 * time.Millisecond
// Verify that concurrent processing is actually faster than sequential
// Allow some overhead for goroutine scheduling
assert.Less(t, duration, sequentialTime, "Concurrent processing should be faster than sequential")
// Concurrent should take: ~100-150ms (depending on scheduling)
assert.Less(t, duration, 150*time.Millisecond, "Concurrent processing should be significantly faster")
assert.Greater(t, duration, 100*time.Millisecond, "Concurrent processing should be longer than 100ms")
}
func TestCalculateContainerStatsAllErrors(t *testing.T) {
mockClient := new(MockDockerClient)
// Test containers
containers := []container.Summary{
{ID: "container1"},
{ID: "container2"},
}
// Setup mock expectations with all calls returning errors
mockClient.On("ContainerInspect", mock.Anything, "container1").Return(container.InspectResponse{}, errors.New("network error"))
mockClient.On("ContainerInspect", mock.Anything, "container2").Return(container.InspectResponse{}, errors.New("permission denied"))
// Call the function
stats, err := CalculateContainerStats(context.Background(), mockClient, containers)
// Assert that an error was returned
require.Error(t, err, "should return error when all containers fail to inspect")
assert.Contains(t, err.Error(), "network error", "error should contain one of the original error messages")
assert.Contains(t, err.Error(), "permission denied", "error should contain the other original error message")
// Assert that stats are zero since no containers were successfully processed
expectedStats := ContainerStats{
Running: 0,
Stopped: 0,
Healthy: 0,
Unhealthy: 0,
Total: 2, // total containers processed
}
assert.Equal(t, expectedStats, stats)
// Verify all mock calls were made
mockClient.AssertExpectations(t)
}
func TestGetContainerStatus(t *testing.T) {
testCases := []struct {
name string
state *container.State
expected ContainerStats
}{
{
name: "running healthy container",
state: &container.State{
Status: container.StateRunning,
Health: &container.Health{
Status: container.Healthy,
},
},
expected: ContainerStats{
Running: 1,
Stopped: 0,
Healthy: 1,
Unhealthy: 0,
},
},
{
name: "running unhealthy container",
state: &container.State{
Status: container.StateRunning,
Health: &container.Health{
Status: container.Unhealthy,
},
},
expected: ContainerStats{
Running: 1,
Stopped: 0,
Healthy: 0,
Unhealthy: 1,
},
},
{
name: "running container without health check",
state: &container.State{
Status: container.StateRunning,
},
expected: ContainerStats{
Running: 1,
Stopped: 0,
Healthy: 0,
Unhealthy: 0,
},
},
{
name: "exited container",
state: &container.State{
Status: container.StateExited,
},
expected: ContainerStats{
Running: 0,
Stopped: 1,
Healthy: 0,
Unhealthy: 0,
},
},
{
name: "dead container",
state: &container.State{
Status: container.StateDead,
},
expected: ContainerStats{
Running: 0,
Stopped: 1,
Healthy: 0,
Unhealthy: 0,
},
},
{
name: "nil state",
state: nil,
expected: ContainerStats{
Running: 0,
Stopped: 0,
Healthy: 0,
Unhealthy: 0,
},
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
stat := getContainerStatus(testCase.state)
assert.Equal(t, testCase.expected, stat)
})
}
}

View File

@ -11,7 +11,7 @@ import (
"github.com/docker/docker/api/types/volume"
portainer "github.com/portainer/portainer/api"
"github.com/portainer/portainer/api/dataservices"
"github.com/portainer/portainer/api/docker"
"github.com/portainer/portainer/api/docker/stats"
"github.com/portainer/portainer/api/http/errors"
"github.com/portainer/portainer/api/http/handler/docker/utils"
"github.com/portainer/portainer/api/http/middlewares"
@ -26,12 +26,12 @@ type imagesCounters struct {
}
type dashboardResponse struct {
Containers docker.ContainerStats `json:"containers"`
Services int `json:"services"`
Images imagesCounters `json:"images"`
Volumes int `json:"volumes"`
Networks int `json:"networks"`
Stacks int `json:"stacks"`
Containers stats.ContainerStats `json:"containers"`
Services int `json:"services"`
Images imagesCounters `json:"images"`
Volumes int `json:"volumes"`
Networks int `json:"networks"`
Stacks int `json:"stacks"`
}
// @id dockerDashboard
@ -144,13 +144,18 @@ func (h *Handler) dashboard(w http.ResponseWriter, r *http.Request) *httperror.H
stackCount = len(stacks)
}
containersStats, err := stats.CalculateContainerStats(r.Context(), cli, containers)
if err != nil {
return httperror.InternalServerError("Unable to retrieve Docker containers stats", err)
}
resp = dashboardResponse{
Images: imagesCounters{
Total: len(images),
Size: totalSize,
},
Services: len(services),
Containers: docker.CalculateContainerStats(containers),
Containers: containersStats,
Networks: len(networks),
Volumes: len(volumes),
Stacks: stackCount,

1
go.mod
View File

@ -254,6 +254,7 @@ require (
github.com/spf13/cast v1.7.0 // indirect
github.com/spf13/cobra v1.9.1 // indirect
github.com/spf13/pflag v1.0.7 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/theupdateframework/notary v0.7.0 // indirect
github.com/tilt-dev/fsnotify v1.4.8-0.20220602155310-fff9c274a375 // indirect

4
go.sum
View File

@ -785,8 +785,6 @@ github.com/zmap/zcrypto v0.0.0-20241123155728-2916694fa469/go.mod h1:sUuKi10EbW7
github.com/zmap/zlint/v3 v3.0.0/go.mod h1:paGwFySdHIBEMJ61YjoqT4h7Ge+fdYG4sUQhnTb1lJ8=
github.com/zmap/zlint/v3 v3.6.4 h1:r2kHfRF7mIsxW0IH4Og2iZnrlpCLTZBFjnXy1x/ZnZI=
github.com/zmap/zlint/v3 v3.6.4/go.mod h1:KQLVUquVaO5YJDl5a4k/7RPIbIW2v66+sRoBPNZusI8=
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo=
go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
@ -955,8 +953,6 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=

View File

@ -10,6 +10,7 @@ import (
portainer "github.com/portainer/portainer/api"
"github.com/portainer/portainer/api/docker/consts"
"github.com/portainer/portainer/api/docker/stats"
edgeutils "github.com/portainer/portainer/pkg/edge"
networkingutils "github.com/portainer/portainer/pkg/networking"
@ -131,7 +132,8 @@ func dockerSnapshotSwarmServices(snapshot *portainer.DockerSnapshot, cli *client
}
func dockerSnapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
containers, err := cli.ContainerList(context.Background(), dockercontainer.ListOptions{All: true})
ctx := context.Background()
containers, err := cli.ContainerList(ctx, dockercontainer.ListOptions{All: true})
if err != nil {
return err
}
@ -207,7 +209,10 @@ func dockerSnapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Cl
snapshot.GpuUseAll = gpuUseAll
snapshot.GpuUseList = gpuUseList
stats := calculateContainerStats(containers)
stats, err := stats.CalculateContainerStats(ctx, cli, containers)
if err != nil {
return fmt.Errorf("failed to calculate container stats: %w", err)
}
snapshot.ContainerCount = stats.Total
snapshot.RunningContainerCount = stats.Running
@ -344,37 +349,3 @@ func isPodman(version types.Version) bool {
return false
}
type ContainerStats struct {
Running int
Stopped int
Healthy int
Unhealthy int
Total int
}
func calculateContainerStats(containers []types.Container) ContainerStats {
var running, stopped, healthy, unhealthy int
for _, container := range containers {
switch container.State {
case "running":
running++
case "healthy":
running++
healthy++
case "unhealthy":
running++
unhealthy++
case "exited", "stopped":
stopped++
}
}
return ContainerStats{
Running: running,
Stopped: stopped,
Healthy: healthy,
Unhealthy: unhealthy,
Total: len(containers),
}
}