mirror of https://github.com/portainer/portainer
370 lines
9.6 KiB
Go
370 lines
9.6 KiB
Go
package snapshot
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
portainer "github.com/portainer/portainer/api"
|
|
"github.com/portainer/portainer/api/docker/consts"
|
|
edgeutils "github.com/portainer/portainer/pkg/edge"
|
|
networkingutils "github.com/portainer/portainer/pkg/networking"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/docker/docker/api/types/container"
|
|
_container "github.com/docker/docker/api/types/container"
|
|
"github.com/docker/docker/api/types/image"
|
|
"github.com/docker/docker/api/types/volume"
|
|
"github.com/docker/docker/client"
|
|
"github.com/docker/docker/pkg/stdcopy"
|
|
"github.com/rs/zerolog/log"
|
|
"github.com/segmentio/encoding/json"
|
|
)
|
|
|
|
func CreateDockerSnapshot(cli *client.Client) (*portainer.DockerSnapshot, error) {
|
|
if _, err := cli.Ping(context.Background()); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
dockerSnapshot := &portainer.DockerSnapshot{}
|
|
|
|
if err := dockerSnapshotInfo(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot engine information")
|
|
}
|
|
|
|
if dockerSnapshot.Swarm {
|
|
if err := dockerSnapshotSwarmServices(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot Swarm services")
|
|
}
|
|
|
|
if err := dockerSnapshotNodes(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot Swarm nodes")
|
|
}
|
|
}
|
|
|
|
if err := dockerSnapshotContainers(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot containers")
|
|
}
|
|
|
|
if err := dockerSnapshotImages(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot images")
|
|
}
|
|
|
|
if err := dockerSnapshotVolumes(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot volumes")
|
|
}
|
|
|
|
if err := dockerSnapshotNetworks(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot networks")
|
|
}
|
|
|
|
if err := dockerSnapshotVersion(dockerSnapshot, cli); err != nil {
|
|
log.Warn().Err(err).Msg("unable to snapshot engine version")
|
|
}
|
|
|
|
dockerSnapshot.Time = time.Now().Unix()
|
|
|
|
return dockerSnapshot, nil
|
|
}
|
|
|
|
func dockerSnapshotInfo(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
info, err := cli.Info(context.Background())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
snapshot.Swarm = info.Swarm.ControlAvailable
|
|
snapshot.DockerVersion = info.ServerVersion
|
|
snapshot.TotalCPU = info.NCPU
|
|
snapshot.TotalMemory = info.MemTotal
|
|
snapshot.SnapshotRaw.Info = info
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotNodes(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
nodes, err := cli.NodeList(context.Background(), types.NodeListOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var nanoCpus, totalMem int64
|
|
|
|
for _, node := range nodes {
|
|
nanoCpus += node.Description.Resources.NanoCPUs
|
|
totalMem += node.Description.Resources.MemoryBytes
|
|
}
|
|
|
|
snapshot.TotalCPU = int(nanoCpus / 1e9)
|
|
snapshot.TotalMemory = totalMem
|
|
snapshot.NodeCount = len(nodes)
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotSwarmServices(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
stacks := make(map[string]struct{})
|
|
|
|
services, err := cli.ServiceList(context.Background(), types.ServiceListOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, service := range services {
|
|
for k, v := range service.Spec.Labels {
|
|
if k == "com.docker.stack.namespace" {
|
|
stacks[v] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
snapshot.ServiceCount = len(services)
|
|
snapshot.StackCount += len(stacks)
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotContainers(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
containers, err := cli.ContainerList(context.Background(), container.ListOptions{All: true})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
stacks := make(map[string]struct{})
|
|
gpuUseSet := make(map[string]struct{})
|
|
gpuUseAll := false
|
|
|
|
for _, container := range containers {
|
|
for k, v := range container.Labels {
|
|
if k == consts.ComposeStackNameLabel {
|
|
stacks[v] = struct{}{}
|
|
}
|
|
}
|
|
|
|
if container.State != "running" {
|
|
continue
|
|
}
|
|
|
|
// Snapshot GPUs
|
|
response, err := cli.ContainerInspect(context.Background(), container.ID)
|
|
if err != nil && !snapshot.Swarm {
|
|
return err
|
|
} else if err != nil {
|
|
// Inspect a container will fail when the container runs on a different
|
|
// Swarm node, so it is better to log the error instead of return error
|
|
// when the Swarm mode is enabled
|
|
if !strings.Contains(err.Error(), "No such container") {
|
|
return err
|
|
}
|
|
|
|
// It is common to have containers running on different Swarm nodes,
|
|
// so we just log the error in the debug level
|
|
log.Debug().Str("container", container.ID).Err(err).Msg("unable to inspect container in other Swarm nodes")
|
|
|
|
continue
|
|
}
|
|
|
|
var gpuOptions *_container.DeviceRequest
|
|
|
|
for _, deviceRequest := range response.HostConfig.Resources.DeviceRequests {
|
|
if deviceRequest.Driver == "nvidia" || deviceRequest.Capabilities[0][0] == "gpu" {
|
|
gpuOptions = &deviceRequest
|
|
}
|
|
}
|
|
|
|
if gpuOptions == nil {
|
|
continue
|
|
}
|
|
|
|
if gpuOptions.Count == -1 {
|
|
gpuUseAll = true
|
|
}
|
|
|
|
for _, id := range gpuOptions.DeviceIDs {
|
|
gpuUseSet[id] = struct{}{}
|
|
}
|
|
}
|
|
|
|
gpuUseList := make([]string, 0, len(gpuUseSet))
|
|
for gpuUse := range gpuUseSet {
|
|
gpuUseList = append(gpuUseList, gpuUse)
|
|
}
|
|
|
|
snapshot.GpuUseAll = gpuUseAll
|
|
snapshot.GpuUseList = gpuUseList
|
|
|
|
stats := calculateContainerStats(containers)
|
|
|
|
snapshot.ContainerCount = stats.Total
|
|
snapshot.RunningContainerCount = stats.Running
|
|
snapshot.StoppedContainerCount = stats.Stopped
|
|
snapshot.HealthyContainerCount = stats.Healthy
|
|
snapshot.UnhealthyContainerCount = stats.Unhealthy
|
|
snapshot.StackCount += len(stacks)
|
|
|
|
for _, container := range containers {
|
|
snapshot.SnapshotRaw.Containers = append(snapshot.SnapshotRaw.Containers, portainer.DockerContainerSnapshot{Container: container})
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotImages(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
images, err := cli.ImageList(context.Background(), image.ListOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
snapshot.ImageCount = len(images)
|
|
snapshot.SnapshotRaw.Images = images
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotVolumes(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
volumes, err := cli.VolumeList(context.Background(), volume.ListOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
snapshot.VolumeCount = len(volumes.Volumes)
|
|
snapshot.SnapshotRaw.Volumes = volumes
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotNetworks(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
networks, err := cli.NetworkList(context.Background(), types.NetworkListOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
snapshot.SnapshotRaw.Networks = networks
|
|
|
|
return nil
|
|
}
|
|
|
|
func dockerSnapshotVersion(snapshot *portainer.DockerSnapshot, cli *client.Client) error {
|
|
version, err := cli.ServerVersion(context.Background())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
snapshot.SnapshotRaw.Version = version
|
|
snapshot.IsPodman = isPodman(version)
|
|
|
|
return nil
|
|
}
|
|
|
|
// DockerSnapshotDiagnostics returns the diagnostics data for the agent
|
|
func DockerSnapshotDiagnostics(cli *client.Client, edgeKey string) (*portainer.DiagnosticsData, error) {
|
|
containerID := os.Getenv("HOSTNAME")
|
|
snapshot := &portainer.DockerSnapshot{
|
|
DiagnosticsData: &portainer.DiagnosticsData{
|
|
DNS: make(map[string]string),
|
|
Telnet: make(map[string]string),
|
|
},
|
|
}
|
|
|
|
if err := dockerSnapshotContainerErrorLogs(snapshot, cli, containerID); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if edgeKey == "" {
|
|
return snapshot.DiagnosticsData, nil
|
|
}
|
|
|
|
url, err := edgeutils.GetPortainerURLFromEdgeKey(edgeKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get portainer URL from edge key: %w", err)
|
|
}
|
|
|
|
snapshot.DiagnosticsData.DNS["edge-to-portainer"] = networkingutils.ProbeDNSConnection(url)
|
|
snapshot.DiagnosticsData.Telnet["edge-to-portainer"] = networkingutils.ProbeTelnetConnection(url)
|
|
|
|
return snapshot.DiagnosticsData, nil
|
|
}
|
|
|
|
// DockerSnapshotContainerErrorLogs returns the 5 most recent error logs of the agent container
|
|
// this will primarily be used for agent snapshot
|
|
func dockerSnapshotContainerErrorLogs(snapshot *portainer.DockerSnapshot, cli *client.Client, containerId string) error {
|
|
if containerId == "" {
|
|
return nil
|
|
}
|
|
|
|
rd, err := cli.ContainerLogs(context.Background(), containerId, container.LogsOptions{
|
|
ShowStdout: false,
|
|
ShowStderr: true,
|
|
Tail: "5",
|
|
Timestamps: true,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get container logs: %w", err)
|
|
}
|
|
defer rd.Close()
|
|
|
|
var stdOut, stdErr bytes.Buffer
|
|
if _, err := stdcopy.StdCopy(&stdErr, &stdOut, rd); err != nil {
|
|
return fmt.Errorf("failed to copy error logs: %w", err)
|
|
}
|
|
|
|
var logs []map[string]string
|
|
jsonLogs, err := json.Marshal(logs)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal logs to JSON: %w", err)
|
|
}
|
|
|
|
snapshot.DiagnosticsData.Log = string(jsonLogs)
|
|
|
|
return nil
|
|
}
|
|
|
|
// isPodman checks if the version is for Podman by checking if any of the components contain "podman".
|
|
// If it's podman, a component name should be "Podman Engine"
|
|
func isPodman(version types.Version) bool {
|
|
for _, component := range version.Components {
|
|
if strings.Contains(strings.ToLower(component.Name), "podman") {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
type ContainerStats struct {
|
|
Running int
|
|
Stopped int
|
|
Healthy int
|
|
Unhealthy int
|
|
Total int
|
|
}
|
|
|
|
func calculateContainerStats(containers []types.Container) ContainerStats {
|
|
var running, stopped, healthy, unhealthy int
|
|
for _, container := range containers {
|
|
switch container.State {
|
|
case "running":
|
|
running++
|
|
case "healthy":
|
|
running++
|
|
healthy++
|
|
case "unhealthy":
|
|
running++
|
|
unhealthy++
|
|
case "exited", "stopped":
|
|
stopped++
|
|
}
|
|
}
|
|
|
|
return ContainerStats{
|
|
Running: running,
|
|
Stopped: stopped,
|
|
Healthy: healthy,
|
|
Unhealthy: unhealthy,
|
|
Total: len(containers),
|
|
}
|
|
}
|