From fd916bc8a22174c4f5471ffe5cc8b7fcbdebad21 Mon Sep 17 00:00:00 2001
From: Ali <83188384+testA113@users.noreply.github.com>
Date: Fri, 3 Mar 2023 14:47:10 +1300
Subject: [PATCH] feat(gpu): rework docker GPU for UI performance [EE-4918]
(#8518)
---
api/cmd/portainer/main.go | 24 ++-
api/datastore/migrate_post_init.go | 116 ++++++++++++++
api/datastore/migrator/migrate_dbversion90.go | 34 +++-
.../test_data/output_24_to_latest.json | 2 +
.../endpoints/endpoint_settings_update.go | 12 ++
api/kubernetes/cli/client.go | 23 +--
api/portainer.go | 3 +
app/assets/css/vendor-override.css | 5 -
.../imageRegistry/por-image-registry.html | 2 +-
app/docker/react/components/index.ts | 9 +-
.../create/createContainerController.js | 19 ++-
.../containers/create/createcontainer.html | 31 ++--
app/docker/views/dashboard/dashboard.html | 7 +-
...ocker-features-configuration.controller.js | 71 +++++++--
.../docker-features-configuration.html | 43 ++++-
app/portainer/react/components/envronments.ts | 2 +-
app/portainer/react/components/index.ts | 7 +
app/portainer/services/authentication.js | 1 -
.../views/endpoints/edit/endpoint.html | 22 ++-
.../endpoints/edit/endpointController.js | 25 +--
app/react-tools/test-mocks.ts | 1 +
app/react/components/.keep | 0
.../InsightsBox/InsightsBox.stories.tsx | 18 +++
.../InsightsBox/InsightsBox.test.tsx | 16 ++
.../components/InsightsBox/InsightsBox.tsx | 63 ++++++++
app/react/components/InsightsBox/index.ts | 1 +
.../components/InsightsBox/insights-store.ts | 35 +++++
.../InputList/InputList.module.css | 9 --
.../form-components/InputList/InputList.tsx | 148 ++++++++++--------
.../docker/containers/CreateView/Gpu.tsx | 39 +++--
.../ContainersDatatable/columns/index.tsx | 2 +-
app/react/docker/containers/utils.ts | 16 +-
.../host/SetupView}/GpusList.tsx | 5 +-
.../environment.service/create.ts | 10 --
.../environments/queries/useEnvironment.ts | 11 +-
app/react/portainer/environments/types.ts | 1 +
.../WizardDocker/APITab/APIForm.tsx | 35 ++++-
.../APITab/APIForm.validation.tsx | 3 -
.../WizardDocker/APITab/APITab.tsx | 5 +-
.../WizardDocker/APITab/types.ts | 2 -
.../WizardDocker/SocketTab/SocketForm.tsx | 35 ++++-
.../SocketTab/SocketForm.validation.tsx | 3 -
.../WizardDocker/SocketTab/SocketTab.tsx | 8 +-
.../WizardDocker/SocketTab/types.ts | 2 -
.../WizardDocker/WizardDocker.tsx | 2 +
.../shared/AgentForm/AgentForm.tsx | 7 +-
.../shared/AgentForm/AgentForm.validation.tsx | 2 -
.../EdgeAgentForm/EdgeAgentForm.tsx | 11 +-
.../EdgeAgentForm/EdgeAgentForm.validation.ts | 2 -
.../EdgeAgentTab/EdgeAgentForm/types.ts | 2 -
.../shared/EdgeAgentTab/EdgeAgentTab.tsx | 3 -
.../shared/Hardware/Hardware.tsx | 22 ---
52 files changed, 692 insertions(+), 285 deletions(-)
create mode 100644 api/datastore/migrate_post_init.go
delete mode 100644 app/react/components/.keep
create mode 100644 app/react/components/InsightsBox/InsightsBox.stories.tsx
create mode 100644 app/react/components/InsightsBox/InsightsBox.test.tsx
create mode 100644 app/react/components/InsightsBox/InsightsBox.tsx
create mode 100644 app/react/components/InsightsBox/index.ts
create mode 100644 app/react/components/InsightsBox/insights-store.ts
rename app/react/{portainer/environments/wizard/EnvironmentsCreationView/shared/Hardware => docker/host/SetupView}/GpusList.tsx (90%)
delete mode 100644 app/react/portainer/environments/wizard/EnvironmentsCreationView/shared/Hardware/Hardware.tsx
diff --git a/api/cmd/portainer/main.go b/api/cmd/portainer/main.go
index 9a0285f0d..a2a0d545b 100644
--- a/api/cmd/portainer/main.go
+++ b/api/cmd/portainer/main.go
@@ -689,20 +689,16 @@ func buildServer(flags *portainer.CLIFlags) portainer.Server {
log.Fatal().Err(err).Msg("failed initializing upgrade service")
}
- // FIXME: In 2.16 we changed the way ingress controller permissions are
- // stored. Instead of being stored as annotation on an ingress rule, we keep
- // them in our database. However, in order to run the migration we need an
- // admin kube client to run lookup the old ingress rules and compare them
- // with the current existing ingress classes.
- //
- // Unfortunately, our migrations run as part of the database initialization
- // and our kubeclients require an initialized database. So it is not
- // possible to do this migration as part of our normal flow. We DO have a
- // migration which toggles a boolean in kubernetes configuration that
- // indicated that this "post init" migration should be run. If/when this is
- // resolved we can remove this function.
- err = kubernetesClientFactory.PostInitMigrateIngresses()
- if err != nil {
+ // Our normal migrations run as part of the database initialization
+ // but some more complex migrations require access to a kubernetes or docker
+ // client. Therefore we run a separate migration process just before
+ // starting the server.
+ postInitMigrator := datastore.NewPostInitMigrator(
+ kubernetesClientFactory,
+ dockerClientFactory,
+ dataStore,
+ )
+ if err := postInitMigrator.PostInitMigrate(); err != nil {
log.Fatal().Err(err).Msg("failure during post init migrations")
}
diff --git a/api/datastore/migrate_post_init.go b/api/datastore/migrate_post_init.go
new file mode 100644
index 000000000..dab0139b6
--- /dev/null
+++ b/api/datastore/migrate_post_init.go
@@ -0,0 +1,116 @@
+package datastore
+
+import (
+ "context"
+
+ "github.com/docker/docker/api/types"
+ portainer "github.com/portainer/portainer/api"
+ "github.com/portainer/portainer/api/dataservices"
+ "github.com/portainer/portainer/api/docker"
+ "github.com/portainer/portainer/api/kubernetes/cli"
+ "github.com/rs/zerolog/log"
+)
+
+type PostInitMigrator struct {
+ kubeFactory *cli.ClientFactory
+ dockerFactory *docker.ClientFactory
+ dataStore dataservices.DataStore
+}
+
+func NewPostInitMigrator(
+ kubeFactory *cli.ClientFactory,
+ dockerFactory *docker.ClientFactory,
+ dataStore dataservices.DataStore,
+) *PostInitMigrator {
+ return &PostInitMigrator{
+ kubeFactory: kubeFactory,
+ dockerFactory: dockerFactory,
+ dataStore: dataStore,
+ }
+}
+
+func (migrator *PostInitMigrator) PostInitMigrate() error {
+ if err := migrator.PostInitMigrateIngresses(); err != nil {
+ return err
+ }
+
+ migrator.PostInitMigrateGPUs()
+
+ return nil
+}
+
+func (migrator *PostInitMigrator) PostInitMigrateIngresses() error {
+ endpoints, err := migrator.dataStore.Endpoint().Endpoints()
+ if err != nil {
+ return err
+ }
+ for i := range endpoints {
+ // Early exit if we do not need to migrate!
+ if endpoints[i].PostInitMigrations.MigrateIngresses == false {
+ return nil
+ }
+
+ err := migrator.kubeFactory.MigrateEndpointIngresses(&endpoints[i])
+ if err != nil {
+ log.Debug().Err(err).Msg("failure migrating endpoint ingresses")
+ }
+ }
+
+ return nil
+}
+
+// PostInitMigrateGPUs will check all docker endpoints for containers with GPUs and set EnableGPUManagement to true if any are found
+// If there's an error getting the containers, we'll log it and move on
+func (migrator *PostInitMigrator) PostInitMigrateGPUs() {
+ environments, err := migrator.dataStore.Endpoint().Endpoints()
+ if err != nil {
+ log.Err(err).Msg("failure getting endpoints")
+ return
+ }
+ for i := range environments {
+ if environments[i].Type == portainer.DockerEnvironment {
+ // // Early exit if we do not need to migrate!
+ if environments[i].PostInitMigrations.MigrateGPUs == false {
+ return
+ }
+
+ // set the MigrateGPUs flag to false so we don't run this again
+ environments[i].PostInitMigrations.MigrateGPUs = false
+ migrator.dataStore.Endpoint().UpdateEndpoint(environments[i].ID, &environments[i])
+
+ // create a docker client
+ dockerClient, err := migrator.dockerFactory.CreateClient(&environments[i], "", nil)
+ if err != nil {
+ log.Err(err).Msg("failure creating docker client for environment: " + environments[i].Name)
+ return
+ }
+ defer dockerClient.Close()
+
+ // get all containers
+ containers, err := dockerClient.ContainerList(context.Background(), types.ContainerListOptions{All: true})
+ if err != nil {
+ log.Err(err).Msg("failed to list containers")
+ return
+ }
+
+ // check for a gpu on each container. If even one GPU is found, set EnableGPUManagement to true for the whole endpoint
+ containersLoop:
+ for _, container := range containers {
+ // https://www.sobyte.net/post/2022-10/go-docker/ has nice documentation on the docker client with GPUs
+ containerDetails, err := dockerClient.ContainerInspect(context.Background(), container.ID)
+ if err != nil {
+ log.Err(err).Msg("failed to inspect container")
+ return
+ }
+ deviceRequests := containerDetails.HostConfig.Resources.DeviceRequests
+ for _, deviceRequest := range deviceRequests {
+ if deviceRequest.Driver == "nvidia" {
+ environments[i].EnableGPUManagement = true
+ migrator.dataStore.Endpoint().UpdateEndpoint(environments[i].ID, &environments[i])
+ break containersLoop
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/api/datastore/migrator/migrate_dbversion90.go b/api/datastore/migrator/migrate_dbversion90.go
index 4d890a40a..fe107188d 100644
--- a/api/datastore/migrator/migrate_dbversion90.go
+++ b/api/datastore/migrator/migrate_dbversion90.go
@@ -3,11 +3,16 @@ package migrator
import (
"github.com/rs/zerolog/log"
+ portainer "github.com/portainer/portainer/api"
portainerDsErrors "github.com/portainer/portainer/api/dataservices/errors"
)
func (m *Migrator) migrateDBVersionToDB90() error {
- if err := m.updateUserThemForDB90(); err != nil {
+ if err := m.updateUserThemeForDB90(); err != nil {
+ return err
+ }
+
+ if err := m.updateEnableGpuManagementFeatures(); err != nil {
return err
}
@@ -39,7 +44,7 @@ func (m *Migrator) updateEdgeStackStatusForDB90() error {
return nil
}
-func (m *Migrator) updateUserThemForDB90() error {
+func (m *Migrator) updateUserThemeForDB90() error {
log.Info().Msg("updating existing user theme settings")
users, err := m.userService.Users()
@@ -60,3 +65,28 @@ func (m *Migrator) updateUserThemForDB90() error {
return nil
}
+
+func (m *Migrator) updateEnableGpuManagementFeatures() error {
+ // get all environments
+ environments, err := m.endpointService.Endpoints()
+ if err != nil {
+ return err
+ }
+
+ for _, environment := range environments {
+ if environment.Type == portainer.DockerEnvironment {
+ // set the PostInitMigrations.MigrateGPUs to true on this environment to run the migration only on the 2.18 upgrade
+ environment.PostInitMigrations.MigrateGPUs = true
+ // if there's one or more gpu, set the EnableGpuManagement setting to true
+ gpuList := environment.Gpus
+ if len(gpuList) > 0 {
+ environment.EnableGPUManagement = true
+ }
+ // update the environment
+ if err := m.endpointService.UpdateEndpoint(environment.ID, &environment); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
diff --git a/api/datastore/test_data/output_24_to_latest.json b/api/datastore/test_data/output_24_to_latest.json
index a4d4b2ac8..d5c2e3be1 100644
--- a/api/datastore/test_data/output_24_to_latest.json
+++ b/api/datastore/test_data/output_24_to_latest.json
@@ -46,6 +46,7 @@
},
"EdgeCheckinInterval": 0,
"EdgeKey": "",
+ "EnableGPUManagement": false,
"Gpus": [],
"GroupId": 1,
"Id": 1,
@@ -71,6 +72,7 @@
"LastCheckInDate": 0,
"Name": "local",
"PostInitMigrations": {
+ "MigrateGPUs": true,
"MigrateIngresses": true
},
"PublicURL": "",
diff --git a/api/http/handler/endpoints/endpoint_settings_update.go b/api/http/handler/endpoints/endpoint_settings_update.go
index 68713f385..34a9abf6a 100644
--- a/api/http/handler/endpoints/endpoint_settings_update.go
+++ b/api/http/handler/endpoints/endpoint_settings_update.go
@@ -28,6 +28,10 @@ type endpointSettingsUpdatePayload struct {
AllowSysctlSettingForRegularUsers *bool `json:"allowSysctlSettingForRegularUsers" example:"true"`
// Whether host management features are enabled
EnableHostManagementFeatures *bool `json:"enableHostManagementFeatures" example:"true"`
+
+ EnableGPUManagement *bool `json:"enableGPUManagement" example:"false"`
+
+ Gpus []portainer.Pair `json:"gpus"`
}
func (payload *endpointSettingsUpdatePayload) Validate(r *http.Request) error {
@@ -107,6 +111,14 @@ func (handler *Handler) endpointSettingsUpdate(w http.ResponseWriter, r *http.Re
securitySettings.EnableHostManagementFeatures = *payload.EnableHostManagementFeatures
}
+ if payload.EnableGPUManagement != nil {
+ endpoint.EnableGPUManagement = *payload.EnableGPUManagement
+ }
+
+ if payload.Gpus != nil {
+ endpoint.Gpus = payload.Gpus
+ }
+
endpoint.SecuritySettings = securitySettings
err = handler.DataStore.Endpoint().UpdateEndpoint(portainer.EndpointID(endpointID), endpoint)
diff --git a/api/kubernetes/cli/client.go b/api/kubernetes/cli/client.go
index d15d486bb..fc77a7786 100644
--- a/api/kubernetes/cli/client.go
+++ b/api/kubernetes/cli/client.go
@@ -12,7 +12,6 @@ import (
"github.com/pkg/errors"
portainer "github.com/portainer/portainer/api"
"github.com/portainer/portainer/api/dataservices"
- "github.com/rs/zerolog/log"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
@@ -221,27 +220,7 @@ func buildLocalClient() (*kubernetes.Clientset, error) {
return kubernetes.NewForConfig(config)
}
-func (factory *ClientFactory) PostInitMigrateIngresses() error {
- endpoints, err := factory.dataStore.Endpoint().Endpoints()
- if err != nil {
- return err
- }
- for i := range endpoints {
- // Early exit if we do not need to migrate!
- if endpoints[i].PostInitMigrations.MigrateIngresses == false {
- return nil
- }
-
- err := factory.migrateEndpointIngresses(&endpoints[i])
- if err != nil {
- log.Debug().Err(err).Msg("failure migrating endpoint ingresses")
- }
- }
-
- return nil
-}
-
-func (factory *ClientFactory) migrateEndpointIngresses(e *portainer.Endpoint) error {
+func (factory *ClientFactory) MigrateEndpointIngresses(e *portainer.Endpoint) error {
// classes is a list of controllers which have been manually added to the
// cluster setup view. These need to all be allowed globally, but then
// blocked in specific namespaces which they were not previously allowed in.
diff --git a/api/portainer.go b/api/portainer.go
index 918c32aaa..49b53f97d 100644
--- a/api/portainer.go
+++ b/api/portainer.go
@@ -402,6 +402,8 @@ type (
Version string `example:"1.0.0"`
}
+ EnableGPUManagement bool `json:"EnableGPUManagement"`
+
// Deprecated fields
// Deprecated in DBVersion == 4
TLS bool `json:"TLS,omitempty"`
@@ -502,6 +504,7 @@ type (
// EndpointPostInitMigrations
EndpointPostInitMigrations struct {
MigrateIngresses bool `json:"MigrateIngresses"`
+ MigrateGPUs bool `json:"MigrateGPUs"`
}
// Extension represents a deprecated Portainer extension
diff --git a/app/assets/css/vendor-override.css b/app/assets/css/vendor-override.css
index 48bbfffc6..6135d55d9 100644
--- a/app/assets/css/vendor-override.css
+++ b/app/assets/css/vendor-override.css
@@ -358,11 +358,6 @@ input:-webkit-autofill {
}
/* Overide Vendor CSS */
-
-.btn-link:hover {
- color: var(--text-link-hover-color) !important;
-}
-
.multiSelect.inlineBlock button {
margin: 0;
}
diff --git a/app/docker/components/imageRegistry/por-image-registry.html b/app/docker/components/imageRegistry/por-image-registry.html
index 142b3139a..22eab8cd5 100644
--- a/app/docker/components/imageRegistry/por-image-registry.html
+++ b/app/docker/components/imageRegistry/por-image-registry.html
@@ -36,7 +36,7 @@
title="Search image on Docker Hub"
target="_blank"
>
-