mirror of https://github.com/k3s-io/k3s
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
136 lines
4.9 KiB
136 lines
4.9 KiB
package certmonitor |
|
|
|
import ( |
|
"context" |
|
"crypto/x509" |
|
"fmt" |
|
"os" |
|
"path/filepath" |
|
"strings" |
|
"time" |
|
|
|
daemonconfig "github.com/k3s-io/k3s/pkg/daemons/config" |
|
"github.com/k3s-io/k3s/pkg/daemons/control/deps" |
|
"github.com/k3s-io/k3s/pkg/metrics" |
|
"github.com/k3s-io/k3s/pkg/util" |
|
"github.com/k3s-io/k3s/pkg/util/services" |
|
"github.com/k3s-io/k3s/pkg/version" |
|
"github.com/prometheus/client_golang/prometheus" |
|
certutil "github.com/rancher/dynamiclistener/cert" |
|
"github.com/rancher/wrangler/v3/pkg/merr" |
|
"github.com/sirupsen/logrus" |
|
corev1 "k8s.io/api/core/v1" |
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
|
"k8s.io/apimachinery/pkg/types" |
|
"k8s.io/apimachinery/pkg/util/wait" |
|
) |
|
|
|
var ( |
|
// Check certificates twice an hour. Kubernetes events have a TTL of 1 hour by default, |
|
// so similar events should be aggregated and refreshed by the event recorder as long |
|
// as they are created within the TTL period. |
|
certCheckInterval = time.Minute * 30 |
|
|
|
controllerName = version.Program + "-cert-monitor" |
|
|
|
certificateExpirationSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
|
Name: version.Program + "_certificate_expiration_seconds", |
|
Help: "Remaining lifetime on the certificate.", |
|
}, []string{"subject", "usages"}) |
|
) |
|
|
|
// Setup starts the certificate expiration monitor |
|
func Setup(ctx context.Context, nodeConfig *daemonconfig.Node, dataDir string) error { |
|
logrus.Debugf("Starting %s with monitoring period %s", controllerName, certCheckInterval) |
|
metrics.DefaultRegisterer.MustRegister(certificateExpirationSeconds) |
|
|
|
client, err := util.GetClientSet(nodeConfig.AgentConfig.KubeConfigKubelet) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
recorder := util.BuildControllerEventRecorder(client, controllerName, metav1.NamespaceDefault) |
|
|
|
// This is consistent with events attached to the node generated by the kubelet |
|
// https://github.com/kubernetes/kubernetes/blob/612130dd2f4188db839ea5c2dea07a96b0ad8d1c/pkg/kubelet/kubelet.go#L479-L485 |
|
nodeRef := &corev1.ObjectReference{ |
|
Kind: "Node", |
|
Name: nodeConfig.AgentConfig.NodeName, |
|
UID: types.UID(nodeConfig.AgentConfig.NodeName), |
|
Namespace: "", |
|
} |
|
|
|
// Create a dummy controlConfig just to hold the paths for the server certs |
|
controlConfig := daemonconfig.Control{ |
|
DataDir: filepath.Join(dataDir, "server"), |
|
Runtime: &daemonconfig.ControlRuntime{}, |
|
} |
|
deps.CreateRuntimeCertFiles(&controlConfig) |
|
|
|
caMap := map[string][]string{} |
|
nodeList := services.Agent |
|
if _, err := os.Stat(controlConfig.DataDir); err == nil { |
|
nodeList = services.All |
|
caMap, err = services.FilesForServices(controlConfig, services.CA) |
|
if err != nil { |
|
return err |
|
} |
|
} |
|
|
|
nodeMap, err := services.FilesForServices(controlConfig, nodeList) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
go wait.Until(func() { |
|
logrus.Debugf("Running %s certificate expiration check", controllerName) |
|
if err := checkCerts(nodeMap, time.Hour*24*daemonconfig.CertificateRenewDays); err != nil { |
|
message := fmt.Sprintf("Node certificates require attention - restart %s on this node to trigger automatic rotation: %v", version.Program, err) |
|
recorder.Event(nodeRef, corev1.EventTypeWarning, "CertificateExpirationWarning", message) |
|
} |
|
if err := checkCerts(caMap, time.Hour*24*365); err != nil { |
|
message := fmt.Sprintf("Certificate authority certificates require attention - check %s documentation and begin planning rotation: %v", version.Program, err) |
|
recorder.Event(nodeRef, corev1.EventTypeWarning, "CACertificateExpirationWarning", message) |
|
|
|
} |
|
}, certCheckInterval, ctx.Done()) |
|
|
|
return nil |
|
} |
|
|
|
func checkCerts(fileMap map[string][]string, warningPeriod time.Duration) error { |
|
errs := merr.Errors{} |
|
now := time.Now() |
|
warn := now.Add(warningPeriod) |
|
|
|
for service, files := range fileMap { |
|
for _, file := range files { |
|
basename := filepath.Base(file) |
|
certs, _ := certutil.CertsFromFile(file) |
|
for _, cert := range certs { |
|
usages := []string{} |
|
if cert.KeyUsage&x509.KeyUsageCertSign != 0 { |
|
usages = append(usages, "CertSign") |
|
} |
|
for _, eku := range cert.ExtKeyUsage { |
|
switch eku { |
|
case x509.ExtKeyUsageServerAuth: |
|
usages = append(usages, "ServerAuth") |
|
case x509.ExtKeyUsageClientAuth: |
|
usages = append(usages, "ClientAuth") |
|
} |
|
} |
|
certificateExpirationSeconds.WithLabelValues(cert.Subject.String(), strings.Join(usages, ",")).Set(cert.NotAfter.Sub(now).Seconds()) |
|
if now.Before(cert.NotBefore) { |
|
errs = append(errs, fmt.Errorf("%s/%s: certificate %s is not valid before %s", service, basename, cert.Subject, cert.NotBefore.Format(time.RFC3339))) |
|
} else if now.After(cert.NotAfter) { |
|
errs = append(errs, fmt.Errorf("%s/%s: certificate %s expired at %s", service, basename, cert.Subject, cert.NotAfter.Format(time.RFC3339))) |
|
} else if warn.After(cert.NotAfter) { |
|
errs = append(errs, fmt.Errorf("%s/%s: certificate %s will expire within %d days at %s", service, basename, cert.Subject, int(warningPeriod.Hours()/24), cert.NotAfter.Format(time.RFC3339))) |
|
} |
|
} |
|
} |
|
} |
|
|
|
return merr.NewErrors(errs...) |
|
}
|
|
|