mirror of https://github.com/k3s-io/k3s
Merge pull request #30116 from Random-Liu/start-e2e-services-in-separate-process
Automatic merge from submit-queue Node Conformance Test: Start e2e services in a separate process For https://github.com/kubernetes/kubernetes/issues/30122. This is the first step of https://github.com/kubernetes/kubernetes/issues/30174. This PR added a `start-services-only` mode in the test suite, and start all e2e services in a separate process by run the test binary again in `start-services-only` mode.pull/6/head
commit
457a151fdb
|
@ -289,6 +289,7 @@ log-flush-frequency
|
|||
long-running-request-regexp
|
||||
low-diskspace-threshold-mb
|
||||
make-symlinks
|
||||
manifest-path
|
||||
manifest-url
|
||||
manifest-url-header
|
||||
masquerade-all
|
||||
|
@ -421,6 +422,7 @@ rkt-stage1-image
|
|||
root-ca-file
|
||||
root-dir
|
||||
run-proxy
|
||||
run-services-mode
|
||||
runtime-cgroups
|
||||
runtime-config
|
||||
runtime-request-timeout
|
||||
|
|
|
@ -67,14 +67,24 @@ type TestContextType struct {
|
|||
CreateTestingNS CreateTestingNSFn
|
||||
// If set to true test will dump data about the namespace in which test was running.
|
||||
DumpLogsOnFailure bool
|
||||
// If the garbage collector is enabled in the kube-apiserver and kube-controller-manager.
|
||||
GarbageCollectorEnabled bool
|
||||
// Node e2e specific test context
|
||||
NodeTestContextType
|
||||
}
|
||||
|
||||
// NodeTestContextType is part of TestContextType, it is shared by all node e2e test.
|
||||
type NodeTestContextType struct {
|
||||
// Name of the node to run tests on (node e2e suite only).
|
||||
NodeName string
|
||||
// DisableKubenet disables kubenet when starting kubelet.
|
||||
DisableKubenet bool
|
||||
// Whether to enable the QoS Cgroup Hierarchy or not
|
||||
CgroupsPerQOS bool
|
||||
// The hard eviction thresholds
|
||||
EvictionHard string
|
||||
// If the garbage collector is enabled in the kube-apiserver and kube-controller-manager.
|
||||
GarbageCollectorEnabled bool
|
||||
// ManifestPath is the static pod manifest path.
|
||||
ManifestPath string
|
||||
}
|
||||
|
||||
type CloudConfig struct {
|
||||
|
@ -112,7 +122,6 @@ func RegisterCommonFlags() {
|
|||
flag.StringVar(&TestContext.Host, "host", "http://127.0.0.1:8080", "The host, or apiserver, to connect to")
|
||||
flag.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
|
||||
flag.StringVar(&TestContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
|
||||
flag.BoolVar(&TestContext.GarbageCollectorEnabled, "garbage-collector-enabled", false, "Set to true if the garbage collector is enabled in the kube-apiserver and kube-controller-manager, then some tests will rely on the garbage collector to delete dependent resources.")
|
||||
}
|
||||
|
||||
// Register flags specific to the cluster e2e test suite.
|
||||
|
@ -149,11 +158,16 @@ func RegisterClusterFlags() {
|
|||
flag.StringVar(&TestContext.UpgradeTarget, "upgrade-target", "ci/latest", "Version to upgrade to (e.g. 'release/stable', 'release/latest', 'ci/latest', '0.19.1', '0.19.1-669-gabac8c8') if doing an upgrade test.")
|
||||
flag.StringVar(&TestContext.PrometheusPushGateway, "prom-push-gateway", "", "The URL to prometheus gateway, so that metrics can be pushed during e2es and scraped by prometheus. Typically something like 127.0.0.1:9091.")
|
||||
flag.BoolVar(&TestContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to Cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
|
||||
flag.BoolVar(&TestContext.GarbageCollectorEnabled, "garbage-collector-enabled", false, "Set to true if the garbage collector is enabled in the kube-apiserver and kube-controller-manager, then some tests will rely on the garbage collector to delete dependent resources.")
|
||||
}
|
||||
|
||||
// Register flags specific to the node e2e test suite.
|
||||
func RegisterNodeFlags() {
|
||||
flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).")
|
||||
// TODO(random-liu): Remove kubelet related flags when we move the kubelet start logic out of the test.
|
||||
// TODO(random-liu): Find someway to get kubelet configuration, and automatic config and filter test based on the configuration.
|
||||
flag.BoolVar(&TestContext.DisableKubenet, "disable-kubenet", false, "If true, start kubelet without kubenet. (default false)")
|
||||
flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.")
|
||||
flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "memory.available<250Mi", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.")
|
||||
flag.StringVar(&TestContext.ManifestPath, "manifest-path", "", "The path to the static pod manifest file.")
|
||||
}
|
||||
|
|
|
@ -45,41 +45,30 @@ import (
|
|||
"github.com/onsi/ginkgo/config"
|
||||
more_reporters "github.com/onsi/ginkgo/reporters"
|
||||
. "github.com/onsi/gomega"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
var e2es *e2eService
|
||||
|
||||
// context is the test context shared by all parallel nodes.
|
||||
// Originally we setup the test environment and initialize global variables
|
||||
// in BeforeSuite, and then used the global variables in the test.
|
||||
// However, after we make the test parallel, ginkgo will run all tests
|
||||
// in several parallel test nodes. And for each test node, the BeforeSuite
|
||||
// and AfterSuite will be run.
|
||||
// We don't want to start services (kubelet, apiserver and etcd) for all
|
||||
// parallel nodes, but we do want to set some globally shared variable which
|
||||
// could be used in test.
|
||||
// We have to use SynchronizedBeforeSuite to achieve that. The first
|
||||
// function of SynchronizedBeforeSuite is only called once, and the second
|
||||
// function is called in each parallel test node. The result returned by
|
||||
// the first function will be the parameter of the second function.
|
||||
// So we'll start all services and initialize the shared context in the first
|
||||
// function, and propagate the context to all parallel test nodes in the
|
||||
// second function.
|
||||
// Notice no lock is needed for shared context, because context should only be
|
||||
// initialized in the first function in SynchronizedBeforeSuite. After that
|
||||
// it should never be modified.
|
||||
var context SharedContext
|
||||
var e2es *E2EServices
|
||||
|
||||
var prePullImages = flag.Bool("prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
|
||||
var runServicesMode = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.")
|
||||
|
||||
func init() {
|
||||
framework.RegisterCommonFlags()
|
||||
framework.RegisterNodeFlags()
|
||||
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
|
||||
// Mark the run-services-mode flag as hidden to prevent user from using it.
|
||||
pflag.CommandLine.MarkHidden("run-services-mode")
|
||||
}
|
||||
|
||||
func TestE2eNode(t *testing.T) {
|
||||
flag.Parse()
|
||||
|
||||
pflag.Parse()
|
||||
if *runServicesMode {
|
||||
// If run-services-mode is specified, only run services in current process.
|
||||
RunE2EServices()
|
||||
return
|
||||
}
|
||||
// If run-services-mode is not specified, run test.
|
||||
rand.Seed(time.Now().UTC().UnixNano())
|
||||
RegisterFailHandler(Fail)
|
||||
reporters := []Reporter{}
|
||||
|
@ -103,6 +92,8 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
|||
if *buildServices {
|
||||
Expect(buildGo()).To(Succeed())
|
||||
}
|
||||
|
||||
// Initialize node name here, so that the following code can get right node name.
|
||||
if framework.TestContext.NodeName == "" {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
|
@ -110,7 +101,6 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
|||
}
|
||||
framework.TestContext.NodeName = hostname
|
||||
}
|
||||
|
||||
// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
|
||||
// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
|
||||
if *prePullImages {
|
||||
|
@ -124,11 +114,10 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
|||
// We should mask locksmithd when provisioning the machine.
|
||||
maskLocksmithdOnCoreos()
|
||||
|
||||
shared := &SharedContext{}
|
||||
if *startServices {
|
||||
e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, framework.TestContext.EvictionHard, shared)
|
||||
if err := e2es.start(); err != nil {
|
||||
Fail(fmt.Sprintf("Unable to start node services.\n%v", err))
|
||||
e2es = NewE2EServices()
|
||||
if err := e2es.Start(); err != nil {
|
||||
glog.Fatalf("Unable to start node services: %v", err)
|
||||
}
|
||||
glog.Infof("Node services started. Running tests...")
|
||||
} else {
|
||||
|
@ -136,33 +125,31 @@ var _ = SynchronizedBeforeSuite(func() []byte {
|
|||
}
|
||||
|
||||
glog.Infof("Starting namespace controller")
|
||||
// TODO(random-liu): Move namespace controller into namespace services.
|
||||
startNamespaceController()
|
||||
|
||||
// Reference common test to make the import valid.
|
||||
commontest.CurrentSuite = commontest.NodeE2E
|
||||
|
||||
// Share the node name with the other test nodes.
|
||||
shared.NodeName = framework.TestContext.NodeName
|
||||
data, err := json.Marshal(shared)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
data, err := json.Marshal(&framework.TestContext.NodeTestContextType)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to serialize node test context: %v", err)
|
||||
}
|
||||
return data
|
||||
}, func(data []byte) {
|
||||
// Set the shared context got from the synchronized initialize function
|
||||
shared := &SharedContext{}
|
||||
Expect(json.Unmarshal(data, shared)).To(Succeed())
|
||||
context = *shared
|
||||
|
||||
framework.TestContext.NodeName = shared.NodeName
|
||||
// The node test context is updated in the first function, update it on every test node.
|
||||
err := json.Unmarshal(data, &framework.TestContext.NodeTestContextType)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to deserialize node test context: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
// Tear down the kubelet on the node
|
||||
var _ = SynchronizedAfterSuite(func() {}, func() {
|
||||
if e2es != nil {
|
||||
e2es.getLogFiles()
|
||||
if *startServices && *stopServices {
|
||||
glog.Infof("Stopping node services...")
|
||||
e2es.stop()
|
||||
e2es.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
|
@ -33,22 +34,122 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/kardianos/osext"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
)
|
||||
|
||||
// TODO(random-liu): Move this file to a separate package.
|
||||
var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
|
||||
|
||||
type e2eService struct {
|
||||
killCmds []*killCmd
|
||||
rmDirs []string
|
||||
// E2EServices starts and stops e2e services in a separate process. The test uses it to start and
|
||||
// stop all e2e services.
|
||||
type E2EServices struct {
|
||||
services *server
|
||||
}
|
||||
|
||||
context *SharedContext
|
||||
etcdDataDir string
|
||||
nodeName string
|
||||
logFiles map[string]logFileData
|
||||
cgroupsPerQOS bool
|
||||
evictionHard string
|
||||
func NewE2EServices() *E2EServices {
|
||||
return &E2EServices{}
|
||||
}
|
||||
|
||||
// services.log is the combined log of all services
|
||||
const servicesLogFile = "services.log"
|
||||
|
||||
// Start starts the e2e services in another process, it returns when all e2e
|
||||
// services are ready.
|
||||
// We want to statically link e2e services into the test binary, but we don't
|
||||
// want their glog to pollute the test result. So we run the binary in run-
|
||||
// services-mode to start e2e services in another process.
|
||||
func (e *E2EServices) Start() error {
|
||||
var err error
|
||||
// Create the manifest path for kubelet.
|
||||
// TODO(random-liu): Remove related logic when we move kubelet starting logic out of the test.
|
||||
framework.TestContext.ManifestPath, err = ioutil.TempDir("", "node-e2e-pod")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create static pod manifest directory: %v", err)
|
||||
}
|
||||
testBin, err := osext.Executable()
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't get current binary: %v", err)
|
||||
}
|
||||
// TODO(random-liu): Add sudo after we statically link apiserver and etcd, because apiserver needs
|
||||
// sudo. We can't add sudo now, because etcd may not be in PATH of root.
|
||||
startCmd := exec.Command(testBin,
|
||||
"--run-services-mode",
|
||||
"--server-start-timeout", serverStartTimeout.String(),
|
||||
"--report-dir", framework.TestContext.ReportDir,
|
||||
// TODO(random-liu): Remove the following flags after we move kubelet starting logic
|
||||
// out of the test.
|
||||
"--node-name", framework.TestContext.NodeName,
|
||||
"--disable-kubenet="+strconv.FormatBool(framework.TestContext.DisableKubenet),
|
||||
"--cgroups-per-qos="+strconv.FormatBool(framework.TestContext.CgroupsPerQOS),
|
||||
"--manifest-path", framework.TestContext.ManifestPath,
|
||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
||||
)
|
||||
e.services = newServer("services", startCmd, nil, getHealthCheckURLs(), servicesLogFile)
|
||||
return e.services.start()
|
||||
}
|
||||
|
||||
// Stop stops the e2e services.
|
||||
func (e *E2EServices) Stop() error {
|
||||
defer func() {
|
||||
// Cleanup the manifest path for kubelet.
|
||||
manifestPath := framework.TestContext.ManifestPath
|
||||
if manifestPath != "" {
|
||||
err := os.RemoveAll(manifestPath)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to delete static pod manifest directory %s.\n%v", manifestPath, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
if e.services == nil {
|
||||
glog.Errorf("can't stop e2e services, because `services` is nil")
|
||||
}
|
||||
return e.services.kill()
|
||||
}
|
||||
|
||||
// RunE2EServices actually start the e2e services. This function is used to
|
||||
// start e2e services in current process. This is only used in run-services-mode.
|
||||
func RunE2EServices() {
|
||||
e := newE2EService()
|
||||
if err := e.run(); err != nil {
|
||||
glog.Fatalf("Failed to run e2e services: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Ports of different e2e services.
|
||||
const (
|
||||
etcdPort = "4001"
|
||||
apiserverPort = "8080"
|
||||
kubeletPort = "10250"
|
||||
kubeletReadOnlyPort = "10255"
|
||||
)
|
||||
|
||||
// Health check urls of different e2e services.
|
||||
var (
|
||||
etcdHealthCheckURL = getEndpoint(etcdPort) + "/v2/keys/" // Trailing slash is required,
|
||||
apiserverHealthCheckURL = getEndpoint(apiserverPort) + "/healthz"
|
||||
kubeletHealthCheckURL = getEndpoint(kubeletReadOnlyPort) + "/healthz"
|
||||
)
|
||||
|
||||
// getEndpoint generates endpoint url from service port.
|
||||
func getEndpoint(port string) string {
|
||||
return "http://127.0.0.1:" + port
|
||||
}
|
||||
|
||||
func getHealthCheckURLs() []string {
|
||||
return []string{
|
||||
etcdHealthCheckURL,
|
||||
apiserverHealthCheckURL,
|
||||
kubeletHealthCheckURL,
|
||||
}
|
||||
}
|
||||
|
||||
// e2eService is used internally in this file to start e2e services in current process.
|
||||
type e2eService struct {
|
||||
services []*server
|
||||
rmDirs []string
|
||||
logFiles map[string]logFileData
|
||||
}
|
||||
|
||||
type logFileData struct {
|
||||
|
@ -63,7 +164,7 @@ const (
|
|||
defaultEtcdPath = "/tmp/etcd"
|
||||
)
|
||||
|
||||
func newE2eService(nodeName string, cgroupsPerQOS bool, evictionHard string, context *SharedContext) *e2eService {
|
||||
func newE2EService() *e2eService {
|
||||
// Special log files that need to be collected for additional debugging.
|
||||
var logFiles = map[string]logFileData{
|
||||
"kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}},
|
||||
|
@ -71,13 +172,25 @@ func newE2eService(nodeName string, cgroupsPerQOS bool, evictionHard string, con
|
|||
"cloud-init.log": {[]string{"/var/log/cloud-init.log"}, []string{"-u", "cloud*"}},
|
||||
}
|
||||
|
||||
return &e2eService{
|
||||
context: context,
|
||||
nodeName: nodeName,
|
||||
logFiles: logFiles,
|
||||
cgroupsPerQOS: cgroupsPerQOS,
|
||||
evictionHard: evictionHard,
|
||||
return &e2eService{logFiles: logFiles}
|
||||
}
|
||||
|
||||
// terminationSignals are signals that cause the program to exit in the
|
||||
// supported platforms (linux, darwin, windows).
|
||||
var terminationSignals = []os.Signal{syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT}
|
||||
|
||||
// run starts all e2e services and wait for the termination signal. Once receives the
|
||||
// termination signal, it will stop the e2e services gracefully.
|
||||
func (es *e2eService) run() error {
|
||||
defer es.stop()
|
||||
if err := es.start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait until receiving a termination signal.
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, terminationSignals...)
|
||||
<-sig
|
||||
return nil
|
||||
}
|
||||
|
||||
func (es *e2eService) start() error {
|
||||
|
@ -88,25 +201,23 @@ func (es *e2eService) start() error {
|
|||
return err
|
||||
}
|
||||
|
||||
cmd, err := es.startEtcd()
|
||||
s, err := es.startEtcd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
es.killCmds = append(es.killCmds, cmd)
|
||||
es.rmDirs = append(es.rmDirs, es.etcdDataDir)
|
||||
es.services = append(es.services, s)
|
||||
|
||||
cmd, err = es.startApiServer()
|
||||
s, err = es.startApiServer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
es.killCmds = append(es.killCmds, cmd)
|
||||
es.services = append(es.services, s)
|
||||
|
||||
cmd, err = es.startKubeletServer()
|
||||
s, err = es.startKubeletServer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
es.killCmds = append(es.killCmds, cmd)
|
||||
es.rmDirs = append(es.rmDirs, es.context.PodConfigPath)
|
||||
es.services = append(es.services, s)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -167,9 +278,10 @@ func isJournaldAvailable() bool {
|
|||
}
|
||||
|
||||
func (es *e2eService) stop() {
|
||||
for _, k := range es.killCmds {
|
||||
if err := k.Kill(); err != nil {
|
||||
glog.Errorf("Failed to stop %v: %v", k.name, err)
|
||||
es.getLogFiles()
|
||||
for _, s := range es.services {
|
||||
if err := s.kill(); err != nil {
|
||||
glog.Errorf("Failed to stop %v: %v", s.name, err)
|
||||
}
|
||||
}
|
||||
for _, d := range es.rmDirs {
|
||||
|
@ -180,12 +292,13 @@ func (es *e2eService) stop() {
|
|||
}
|
||||
}
|
||||
|
||||
func (es *e2eService) startEtcd() (*killCmd, error) {
|
||||
func (es *e2eService) startEtcd() (*server, error) {
|
||||
dataDir, err := ioutil.TempDir("", "node-e2e")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
es.etcdDataDir = dataDir
|
||||
// Mark the dataDir as directories to remove.
|
||||
es.rmDirs = append(es.rmDirs, dataDir)
|
||||
var etcdPath string
|
||||
// CoreOS ships a binary named 'etcd' which is really old, so prefer 'etcd2' if it exists
|
||||
etcdPath, err = exec.LookPath("etcd2")
|
||||
|
@ -205,37 +318,36 @@ func (es *e2eService) startEtcd() (*killCmd, error) {
|
|||
"--advertise-client-urls=http://0.0.0.0:2379,http://0.0.0.0:4001")
|
||||
// Execute etcd in the data directory instead of using --data-dir because the flag sometimes requires additional
|
||||
// configuration (e.g. --name in version 0.4.9)
|
||||
cmd.Dir = es.etcdDataDir
|
||||
hcc := newHealthCheckCommand(
|
||||
"http://127.0.0.1:4001/v2/keys/", // Trailing slash is required,
|
||||
cmd.Dir = dataDir
|
||||
server := newServer(
|
||||
"etcd",
|
||||
cmd,
|
||||
nil,
|
||||
[]string{etcdHealthCheckURL},
|
||||
"etcd.log")
|
||||
return &killCmd{name: "etcd", cmd: cmd}, es.startServer(hcc)
|
||||
return server, server.start()
|
||||
}
|
||||
|
||||
func (es *e2eService) startApiServer() (*killCmd, error) {
|
||||
func (es *e2eService) startApiServer() (*server, error) {
|
||||
cmd := exec.Command("sudo", getApiServerBin(),
|
||||
"--etcd-servers", "http://127.0.0.1:4001",
|
||||
"--etcd-servers", getEndpoint(etcdPort),
|
||||
"--insecure-bind-address", "0.0.0.0",
|
||||
"--service-cluster-ip-range", "10.0.0.1/24",
|
||||
"--kubelet-port", "10250",
|
||||
"--kubelet-port", kubeletPort,
|
||||
"--allow-privileged", "true",
|
||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||
)
|
||||
hcc := newHealthCheckCommand(
|
||||
"http://127.0.0.1:8080/healthz",
|
||||
server := newServer(
|
||||
"apiserver",
|
||||
cmd,
|
||||
nil,
|
||||
[]string{apiserverHealthCheckURL},
|
||||
"kube-apiserver.log")
|
||||
return &killCmd{name: "kube-apiserver", cmd: cmd}, es.startServer(hcc)
|
||||
return server, server.start()
|
||||
}
|
||||
|
||||
func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
||||
dataDir, err := ioutil.TempDir("", "node-e2e-pod")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
es.context.PodConfigPath = dataDir
|
||||
var killOverride *exec.Cmd
|
||||
func (es *e2eService) startKubeletServer() (*server, error) {
|
||||
var killCommand *exec.Cmd
|
||||
cmdArgs := []string{}
|
||||
if systemdRun, err := exec.LookPath("systemd-run"); err == nil {
|
||||
// On systemd services, detection of a service / unit works reliably while
|
||||
|
@ -244,7 +356,7 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
|||
// sense to test it that way
|
||||
unitName := fmt.Sprintf("kubelet-%d.service", rand.Int31())
|
||||
cmdArgs = append(cmdArgs, systemdRun, "--unit="+unitName, getKubeletServerBin())
|
||||
killOverride = exec.Command("sudo", "systemctl", "kill", unitName)
|
||||
killCommand = exec.Command("sudo", "systemctl", "kill", unitName)
|
||||
es.logFiles["kubelet.log"] = logFileData{
|
||||
journalctlCommand: []string{"-u", unitName},
|
||||
}
|
||||
|
@ -258,26 +370,27 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
|||
)
|
||||
}
|
||||
cmdArgs = append(cmdArgs,
|
||||
"--api-servers", "http://127.0.0.1:8080",
|
||||
"--api-servers", getEndpoint(apiserverPort),
|
||||
"--address", "0.0.0.0",
|
||||
"--port", "10250",
|
||||
"--hostname-override", es.nodeName, // Required because hostname is inconsistent across hosts
|
||||
"--port", kubeletPort,
|
||||
"--read-only-port", kubeletReadOnlyPort,
|
||||
"--hostname-override", framework.TestContext.NodeName, // Required because hostname is inconsistent across hosts
|
||||
"--volume-stats-agg-period", "10s", // Aggregate volumes frequently so tests don't need to wait as long
|
||||
"--allow-privileged", "true",
|
||||
"--serialize-image-pulls", "false",
|
||||
"--config", es.context.PodConfigPath,
|
||||
"--config", framework.TestContext.ManifestPath,
|
||||
"--file-check-frequency", "10s", // Check file frequently so tests won't wait too long
|
||||
"--v", LOG_VERBOSITY_LEVEL, "--logtostderr",
|
||||
"--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller.
|
||||
"--eviction-hard", es.evictionHard,
|
||||
"--eviction-hard", framework.TestContext.EvictionHard,
|
||||
"--eviction-pressure-transition-period", "30s",
|
||||
)
|
||||
if es.cgroupsPerQOS {
|
||||
if framework.TestContext.CgroupsPerQOS {
|
||||
cmdArgs = append(cmdArgs,
|
||||
"--cgroups-per-qos", "true",
|
||||
)
|
||||
}
|
||||
if !*disableKubenet {
|
||||
if !framework.TestContext.DisableKubenet {
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -288,31 +401,99 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) {
|
|||
}
|
||||
|
||||
cmd := exec.Command("sudo", cmdArgs...)
|
||||
hcc := newHealthCheckCommand(
|
||||
"http://127.0.0.1:10255/healthz",
|
||||
server := newServer(
|
||||
"kubelet",
|
||||
cmd,
|
||||
killCommand,
|
||||
[]string{kubeletHealthCheckURL},
|
||||
"kubelet.log")
|
||||
return &killCmd{name: "kubelet", cmd: cmd, override: killOverride}, es.startServer(hcc)
|
||||
return server, server.start()
|
||||
}
|
||||
|
||||
func (es *e2eService) startServer(cmd *healthCheckCommand) error {
|
||||
cmdErrorChan := make(chan error)
|
||||
// server manages a server started and killed with commands.
|
||||
type server struct {
|
||||
// name is the name of the server, it is only used for logging.
|
||||
name string
|
||||
// startCommand is the command used to start the server
|
||||
startCommand *exec.Cmd
|
||||
// killCommand is the command used to stop the server. It is not required. If it
|
||||
// is not specified, `sudo kill` will be used to stop the server.
|
||||
killCommand *exec.Cmd
|
||||
// healthCheckUrls is the urls used to check whether the server is ready.
|
||||
healthCheckUrls []string
|
||||
// outFilename is the name of the log file. The stdout and stderr of the server
|
||||
// will be redirected to this file.
|
||||
outFilename string
|
||||
}
|
||||
|
||||
func newServer(name string, start, kill *exec.Cmd, urls []string, filename string) *server {
|
||||
return &server{
|
||||
name: name,
|
||||
startCommand: start,
|
||||
killCommand: kill,
|
||||
healthCheckUrls: urls,
|
||||
outFilename: filename,
|
||||
}
|
||||
}
|
||||
|
||||
// commandToString format command to string.
|
||||
func commandToString(c *exec.Cmd) string {
|
||||
if c == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
|
||||
}
|
||||
|
||||
func (s *server) String() string {
|
||||
return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, health-check: %v, output-file: %q", s.name,
|
||||
commandToString(s.startCommand), commandToString(s.killCommand), s.healthCheckUrls, s.outFilename)
|
||||
}
|
||||
|
||||
// readinessCheck checks whether services are ready via the health check urls. Once there is
|
||||
// an error in errCh, the function will stop waiting and return the error.
|
||||
// TODO(random-liu): Move this to util
|
||||
func readinessCheck(urls []string, errCh <-chan error) error {
|
||||
endTime := time.Now().Add(*serverStartTimeout)
|
||||
for endTime.After(time.Now()) {
|
||||
select {
|
||||
case err := <-errCh:
|
||||
return err
|
||||
case <-time.After(time.Second):
|
||||
ready := true
|
||||
for _, url := range urls {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
ready = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("e2e service readiness check timeout %v", *serverStartTimeout)
|
||||
}
|
||||
|
||||
func (s *server) start() error {
|
||||
errCh := make(chan error)
|
||||
go func() {
|
||||
defer close(cmdErrorChan)
|
||||
defer close(errCh)
|
||||
|
||||
// Create the output filename
|
||||
outPath := path.Join(framework.TestContext.ReportDir, cmd.outputFilename)
|
||||
outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
|
||||
outfile, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
cmdErrorChan <- fmt.Errorf("Failed to create file %s for `%s` %v.", outPath, cmd, err)
|
||||
errCh <- fmt.Errorf("failed to create file %q for `%s` %v.", outPath, s, err)
|
||||
return
|
||||
}
|
||||
defer outfile.Close()
|
||||
defer outfile.Sync()
|
||||
|
||||
cmd := s.startCommand
|
||||
// Set the command to write the output file
|
||||
cmd.Cmd.Stdout = outfile
|
||||
cmd.Cmd.Stderr = outfile
|
||||
cmd.Stdout = outfile
|
||||
cmd.Stderr = outfile
|
||||
|
||||
// Death of this test process should kill the server as well.
|
||||
attrs := &syscall.SysProcAttr{}
|
||||
|
@ -321,63 +502,41 @@ func (es *e2eService) startServer(cmd *healthCheckCommand) error {
|
|||
if deathSigField.IsValid() {
|
||||
deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
|
||||
} else {
|
||||
cmdErrorChan <- fmt.Errorf("Failed to set Pdeathsig field (non-linux build)")
|
||||
errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
|
||||
return
|
||||
}
|
||||
cmd.Cmd.SysProcAttr = attrs
|
||||
cmd.SysProcAttr = attrs
|
||||
|
||||
// Run the command
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
cmdErrorChan <- fmt.Errorf("%s Failed with error \"%v\". Output written to: %s", cmd, err, outPath)
|
||||
errCh <- fmt.Errorf("failed to run server start command %q: %v", commandToString(cmd), err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
endTime := time.Now().Add(*serverStartTimeout)
|
||||
for endTime.After(time.Now()) {
|
||||
select {
|
||||
case err := <-cmdErrorChan:
|
||||
return err
|
||||
case <-time.After(time.Second):
|
||||
resp, err := http.Get(cmd.HealthCheckUrl)
|
||||
if err == nil && resp.StatusCode == http.StatusOK {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Timeout waiting for service %s", cmd)
|
||||
return readinessCheck(s.healthCheckUrls, errCh)
|
||||
}
|
||||
|
||||
// killCmd is a struct to kill a given cmd. The cmd member specifies a command
|
||||
// to find the pid of and attempt to kill.
|
||||
// If the override field is set, that will be used instead to kill the command.
|
||||
// name is only used for logging
|
||||
type killCmd struct {
|
||||
name string
|
||||
cmd *exec.Cmd
|
||||
override *exec.Cmd
|
||||
}
|
||||
func (s *server) kill() error {
|
||||
name := s.name
|
||||
cmd := s.startCommand
|
||||
|
||||
func (k *killCmd) Kill() error {
|
||||
name := k.name
|
||||
cmd := k.cmd
|
||||
|
||||
if k.override != nil {
|
||||
return k.override.Run()
|
||||
if s.killCommand != nil {
|
||||
return s.killCommand.Run()
|
||||
}
|
||||
|
||||
if cmd == nil {
|
||||
return fmt.Errorf("Could not kill %s because both `override` and `cmd` are nil", name)
|
||||
return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
|
||||
}
|
||||
|
||||
if cmd.Process == nil {
|
||||
glog.V(2).Infof("%s not running", name)
|
||||
glog.V(2).Infof("%q not running", name)
|
||||
return nil
|
||||
}
|
||||
pid := cmd.Process.Pid
|
||||
if pid <= 1 {
|
||||
return fmt.Errorf("invalid PID %d for %s", pid, name)
|
||||
return fmt.Errorf("invalid PID %d for %q", pid, name)
|
||||
}
|
||||
|
||||
// Attempt to shut down the process in a friendly manner before forcing it.
|
||||
|
@ -413,7 +572,7 @@ func (k *killCmd) Kill() error {
|
|||
select {
|
||||
case err := <-waitChan:
|
||||
if err != nil {
|
||||
return fmt.Errorf("error stopping %s: %v", name, err)
|
||||
return fmt.Errorf("error stopping %q: %v", name, err)
|
||||
}
|
||||
// Success!
|
||||
return nil
|
||||
|
@ -422,23 +581,5 @@ func (k *killCmd) Kill() error {
|
|||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to stop %s", name)
|
||||
}
|
||||
|
||||
type healthCheckCommand struct {
|
||||
*exec.Cmd
|
||||
HealthCheckUrl string
|
||||
outputFilename string
|
||||
}
|
||||
|
||||
func newHealthCheckCommand(healthCheckUrl string, cmd *exec.Cmd, filename string) *healthCheckCommand {
|
||||
return &healthCheckCommand{
|
||||
HealthCheckUrl: healthCheckUrl,
|
||||
Cmd: cmd,
|
||||
outputFilename: filename,
|
||||
}
|
||||
}
|
||||
|
||||
func (hcc *healthCheckCommand) String() string {
|
||||
return fmt.Sprintf("`%s` health-check: %s", strings.Join(append([]string{hcc.Path}, hcc.Args[1:]...), " "), hcc.HealthCheckUrl)
|
||||
return fmt.Errorf("unable to stop %q", name)
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
|
|||
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
|
||||
|
||||
By("create the static pod")
|
||||
err := createStaticPod(context.PodConfigPath, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways)
|
||||
err := createStaticPod(framework.TestContext.ManifestPath, staticPodName, ns, ImageRegistry[nginxImage], api.RestartPolicyAlways)
|
||||
Expect(err).ShouldNot(HaveOccurred())
|
||||
|
||||
By("wait for the mirror pod to be running")
|
||||
|
@ -60,7 +60,7 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
|
|||
|
||||
By("update the static pod container image")
|
||||
image := ImageRegistry[pauseImage]
|
||||
err = createStaticPod(context.PodConfigPath, staticPodName, ns, image, api.RestartPolicyAlways)
|
||||
err = createStaticPod(framework.TestContext.ManifestPath, staticPodName, ns, image, api.RestartPolicyAlways)
|
||||
Expect(err).ShouldNot(HaveOccurred())
|
||||
|
||||
By("wait for the mirror pod to be updated")
|
||||
|
@ -106,7 +106,7 @@ var _ = framework.KubeDescribe("MirrorPod", func() {
|
|||
})
|
||||
AfterEach(func() {
|
||||
By("delete the static pod")
|
||||
err := deleteStaticPod(context.PodConfigPath, staticPodName, ns)
|
||||
err := deleteStaticPod(framework.TestContext.ManifestPath, staticPodName, ns)
|
||||
Expect(err).ShouldNot(HaveOccurred())
|
||||
|
||||
By("wait for the mirror pod to disappear")
|
||||
|
|
|
@ -22,12 +22,6 @@ import (
|
|||
|
||||
var kubeletAddress = flag.String("kubelet-address", "http://127.0.0.1:10255", "Host and port of the kubelet")
|
||||
|
||||
var disableKubenet = flag.Bool("disable-kubenet", false, "If true, start kubelet without kubenet")
|
||||
var buildServices = flag.Bool("build-services", true, "If true, build local executables")
|
||||
var startServices = flag.Bool("start-services", true, "If true, start local node services")
|
||||
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
|
||||
|
||||
type SharedContext struct {
|
||||
NodeName string
|
||||
PodConfigPath string
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue