Add perf test automation

pull/998/head
galal-hussein 2019-10-31 23:06:54 +02:00
parent 31a615fc0f
commit 128eff9b0e
37 changed files with 2440 additions and 0 deletions

7
tests/perf/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
.terraform*
*.tfstate*
*.tfvars*
*.plan*
*tests_results*
*junit.xml
*kubeconfig.yaml

21
tests/perf/Makefile Normal file
View File

@ -0,0 +1,21 @@
MODULE := $(shell basename $$PWD)
.PHONY: init config apply destroy clean test
init:
@scripts/perf init
config:
@scripts/perf config
apply:
@scripts/perf apply
destroy:
@scripts/perf destroy
clean:
@scripts/perf clean
test:
@scripts/test test_load

44
tests/perf/agents/data.tf Normal file
View File

@ -0,0 +1,44 @@
data "terraform_remote_state" "server" {
backend = "local"
config = {
path = "${path.module}/../server/server.tfstate"
}
}
data "aws_vpc" "default" {
default = true
}
data "aws_subnet_ids" "available" {
vpc_id = data.aws_vpc.default.id
}
data "aws_subnet" "selected" {
id = "${tolist(data.aws_subnet_ids.available.ids)[1]}"
}
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]
filter {
name = "name"
values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "root-device-type"
values = ["ebs"]
}
filter {
name = "architecture"
values = ["x86_64"]
}
}

View File

@ -0,0 +1,33 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- apt-get update
- apt-get install -y software-properties-common
- apt-get install -y resolvconf linux-headers-$(uname -r) && echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail && systemctl start resolvconf
- DEBIAN_FRONTEND=noninteractive apt-get upgrade -y
- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s
- cp k3s /usr/local/bin/k3s
- chmod +x /usr/local/bin/k3s
- until (curl -sfL https://get.k3s.io | K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" sh -); do echo 'Error installing k3s agent'; sleep 1; done

79
tests/perf/agents/main.tf Normal file
View File

@ -0,0 +1,79 @@
terraform {
backend "local" {
path = "pool.tfstate"
}
}
locals {
name = var.name
k3s_cluster_secret = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282"
}
provider "aws" {
region = "us-west-2"
profile = "rancher-eng"
}
resource "aws_security_group" "k3s" {
name = "${local.name}-pool"
vpc_id = data.aws_vpc.default.id
ingress {
from_port = 22
to_port = 22
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
module "k3s-pool-worker-asg" {
source = "terraform-aws-modules/autoscaling/aws"
version = "3.0.0"
name = "${local.name}-pool"
asg_name = "${local.name}-pool"
instance_type = var.worker_instance_type
image_id = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/pool_worker_userdata.tmpl", { k3s_url = data.terraform_remote_state.server.outputs.public_ip, k3s_cluster_secret = local.k3s_cluster_secret, extra_ssh_keys = var.extra_ssh_keys, install_k3s_version = var.k3s_version }))
ebs_optimized = true
desired_capacity = var.node_count
health_check_type = "EC2"
max_size = var.node_count
min_size = var.node_count
vpc_zone_identifier = [data.aws_subnet.selected.id]
spot_price = "0.680"
security_groups = [
aws_security_group.k3s.id,
]
lc_name = "${local.name}-pool"
root_block_device = [
{
volume_size = "100"
volume_type = "gp2"
},
]
}

View File

View File

@ -0,0 +1,28 @@
variable "node_count" {
description = "Number of nodes to run k3s agents on."
type = number
# default = 10
}
variable "worker_instance_type" {
type = string
default = "t3.2xlarge"
}
variable "extra_ssh_keys" {
type = list
default = []
description = "Extra ssh keys to inject into Rancher instances"
}
variable "k3s_version" {
default = "v0.9.1"
type = string
description = "Version of K3S to install"
}
variable "name" {
default = "k3s-loadtest"
type = string
description = "Name to identify this cluster"
}

View File

@ -0,0 +1,4 @@
terraform {
required_version = ">= 0.12"
}

28
tests/perf/scripts/config Executable file
View File

@ -0,0 +1,28 @@
## MAIN VARIABLES ##
####################
CLUSTER_NAME="hgalal-k3s"
K3S_VERSION="v0.10.0"
EXTRA_SSH_KEYS="ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDZBAE6I9J733HJfCBVu7iWSUuJ7th0U4P4IFfpFDca52n/Hk4yFFr8SPR8JJc1n42c3vEVCbExp/MD4ihqEBy9+pLewxA+fkb7UAT4cT2eLfvZdTTVe8KSiw6lVN6tWSoNXmNqY+wH7zWQ04lfjXPa/c01L1n2XwV/O+5xii9vEuSxN9YhfQ/s61SdLFqQ5yS8gPsM0qQW+bFt5KGGbapqztDO+h9lxGbZRcRAKbCzZ5kF1mhjI/+VubTWKtoVLCumjzjYqILYyx9g/mLSo26qjDEZvtwBQB9KLugDAtnalLVp0HgivC5YfLHr8PxViVSHfIIKS2DhUpn07jr8eKi9"
PRIVATE_KEY_PATH="/home/hussein/.ssh/id_rsa" #this has to be a full path
## K3S SERVER VARIABLES ##
##########################
K3S_HA=1
MASTER_COUNT=3
DB_INSTANCE_TYPE="db.m4.4xlarge"
SERVER_INSTANCE_TYPE="m5.2xlarge"
DEBUG=1
## PROMETHEUS SERVER VARIABLES ##
#################################
PROM_WORKER_NODE_COUNT=1
PROM_HOST="prometheus-load.eng.rancher.space"
GRAF_HOST="prometheus-load.eng.rancher.space"
## K3S AGENTS VARIABLES ##
##########################
AGENT_NODE_COUNT=100
WORKER_INSTANCE_TYPE="m5.xlarge"

83
tests/perf/scripts/perf Executable file
View File

@ -0,0 +1,83 @@
#!/bin/bash -ex
TERRAFORM_PLAN_CMD="terraform plan --var-file variables.tfvars --out k3s.plan"
TERRAFORM_APPLY_CMD="terraform apply k3s.plan"
TERRAFORM_DESTROY_CMD="terraform destroy --var-file variables.tfvars --force"
for bin in docker kubectl terraform; do
type $bin >/dev/null 2>&1 || (echo "$bin is not in the path. Please make sure it is installed and in PATH."; exit 1)
done
init() {
for i in server agents; do
pushd $i
terraform init
popd
done
}
apply() {
# init terraform
config
# Run apply for server and agents
for i in server agents; do
if [ $i == "agents" ]; then
echo "Sleeping 1 minute until server(s) is initialized"
sleep 60
fi
pushd $i
$TERRAFORM_PLAN_CMD
$TERRAFORM_APPLY_CMD
popd
done
}
config() {
source scripts/config
pushd ./server
cat <<MAIN > variables.tfvars
name = "${CLUSTER_NAME}"
db_instance_type = "${DB_INSTANCE_TYPE}"
server_instance_type = "${SERVER_INSTANCE_TYPE}"
extra_ssh_keys = ["${EXTRA_SSH_KEYS}"]
master_count = ${MASTER_COUNT}
k3s_ha = ${K3S_HA}
k3s_version = "${K3S_VERSION}"
prom_worker_node_count = ${PROM_WORKER_NODE_COUNT}
prom_host = "${PROM_HOST}"
graf_host = "${GRAF_HOST}"
ssh_key_path = "${PRIVATE_KEY_PATH}"
debug = ${DEBUG}
MAIN
popd
pushd ./agents
cat <<MAIN > variables.tfvars
name = "${CLUSTER_NAME}"
node_count = ${AGENT_NODE_COUNT}
extra_ssh_keys = ["${EXTRA_SSH_KEYS}"]
k3s_version = "${K3S_VERSION}"
worker_instance_type = "${WORKER_INSTANCE_TYPE}"
MAIN
popd
}
clean() {
# clean server and agents
for i in server agents; do
pushd $i
rm -f *.plan *.tfvars *.tfstate*
popd
done
}
destroy() {
for i in agents server; do
pushd $i
terraform destroy --var-file variables.tfvars --force
popd
done
clean
}
$@

48
tests/perf/scripts/test Executable file
View File

@ -0,0 +1,48 @@
#!/bin/bash -ex
test_load() {
source scripts/config
masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2`
pushd tests/
docker run -v $PRIVATE_KEY_PATH:/opt/priv_key \
-e KUBE_SSH_USER=ubuntu \
-e LOCAL_SSH_KEY=/opt/priv_key \
-it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \
clusterloader --testconfig /opt/k3s/perf-tests/load/config.yaml \
--kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \
--masterip $masterips \
--provider=local \
--report-dir /opt/k3s/perf-tests/load_tests_results \
--enable-prometheus-server \
--tear-down-prometheus-server=0
popd
}
test_density() {
source scripts/config
masterips=`terraform output -state=server/server.tfstate | grep k3s_server_ips | cut -d "=" -f 2`
pushd tests/
docker run -e KUBE_SSH_USER=ubuntu \
-v $PRIVATE_KEY_PATH:/opt/priv_key \
-e LOCAL_SSH_KEY=/opt/priv_key \
-it -v $PWD/:/opt/k3s/perf-tests husseingalal/clusterloader:dev \
clusterloader --testconfig /opt/k3s/perf-tests/density/config.yaml \
--kubeconfig /opt/k3s/perf-tests/kubeconfig.yaml \
--masterip $masterips \
--provider=local \
--report-dir /opt/k3s/perf-tests/density_tests_results \
--enable-prometheus-server \
--tear-down-prometheus-server=0
popd
}
clean() {
# clean kubeconfig
pushd tests/
rm -f kubeconfig
rm -rf load_tests_results/
rm -rf density_tests_results/
popd
}
$@

52
tests/perf/server/data.tf Normal file
View File

@ -0,0 +1,52 @@
data "aws_vpc" "default" {
default = true
}
data "aws_subnet_ids" "available" {
vpc_id = data.aws_vpc.default.id
}
data "aws_subnet" "selected" {
id = "${tolist(data.aws_subnet_ids.available.ids)[1]}"
}
data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]
filter {
name = "name"
values = ["ubuntu-minimal/images/*/ubuntu-bionic-18.04-*"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "root-device-type"
values = ["ebs"]
}
filter {
name = "architecture"
values = ["x86_64"]
}
}
data "template_file" "metrics" {
template = file("${path.module}/files/metrics.yaml")
vars = {
prom_worker_node_count = local.prom_worker_node_count
}
}
data "template_file" "k3s-prom-yaml" {
template = file("${path.module}/files/prom.yaml")
vars = {
prom_host = var.prom_host
graf_host = var.graf_host
prom_worker_node_count = local.prom_worker_node_count
}
}

View File

@ -0,0 +1,227 @@
%{ if prom_worker_node_count != 0 }
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
- apiGroups: ["policy"]
resources:
- poddisruptionbudgets
verbs: ["list", "watch"]
- apiGroups: ["certificates.k8s.io"]
resources:
- certificatesigningrequests
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources:
- storageclasses
verbs: ["list", "watch"]
- apiGroups: ["autoscaling.k8s.io"]
resources:
- verticalpodautoscalers
verbs: ["list", "watch"]
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: kube-state-metrics
name: kube-state-metrics
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: kube-state-metrics
replicas: 1
template:
metadata:
labels:
k8s-app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:v1.7.2
ports:
- name: http-metrics
containerPort: 8080
- name: telemetry
containerPort: 8081
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: kube-system
labels:
k8s-app: kube-state-metrics
annotations:
prometheus.io/scrape: 'true'
spec:
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
protocol: TCP
- name: telemetry
port: 8081
targetPort: telemetry
protocol: TCP
selector:
k8s-app: kube-state-metrics
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: slo-monitor
subjects:
- kind: ServiceAccount
name: slo-monitor
namespace: kube-system
roleRef:
kind: ClusterRole
name: slo-monitor
apiGroup: rbac.authorization.k8s.io
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: slo-monitor
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["pods", "events"]
verbs: ["get", "watch", "list"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: slo-monitor
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: slo-monitor
namespace: kube-system
labels:
app: slo-monitor
spec:
selector:
matchLabels:
app: slo-monitor
template:
metadata:
labels:
app: slo-monitor
annotations:
prometheus.io/scrape: "true"
spec:
containers:
- name: slo-monitor
image: gcr.io/google-containers/slo-monitor:0.12.0
command:
- /slo-monitor
- --alsologtostderr=true
imagePullPolicy: Always
ports:
- name: metrics
containerPort: 8080
resources:
requests:
cpu: 300m
memory: 100Mi
limits:
cpu: 300m
memory: 100Mi
restartPolicy: Always
serviceAccountName: slo-monitor
---
apiVersion: v1
kind: Service
metadata:
name: slo-monitor
namespace: kube-system
labels:
app: slo-monitor
spec:
selector:
app: slo-monitor
ports:
- name: metrics
port: 80
targetPort: metrics
type: ClusterIP
%{ endif }

View File

@ -0,0 +1,86 @@
%{ if prom_worker_node_count != 0 }
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: prometheus
namespace: kube-system
spec:
chart: https://raw.githubusercontent.com/galal-hussein/charts/master/prometheus-9.2.0.tgz
targetNamespace: monitoring
valuesContent: |-
alertmanager:
nodeSelector:
prom: "true"
persistentVolume:
enabled: false
kubeStateMetrics:
nodeSelector:
prom: "true"
nodeExporter:
nodeSelector:
prom: "true"
server:
nodeSelector:
prom: "true"
ingress:
enabled: true
hosts:
- ${prom_host}
persistentVolume:
enabled: false
pushgateway:
nodeSelector:
prom: "true"
persistentVolume:
enabled: false
serverFiles:
prometheus.yml:
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
- job_name: kubernetes-apiservers
scrape_interval: 10s
scrape_timeout: 10s
metrics_path: /metrics
scheme: https
kubernetes_sd_configs:
- api_server: null
role: endpoints
namespaces:
names: []
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
separator: ;
regex: default;kubernetes;https
replacement: $1
action: keep
---
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: grafana
namespace: kube-system
spec:
chart: stable/grafana
targetNamespace: monitoring
valuesContent: |-
ingress:
enabled: true
hosts:
- ${graf_host}
nodeSelector:
prom: "true"
%{ endif }

View File

@ -0,0 +1,55 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
write_files:
- path: /var/lib/rancher/k3s/server/manifests/metrics.yaml
permissions: "0755"
owner: root:root
encoding: b64
content: ${metrics_yaml}
- path: /var/lib/rancher/k3s/server/manifests/prom.yaml
permissions: "0755"
owner: root:root
encoding: b64
content: ${prom_yaml}
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- apt-get update
- apt-get install -y git vim software-properties-common resolvconf linux-headers-$(uname -r)
- echo "nameserver 1.1.1.1" > /etc/resolvconf/resolv.conf.d/tail
- echo "RateLimitIntervalSec=0" >> /etc/systemd/journald.conf
- echo "RateLimitBurst=0" >> /etc/systemd/journald.conf
- systemctl restart systemd-journald.service
- systemctl start resolvconf
- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s
- cp k3s /usr/local/bin/k3s
- chmod +x /usr/local/bin/k3s
%{if master_index != 0 }
- sleep 20
%{ endif }
- until (curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=true K3S_CLUSTER_SECRET="${k3s_cluster_secret}" INSTALL_K3S_VERSION="${install_k3s_version}" INSTALL_K3S_EXEC="${k3s_server_args} --cluster-cidr=10.0.0.0/8 --no-deploy traefik --no-deploy servicelb --tls-san ${lb_address} %{ if use_ha == "true" } --storage-endpoint="postgres://${db_username}:${db_password}@${db_address}:5432/${db_name}" %{ if master_index == 0 }--bootstrap-save%{ endif } %{ endif }" sh -); do echo 'Error installing k3s'; sleep 1; done
%{if debug != 0 }
- sed -i 's/bin\/k3s/bin\/k3s --debug/g' /etc/systemd/system/k3s.service
- systemctl daemon-reload
- systemctl restart k3s
%{ endif }

View File

@ -0,0 +1,29 @@
#cloud-config
%{ if length(extra_ssh_keys) > 0 }
ssh_authorized_keys:
%{ for ssh_key in extra_ssh_keys }
- ${ssh_key}
%{ endfor }
%{ endif }
runcmd:
- echo "net.ipv4.neigh.default.gc_interval = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_stale_time = 3600" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh3 = 16384" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh2 = 8192" >> /etc/sysctl.conf
- echo "net.ipv4.neigh.default.gc_thresh1 = 4096" >> /etc/sysctl.conf
- echo "fs.file-max = 12000500" >> /etc/sysctl.conf
- echo "fs.nr_open = 20000500" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_mem = '10000000 10000000 10000000'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_rmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.ipv4.tcp_wmem = '1024 4096 16384'" >> /etc/sysctl.conf
- echo "net.core.rmem_max = 16384" >> /etc/sysctl.conf
- echo "net.core.wmem_max = 16384" >> /etc/sysctl.conf
- ulimit -n 20000
- echo "# <domain> <type> <item> <value>" >> /etc/security/limits.d/limits.conf
- echo " * soft nofile 20000" >> /etc/security/limits.d/limits.conf
- echo " * hard nofile 20000" >> /etc/security/limits.d/limits.conf
- sysctl -p
- wget https://raw.githubusercontent.com/galal-hussein/k3s/k3s_with_kine_fix/k3s
- cp k3s /usr/local/bin/k3s
- chmod +x /usr/local/bin/k3s
- until (curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=${install_k3s_version} INSTALL_K3S_EXEC="${k3s_exec}" K3S_URL=https://${k3s_url}:6443 K3S_CLUSTER_SECRET="${k3s_cluster_secret}" sh -); do echo 'k3s did not install correctly'; sleep 1; done

188
tests/perf/server/main.tf Normal file
View File

@ -0,0 +1,188 @@
terraform {
backend "local" {
path = "server.tfstate"
}
}
locals {
name = var.name
k3s_cluster_secret = var.k3s_cluster_secret
install_k3s_version = var.k3s_version
prom_worker_node_count = var.prom_worker_node_count
}
provider "aws" {
region = "us-west-2"
profile = "rancher-eng"
}
resource "aws_security_group" "k3s" {
name = "${local.name}-sg"
vpc_id = data.aws_vpc.default.id
ingress {
from_port = 22
to_port = 22
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 6443
to_port = 6443
protocol = "TCP"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_db_instance" "k3s_db" {
count = "${var.k3s_ha}"
allocated_storage = 100 #baseline iops is 300 with gp2
storage_type = "io1"
iops = "3000"
engine = "postgres"
engine_version = "11.5"
instance_class = "${var.db_instance_type}"
name = "${var.db_name}"
username = "${var.db_username}"
password = "${var.db_password}"
skip_final_snapshot = true
multi_az = false
}
resource "aws_lb" "k3s-master-nlb" {
name = "${local.name}-nlb"
internal = false
load_balancer_type = "network"
subnets = [data.aws_subnet.selected.id]
}
resource "aws_lb_target_group" "k3s-master-nlb-tg" {
name = "${local.name}-nlb-tg"
port = "6443"
protocol = "TCP"
vpc_id = data.aws_vpc.default.id
deregistration_delay = "300"
health_check {
interval = "30"
port = "6443"
protocol = "TCP"
healthy_threshold = "10"
unhealthy_threshold= "10"
}
}
resource "aws_lb_listener" "k3s-master-nlb-tg" {
load_balancer_arn = "${aws_lb.k3s-master-nlb.arn}"
port = "6443"
protocol = "TCP"
default_action {
target_group_arn = "${aws_lb_target_group.k3s-master-nlb-tg.arn}"
type = "forward"
}
}
resource "aws_lb_target_group_attachment" "test" {
count = "${var.master_count}"
target_group_arn = "${aws_lb_target_group.k3s-master-nlb-tg.arn}"
target_id = "${aws_spot_instance_request.k3s-server[count.index].spot_instance_id}"
port = 6443
}
resource "aws_spot_instance_request" "k3s-server" {
count = "${var.master_count}"
instance_type = var.server_instance_type
ami = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/server_userdata.tmpl",
{
extra_ssh_keys = var.extra_ssh_keys,
metrics_yaml = base64encode(data.template_file.metrics.rendered),
prom_yaml = base64encode(data.template_file.k3s-prom-yaml.rendered),
k3s_cluster_secret = local.k3s_cluster_secret,
install_k3s_version = local.install_k3s_version,
k3s_server_args = var.k3s_server_args,
db_address = aws_db_instance.k3s_db[0].address,
db_name = aws_db_instance.k3s_db[0].name,
db_username = aws_db_instance.k3s_db[0].username,
db_password = aws_db_instance.k3s_db[0].password,
use_ha = "${var.k3s_ha == 1 ? "true": "false"}",
master_index = count.index,
lb_address = aws_lb.k3s-master-nlb.dns_name,
prom_worker_node_count = local.prom_worker_node_count,
debug = var.debug,}))
wait_for_fulfillment = true
security_groups = [
aws_security_group.k3s.name,
]
root_block_device {
volume_size = "100"
volume_type = "gp2"
}
tags = {
Name = "${local.name}-server-${count.index}"
}
provisioner "local-exec" {
command = "sleep 10"
}
}
module "k3s-prom-worker-asg" {
source = "terraform-aws-modules/autoscaling/aws"
version = "3.0.0"
name = "${local.name}-prom-worker"
asg_name = "${local.name}-prom-worker"
instance_type = "m5.large"
image_id = data.aws_ami.ubuntu.id
user_data = base64encode(templatefile("${path.module}/files/worker_userdata.tmpl", { extra_ssh_keys = var.extra_ssh_keys, k3s_url = aws_lb.k3s-master-nlb.dns_name, k3s_cluster_secret = local.k3s_cluster_secret, install_k3s_version = local.install_k3s_version, k3s_exec = "--node-label prom=true" }))
desired_capacity = local.prom_worker_node_count
health_check_type = "EC2"
max_size = local.prom_worker_node_count
min_size = local.prom_worker_node_count
vpc_zone_identifier = [data.aws_subnet.selected.id]
spot_price = "0.340"
security_groups = [
aws_security_group.k3s.id,
]
lc_name = "${local.name}-prom-worker"
root_block_device = [
{
volume_size = "100"
volume_type = "gp2"
},
]
}
resource "null_resource" "get-kubeconfig" {
provisioner "local-exec" {
interpreter = ["bash", "-c"]
command = "until ssh -i ${var.ssh_key_path} ubuntu@${aws_spot_instance_request.k3s-server[0].public_ip} 'sudo sed \"s/localhost/$aws_lb.k3s-master-nlb.dns_name}/g;s/127.0.0.1/${aws_lb.k3s-master-nlb.dns_name}/g\" /etc/rancher/k3s/k3s.yaml' >| ../tests/kubeconfig.yaml; do sleep 5; done"
}
}

View File

@ -0,0 +1,15 @@
output "public_ip" {
value = aws_lb.k3s-master-nlb.dns_name
}
output "install_k3s_version" {
value = local.install_k3s_version
}
output "k3s_cluster_secret" {
value = local.k3s_cluster_secret
}
output "k3s_server_ips" {
value = join(",", aws_spot_instance_request.k3s-server.*.public_ip)
}

View File

@ -0,0 +1,78 @@
variable "server_instance_type" {
# default = "c4.8xlarge"
}
variable "k3s_version" {
default = "v0.9.1"
type = string
description = "Version of K3S to install"
}
variable "k3s_server_args" {
default = ""
}
variable "prom_worker_node_count" {
default = 0
type = number
description = "The number of workers to create labeled for prometheus"
}
variable "k3s_cluster_secret" {
default = "pvc-6476dcaf-73a0-11e9-b8e5-06943b744282"
type = string
description = "Cluster secret for k3s cluster registration"
}
variable "prom_host" {
default = ""
}
variable "graf_host" {
default = ""
}
variable "name" {
default = "k3s-loadtest"
type = string
description = "Name to identify this cluster"
}
variable "ssh_key_path" {
default = "~/.ssh/id_rsa"
type = string
description = "Path of the private key to ssh to the nodes"
}
variable "extra_ssh_keys" {
type = list
default = []
description = "Extra ssh keys to inject into Rancher instances"
}
variable "k3s_ha" {
default = 0
description = "Enable k3s in HA mode"
}
variable "db_instance_type" {
}
variable "db_name" {
default = "k3s"
}
variable "db_username" {
default = "postgres"
}
variable "db_password" {
default = "b58bf234c4bd0133fc7a92b782e498a6"
}
variable "master_count" {
default = 1
description = "Count of k3s master servers"
}
variable "debug" {
default = 0
description = "Enable Debug log"
}

View File

@ -0,0 +1,4 @@
terraform {
required_version = ">= 0.12"
}

View File

@ -0,0 +1 @@
NODE_MODE: masteranddns

View File

@ -0,0 +1 @@
NODE_MODE: masteranddns

View File

@ -0,0 +1 @@
PODS_PER_NODE: 95

View File

@ -0,0 +1,248 @@
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
#Constants
{{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
# LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node.
# Increasing allocation of both memory and cpu by 10%
# decreases the value of priority function in scheduler by one point.
# This results in decreased probability of choosing the same node again.
{{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}}
{{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}}
{{$MIN_LATENCY_PODS := 500}}
{{$MIN_SATURATION_PODS_TIMEOUT := 180}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $podsPerNamespace $namespaces}}
{{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}}
{{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}}
{{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}}
# saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node
# failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711
{{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}}
name: density
automanagedNamespaces: {{$namespaces}}
tuningSets:
- name: Uniform5qps
qpsLoad:
qps: 5
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 1m
jitterFactor: 10.0
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: reset
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
# TODO(oxddr): figure out how many probers to run in function of cluster
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
- name: Starting saturation pod measurements
measurements:
- Identifier: SaturationPodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = saturation
threshold: {{$saturationDeploymentTimeout}}s
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = saturation
operationTimeout: {{$saturationDeploymentHardTimeout}}s
- Identifier: SchedulingThroughput
Method: SchedulingThroughput
Params:
action: start
labelSelector: group = saturation
- name: Creating saturation pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: Uniform5qps
objectBundle:
- basename: saturation-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
Replicas: {{$podsPerNamespace}}
Group: saturation
CpuRequest: 1m
MemoryRequest: 10M
- name: Collecting saturation pod measurements
measurements:
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
- measurements:
- Identifier: SaturationPodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- measurements:
- Identifier: SchedulingThroughput
Method: SchedulingThroughput
Params:
action: gather
- name: Starting latency pod measurements
measurements:
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = latency
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = latency
operationTimeout: 15m
- name: Creating latency pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$latencyReplicas}}
tuningSet: Uniform5qps
objectBundle:
- basename: latency-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
Replicas: 1
Group: latency
CpuRequest: {{$LATENCY_POD_CPU}}m
MemoryRequest: {{$LATENCY_POD_MEMORY}}M
- name: Waiting for latency pods to be running
measurements:
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
- name: Deleting latency pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Uniform5qps
objectBundle:
- basename: latency-deployment
objectTemplatePath: deployment.yaml
- name: Waiting for latency pods to be deleted
measurements:
- Identifier: WaitForRunningLatencyDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
- name: Collecting pod startup latency
measurements:
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- name: Deleting saturation pods
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Uniform5qps
objectBundle:
- basename: saturation-deployment
objectTemplatePath: deployment.yaml
- name: Waiting for saturation pods to be deleted
measurements:
- Identifier: WaitForRunningSaturationDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
- name: Collecting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: gather
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: gather
{{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}}
enableViolations: true
{{end}}
useSimpleLatencyQuery: true
summaryName: APIResponsivenessPrometheus_simple
{{if not $USE_SIMPLE_LATENCY_QUERY}}
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
{{end}}
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: gather
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}

View File

@ -0,0 +1,37 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.Name}}
labels:
group: {{.Group}}
spec:
replicas: {{.Replicas}}
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
name: {{.Name}}
group: {{.Group}}
spec:
containers:
- image: k8s.gcr.io/pause:3.1
imagePullPolicy: IfNotPresent
name: {{.Name}}
ports:
resources:
requests:
cpu: {{.CpuRequest}}
memory: {{.MemoryRequest}}
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900

View File

@ -0,0 +1,765 @@
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
# - The number of created SVCs is half the number of created Deployments.
# - Only half of Deployments will be assigned 1-1 to existing SVCs.
#Constants
{{$NODE_MODE := DefaultParam .NODE_MODE "allnodes"}}
{{$NODES_PER_NAMESPACE := DefaultParam .NODES_PER_NAMESPACE 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$LOAD_TEST_THROUGHPUT := DefaultParam .LOAD_TEST_THROUGHPUT 10}}
{{$BIG_GROUP_SIZE := 1000}}
{{$MEDIUM_GROUP_SIZE := 500}}
{{$SMALL_GROUP_SIZE := 50}}
{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := 1}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$PROMETHEUS_SCRAPE_KUBE_PROXY := DefaultParam .PROMETHEUS_SCRAPE_KUBE_PROXY true}}
{{$ENABLE_PROMETHEUS_API_RESPONSIVENESS := DefaultParam .ENABLE_PROMETHEUS_API_RESPONSIVENESS false}}
{{$ENABLE_CONFIGMAPS := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$ENABLE_DAEMONSETS := DefaultParam .ENABLE_DAEMONSETS false}}
{{$ENABLE_JOBS := DefaultParam .ENABLE_JOBS false}}
{{$ENABLE_PVS := DefaultParam .ENABLE_PVS false}}
{{$ENABLE_SECRETS := DefaultParam .ENABLE_SECRETS false}}
{{$ENABLE_STATEFULSETS := DefaultParam .ENABLE_STATEFULSETS false}}
{{$ENABLE_NETWORKPOLICIES := DefaultParam .ENABLE_NETWORKPOLICIES false}}
{{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
{{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
{{$podsPerNamespace := DivideInt $totalPods $namespaces}}
{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}}
# bigDeployments - 1/4 of namespace pods should be in big Deployments.
{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}}
# mediumDeployments - 1/4 of namespace pods should be in medium Deployments.
{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}}
# smallDeployments - 1/2 of namespace pods should be in small Deployments.
{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}}
# If StatefulSets are enabled reduce the number of small and medium deployments per namespace
{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $SMALL_STATEFUL_SETS_PER_NAMESPACE 0)}}
{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_STATEFULSETS $MEDIUM_STATEFUL_SETS_PER_NAMESPACE 0)}}
# If Jobs are enabled reduce the number of small, medium, big deployments per namespace.
{{$smallDeploymentsPerNamespace := SubtractInt $smallDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}}
{{$mediumDeploymentsPerNamespace := SubtractInt $mediumDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}}
{{$bigDeploymentsPerNamespace := SubtractInt $bigDeploymentsPerNamespace (IfThenElse $ENABLE_JOBS 1 0)}}
name: load
automanagedNamespaces: {{$namespaces}}
tuningSets:
- name: Sequence
parallelismLimitedLoad:
parallelismLimit: 1
- name: RandomizedSaturationTimeLimited
RandomizedTimeLimitedLoad:
timeLimit: {{$saturationTime}}s
- name: RandomizedScalingTimeLimited
RandomizedTimeLimitedLoad:
# The expected number of created/deleted pods is totalPods/4 when scaling,
# as each RS changes its size from X to a uniform random value in [X/2, 3X/2].
# To match 10 [pods/s] requirement, we need to divide saturationTime by 4.
timeLimit: {{DivideInt $saturationTime 4}}s
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 1m
jitterFactor: 10.0
simulatedDowntime: 10m
{{end}}
steps:
- name: Starting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: reset
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: start
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: start
labelSelector: group = load
threshold: 1h
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: start
replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: start
{{end}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: start
nodeMode: {{$NODE_MODE}}
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
- name: Creating SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}}
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
{{if $ENABLE_DAEMONSETS}}
- name: Creating PriorityClass for DaemonSets
phases:
- replicasPerNamespace: 1
tuningSet: Sequence
objectBundle:
- basename: daemonset-priorityclass
objectTemplatePath: daemonset-priorityclass.yaml
{{end}}
- name: Starting measurement for waiting for pods
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: Deployment
labelSelector: group = load
operationTimeout: 15m
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: StatefulSet
labelSelector: group = load
operationTimeout: 15m
{{end}}
{{if $ENABLE_DAEMONSETS}}
- Identifier: WaitForRunningDaemonSets
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: apps/v1
kind: DaemonSet
labelSelector: group = load
operationTimeout: 15m
{{end}}
{{if $ENABLE_JOBS}}
- Identifier: WaitForRunningJobs
Method: WaitForControlledPodsRunning
Params:
action: start
apiVersion: batch/v1
kind: Job
labelSelector: group = load
operationTimeout: 15m
{{end}}
- name: Creating objects
phases:
{{if $ENABLE_DAEMONSETS}}
- namespaceRange:
min: 1
max: 1
replicasPerNamespace: 1
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: daemonset
objectTemplatePath: daemonset.yaml
templateFillMap:
Image: k8s.gcr.io/pause:3.0
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: big-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$BIG_GROUP_SIZE}}
ReplicasMax: {{$BIG_GROUP_SIZE}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: medium-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: small-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
{{end}}
{{if $ENABLE_JOBS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{$SMALL_GROUP_SIZE}}
ReplicasMax: {{$SMALL_GROUP_SIZE}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{$MEDIUM_GROUP_SIZE}}
ReplicasMax: {{$MEDIUM_GROUP_SIZE}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: big-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{$BIG_GROUP_SIZE}}
ReplicasMax: {{$BIG_GROUP_SIZE}}
{{end}}
- name: Waiting for pods to be running
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_DAEMONSETS}}
- Identifier: WaitForRunningDaemonSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_JOBS}}
- Identifier: WaitForRunningJobs
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Scaling and updating objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$bigDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}}
SvcName: big-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$mediumDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
SvcName: medium-service
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$smallDeploymentsPerNamespace}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
SvcName: small-service
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
{{end}}
{{if $ENABLE_DAEMONSETS}}
- namespaceRange:
min: 1
max: 1
replicasPerNamespace: 1
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: daemonset
objectTemplatePath: daemonset.yaml
templateFillMap:
Image: k8s.gcr.io/pause:3.1
{{end}}
{{if $ENABLE_JOBS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: small-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $SMALL_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $SMALL_GROUP_SIZE 1.5}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: medium-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $MEDIUM_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $MEDIUM_GROUP_SIZE 1.5}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 1
tuningSet: RandomizedScalingTimeLimited
objectBundle:
- basename: big-job
objectTemplatePath: job.yaml
templateFillMap:
ReplicasMin: {{MultiplyInt $BIG_GROUP_SIZE 0.5}}
ReplicasMax: {{MultiplyInt $BIG_GROUP_SIZE 1.5}}
{{end}}
- name: Waiting for objects to become scaled
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_DAEMONSETS}}
- Identifier: WaitForRunningDaemonSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_JOBS}}
- Identifier: WaitForRunningJobs
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
- name: Deleting objects
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: big-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: big-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: big-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: big-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: medium-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: medium-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: medium-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-deployment
objectTemplatePath: deployment.yaml
{{if $ENABLE_CONFIGMAPS}}
- basename: small-deployment
objectTemplatePath: configmap.yaml
{{end}}
{{if $ENABLE_SECRETS}}
- basename: small-deployment
objectTemplatePath: secret.yaml
{{end}}
{{if $ENABLE_NETWORKPOLICIES}}
- basename: small-deployment
objectTemplatePath: networkpolicy.yaml
{{end}}
{{if $ENABLE_STATEFULSETS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-statefulset
objectTemplatePath: statefulset.yaml
- basename: small-statefulset
objectTemplatePath: statefulset_service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-statefulset
objectTemplatePath: statefulset.yaml
- basename: medium-statefulset
objectTemplatePath: statefulset_service.yaml
{{end}}
{{if $ENABLE_DAEMONSETS}}
- namespaceRange:
min: 1
max: 1
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: daemonset
objectTemplatePath: daemonset.yaml
{{end}}
{{if $ENABLE_JOBS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: small-job
objectTemplatePath: job.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: medium-job
objectTemplatePath: job.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
- basename: big-job
objectTemplatePath: job.yaml
{{end}}
# If both StatefulSets and PVs were enabled we need to delete PVs manually.
{{if and $ENABLE_STATEFULSETS $ENABLE_PVS}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{range $ssIndex := Seq $SMALL_STATEFUL_SETS_PER_NAMESPACE}}
- basename: pv-small-statefulset-{{$ssIndex}}
objectTemplatePath: pvc.yaml
listUnknownObjectOptions:
labelSelector:
matchLabels:
name: small-statefulset-{{$ssIndex}}
{{end}}
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: RandomizedSaturationTimeLimited
objectBundle:
{{range $ssIndex := Seq $MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
- basename: pv-medium-statefulset-{{$ssIndex}}
objectTemplatePath: pvc.yaml
listUnknownObjectOptions:
labelSelector:
matchLabels:
name: medium-statefulset-{{$ssIndex}}
{{end}}
{{end}}
- name: Waiting for pods to be deleted
measurements:
- Identifier: WaitForRunningDeployments
Method: WaitForControlledPodsRunning
Params:
action: gather
{{if $ENABLE_STATEFULSETS}}
- Identifier: WaitForRunningStatefulSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_DAEMONSETS}}
- Identifier: WaitForRunningDaemonSets
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if $ENABLE_JOBS}}
- Identifier: WaitForRunningJobs
Method: WaitForControlledPodsRunning
Params:
action: gather
{{end}}
{{if and $ENABLE_STATEFULSETS $ENABLE_PVS}}
- Identifier: WaitForPVCsToBeDeleted
Method: WaitForBoundPVCs
Params:
desiredPVCCount: 0
labelSelector: group = load
timeout: 15m
{{end}}
{{if $ENABLE_DAEMONSETS}}
- name: Deleting PriorityClass for DaemonSets
phases:
- replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: daemonset-priorityclass
objectTemplatePath: daemonset-priorityclass.yaml
{{end}}
- name: Deleting SVCs
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: big-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: medium-service
objectTemplatePath: service.yaml
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: small-service
objectTemplatePath: service.yaml
- name: Collecting measurements
measurements:
- Identifier: APIResponsiveness
Method: APIResponsiveness
Params:
action: gather
- Identifier: APIResponsivenessPrometheusSimple
Method: APIResponsivenessPrometheus
Params:
action: gather
{{if $ENABLE_PROMETHEUS_API_RESPONSIVENESS}}
enableViolations: true
{{end}}
useSimpleLatencyQuery: true
summaryName: APIResponsivenessPrometheus_simple
{{if not $USE_SIMPLE_LATENCY_QUERY}}
- Identifier: APIResponsivenessPrometheus
Method: APIResponsivenessPrometheus
Params:
action: gather
{{end}}
- Identifier: PodStartupLatency
Method: PodStartupLatency
Params:
action: gather
- Identifier: InClusterNetworkLatency
Method: InClusterNetworkLatency
Params:
action: gather
- Identifier: DnsLookupLatency
Method: DnsLookupLatency
Params:
action: gather
{{if $PROMETHEUS_SCRAPE_KUBE_PROXY}}
- Identifier: NetworkProgrammingLatency
Method: NetworkProgrammingLatency
Params:
action: gather
{{end}}
- Identifier: TestMetrics
Method: TestMetrics
Params:
action: gather
systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}

View File

@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{.Name}}
data:
data.yaml: |-
a: 1
b: 2
c: 3

View File

@ -0,0 +1,9 @@
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: {{.Name}}
value: 1000000
globalDefault: false
description: "Designated priority class to be used for DaemonSet pods. This is
to make sure they have higher priority than other test pods and there is always
place for them on each node, see kubernetes/kubernetes#82818."

View File

@ -0,0 +1,41 @@
{{$Image := DefaultParam .Image "k8s.gcr.io/pause:3.1"}}
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{.Name}}
labels:
group: load
spec:
updateStrategy:
rollingUpdate:
maxUnavailable: {{MaxInt 10 (DivideInt .Nodes 20)}} # 5% of nodes, but not less than 10
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
group: load
name: {{.Name}}
spec:
containers:
- name: {{.Name}}
image: {{$Image}}
resources:
requests:
cpu: 10m
memory: "10M"
priorityClassName: daemonset-priorityclass-0 # Name is autogenerated, hence the -0 prefix.
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900

View File

@ -0,0 +1,63 @@
{{$EnableConfigMaps := DefaultParam .ENABLE_CONFIGMAPS false}}
{{$EnableSecrets := DefaultParam .ENABLE_SECRETS false}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.Name}}
labels:
group: load
svc: {{.SvcName}}-{{.Index}}
spec:
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
group: load
name: {{.Name}}
svc: {{.SvcName}}-{{.Index}}
spec:
containers:
- image: k8s.gcr.io/pause:3.1
name: {{.Name}}
resources:
requests:
cpu: 10m
memory: "10M"
volumeMounts:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
mountPath: /var/configmap
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
mountPath: /var/secret
{{end}}
dnsPolicy: Default
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
volumes:
{{if and $EnableConfigMaps (eq (Mod .Index 20) 0 19) }} # .Index % 20 in {0,19} - 10% deployments will have ConfigMap
- name: configmap
configMap:
name: {{.BaseName}}-{{.Index}}
{{end}}
{{if and $EnableSecrets (eq (Mod .Index 20) 10 19) }} # .Index % 20 in {10,19} - 10% deployments will have Secret
- name: secret
secret:
secretName: {{.BaseName}}-{{.Index}}
{{end}}

View File

@ -0,0 +1,39 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Name}}
labels:
group: load
spec:
manualSelector: true
parallelism: {{RandIntRange .ReplicasMin .ReplicasMax}}
selector:
matchLabels:
name: {{.Name}}
template:
metadata:
labels:
group: load
name: {{.Name}}
spec:
containers:
- name: {{.Name}}
# TODO(#799): We should test the "run-to-completion" workflow and hence don't use pause pods.
image: k8s.gcr.io/pause:3.1
resources:
requests:
cpu: 10m
memory: "10M"
restartPolicy: Never
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900

View File

@ -0,0 +1,19 @@
{{if eq (Mod .Index 10) 0}} # Create for only 10% of deployments
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: {{.Name}}
spec:
podSelector:
matchLabels:
name: {{.BaseName}}-{{.Index}}
policyTypes:
- Egress
egress:
- to:
- ipBlock:
cidr: 10.0.0.0/24
ports:
- protocol: TCP
port: 8080
{{end}}

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{.Name}}

View File

@ -0,0 +1,7 @@
apiVersion: v1
kind: Secret
metadata:
name: {{.Name}}
type: Opaque
data:
password: c2NhbGFiaWxpdHkK

View File

@ -0,0 +1,16 @@
{{$SetServiceProxyLabel := DefaultParam .SetServiceProxyLabel false}}
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
{{if and $SetServiceProxyLabel (eq (Mod .Index 2) 0)}}
labels:
service.kubernetes.io/service-proxy-name: foo
{{end}}
spec:
selector:
svc: {{.Name}}
ports:
- port: 80
targetPort: 80

View File

@ -0,0 +1,61 @@
{{$EnablePVs := DefaultParam .ENABLE_PVS false}}
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{.Name}}
labels:
group: load
spec:
podManagementPolicy: Parallel
selector:
matchLabels:
group: load
name: {{.Name}}
serviceName: {{.Name}}
replicas: {{RandIntRange .ReplicasMin .ReplicasMax}}
template:
metadata:
labels:
group: load
name: {{.Name}}
spec:
containers:
- name: {{.Name}}
image: k8s.gcr.io/pause:3.1
ports:
- containerPort: 80
name: web
resources:
requests:
cpu: 10m
memory: "10M"
{{if $EnablePVs}}
volumeMounts:
- name: pv
mountPath: /var/pv
{{end}}
terminationGracePeriodSeconds: 1
# Add not-ready/unreachable tolerations for 15 minutes so that node
# failure doesn't trigger pod deletion.
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 900
{{if $EnablePVs}}
# NOTE: PVs created this way should be cleaned-up manually, as deleting the StatefulSet doesn't automatically delete PVs.
# To avoid deleting all the PVs at once during namespace deletion, they should be deleted explicitly via Phase.
volumeClaimTemplates:
- metadata:
name: pv
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Mi
{{end}}

View File

@ -0,0 +1,10 @@
apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
labels:
name: {{.Name}}
spec:
clusterIP: None
selector:
name: {{.Name}}