Update cluster/gce scripts to support Windows nodes.

pull/564/head
Peter Hornyack 2019-01-30 19:46:45 -08:00
parent 1f7e9fd9a2
commit f0f7829934
15 changed files with 2975 additions and 131 deletions

View File

@ -321,6 +321,9 @@ function find-tar() {
# KUBE_MANIFESTS_TAR
function find-release-tars() {
SERVER_BINARY_TAR=$(find-tar kubernetes-server-linux-amd64.tar.gz)
if [[ "${NUM_WINDOWS_NODES}" -gt "0" && "${USE_RELEASE_NODE_BINARIES:-false}" == "false" ]]; then
NODE_BINARY_TAR=$(find-tar kubernetes-node-windows-amd64.tar.gz)
fi
# This tarball is used by GCI, Ubuntu Trusty, and Container Linux.
KUBE_MANIFESTS_TAR=

View File

@ -14,26 +14,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Returns the total number of Linux and Windows nodes in the cluster.
#
# Vars assumed:
# NUM_NODES
# NUM_WINDOWS_NODES
function get-num-nodes {
echo "$((${NUM_NODES} + ${NUM_WINDOWS_NODES}))"
}
# Vars assumed:
# NUM_NODES
# NUM_WINDOWS_NODES
function get-master-size {
local suggested_master_size=1
if [[ "${NUM_NODES}" -gt "5" ]]; then
if [[ "$(get-num-nodes)" -gt "5" ]]; then
suggested_master_size=2
fi
if [[ "${NUM_NODES}" -gt "10" ]]; then
if [[ "$(get-num-nodes)" -gt "10" ]]; then
suggested_master_size=4
fi
if [[ "${NUM_NODES}" -gt "100" ]]; then
if [[ "$(get-num-nodes)" -gt "100" ]]; then
suggested_master_size=8
fi
if [[ "${NUM_NODES}" -gt "250" ]]; then
if [[ "$(get-num-nodes)" -gt "250" ]]; then
suggested_master_size=16
fi
if [[ "${NUM_NODES}" -gt "500" ]]; then
if [[ "$(get-num-nodes)" -gt "500" ]]; then
suggested_master_size=32
fi
if [[ "${NUM_NODES}" -gt "3000" ]]; then
if [[ "$(get-num-nodes)" -gt "3000" ]]; then
suggested_master_size=64
fi
echo "${suggested_master_size}"
@ -41,12 +51,13 @@ function get-master-size {
# Vars assumed:
# NUM_NODES
# NUM_WINDOWS_NODES
function get-master-root-disk-size() {
local suggested_master_root_disk_size="20GB"
if [[ "${NUM_NODES}" -gt "500" ]]; then
if [[ "$(get-num-nodes)" -gt "500" ]]; then
suggested_master_root_disk_size="100GB"
fi
if [[ "${NUM_NODES}" -gt "3000" ]]; then
if [[ "$(get-num-nodes)" -gt "3000" ]]; then
suggested_master_root_disk_size="500GB"
fi
echo "${suggested_master_root_disk_size}"
@ -54,12 +65,13 @@ function get-master-root-disk-size() {
# Vars assumed:
# NUM_NODES
# NUM_WINDOWS_NODES
function get-master-disk-size() {
local suggested_master_disk_size="20GB"
if [[ "${NUM_NODES}" -gt "500" ]]; then
if [[ "$(get-num-nodes)" -gt "500" ]]; then
suggested_master_disk_size="100GB"
fi
if [[ "${NUM_NODES}" -gt "3000" ]]; then
if [[ "$(get-num-nodes)" -gt "3000" ]]; then
suggested_master_disk_size="200GB"
fi
echo "${suggested_master_disk_size}"
@ -72,13 +84,13 @@ function get-node-ip-range {
return
fi
local suggested_range="10.40.0.0/22"
if [[ "${NUM_NODES}" -gt 1000 ]]; then
if [[ "$(get-num-nodes)" -gt 1000 ]]; then
suggested_range="10.40.0.0/21"
fi
if [[ "${NUM_NODES}" -gt 2000 ]]; then
if [[ "$(get-num-nodes)" -gt 2000 ]]; then
suggested_range="10.40.0.0/20"
fi
if [[ "${NUM_NODES}" -gt 4000 ]]; then
if [[ "$(get-num-nodes)" -gt 4000 ]]; then
suggested_range="10.40.0.0/19"
fi
echo "${suggested_range}"
@ -86,13 +98,13 @@ function get-node-ip-range {
function get-cluster-ip-range {
local suggested_range="10.64.0.0/14"
if [[ "${NUM_NODES}" -gt 1000 ]]; then
if [[ "$(get-num-nodes)" -gt 1000 ]]; then
suggested_range="10.64.0.0/13"
fi
if [[ "${NUM_NODES}" -gt 2000 ]]; then
if [[ "$(get-num-nodes)" -gt 2000 ]]; then
suggested_range="10.64.0.0/12"
fi
if [[ "${NUM_NODES}" -gt 4000 ]]; then
if [[ "$(get-num-nodes)" -gt 4000 ]]; then
suggested_range="10.64.0.0/11"
fi
echo "${suggested_range}"
@ -114,3 +126,26 @@ function get-alias-range-size() {
# NOTE: Avoid giving nodes empty scopes, because kubelet needs a service account
# in order to initialize properly.
NODE_SCOPES="${NODE_SCOPES:-monitoring,logging-write,storage-ro}"
# Root directory for Kubernetes files on Windows nodes.
WINDOWS_K8S_DIR="C:\etc\kubernetes"
# Directory where Kubernetes binaries will be installed on Windows nodes.
WINDOWS_NODE_DIR="${WINDOWS_K8S_DIR}\node\bin"
# Directory where Kubernetes log files will be stored on Windows nodes.
WINDOWS_LOGS_DIR="${WINDOWS_K8S_DIR}\logs"
# Directory where CNI binaries will be stored on Windows nodes.
WINDOWS_CNI_DIR="${WINDOWS_K8S_DIR}\cni"
# Directory where CNI config files will be stored on Windows nodes.
WINDOWS_CNI_CONFIG_DIR="${WINDOWS_K8S_DIR}\cni\config"
# Pod manifests directory for Windows nodes on Windows nodes.
WINDOWS_MANIFESTS_DIR="${WINDOWS_K8S_DIR}\manifests"
# Directory where cert/key files will be stores on Windows nodes.
WINDOWS_PKI_DIR="${WINDOWS_K8S_DIR}\pki"
# Path for kubelet config file on Windows nodes.
WINDOWS_KUBELET_CONFIG_FILE="${WINDOWS_K8S_DIR}\kubelet-config.yaml"
# Path for kubeconfig file on Windows nodes.
WINDOWS_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubelet.kubeconfig"
# Path for bootstrap kubeconfig file on Windows nodes.
WINDOWS_BOOTSTRAP_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubelet.bootstrap-kubeconfig"
# Path for kube-proxy kubeconfig file on Windows nodes.
WINDOWS_KUBEPROXY_KUBECONFIG_FILE="${WINDOWS_K8S_DIR}\kubeproxy.kubeconfig"

View File

@ -29,6 +29,7 @@ RELEASE_REGION_FALLBACK=${RELEASE_REGION_FALLBACK:-false}
REGIONAL_KUBE_ADDONS=${REGIONAL_KUBE_ADDONS:-true}
NODE_SIZE=${NODE_SIZE:-n1-standard-2}
NUM_NODES=${NUM_NODES:-3}
NUM_WINDOWS_NODES=${NUM_WINDOWS_NODES:-0}
MASTER_SIZE=${MASTER_SIZE:-n1-standard-$(get-master-size)}
MASTER_MIN_CPU_ARCHITECTURE=${MASTER_MIN_CPU_ARCHITECTURE:-} # To allow choosing better architectures.
MASTER_DISK_TYPE=pd-ssd
@ -44,6 +45,7 @@ NODE_LOCAL_SSDS=${NODE_LOCAL_SSDS:-0}
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
WINDOWS_NODE_LABELS="${WINDOWS_NODE_LABELS:-}"
# An extension to local SSDs allowing users to specify block/fs and SCSI/NVMe devices
# Format of this variable will be "#,scsi/nvme,block/fs" you can specify multiple
@ -63,6 +65,7 @@ MIG_WAIT_UNTIL_STABLE_TIMEOUT=${MIG_WAIT_UNTIL_STABLE_TIMEOUT:-1800}
MASTER_OS_DISTRIBUTION=${KUBE_MASTER_OS_DISTRIBUTION:-${KUBE_OS_DISTRIBUTION:-gci}}
NODE_OS_DISTRIBUTION=${KUBE_NODE_OS_DISTRIBUTION:-${KUBE_OS_DISTRIBUTION:-gci}}
WINDOWS_NODE_OS_DISTRIBUTION=${WINDOWS_NODE_OS_DISTRIBUTION:-win1803}
if [[ "${MASTER_OS_DISTRIBUTION}" == "cos" ]]; then
MASTER_OS_DISTRIBUTION="gci"
@ -173,15 +176,19 @@ HEAPSTER_MACHINE_TYPE="${HEAPSTER_MACHINE_TYPE:-}"
# NON_MASTER_NODE_LABELS are labels will only be applied on non-master nodes.
NON_MASTER_NODE_LABELS="${KUBE_NON_MASTER_NODE_LABELS:-}"
WINDOWS_NON_MASTER_NODE_LABELS="${WINDOWS_NON_MASTER_NODE_LABELS:-}"
if [[ "${PREEMPTIBLE_MASTER}" == "true" ]]; then
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-preemptible=true"
WINDOWS_NODE_LABELS="${WINDOWS_NODE_LABELS},cloud.google.com/gke-preemptible=true"
elif [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS},cloud.google.com/gke-preemptible=true"
WINDOWS_NON_MASTER_NODE_LABELS="${WINDOWS_NON_MASTER_NODE_LABELS},cloud.google.com/gke-preemptible=true"
fi
# To avoid running Calico on a node that is not configured appropriately,
# label each Node so that the DaemonSet can run the Pods only on ready Nodes.
# Windows nodes do not support Calico.
if [[ ${NETWORK_POLICY_PROVIDER:-} == "calico" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS:+${NON_MASTER_NODE_LABELS},}projectcalico.org/ds-ready=true"
fi
@ -194,6 +201,7 @@ CUSTOM_TYPHA_DEPLOYMENT_YAML="${KUBE_CUSTOM_TYPHA_DEPLOYMENT_YAML:-}"
# To avoid running netd on a node that is not configured appropriately,
# label each Node so that the DaemonSet can run the Pods only on ready Nodes.
# Windows nodes do not support netd.
if [[ ${ENABLE_NETD:-} == "true" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS:+${NON_MASTER_NODE_LABELS},}cloud.google.com/gke-netd-ready=true"
fi
@ -467,3 +475,7 @@ ENABLE_NODE_TERMINATION_HANDLER="${ENABLE_NODE_TERMINATION_HANDLER:-false}"
if [[ "${NODE_TERMINATION_HANDLER_IMAGE:-}" ]]; then
PROVIDER_VARS="${PROVIDER_VARS:-} NODE_TERMINATION_HANDLER_IMAGE"
fi
# Taint Windows nodes by default to prevent Linux workloads from being
# scheduled onto them.
WINDOWS_NODE_TAINTS="${WINDOWS_NODE_TAINTS:-node.kubernetes.io/os=windows:NoSchedule}"

View File

@ -29,6 +29,7 @@ RELEASE_REGION_FALLBACK=${RELEASE_REGION_FALLBACK:-false}
REGIONAL_KUBE_ADDONS=${REGIONAL_KUBE_ADDONS:-true}
NODE_SIZE=${NODE_SIZE:-n1-standard-2}
NUM_NODES=${NUM_NODES:-3}
NUM_WINDOWS_NODES=${NUM_WINDOWS_NODES:-0}
MASTER_SIZE=${MASTER_SIZE:-n1-standard-$(get-master-size)}
MASTER_MIN_CPU_ARCHITECTURE=${MASTER_MIN_CPU_ARCHITECTURE:-} # To allow choosing better architectures.
MASTER_DISK_TYPE=pd-ssd
@ -44,6 +45,7 @@ NODE_LOCAL_SSDS=${NODE_LOCAL_SSDS:-0}
# fluentd is not running as a manifest pod with appropriate label.
# TODO(piosz): remove this in 1.8
NODE_LABELS="${KUBE_NODE_LABELS:-beta.kubernetes.io/fluentd-ds-ready=true}"
WINDOWS_NODE_LABELS="${WINDOWS_NODE_LABELS:-}"
# An extension to local SSDs allowing users to specify block/fs and SCSI/NVMe devices
# Format of this variable will be "#,scsi/nvme,block/fs" you can specify multiple
@ -66,6 +68,8 @@ MIG_WAIT_UNTIL_STABLE_TIMEOUT=${MIG_WAIT_UNTIL_STABLE_TIMEOUT:-1800}
MASTER_OS_DISTRIBUTION=${KUBE_MASTER_OS_DISTRIBUTION:-${KUBE_OS_DISTRIBUTION:-gci}}
NODE_OS_DISTRIBUTION=${KUBE_NODE_OS_DISTRIBUTION:-${KUBE_OS_DISTRIBUTION:-gci}}
WINDOWS_NODE_OS_DISTRIBUTION=${WINDOWS_NODE_OS_DISTRIBUTION:-win1803}
if [[ "${MASTER_OS_DISTRIBUTION}" == "cos" ]]; then
MASTER_OS_DISTRIBUTION="gci"
fi
@ -81,7 +85,7 @@ fi
# To avoid failing large tests due to some flakes in starting nodes, allow
# for a small percentage of nodes to not start during cluster startup.
ALLOWED_NOTREADY_NODES="${ALLOWED_NOTREADY_NODES:-$((NUM_NODES / 100))}"
ALLOWED_NOTREADY_NODES="${ALLOWED_NOTREADY_NODES:-$(($(get-num-nodes) / 100))}"
# By default a cluster will be started with the master and nodes
# on Container-optimized OS (cos, previously known as gci). If
@ -215,11 +219,14 @@ KUBEPROXY_TEST_ARGS="${KUBEPROXY_TEST_ARGS:-} ${TEST_CLUSTER_API_CONTENT_TYPE}"
# NON_MASTER_NODE_LABELS are labels will only be applied on non-master nodes.
NON_MASTER_NODE_LABELS="${KUBE_NON_MASTER_NODE_LABELS:-}"
WINDOWS_NON_MASTER_NODE_LABELS="${WINDOWS_NON_MASTER_NODE_LABELS:-}"
if [[ "${PREEMPTIBLE_MASTER}" == "true" ]]; then
NODE_LABELS="${NODE_LABELS},cloud.google.com/gke-preemptible=true"
WINDOWS_NODE_LABELS="${WINDOWS_NODE_LABELS},cloud.google.com/gke-preemptible=true"
elif [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS},cloud.google.com/gke-preemptible=true"
WINDOWS_NON_MASTER_NODE_LABELS="${WINDOWS_NON_MASTER_NODE_LABELS},cloud.google.com/gke-preemptible=true"
fi
# Optional: Enable netd.
@ -230,6 +237,7 @@ CUSTOM_TYPHA_DEPLOYMENT_YAML="${KUBE_CUSTOM_TYPHA_DEPLOYMENT_YAML:-}"
# To avoid running netd on a node that is not configured appropriately,
# label each Node so that the DaemonSet can run the Pods only on ready Nodes.
# Windows nodes do not support netd.
if [[ ${ENABLE_NETD:-} == "true" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS:+${NON_MASTER_NODE_LABELS},}cloud.google.com/gke-netd-ready=true"
fi
@ -238,6 +246,7 @@ ENABLE_NODELOCAL_DNS="${KUBE_ENABLE_NODELOCAL_DNS:-false}"
# To avoid running Calico on a node that is not configured appropriately,
# label each Node so that the DaemonSet can run the Pods only on ready Nodes.
# Windows nodes do not support Calico.
if [[ ${NETWORK_POLICY_PROVIDER:-} == "calico" ]]; then
NON_MASTER_NODE_LABELS="${NON_MASTER_NODE_LABELS:+${NON_MASTER_NODE_LABELS},}projectcalico.org/ds-ready=true"
fi
@ -486,3 +495,7 @@ ENABLE_NODE_TERMINATION_HANDLER="${ENABLE_NODE_TERMINATION_HANDLER:-false}"
if [[ "${NODE_TERMINATION_HANDLER_IMAGE:-}" ]]; then
PROVIDER_VARS="${PROVIDER_VARS:-} NODE_TERMINATION_HANDLER_IMAGE"
fi
# Taint Windows nodes by default to prevent Linux workloads from being
# scheduled onto them.
WINDOWS_NODE_TAINTS="${WINDOWS_NODE_TAINTS:-node.kubernetes.io/os=windows:NoSchedule}"

View File

@ -17,7 +17,7 @@
# A library of helper functions and constant for GCI distro
source "${KUBE_ROOT}/cluster/gce/gci/helper.sh"
function get-node-instance-metadata {
function get-node-instance-metadata-from-file {
local metadata=""
metadata+="kube-env=${KUBE_TEMP}/node-kube-env.yaml,"
metadata+="kubelet-config=${KUBE_TEMP}/node-kubelet-config.yaml,"
@ -34,8 +34,8 @@ function get-node-instance-metadata {
}
# $1: template name (required).
function create-node-instance-template {
function create-linux-node-instance-template {
local template_name="$1"
ensure-gci-metadata-files
create-node-template "$template_name" "${scope_flags[*]}" "$(get-node-instance-metadata)"
create-node-template "${template_name}" "${scope_flags[*]}" "$(get-node-instance-metadata-from-file)" "" "linux"
}

View File

@ -32,6 +32,8 @@ else
exit 1
fi
source "${KUBE_ROOT}/cluster/gce/${WINDOWS_NODE_OS_DISTRIBUTION}/node-helper.sh"
if [[ "${MASTER_OS_DISTRIBUTION}" == "trusty" || "${MASTER_OS_DISTRIBUTION}" == "gci" || "${MASTER_OS_DISTRIBUTION}" == "ubuntu" ]]; then
source "${KUBE_ROOT}/cluster/gce/${MASTER_OS_DISTRIBUTION}/master-helper.sh"
else
@ -57,7 +59,14 @@ fi
# Sets node image based on the specified os distro. Currently this function only
# supports gci and debian.
function set-node-image() {
#
# Requires:
# NODE_OS_DISTRIBUTION
# Sets:
# DEFAULT_GCI_PROJECT
# NODE_IMAGE
# NODE_IMAGE_PROJECT
function set-linux-node-image() {
if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
DEFAULT_GCI_PROJECT=google-containers
if [[ "${GCI_VERSION}" == "cos"* ]]; then
@ -71,9 +80,29 @@ function set-node-image() {
fi
}
set-node-image
# Requires:
# WINDOWS_NODE_OS_DISTRIBUTION
# Sets:
# WINDOWS_NODE_IMAGE_FAMILY
# WINDOWS_NODE_IMAGE_PROJECT
function set-windows-node-image() {
WINDOWS_NODE_IMAGE_PROJECT="windows-cloud"
if [[ "${WINDOWS_NODE_OS_DISTRIBUTION}" == "win1803" ]]; then
WINDOWS_NODE_IMAGE_FAMILY="windows-1803-core-for-containers"
elif [[ "${WINDOWS_NODE_OS_DISTRIBUTION}" == "win2019" ]]; then
WINDOWS_NODE_IMAGE_FAMILY="windows-2019-core-for-containers"
elif [[ "${WINDOWS_NODE_OS_DISTRIBUTION}" == "win1809" ]]; then
WINDOWS_NODE_IMAGE_FAMILY="windows-1809-core-for-containers"
else
echo "Unknown WINDOWS_NODE_OS_DISTRIBUTION ${WINDOWS_NODE_OS_DISTRIBUTION}" >&2
exit 1
fi
}
# Verfiy cluster autoscaler configuration.
set-linux-node-image
set-windows-node-image
# Verify cluster autoscaler configuration.
if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
if [[ -z $AUTOSCALER_MIN_NODES ]]; then
echo "AUTOSCALER_MIN_NODES not set."
@ -342,7 +371,7 @@ function upload-tars() {
fi
}
# Detect minions created in the minion group
# Detect Linux and Windows nodes created in the instance group.
#
# Assumed vars:
# NODE_INSTANCE_PREFIX
@ -535,23 +564,29 @@ function write-master-env {
KUBERNETES_MASTER_NAME="${MASTER_NAME}"
fi
construct-kubelet-flags true
build-kube-env true "${KUBE_TEMP}/master-kube-env.yaml"
build-kubelet-config true "${KUBE_TEMP}/master-kubelet-config.yaml"
construct-linux-kubelet-flags true
build-linux-kube-env true "${KUBE_TEMP}/master-kube-env.yaml"
build-kubelet-config true "linux" "${KUBE_TEMP}/master-kubelet-config.yaml"
build-kube-master-certs "${KUBE_TEMP}/kube-master-certs.yaml"
}
function write-node-env {
function write-linux-node-env {
if [[ -z "${KUBERNETES_MASTER_NAME:-}" ]]; then
KUBERNETES_MASTER_NAME="${MASTER_NAME}"
fi
construct-kubelet-flags false
build-kube-env false "${KUBE_TEMP}/node-kube-env.yaml"
build-kubelet-config false "${KUBE_TEMP}/node-kubelet-config.yaml"
construct-linux-kubelet-flags false
build-linux-kube-env false "${KUBE_TEMP}/node-kube-env.yaml"
build-kubelet-config false "linux" "${KUBE_TEMP}/node-kubelet-config.yaml"
}
function build-node-labels {
function write-windows-node-env {
construct-windows-kubelet-flags
build-windows-kube-env "${KUBE_TEMP}/windows-node-kube-env.yaml"
build-kubelet-config false "windows" "${KUBE_TEMP}/windows-node-kubelet-config.yaml"
}
function build-linux-node-labels {
local master=$1
local node_labels=""
if [[ "${KUBE_PROXY_DAEMONSET:-}" == "true" && "${master}" != "true" ]]; then
@ -568,6 +603,17 @@ function build-node-labels {
echo $node_labels
}
function build-windows-node-labels {
local node_labels=""
if [[ -n "${WINDOWS_NODE_LABELS:-}" ]]; then
node_labels="${node_labels:+${node_labels},}${WINDOWS_NODE_LABELS}"
fi
if [[ -n "${WINDOWS_NON_MASTER_NODE_LABELS:-}" ]]; then
node_labels="${node_labels:+${node_labels},}${WINDOWS_NON_MASTER_NODE_LABELS}"
fi
echo $node_labels
}
# yaml-map-string-stringarray converts the encoded structure to yaml format, and echoes the result
# under the provided name. If the encoded structure is empty, echoes nothing.
# 1: name to be output in yaml
@ -645,12 +691,26 @@ function yaml-map-string-string {
fi
}
# $1: if 'true', we're rendering flags for a master, else a node
function construct-kubelet-flags {
local master=$1
# Returns kubelet flags used on both Linux and Windows nodes.
function construct-common-kubelet-flags {
local flags="${KUBELET_TEST_LOG_LEVEL:-"--v=2"} ${KUBELET_TEST_ARGS:-}"
flags+=" --allow-privileged=true"
flags+=" --cloud-provider=gce"
# TODO(mtaufen): ROTATE_CERTIFICATES seems unused; delete it?
if [[ -n "${ROTATE_CERTIFICATES:-}" ]]; then
flags+=" --rotate-certificates=true"
fi
if [[ -n "${MAX_PODS_PER_NODE:-}" ]]; then
flags+=" --max-pods=${MAX_PODS_PER_NODE}"
fi
echo $flags
}
# Sets KUBELET_ARGS with the kubelet flags for Linux nodes.
# $1: if 'true', we're rendering flags for a master, else a node
function construct-linux-kubelet-flags {
local master="$1"
local flags="$(construct-common-kubelet-flags)"
flags+=" --allow-privileged=true"
# Keep in sync with CONTAINERIZED_MOUNTER_HOME in configure-helper.sh
flags+=" --experimental-mounter-path=/home/kubernetes/containerized_mounter/mounter"
flags+=" --experimental-check-node-capabilities-before-mount=true"
@ -695,37 +755,131 @@ function construct-kubelet-flags {
flags+=" --non-masquerade-cidr=${NON_MASQUERADE_CIDR}"
fi
flags+=" --volume-plugin-dir=${VOLUME_PLUGIN_DIR}"
local node_labels=$(build-node-labels ${master})
local node_labels="$(build-linux-node-labels ${master})"
if [[ -n "${node_labels:-}" ]]; then
flags+=" --node-labels=${node_labels}"
fi
if [[ -n "${NODE_TAINTS:-}" ]]; then
flags+=" --register-with-taints=${NODE_TAINTS}"
fi
# TODO(mtaufen): ROTATE_CERTIFICATES seems unused; delete it?
if [[ -n "${ROTATE_CERTIFICATES:-}" ]]; then
flags+=" --rotate-certificates=true"
fi
if [[ -n "${CONTAINER_RUNTIME:-}" ]]; then
flags+=" --container-runtime=${CONTAINER_RUNTIME}"
fi
if [[ -n "${CONTAINER_RUNTIME_ENDPOINT:-}" ]]; then
flags+=" --container-runtime-endpoint=${CONTAINER_RUNTIME_ENDPOINT}"
fi
if [[ -n "${MAX_PODS_PER_NODE:-}" ]]; then
flags+=" --max-pods=${MAX_PODS_PER_NODE}"
KUBELET_ARGS="${flags}"
}
# Sets KUBELET_ARGS with the kubelet flags for Windows nodes.
function construct-windows-kubelet-flags {
local flags="$(construct-common-kubelet-flags)"
# Note: NODE_KUBELET_TEST_ARGS is empty in typical kube-up runs.
flags+=" ${NODE_KUBELET_TEST_ARGS:-}"
local node_labels="$(build-windows-node-labels)"
if [[ -n "${node_labels:-}" ]]; then
flags+=" --node-labels=${node_labels}"
fi
# Concatenate common and windows-only node taints and apply them.
local node_taints="${NODE_TAINTS:-}"
if [[ -n "${node_taints}" && -n "${WINDOWS_NODE_TAINTS:-}" ]]; then
node_taints+=":${WINDOWS_NODE_TAINTS}"
else
node_taints="${WINDOWS_NODE_TAINTS:-}"
fi
if [[ -n "${node_taints}" ]]; then
flags+=" --register-with-taints=${node_taints}"
fi
# Many of these flags were adapted from
# https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
flags+=" --config=${WINDOWS_KUBELET_CONFIG_FILE}"
# Path to a kubeconfig file that will be used to get client certificate for
# kubelet. If the file specified by --kubeconfig does not exist, the bootstrap
# kubeconfig is used to request a client certificate from the API server. On
# success, a kubeconfig file referencing the generated client certificate and
# key is written to the path specified by --kubeconfig. The client certificate
# and key file will be stored in the directory pointed by --cert-dir.
#
# See also:
# https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
flags+=" --bootstrap-kubeconfig=${WINDOWS_BOOTSTRAP_KUBECONFIG_FILE}"
flags+=" --kubeconfig=${WINDOWS_KUBECONFIG_FILE}"
# The directory where the TLS certs are located.
flags+=" --cert-dir=${WINDOWS_PKI_DIR}"
flags+=" --network-plugin=cni"
flags+=" --cni-bin-dir=${WINDOWS_CNI_DIR}"
flags+=" --cni-conf-dir=${WINDOWS_CNI_CONFIG_DIR}"
flags+=" --pod-manifest-path=${WINDOWS_MANIFESTS_DIR}"
# Windows images are large and we don't have gcr mirrors yet. Allow longer
# pull progress deadline.
flags+=" --image-pull-progress-deadline=5m"
flags+=" --enable-debugging-handlers=true"
# Configure kubelet to run as a windows service.
flags+=" --windows-service=true"
# TODO(mtaufen): Configure logging for kubelet running as a service. I haven't
# been able to figure out how to direct stdout/stderr into log files when
# configuring it to run via sc.exe, so we just manually override logging
# config here.
flags+=" --log-file=${WINDOWS_LOGS_DIR}\kubelet.log"
# klog sets this to true internally, so need to override to false so we
# actually log to the file
flags+=" --logtostderr=false"
# Configure flags with explicit empty string values. We can't escape
# double-quotes, because they still break sc.exe after expansion in the
# binPath parameter, and single-quotes get parsed as characters instead of
# string delimiters.
flags+=" --resolv-conf="
# Both --cgroups-per-qos and --enforce-node-allocatable should be disabled on
# windows; the latter requires the former to be enabled to work.
flags+=" --cgroups-per-qos=false --enforce-node-allocatable="
# Turn off kernel memory cgroup notification.
flags+=" --experimental-kernel-memcg-notification=false"
KUBELET_ARGS="${flags}"
}
# $1: if 'true', we're rendering config for a master, else a node
function build-kubelet-config {
local master=$1
local file=$2
local master="$1"
local os="$2"
local file="$3"
rm -f "${file}"
{
print-common-kubelet-config
if [[ "${master}" == "true" ]]; then
print-master-kubelet-config
else
print-common-node-kubelet-config
if [[ "${os}" == "linux" ]]; then
print-linux-node-kubelet-config
elif [[ "${os}" == "windows" ]]; then
print-windows-node-kubelet-config
else
echo "Unknown OS ${os}" >&2
exit 1
fi
fi
} > "${file}"
}
# cat the Kubelet config yaml in common between masters, linux nodes, and
# windows nodes
function print-common-kubelet-config {
declare quoted_dns_server_ip
declare quoted_dns_domain
quoted_dns_server_ip=$(yaml-quote "${DNS_SERVER_IP}")
@ -740,54 +894,12 @@ cgroupRoot: /
clusterDNS:
- ${quoted_dns_server_ip}
clusterDomain: ${quoted_dns_domain}
staticPodPath: /etc/kubernetes/manifests
readOnlyPort: 10255
EOF
# --- begin master-specific config ---
if [[ "${master}" == "true" ]]; then
cat <<EOF
enableDebuggingHandlers: false
hairpinMode: none
authentication:
webhook:
enabled: false
anonymous:
enabled: true
authorization:
mode: AlwaysAllow
EOF
if [[ "${REGISTER_MASTER_KUBELET:-false}" == "false" ]]; then
# Note: Standalone mode is used by GKE
declare quoted_master_ip_range
quoted_master_ip_range=$(yaml-quote "${MASTER_IP_RANGE}")
cat <<EOF
podCidr: ${quoted_master_ip_range}
EOF
fi
# --- end master-specific config ---
else
# --- begin node-specific config ---
# Keep authentication.x509.clientCAFile in sync with CA_CERT_BUNDLE_PATH in configure-helper.sh
cat <<EOF
enableDebuggingHandlers: true
authentication:
x509:
clientCAFile: /etc/srv/kubernetes/pki/ca-certificates.crt
EOF
if [[ "${HAIRPIN_MODE:-}" == "promiscuous-bridge" ]] || \
[[ "${HAIRPIN_MODE:-}" == "hairpin-veth" ]] || \
[[ "${HAIRPIN_MODE:-}" == "none" ]]; then
declare quoted_hairpin_mode
quoted_hairpin_mode=$(yaml-quote "${HAIRPIN_MODE}")
cat <<EOF
hairpinMode: ${quoted_hairpin_mode}
EOF
fi
# --- end node-specific config ---
fi
# Note: ENABLE_MANIFEST_URL is used by GKE
# Note: ENABLE_MANIFEST_URL is used by GKE.
# TODO(mtaufen): remove this since it's not used in kubernetes/kubernetes nor
# kubernetes/test-infra.
if [[ "${ENABLE_MANIFEST_URL:-}" == "true" ]]; then
declare quoted_manifest_url
quoted_manifest_url=$(yaml-quote "${MANIFEST_URL}")
@ -804,7 +916,81 @@ EOF
if [[ -n "${FEATURE_GATES:-}" ]]; then
yaml-map-string-string 'featureGates' "${FEATURE_GATES}" false '='
fi
} > "${file}"
}
# cat the Kubelet config yaml for masters
function print-master-kubelet-config {
cat <<EOF
enableDebuggingHandlers: false
hairpinMode: none
staticPodPath: /etc/kubernetes/manifests
authentication:
webhook:
enabled: false
anonymous:
enabled: true
authorization:
mode: AlwaysAllow
EOF
if [[ "${REGISTER_MASTER_KUBELET:-false}" == "false" ]]; then
# Note: Standalone mode is used by GKE
declare quoted_master_ip_range
quoted_master_ip_range=$(yaml-quote "${MASTER_IP_RANGE}")
cat <<EOF
podCidr: ${quoted_master_ip_range}
EOF
fi
}
# cat the Kubelet config yaml in common between linux nodes and windows nodes
function print-common-node-kubelet-config {
cat <<EOF
enableDebuggingHandlers: true
EOF
if [[ "${HAIRPIN_MODE:-}" == "promiscuous-bridge" ]] || \
[[ "${HAIRPIN_MODE:-}" == "hairpin-veth" ]] || \
[[ "${HAIRPIN_MODE:-}" == "none" ]]; then
declare quoted_hairpin_mode
quoted_hairpin_mode=$(yaml-quote "${HAIRPIN_MODE}")
cat <<EOF
hairpinMode: ${quoted_hairpin_mode}
EOF
fi
}
# cat the Kubelet config yaml for linux nodes
function print-linux-node-kubelet-config {
# Keep authentication.x509.clientCAFile in sync with CA_CERT_BUNDLE_PATH in configure-helper.sh
cat <<EOF
staticPodPath: /etc/kubernetes/manifests
authentication:
x509:
clientCAFile: /etc/srv/kubernetes/pki/ca-certificates.crt
EOF
}
# cat the Kubelet config yaml for windows nodes
function print-windows-node-kubelet-config {
# Notes:
# - We don't run any static pods on Windows nodes yet.
# TODO(mtaufen): Does it make any sense to set eviction thresholds for inodes
# on Windows?
# TODO(pjh, mtaufen): It may make sense to use a different hairpin mode on
# Windows. We're currently using hairpin-veth, but
# https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L121
# uses promiscuous-bridge.
# TODO(pjh, mtaufen): Does cgroupRoot make sense for Windows?
# Keep authentication.x509.clientCAFile in sync with CA_CERT_BUNDLE_PATH in
# k8s-node-setup.psm1.
cat <<EOF
authentication:
x509:
clientCAFile: '${WINDOWS_PKI_DIR}\ca-certificates.crt'
EOF
}
function build-kube-master-certs {
@ -828,9 +1014,9 @@ EOF
}
# $1: if 'true', we're building a master yaml, else a node
function build-kube-env {
local master=$1
local file=$2
function build-linux-kube-env {
local master="$1"
local file="$2"
local server_binary_tar_url=$SERVER_BINARY_TAR_URL
local kube_manifests_tar_url="${KUBE_MANIFESTS_TAR_URL:-}"
@ -1187,7 +1373,7 @@ EOF
# TODO(kubernetes/autoscaler#718): AUTOSCALER_ENV_VARS is a hotfix for cluster autoscaler,
# which reads the kube-env to determine the shape of a node and was broken by #60020.
# This should be removed as soon as a more reliable source of information is available!
local node_labels=$(build-node-labels false)
local node_labels="$(build-linux-node-labels false)"
local node_taints="${NODE_TAINTS:-}"
local autoscaler_env_vars="node_labels=${node_labels};node_taints=${node_taints}"
cat >>$file <<EOF
@ -1207,6 +1393,29 @@ EOF
fi
}
function build-windows-kube-env {
local file="$1"
# For now the Windows kube-env is a superset of the Linux kube-env.
build-linux-kube-env false $file
cat >>$file <<EOF
NODE_BINARY_TAR_URL: $(yaml-quote ${NODE_BINARY_TAR_URL})
NODE_BINARY_TAR_HASH: $(yaml-quote ${NODE_BINARY_TAR_HASH})
K8S_DIR: $(yaml-quote ${WINDOWS_K8S_DIR})
NODE_DIR: $(yaml-quote ${WINDOWS_NODE_DIR})
LOGS_DIR: $(yaml-quote ${WINDOWS_LOGS_DIR})
CNI_DIR: $(yaml-quote ${WINDOWS_CNI_DIR})
CNI_CONFIG_DIR: $(yaml-quote ${WINDOWS_CNI_CONFIG_DIR})
MANIFESTS_DIR: $(yaml-quote ${WINDOWS_MANIFESTS_DIR})
PKI_DIR: $(yaml-quote ${WINDOWS_PKI_DIR})
KUBELET_CONFIG_FILE: $(yaml-quote ${WINDOWS_KUBELET_CONFIG_FILE})
KUBECONFIG_FILE: $(yaml-quote ${WINDOWS_KUBECONFIG_FILE})
BOOTSTRAP_KUBECONFIG_FILE: $(yaml-quote ${WINDOWS_BOOTSTRAP_KUBECONFIG_FILE})
KUBEPROXY_KUBECONFIG_FILE: $(yaml-quote ${WINDOWS_KUBEPROXY_KUBECONFIG_FILE})
EOF
}
function sha1sum-file() {
if which sha1sum >/dev/null 2>&1; then
sha1sum "$1" | awk '{ print $1 }'
@ -1521,6 +1730,7 @@ for c in required:
if missing:
for c in missing:
print ("missing required gcloud component \"{0}\"".format(c))
print ("Try running `gcloud components install {0}`".format(c))
exit(1)
' """${version}"""
fi
@ -1670,19 +1880,23 @@ function validate-node-local-ssds-ext(){
# Robustly try to create an instance template.
# $1: The name of the instance template.
# $2: The scopes flag.
# $3: String of comma-separated metadata entries (must all be from a file).
# $3: String of comma-separated metadata-from-file entries.
# $4: String of comma-separated metadata (key=value) entries.
# $5: the node OS ("linux" or "windows").
function create-node-template() {
detect-project
detect-subnetworks
local template_name="$1"
local metadata_values="$4"
local os="$5"
# First, ensure the template doesn't exist.
# TODO(zmerlynn): To make this really robust, we need to parse the output and
# add retries. Just relying on a non-zero exit code doesn't
# distinguish an ephemeral failed call from a "not-exists".
if gcloud compute instance-templates describe "$template_name" --project "${PROJECT}" &>/dev/null; then
if gcloud compute instance-templates describe "${template_name}" --project "${PROJECT}" &>/dev/null; then
echo "Instance template ${1} already exists; deleting." >&2
if ! gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" --quiet &>/dev/null; then
if ! gcloud compute instance-templates delete "${template_name}" --project "${PROJECT}" --quiet &>/dev/null; then
echo -e "${color_yellow}Failed to delete existing instance template${color_norm}" >&2
exit 2
fi
@ -1737,17 +1951,28 @@ function create-node-template() {
"${ENABLE_IP_ALIASES:-}" \
"${IP_ALIAS_SIZE:-}")
local node_image_flags=""
if [[ "${os}" == 'linux' ]]; then
node_image_flags="--image-project ${NODE_IMAGE_PROJECT} --image ${NODE_IMAGE}"
elif [[ "${os}" == 'windows' ]]; then
node_image_flags="--image-project ${WINDOWS_NODE_IMAGE_PROJECT} --image-family ${WINDOWS_NODE_IMAGE_FAMILY}"
else
echo "Unknown OS ${os}" >&2
exit 1
fi
local metadata_flag="${metadata_values:+--metadata ${metadata_values}}"
local attempt=1
while true; do
echo "Attempt ${attempt} to create ${1}" >&2
if ! ${gcloud} compute instance-templates create \
"$template_name" \
"${template_name}" \
--project "${PROJECT}" \
--machine-type "${NODE_SIZE}" \
--boot-disk-type "${NODE_DISK_TYPE}" \
--boot-disk-size "${NODE_DISK_SIZE}" \
--image-project="${NODE_IMAGE_PROJECT}" \
--image "${NODE_IMAGE}" \
${node_image_flags} \
--service-account "${NODE_SERVICE_ACCOUNT}" \
--tags "${NODE_TAG}" \
${accelerator_args} \
@ -1756,19 +1981,20 @@ function create-node-template() {
${network} \
${preemptible_minions} \
$2 \
--metadata-from-file $3 >&2; then
--metadata-from-file $3 \
${metadata_flag} >&2; then
if (( attempt > 5 )); then
echo -e "${color_red}Failed to create instance template $template_name ${color_norm}" >&2
echo -e "${color_red}Failed to create instance template ${template_name} ${color_norm}" >&2
exit 2
fi
echo -e "${color_yellow}Attempt ${attempt} failed to create instance template $template_name. Retrying.${color_norm}" >&2
echo -e "${color_yellow}Attempt ${attempt} failed to create instance template ${template_name}. Retrying.${color_norm}" >&2
attempt=$(($attempt+1))
sleep $(($attempt * 5))
# In case the previous attempt failed with something like a
# Backend Error and left the entry laying around, delete it
# before we try again.
gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" &>/dev/null || true
gcloud compute instance-templates delete "${template_name}" --project "${PROJECT}" &>/dev/null || true
else
break
fi
@ -1799,7 +2025,9 @@ function kube-up() {
parse-master-env
create-subnetworks
detect-subnetworks
create-nodes
# Windows nodes take longer to boot and setup so create them first.
create-windows-nodes
create-linux-nodes
elif [[ ${KUBE_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then
if [[ "${MASTER_OS_DISTRIBUTION}" != "gci" && "${MASTER_OS_DISTRIBUTION}" != "ubuntu" ]]; then
echo "Master replication supported only for gci and ubuntu"
@ -1822,7 +2050,9 @@ function kube-up() {
create-master
create-nodes-firewall
create-nodes-template
create-nodes
# Windows nodes take longer to boot and setup so create them first.
create-windows-nodes
create-linux-nodes
check-cluster
fi
}
@ -1897,6 +2127,17 @@ function create-network() {
--source-ranges "0.0.0.0/0" \
--allow "tcp:22" &
fi
# Open up TCP 3389 to allow RDP connections.
if [[ ${NUM_WINDOWS_NODES} -gt 0 ]]; then
if ! gcloud compute firewall-rules describe --project "${NETWORK_PROJECT}" "${NETWORK}-default-rdp" &>/dev/null; then
gcloud compute firewall-rules create "${NETWORK}-default-rdp" \
--project "${NETWORK_PROJECT}" \
--network "${NETWORK}" \
--source-ranges "0.0.0.0/0" \
--allow "tcp:3389" &
fi
fi
}
function expand-default-subnetwork() {
@ -2187,7 +2428,7 @@ function create-master() {
create-etcd-certs ${MASTER_NAME}
create-etcd-apiserver-certs "etcd-${MASTER_NAME}" ${MASTER_NAME}
if [[ "${NUM_NODES}" -ge "50" ]]; then
if [[ "$(get-num-nodes)" -ge "50" ]]; then
# We block on master creation for large clusters to avoid doing too much
# unnecessary work in case master start-up fails (like creation of nodes).
create-master-instance "${MASTER_RESERVED_IP}"
@ -2377,17 +2618,25 @@ function create-nodes-template() {
local scope_flags=$(get-scope-flags)
write-node-env
write-linux-node-env
write-windows-node-env
local template_name="${NODE_INSTANCE_PREFIX}-template"
create-node-instance-template $template_name
# NOTE: these template names and their format must match
# create-[linux,windows]-nodes() as well as get-template()!
# TODO(pjh): find a better way to manage these (get-template() is annoying).
local linux_template_name="${NODE_INSTANCE_PREFIX}-template"
local windows_template_name="${NODE_INSTANCE_PREFIX}-template-windows"
create-linux-node-instance-template $linux_template_name
create-windows-node-instance-template $windows_template_name "${scope_flags[*]}"
}
# Assumes:
# - MAX_INSTANCES_PER_MIG
# - NUM_NODES
# - NUM_WINDOWS_NODES
# exports:
# - NUM_MIGS
# - NUM_WINDOWS_MIGS
function set_num_migs() {
local defaulted_max_instances_per_mig=${MAX_INSTANCES_PER_MIG:-1000}
@ -2396,6 +2645,7 @@ function set_num_migs() {
defaulted_max_instances_per_mig=1000
fi
export NUM_MIGS=$(((${NUM_NODES} + ${defaulted_max_instances_per_mig} - 1) / ${defaulted_max_instances_per_mig}))
export NUM_WINDOWS_MIGS=$(((${NUM_WINDOWS_NODES} + ${defaulted_max_instances_per_mig} - 1) / ${defaulted_max_instances_per_mig}))
}
# Assumes:
@ -2404,7 +2654,7 @@ function set_num_migs() {
# - NUM_NODES
# - PROJECT
# - ZONE
function create-nodes() {
function create-linux-nodes() {
local template_name="${NODE_INSTANCE_PREFIX}-template"
if [[ -z "${HEAPSTER_MACHINE_TYPE:-}" ]]; then
@ -2434,7 +2684,7 @@ function create-nodes() {
--zone "${ZONE}" \
--base-instance-name "${group_name}" \
--size "${this_mig_size}" \
--template "$template_name" || true;
--template "${template_name}" || true;
gcloud compute instance-groups managed wait-until-stable \
"${group_name}" \
--zone "${ZONE}" \
@ -2444,6 +2694,44 @@ function create-nodes() {
wait
}
# Assumes:
# - NUM_WINDOWS_MIGS
# - NODE_INSTANCE_PREFIX
# - NUM_WINDOWS_NODES
# - PROJECT
# - ZONE
function create-windows-nodes() {
local template_name="${NODE_INSTANCE_PREFIX}-template-windows"
local -r nodes="${NUM_WINDOWS_NODES}"
local instances_left=${nodes}
for ((i=1; i<=${NUM_WINDOWS_MIGS}; i++)); do
local group_name="${NODE_INSTANCE_PREFIX}-windows-group-$i"
if [[ $i == ${NUM_WINDOWS_MIGS} ]]; then
# TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
# We should change it at some point, but note #18545 when changing this.
group_name="${NODE_INSTANCE_PREFIX}-windows-group"
fi
# Spread the remaining number of nodes evenly
this_mig_size=$((${instances_left} / (${NUM_WINDOWS_MIGS}-${i}+1)))
instances_left=$((instances_left-${this_mig_size}))
gcloud compute instance-groups managed \
create "${group_name}" \
--project "${PROJECT}" \
--zone "${ZONE}" \
--base-instance-name "${group_name}" \
--size "${this_mig_size}" \
--template "${template_name}" || true;
gcloud compute instance-groups managed wait-until-stable \
"${group_name}" \
--zone "${ZONE}" \
--project "${PROJECT}" \
--timeout "${MIG_WAIT_UNTIL_STABLE_TIMEOUT}" || true;
done
}
# Assumes:
# - NODE_INSTANCE_PREFIX
# - PROJECT
@ -2486,7 +2774,7 @@ function create-heapster-node() {
--tags "${NODE_TAG}" \
${network} \
$(get-scope-flags) \
--metadata-from-file "$(get-node-instance-metadata)"
--metadata-from-file "$(get-linux-node-instance-metadata-from-file)"
}
# Assumes:
@ -2513,6 +2801,11 @@ function create-cluster-autoscaler-mig-config() {
echo "AUTOSCALER_MAX_NODES must be greater or equal ${NUM_MIGS}"
exit 2
fi
if [[ ${NUM_WINDOWS_MIGS} -gt 0 ]]; then
# TODO(pjh): implement Windows support in this function.
echo "Not implemented yet: autoscaler config for Windows MIGs"
exit 2
fi
# The code assumes that the migs were created with create-nodes
# function which tries to evenly spread nodes across the migs.

View File

@ -0,0 +1,2 @@
approvers:
- yujuhong

View File

@ -0,0 +1,187 @@
# Starting a Windows Kubernetes cluster on GCE using kube-up
## Bring up the cluster
Prerequisites: a Google Cloud Platform project.
### 0. Prepare your environment
Clone this repository under your `$GOPATH/src` directory on a Linux machine.
Then, optionally clean/prepare your environment using these commands:
```
# Remove files that interfere with get-kube / kube-up:
rm -rf ./kubernetes/; rm -f kubernetes.tar.gz; rm -f ~/.kube/config
# Set the default gcloud project for this shell. This is optional but convenient
# if you're working with multiple projects and don't want to repeatedly switch
# between gcloud config configurations.
export CLOUDSDK_CORE_PROJECT=<your_project_name>
```
### 1. Build Kubernetes
The most straightforward approach to build those binaries is to run `make
release`. However, that builds binaries for all supported platforms, and can be
slow. You can speed up the process by following the instructions below to only
build the necessary binaries.
```
# Fetch the PR: https://github.com/pjh/kubernetes/pull/43
git remote add pjh https://github.com/pjh/kubernetes
git fetch pjh pull/43/head
# Get the commit hash and cherry-pick the commit to your current branch
BUILD_WIN_COMMIT=$(git ls-remote pjh | grep refs/pull/43/head | cut -f 1)
git cherry-pick $BUILD_WIN_COMMIT
# Build binaries for both Linux and Windows
make quick-release
```
### 2 Create a Kubernetes cluster
You can create a regular Kubernetes cluster or an end-to-end test cluster.
Please make sure you set the environment variables properly following the
instructions in the previous section.
First, set the following environment variables which are required for
controlling the number of Linux and Windows nodes in the cluster and for
enabling IP aliases (which are required for Windows pod routing):
```
export NUM_NODES=2 # number of Linux nodes
export NUM_WINDOWS_NODES=2
export KUBE_GCE_ENABLE_IP_ALIASES=true
```
If you wish to use `netd` as the CNI plugin for Linux nodes, set these
variables:
```
export KUBE_ENABLE_NETD=true
export KUBE_CUSTOM_NETD_YAML=$(curl -s \
https://raw.githubusercontent.com/GoogleCloudPlatform/netd/master/netd.yaml \
| sed -e 's/^/ /')
```
Now bring up a cluster using one of the following two methods:
#### 2.a Create a regular Kubernetes cluster
```
# Invoke kube-up.sh with these environment variables:
# PROJECT: text name of your GCP project.
# KUBERNETES_SKIP_CONFIRM: skips any kube-up prompts.
PROJECT=${CLOUDSDK_CORE_PROJECT} KUBERNETES_SKIP_CONFIRM=y ./cluster/kube-up.sh
```
To teardown the cluster run:
```
PROJECT=${CLOUDSDK_CORE_PROJECT} KUBERNETES_SKIP_CONFIRM=y ./cluster/kube-down.sh
```
#### 2.b Create a Kubernetes end-to-end (E2E) test cluster
```
PROJECT=${CLOUDSDK_CORE_PROJECT} go run ./hack/e2e.go -- --up
```
This command, by default, tears down the existing E2E cluster and create a new
one.
No matter what type of cluster you chose to create, the result should be a
Kubernetes cluster with one Linux master node, `NUM_NODES` Linux worker nodes
and `NUM_WINDOWS_NODES` Windows worker nodes.
## Validating the cluster
Invoke this script to run a smoke test that verifies that the cluster has been
brought up correctly:
```
cluster/gce/win1803/smoke-test.sh
```
## Running tests against the cluster
These steps are based on
[kubernetes-sigs/windows-testing](https://github.com/kubernetes-sigs/windows-testing).
* TODO(pjh): use patched `cluster/local/util.sh` from
https://github.com/pjh/kubernetes/blob/windows-up/cluster/local/util.sh.
* If necessary run `alias kubectl=client/bin/kubectl` .
* Set the following environment variables (these values should make sense if
you built your cluster using the kube-up steps above):
```
export KUBE_HOME=$(pwd)
export KUBECONFIG=~/.kube/config
export KUBE_MASTER=local
export KUBE_MASTER_NAME=kubernetes-master
export KUBE_MASTER_IP=$(kubectl get node ${KUBE_MASTER_NAME} -o jsonpath='{.status.addresses[?(@.type=="ExternalIP")].address}')
export KUBE_MASTER_URL=https://${KUBE_MASTER_IP}
export KUBE_MASTER_PORT=443
```
* Download the list of Windows e2e tests:
```
curl https://raw.githubusercontent.com/e2e-win/e2e-win-prow-deployment/master/repo-list.txt -o ${KUBE_HOME}/repo-list.yaml
export KUBE_TEST_REPO_LIST=${KUBE_HOME}/repo-list.yaml
```
* Download and configure the list of tests to exclude:
```
curl https://raw.githubusercontent.com/e2e-win/e2e-win-prow-deployment/master/exclude_conformance_test.txt -o ${KUBE_HOME}/exclude_conformance_test.txt
export EXCLUDED_TESTS=$(cat exclude_conformance_test.txt |
tr -d '\r' | # remove Windows carriage returns
tr -s '\n' '|' | # coalesce newlines into |
tr -s ' ' '.' | # coalesce spaces into .
sed -e 's/[]\[()]/\\&/g' | # escape brackets and parentheses
sed -e 's/.$//g') # remove final | added by tr
```
* Taint the Linux nodes so that test pods will not land on them:
```
export LINUX_NODES=$(kubectl get nodes -l beta.kubernetes.io/os=linux,kubernetes.io/hostname!=${KUBE_MASTER_NAME} -o name)
export LINUX_NODE_COUNT=$(echo ${LINUX_NODES} | wc -w)
for node in $LINUX_NODES; do
kubectl taint node $node node-under-test=false:NoSchedule
done
```
* Build necessary test binaries:
```
make WHAT=test/e2e/e2e.test
```
* Run the tests with flags that point at the "local" (already-running) cluster
and that permit the `NoSchedule` Linux nodes:
```
export KUBETEST_ARGS="--ginkgo.noColor=true "\
"--report-dir=${KUBE_HOME}/e2e-reports "\
"--allowed-not-ready-nodes=${LINUX_NODE_COUNT} "\
"--ginkgo.dryRun=false "\
"--ginkgo.focus=\[Conformance\] "\
"--ginkgo.skip=${EXCLUDED_TESTS}"
go run ${KUBE_HOME}/hack/e2e.go -- --verbose-commands \
--ginkgo-parallel=4 \
--check-version-skew=false --test --provider=local \
--test_args="${KUBETEST_ARGS}" &> ${KUBE_HOME}/conformance.out
```
TODO: copy log files from Windows nodes using some command like:
```
scp -r -o PreferredAuthentications=keyboard-interactive,password \
-o PubkeyAuthentication=no \
user@kubernetes-minion-windows-group-mk0p:C:\\etc\\kubernetes\\logs \
kubetest-logs/
```

View File

@ -0,0 +1,90 @@
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<#
.SYNOPSIS
Library containing common variables and code used by other PowerShell modules
and scripts for configuring Windows nodes.
#>
# REDO_STEPS affects the behavior of a node that is rebooted after initial
# bringup. When true, on a reboot the scripts will redo steps that were
# determined to have already been completed once (e.g. to overwrite
# already-existing config files). When false the scripts will perform the
# minimum required steps to re-join this node to the cluster.
$REDO_STEPS = $false
Export-ModuleMember -Variable REDO_STEPS
# Writes $Message to the console. Terminates the script if $Fatal is set.
function Log-Output {
param (
[parameter(Mandatory=$true)] [string]$Message,
[switch]$Fatal
)
Write-Host "${Message}"
if (${Fatal}) {
Exit 1
}
}
# Checks if a file should be written or overwritten by testing if it already
# exists and checking the value of the global $REDO_STEPS variable. Emits an
# informative message if the file already exists.
#
# Returns $true if the file does not exist, or if it does but the global
# $REDO_STEPS variable is set to $true. Returns $false if the file exists and
# the caller should not overwrite it.
function ShouldWrite-File {
param (
[parameter(Mandatory=$true)] [string]$Filename
)
if (Test-Path $Filename) {
if ($REDO_STEPS) {
Log-Output "Warning: $Filename already exists, will overwrite it"
return $true
}
Log-Output "Skip: $Filename already exists, not overwriting it"
return $false
}
return $true
}
# Returns the GCE instance metadata value for $Key. If the key is not present
# in the instance metadata returns $Default if set, otherwise returns $null.
function Get-InstanceMetadataValue {
param (
[parameter(Mandatory=$true)] [string]$Key,
[parameter(Mandatory=$false)] [string]$Default
)
$url = ("http://metadata.google.internal/computeMetadata/v1/instance/" +
"attributes/$Key")
try {
$client = New-Object Net.WebClient
$client.Headers.Add('Metadata-Flavor', 'Google')
return ($client.DownloadString($url)).Trim()
}
catch [System.Net.WebException] {
if ($Default) {
return $Default
}
else {
Log-Output "Failed to retrieve value for $Key."
return $null
}
}
}
# Export all public functions:
Export-ModuleMember -Function *-*

View File

@ -0,0 +1,119 @@
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<#
.SYNOPSIS
Top-level script that runs on Windows nodes to join them to the K8s cluster.
#>
$ErrorActionPreference = 'Stop'
# Turn on tracing to debug
# Set-PSDebug -Trace 1
# Update TLS setting to enable Github downloads and disable progress bar to
# increase download speed.
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
$ProgressPreference = 'SilentlyContinue'
# Returns the GCE instance metadata value for $Key. If the key is not present
# in the instance metadata returns $Default if set, otherwise returns $null.
function Get-InstanceMetadataValue {
param (
[parameter(Mandatory=$true)] [string]$Key,
[parameter(Mandatory=$false)] [string]$Default
)
$url = ("http://metadata.google.internal/computeMetadata/v1/instance/" +
"attributes/$Key")
try {
$client = New-Object Net.WebClient
$client.Headers.Add('Metadata-Flavor', 'Google')
return ($client.DownloadString($url)).Trim()
}
catch [System.Net.WebException] {
if ($Default) {
return $Default
}
else {
Write-Host "Failed to retrieve value for $Key."
return $null
}
}
}
# Fetches the value of $MetadataKey, saves it to C:\$Filename and imports it as
# a PowerShell module.
#
# Note: this function depends on common.psm1.
function FetchAndImport-ModuleFromMetadata {
param (
[parameter(Mandatory=$true)] [string]$MetadataKey,
[parameter(Mandatory=$true)] [string]$Filename
)
$module = Get-InstanceMetadataValue $MetadataKey
if (Test-Path C:\$Filename) {
if (-not $REDO_STEPS) {
Log-Output "Skip: C:\$Filename already exists, not overwriting"
Import-Module -Force C:\$Filename
return
}
Log-Output "Warning: C:\$Filename already exists, will overwrite it."
}
New-Item -ItemType file -Force C:\$Filename | Out-Null
Set-Content C:\$Filename $module
Import-Module -Force C:\$Filename
}
try {
# Don't use FetchAndImport-ModuleFromMetadata for common.psm1 - the common
# module includes variables and functions that any other function may depend
# on.
$module = Get-InstanceMetadataValue 'common-psm1'
New-Item -ItemType file -Force C:\common.psm1 | Out-Null
Set-Content C:\common.psm1 $module
Import-Module -Force C:\common.psm1
# TODO(pjh): update the function to set $Filename automatically from the key,
# then put these calls into a loop over a list of XYZ-psm1 keys.
FetchAndImport-ModuleFromMetadata 'k8s-node-setup-psm1' 'k8s-node-setup.psm1'
Set-PrerequisiteOptions
$kube_env = Fetch-KubeEnv
Set-EnvironmentVars
Create-Directories
Download-HelperScripts
Create-PauseImage
DownloadAndInstall-KubernetesBinaries
Create-NodePki
Create-KubeletKubeconfig
Create-KubeproxyKubeconfig
Set-PodCidr
Configure-HostNetworkingService
Configure-CniNetworking
Configure-Kubelet
Start-WorkerServices
Log-Output 'Waiting 15 seconds for node to join cluster.'
Start-Sleep 15
Verify-WorkerServices
}
catch {
Write-Host 'Exception caught in script:'
Write-Host $_.InvocationInfo.PositionMessage
Write-Host "Kubernetes Windows node setup failed: $($_.Exception.Message)"
exit 1
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A library of helper functions and constants for Windows nodes.
function get-windows-node-instance-metadata-from-file {
local metadata=""
metadata+="cluster-name=${KUBE_TEMP}/cluster-name.txt,"
metadata+="kube-env=${KUBE_TEMP}/windows-node-kube-env.yaml,"
metadata+="kubelet-config=${KUBE_TEMP}/windows-node-kubelet-config.yaml,"
# To get startup script output run "gcloud compute instances
# get-serial-port-output <instance>" from the location where you're running
# kube-up.
metadata+="windows-startup-script-ps1=${KUBE_ROOT}/cluster/gce/${WINDOWS_NODE_OS_DISTRIBUTION}/configure.ps1,"
metadata+="common-psm1=${KUBE_ROOT}/cluster/gce/${WINDOWS_NODE_OS_DISTRIBUTION}/common.psm1,"
metadata+="k8s-node-setup-psm1=${KUBE_ROOT}/cluster/gce/${WINDOWS_NODE_OS_DISTRIBUTION}/k8s-node-setup.psm1,"
metadata+="user-profile-psm1=${KUBE_ROOT}/cluster/gce/${WINDOWS_NODE_OS_DISTRIBUTION}/user-profile.psm1,"
metadata+="${NODE_EXTRA_METADATA}"
echo "${metadata}"
}
function get-windows-node-instance-metadata {
local metadata=""
metadata+="k8s-version=${KUBE_VERSION:-v1.13.2},"
metadata+="serial-port-enable=1,"
# This enables logging the serial port output.
# https://cloud.google.com/compute/docs/instances/viewing-serial-port-output
metadata+="serial-port-logging-enable=true,"
metadata+="win-version=${WINDOWS_NODE_OS_DISTRIBUTION}"
echo "${metadata}"
}
# $1: template name (required).
# $2: scopes flag.
function create-windows-node-instance-template {
local template_name="$1"
local scopes_flag="$2"
create-node-template "${template_name}" "${scopes_flag}" "$(get-windows-node-instance-metadata-from-file)" "$(get-windows-node-instance-metadata)" "windows"
}

672
cluster/gce/win1803/smoke-test.sh Executable file
View File

@ -0,0 +1,672 @@
#!/bin/bash
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A small smoke test to run against a just-deployed kube-up cluster with Windows
# nodes. Performs checks such as:
# 1) Verifying that all Windows nodes have status Ready.
# 2) Verifying that no system pods are attempting to run on Windows nodes.
# 3) Verifying pairwise connectivity between most of the following: Linux
# pods, Windows pods, K8s services, and the Internet.
# 4) Verifying that basic DNS resolution works in Windows pods.
#
# This script assumes that it is run from the root of the kubernetes repository
# and that kubectl is present at client/bin/kubectl.
#
# TODOs:
# - Implement the node-to-pod checks.
# - Capture stdout for each command to a file and only print it when the test
# fails.
# - Move copy-pasted code into reusable functions.
# - Continue running all checks after one fails.
# - Test service connectivity by running a test pod with an http server and
# exposing it as a service (rather than curl-ing from existing system
# services that don't serve http requests).
# - Add test retries for transient errors, such as:
# "error: unable to upgrade connection: Authorization error
# (user=kube-apiserver, verb=create, resource=nodes, subresource=proxy)"
# Override this to use a different kubectl binary.
kubectl=kubectl
linux_deployment_timeout=60
windows_deployment_timeout=240
output_file=/tmp/k8s-smoke-test.out
function check_windows_nodes_are_ready {
# kubectl filtering is the worst.
statuses=$(${kubectl} get nodes -l beta.kubernetes.io/os=windows \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}')
for status in $statuses; do
if [[ $status == "False" ]]; then
echo "ERROR: some Windows node has status != Ready"
echo "kubectl get nodes -l beta.kubernetes.io/os=windows"
${kubectl} get nodes -l beta.kubernetes.io/os=windows
exit 1
fi
done
echo "Verified that all Windows nodes have status Ready"
}
function check_no_system_pods_on_windows_nodes {
windows_system_pods=$(${kubectl} get pods --namespace kube-system \
-o wide | grep -E "Pending|windows" | wc -w)
if [[ $windows_system_pods -ne 0 ]]; then
echo "ERROR: there are kube-system pods trying to run on Windows nodes"
echo "kubectl get pods --namespace kube-system -o wide"
${kubectl} get pods --namespace kube-system -o wide
exit 1
fi
echo "Verified that all system pods are running on Linux nodes"
}
linux_webserver_deployment=linux-nginx
linux_webserver_pod_label=nginx
function deploy_linux_webserver_pod {
echo "Writing example deployment to $linux_webserver_deployment.yaml"
cat <<EOF > $linux_webserver_deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: $linux_webserver_deployment
labels:
app: $linux_webserver_pod_label
spec:
replicas: 1
selector:
matchLabels:
app: $linux_webserver_pod_label
template:
metadata:
labels:
app: $linux_webserver_pod_label
spec:
containers:
- name: nginx
image: nginx:1.7.9
nodeSelector:
beta.kubernetes.io/os: linux
EOF
if ! ${kubectl} create -f $linux_webserver_deployment.yaml; then
echo "kubectl create -f $linux_webserver_deployment.yaml failed"
exit 1
fi
timeout=$linux_deployment_timeout
while [[ $timeout -gt 0 ]]; do
echo "Waiting for Linux $linux_webserver_pod_label pods to become Ready"
statuses=$(${kubectl} get pods -l app=$linux_webserver_pod_label \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' \
| grep "False" | wc -w)
if [[ $statuses -eq 0 ]]; then
break
else
sleep 10
(( timeout=timeout-10 ))
fi
done
if [[ $timeout -gt 0 ]]; then
echo "All $linux_webserver_pod_label pods became Ready"
else
echo "ERROR: Not all $linux_webserver_pod_label pods became Ready"
echo "kubectl get pods -l app=$linux_webserver_pod_label"
${kubectl} get pods -l app=$linux_webserver_pod_label
cleanup_deployments
exit 1
fi
}
# Returns the name of an arbitrary Linux webserver pod.
function get_linux_webserver_pod_name {
$kubectl get pods -l app=$linux_webserver_pod_label \
-o jsonpath='{.items[0].metadata.name}'
}
# Returns the IP address of an arbitrary Linux webserver pod.
function get_linux_webserver_pod_ip {
$kubectl get pods -l app=$linux_webserver_pod_label \
-o jsonpath='{.items[0].status.podIP}'
}
function undeploy_linux_webserver_pod {
${kubectl} delete deployment $linux_webserver_deployment
}
linux_command_deployment=linux-ubuntu
linux_command_pod_label=ubuntu
function deploy_linux_command_pod {
echo "Writing example deployment to $linux_command_deployment.yaml"
cat <<EOF > $linux_command_deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: $linux_command_deployment
labels:
app: $linux_command_pod_label
spec:
replicas: 1
selector:
matchLabels:
app: $linux_command_pod_label
template:
metadata:
labels:
app: $linux_command_pod_label
spec:
containers:
- name: ubuntu
image: ubuntu
command: ["sleep", "123456"]
nodeSelector:
beta.kubernetes.io/os: linux
EOF
if ! ${kubectl} create -f $linux_command_deployment.yaml; then
echo "kubectl create -f $linux_command_deployment.yaml failed"
exit 1
fi
timeout=$linux_deployment_timeout
while [[ $timeout -gt 0 ]]; do
echo "Waiting for Linux $linux_command_pod_label pods to become Ready"
statuses=$(${kubectl} get pods -l app=$linux_command_pod_label \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' \
| grep "False" | wc -w)
if [[ $statuses -eq 0 ]]; then
break
else
sleep 10
(( timeout=timeout-10 ))
fi
done
if [[ $timeout -gt 0 ]]; then
echo "All $linux_command_pod_label pods became Ready"
else
echo "ERROR: Not all $linux_command_pod_label pods became Ready"
echo "kubectl get pods -l app=$linux_command_pod_label"
${kubectl} get pods -l app=$linux_command_pod_label
cleanup_deployments
exit 1
fi
}
# Returns the name of an arbitrary Linux command pod.
function get_linux_command_pod_name {
$kubectl get pods -l app=$linux_command_pod_label \
-o jsonpath='{.items[0].metadata.name}'
}
# Returns the IP address of an arbitrary Linux command pod.
function get_linux_command_pod_ip {
$kubectl get pods -l app=$linux_command_pod_label \
-o jsonpath='{.items[0].status.podIP}'
}
# Installs test executables (ping, curl) in the Linux command pod.
# NOTE: this assumes that there is only one Linux "command pod".
# TODO(pjh): fix this.
function prepare_linux_command_pod {
local linux_command_pod
linux_command_pod="$(get_linux_command_pod_name)"
echo "Installing test utilities in Linux command pod, may take a minute"
$kubectl exec "$linux_command_pod" -- apt-get update > /dev/null
$kubectl exec "$linux_command_pod" -- \
apt-get install -y iputils-ping curl > /dev/null
}
function undeploy_linux_command_pod {
${kubectl} delete deployment $linux_command_deployment
}
windows_webserver_deployment=windows-nettest
windows_webserver_pod_label=nettest
function deploy_windows_webserver_pod {
echo "Writing example deployment to $windows_webserver_deployment.yaml"
cat <<EOF > $windows_webserver_deployment.yaml
# You can run a pod with the e2eteam/nettest:1.0 image (which should listen on
# <podIP>:8080) and create another pod on a different node (linux would be
# easier) to curl the http server:
# curl http://<pod_ip>:8080/read
apiVersion: apps/v1
kind: Deployment
metadata:
name: $windows_webserver_deployment
labels:
app: $windows_webserver_pod_label
spec:
replicas: 1
selector:
matchLabels:
app: $windows_webserver_pod_label
template:
metadata:
labels:
app: $windows_webserver_pod_label
spec:
containers:
- name: nettest
image: e2eteam/nettest:1.0
nodeSelector:
beta.kubernetes.io/os: windows
tolerations:
- effect: NoSchedule
key: node.kubernetes.io/os
operator: Equal
value: windows
EOF
if ! ${kubectl} create -f $windows_webserver_deployment.yaml; then
echo "kubectl create -f $windows_webserver_deployment.yaml failed"
exit 1
fi
timeout=$windows_deployment_timeout
while [[ $timeout -gt 0 ]]; do
echo "Waiting for Windows $windows_webserver_pod_label pods to become Ready"
statuses=$(${kubectl} get pods -l app=$windows_webserver_pod_label \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' \
| grep "False" | wc -w)
if [[ $statuses -eq 0 ]]; then
break
else
sleep 10
(( timeout=timeout-10 ))
fi
done
if [[ $timeout -gt 0 ]]; then
echo "All $windows_webserver_pod_label pods became Ready"
else
echo "ERROR: Not all $windows_webserver_pod_label pods became Ready"
echo "kubectl get pods -l app=$windows_webserver_pod_label"
${kubectl} get pods -l app=$windows_webserver_pod_label
cleanup_deployments
exit 1
fi
}
function get_windows_webserver_pod_name {
$kubectl get pods -l app=$windows_webserver_pod_label \
-o jsonpath='{.items[0].metadata.name}'
}
function get_windows_webserver_pod_ip {
$kubectl get pods -l app=$windows_webserver_pod_label \
-o jsonpath='{.items[0].status.podIP}'
}
function undeploy_windows_webserver_pod {
${kubectl} delete deployment $windows_webserver_deployment
}
windows_command_deployment=windows-powershell
windows_command_pod_label=powershell
function deploy_windows_command_pod {
echo "Writing example deployment to $windows_command_deployment.yaml"
cat <<EOF > $windows_command_deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: $windows_command_deployment
labels:
app: $windows_command_pod_label
spec:
replicas: 1
selector:
matchLabels:
app: $windows_command_pod_label
template:
metadata:
labels:
app: $windows_command_pod_label
spec:
containers:
- name: nettest
image: e2eteam/nettest:1.0
nodeSelector:
beta.kubernetes.io/os: windows
tolerations:
- effect: NoSchedule
key: node.kubernetes.io/os
operator: Equal
value: windows
EOF
if ! ${kubectl} create -f $windows_command_deployment.yaml; then
echo "kubectl create -f $windows_command_deployment.yaml failed"
exit 1
fi
timeout=$windows_deployment_timeout
while [[ $timeout -gt 0 ]]; do
echo "Waiting for Windows $windows_command_pod_label pods to become Ready"
statuses=$(${kubectl} get pods -l app=$windows_command_pod_label \
-o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' \
| grep "False" | wc -w)
if [[ $statuses -eq 0 ]]; then
break
else
sleep 10
(( timeout=timeout-10 ))
fi
done
if [[ $timeout -gt 0 ]]; then
echo "All $windows_command_pod_label pods became Ready"
else
echo "ERROR: Not all $windows_command_pod_label pods became Ready"
echo "kubectl get pods -l app=$windows_command_pod_label"
${kubectl} get pods -l app=$windows_command_pod_label
cleanup_deployments
exit 1
fi
}
function get_windows_command_pod_name {
$kubectl get pods -l app=$windows_command_pod_label \
-o jsonpath='{.items[0].metadata.name}'
}
function get_windows_command_pod_ip {
$kubectl get pods -l app=$windows_command_pod_label \
-o jsonpath='{.items[0].status.podIP}'
}
function undeploy_windows_command_pod {
${kubectl} delete deployment $windows_command_deployment
}
function test_linux_node_to_linux_pod {
echo "TODO: ${FUNCNAME[0]}"
}
function test_linux_node_to_windows_pod {
echo "TODO: ${FUNCNAME[0]}"
}
function test_linux_pod_to_linux_pod {
echo "TEST: ${FUNCNAME[0]}"
local linux_command_pod
linux_command_pod="$(get_linux_command_pod_name)"
local linux_webserver_pod_ip
linux_webserver_pod_ip="$(get_linux_webserver_pod_ip)"
if ! $kubectl exec "$linux_command_pod" -- curl -m 20 \
"http://$linux_webserver_pod_ip" &> $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
# TODO(pjh): this test flakily fails on brand-new clusters, not sure why.
# % Total % Received % Xferd Average Speed Time Time Time Current
# Dload Upload Total Spent Left Speed
# 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
# curl: (6) Could not resolve host:
# command terminated with exit code 6
function test_linux_pod_to_windows_pod {
echo "TEST: ${FUNCNAME[0]}"
local linux_command_pod
linux_command_pod="$(get_linux_command_pod_name)"
local windows_webserver_pod_ip
windows_webserver_pod_ip="$(get_windows_webserver_pod_ip)"
if ! $kubectl exec "$linux_command_pod" -- curl -m 20 \
"http://$windows_webserver_pod_ip:8080/read" &> $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
echo "This test seems to be flaky. TODO(pjh): investigate."
exit 1
fi
}
function test_linux_pod_to_internet {
echo "TEST: ${FUNCNAME[0]}"
local linux_command_pod
linux_command_pod="$(get_linux_command_pod_name)"
local internet_ip="8.8.8.8" # Google DNS
# This is expected to return 404 (not found).
if ! $kubectl exec "$linux_command_pod" -- curl -m 20 \
"http://$internet_ip" > $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_linux_pod_to_k8s_service {
echo "TEST: ${FUNCNAME[0]}"
local linux_command_pod
linux_command_pod="$(get_linux_command_pod_name)"
local service="heapster"
local service_ip
service_ip=$($kubectl get service --namespace kube-system $service \
-o jsonpath='{.spec.clusterIP}')
local service_port
service_port=$($kubectl get service --namespace kube-system $service \
-o jsonpath='{.spec.ports[?(@.protocol=="TCP")].port}')
echo "curl-ing $service address from Linux pod: $service_ip:$service_port"
# curl-ing the heapster service results in an expected 404 response code. The
# curl command does not set a failure return code in this case.
if ! $kubectl exec "$linux_command_pod" -- \
curl -m 20 "http://$service_ip:$service_port" &> $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_windows_node_to_linux_pod {
echo "TODO: ${FUNCNAME[0]}"
}
function test_windows_node_to_windows_pod {
echo "TODO: ${FUNCNAME[0]}"
}
# TODO(pjh): this test failed for me once with
# error: unable to upgrade connection: container not found ("nettest")
# Maybe the container crashed for some reason? Investigate if it happens more.
#
# TODO(pjh): another one-time failure:
# error: unable to upgrade connection: Authorization error
# (user=kube-apiserver, verb=create, resource=nodes, subresource=proxy)
function test_windows_pod_to_linux_pod {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
local linux_webserver_pod_ip
linux_webserver_pod_ip="$(get_linux_webserver_pod_ip)"
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"curl -UseBasicParsing http://$linux_webserver_pod_ip" > \
$output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_windows_pod_to_windows_pod {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
local windows_webserver_pod_ip
windows_webserver_pod_ip="$(get_windows_webserver_pod_ip)"
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"curl -UseBasicParsing http://$windows_webserver_pod_ip:8080/read" \
> $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_windows_pod_to_internet {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
local internet_ip="8.8.8.8"
# This snippet tests Internet connectivity without depending on DNS by
# attempting to curl Google's well-known DNS IP, 8.8.8.8. On success we expect
# to get back a 404 status code; on failure the response object will have a
# status code of 0 or some other HTTP code.
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"\$response = try { \`
(curl -UseBasicParsing http://$internet_ip \`
-ErrorAction Stop).BaseResponse \`
} catch [System.Net.WebException] { \`
\$_.Exception.Response \`
}; \`
\$statusCodeInt = [int]\$response.StatusCode; \`
if (\$statusCodeInt -eq 404) { \`
exit 0 \`
} else { \`
Write-Host \"curl $internet_ip got unexpected status code \$statusCodeInt\"
exit 1 \`
}" > $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_windows_pod_to_k8s_service {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
local service="heapster"
local service_ip
service_ip=$($kubectl get service --namespace kube-system $service \
-o jsonpath='{.spec.clusterIP}')
local service_port
service_port=$($kubectl get service --namespace kube-system $service \
-o jsonpath='{.spec.ports[?(@.protocol=="TCP")].port}')
local service_address="$service_ip:$service_port"
echo "curl-ing $service address from Windows pod: $service_address"
# Performing a web request to the heapster service results in an expected 404
# response; this code snippet filters out the expected 404 from other status
# codes that indicate failure.
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"\$response = try { \`
(curl -UseBasicParsing http://$service_address \`
-ErrorAction Stop).BaseResponse \`
} catch [System.Net.WebException] { \`
\$_.Exception.Response \`
}; \`
\$statusCodeInt = [int]\$response.StatusCode; \`
if (\$statusCodeInt -eq 404) { \`
exit 0 \`
} else { \`
Write-Host \"curl $service_address got unexpected status code \$statusCodeInt\"
exit 1 \`
}" > $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_kube_dns_in_windows_pod {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
local service="kube-dns"
local service_ip
service_ip=$($kubectl get service --namespace kube-system $service \
-o jsonpath='{.spec.clusterIP}')
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"Resolve-DnsName www.bing.com -server $service_ip" > $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function test_dns_just_works_in_windows_pod {
echo "TEST: ${FUNCNAME[0]}"
local windows_command_pod
windows_command_pod="$(get_windows_command_pod_name)"
if ! $kubectl exec "$windows_command_pod" -- powershell.exe \
"curl -UseBasicParsing http://www.bing.com" > $output_file; then
cleanup_deployments
echo "Failing output: $(cat $output_file)"
echo "FAILED: ${FUNCNAME[0]}"
exit 1
fi
}
function cleanup_deployments {
undeploy_linux_webserver_pod
undeploy_linux_command_pod
undeploy_windows_webserver_pod
undeploy_windows_command_pod
}
check_windows_nodes_are_ready
check_no_system_pods_on_windows_nodes
deploy_linux_webserver_pod
deploy_linux_command_pod
deploy_windows_webserver_pod
deploy_windows_command_pod
prepare_linux_command_pod
echo ""
test_linux_node_to_linux_pod
test_linux_node_to_windows_pod
test_linux_pod_to_linux_pod
test_linux_pod_to_windows_pod
test_linux_pod_to_k8s_service
# Note: test_windows_node_to_k8s_service is not supported at this time.
# https://docs.microsoft.com/en-us/virtualization/windowscontainers/kubernetes/common-problems#my-windows-node-cannot-access-my-services-using-the-service-ip
test_windows_node_to_linux_pod
test_windows_node_to_windows_pod
test_windows_pod_to_linux_pod
test_windows_pod_to_windows_pod
test_windows_pod_to_internet
test_windows_pod_to_k8s_service
test_kube_dns_in_windows_pod
test_dns_just_works_in_windows_pod
echo ""
cleanup_deployments
echo "All tests passed!"
exit 0

View File

@ -0,0 +1,337 @@
<#
.Synopsis
Rough PS functions to create new user profiles
.DESCRIPTION
Call the Create-NewProfile function directly to create a new profile
.EXAMPLE
Create-NewProfile -Username 'testUser1' -Password 'testUser1'
.NOTES
Created by: Josh Rickard (@MS_dministrator) and Thom Schumacher (@driberif)
Forked by: @crshnbrn66, then @pjh (2018-11-08). See
https://gist.github.com/pjh/9753cd14400f4e3d4567f4553ba75f1d/revisions
Date: 24MAR2017
Location: https://gist.github.com/crshnbrn66/7e81bf20408c05ddb2b4fdf4498477d8
Contact: https://github.com/MSAdministrator
MSAdministrator.com
https://github.com/crshnbrn66
powershellposse.com
#>
#Function to create the new local user first
function New-LocalUser
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Param1 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
$userName,
# Param2 help description
[string]
$password
)
$system = [ADSI]"WinNT://$env:COMPUTERNAME";
$user = $system.Create("user",$userName);
$user.SetPassword($password);
$user.SetInfo();
$flag=$user.UserFlags.value -bor 0x10000;
$user.put("userflags",$flag);
$user.SetInfo();
$group = [ADSI]("WinNT://$env:COMPUTERNAME/Users");
$group.PSBase.Invoke("Add", $user.PSBase.Path);
}
#function to register a native method
function Register-NativeMethod
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Param1 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
[string]$dll,
# Param2 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=1)]
[string]
$methodSignature
)
$script:nativeMethods += [PSCustomObject]@{ Dll = $dll; Signature = $methodSignature; }
}
function Get-Win32LastError
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param($typeName = 'LastError')
if (-not ([System.Management.Automation.PSTypeName]$typeName).Type)
{
$lasterrorCode = $script:lasterror | ForEach-Object{
'[DllImport("kernel32.dll", SetLastError = true)]
public static extern uint GetLastError();'
}
Add-Type @"
using System;
using System.Text;
using System.Runtime.InteropServices;
public static class $typeName {
$lasterrorCode
}
"@
}
}
#function to add native method
function Add-NativeMethods
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param($typeName = 'NativeMethods')
$nativeMethodsCode = $script:nativeMethods | ForEach-Object { "
[DllImport(`"$($_.Dll)`")]
public static extern $($_.Signature);
" }
Add-Type @"
using System;
using System.Text;
using System.Runtime.InteropServices;
public static class $typeName {
$nativeMethodsCode
}
"@
}
#Main function to create the new user profile
function Create-NewProfile {
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Param1 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
[string]$UserName,
# Param2 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=1)]
[string]
$Password
)
Write-Verbose "Creating local user $Username";
try
{
New-LocalUser -username $UserName -password $Password;
}
catch
{
Write-Error $_.Exception.Message;
break;
}
$methodName = 'UserEnvCP'
$script:nativeMethods = @();
if (-not ([System.Management.Automation.PSTypeName]$MethodName).Type)
{
Register-NativeMethod "userenv.dll" "int CreateProfile([MarshalAs(UnmanagedType.LPWStr)] string pszUserSid,`
[MarshalAs(UnmanagedType.LPWStr)] string pszUserName,`
[Out][MarshalAs(UnmanagedType.LPWStr)] StringBuilder pszProfilePath, uint cchProfilePath)";
Add-NativeMethods -typeName $MethodName;
}
$localUser = New-Object System.Security.Principal.NTAccount("$UserName");
$userSID = $localUser.Translate([System.Security.Principal.SecurityIdentifier]);
$sb = new-object System.Text.StringBuilder(260);
$pathLen = $sb.Capacity;
Write-Verbose "Creating user profile for $Username";
try
{
[UserEnvCP]::CreateProfile($userSID.Value, $Username, $sb, $pathLen) | Out-Null;
}
catch
{
Write-Error $_.Exception.Message;
break;
}
}
function New-ProfileFromSID {
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Param1 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
[string]$UserName,
[string]$domain = 'PHCORP'
)
$methodname = 'UserEnvCP2'
$script:nativeMethods = @();
if (-not ([System.Management.Automation.PSTypeName]$methodname).Type)
{
Register-NativeMethod "userenv.dll" "int CreateProfile([MarshalAs(UnmanagedType.LPWStr)] string pszUserSid,`
[MarshalAs(UnmanagedType.LPWStr)] string pszUserName,`
[Out][MarshalAs(UnmanagedType.LPWStr)] StringBuilder pszProfilePath, uint cchProfilePath)";
Add-NativeMethods -typeName $methodname;
}
$sb = new-object System.Text.StringBuilder(260);
$pathLen = $sb.Capacity;
Write-Verbose "Creating user profile for $Username";
#$SID= ((get-aduser -id $UserName -ErrorAction Stop).sid.value)
if($domain)
{
$objUser = New-Object System.Security.Principal.NTAccount($domain, $UserName)
$strSID = $objUser.Translate([System.Security.Principal.SecurityIdentifier])
$SID = $strSID.Value
}
else
{
$objUser = New-Object System.Security.Principal.NTAccount($UserName)
$strSID = $objUser.Translate([System.Security.Principal.SecurityIdentifier])
$SID = $strSID.Value
}
Write-Verbose "$UserName SID: $SID"
try
{
$result = [UserEnvCP2]::CreateProfile($SID, $Username, $sb, $pathLen)
if($result -eq '-2147024713')
{
$status = "$userName already exists"
write-verbose "$username Creation Result: $result"
}
elseif($result -eq '-2147024809')
{
$staus = "$username Not Found"
write-verbose "$username creation result: $result"
}
elseif($result -eq 0)
{
$status = "$username Profile has been created"
write-verbose "$username Creation Result: $result"
}
else
{
$status = "$UserName unknown return result: $result"
}
}
catch
{
Write-Error $_.Exception.Message;
break;
}
$status
}
Function Remove-Profile {
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Param1 help description
[Parameter(Mandatory=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
[string]$UserName,
[string]$ProfilePath,
[string]$domain = 'PHCORP'
)
$methodname = 'userenvDP'
$script:nativeMethods = @();
if (-not ([System.Management.Automation.PSTypeName]"$methodname.profile").Type)
{
add-type @"
using System.Runtime.InteropServices;
namespace $typename
{
public static class UserEnv
{
[DllImport("userenv.dll", CharSet = CharSet.Unicode, ExactSpelling = false, SetLastError = true)]
public static extern bool DeleteProfile(string sidString, string profilePath, string computerName);
[DllImport("kernel32.dll")]
public static extern uint GetLastError();
}
public static class Profile
{
public static uint Delete(string sidString)
{ //Profile path and computer name are optional
if (!UserEnv.DeleteProfile(sidString, null, null))
{
return UserEnv.GetLastError();
}
return 0;
}
}
}
"@
}
#$SID= ((get-aduser -id $UserName -ErrorAction Stop).sid.value)
if($domain)
{
$objUser = New-Object System.Security.Principal.NTAccount($domain, $UserName)
$strSID = $objUser.Translate([System.Security.Principal.SecurityIdentifier])
$SID = $strSID.Value
}
else
{
$objUser = New-Object System.Security.Principal.NTAccount($UserName)
$strSID = $objUser.Translate([System.Security.Principal.SecurityIdentifier])
$SID = $strSID.Value
}
Write-Verbose "$UserName SID: $SID"
try
{
#http://stackoverflow.com/questions/31949002/c-sharp-delete-user-profile
$result = [userenvDP.Profile]::Delete($SID)
}
catch
{
Write-Error $_.Exception.Message;
break;
}
$LastError
}
Export-ModuleMember Create-NewProfile

View File

@ -50,9 +50,8 @@ function kubectl_retry() {
ALLOWED_NOTREADY_NODES="${ALLOWED_NOTREADY_NODES:-0}"
CLUSTER_READY_ADDITIONAL_TIME_SECONDS="${CLUSTER_READY_ADDITIONAL_TIME_SECONDS:-30}"
EXPECTED_NUM_NODES="${NUM_NODES}"
if [[ "${KUBERNETES_PROVIDER:-}" == "gce" ]]; then
EXPECTED_NUM_NODES="$(get-num-nodes)"
echo "Validating gce cluster, MULTIZONE=${MULTIZONE:-}"
# In multizone mode we need to add instances for all nodes in the region.
if [[ "${MULTIZONE:-}" == "true" ]]; then
@ -60,6 +59,8 @@ if [[ "${KUBERNETES_PROVIDER:-}" == "gce" ]]; then
--filter="name ~ '${NODE_INSTANCE_PREFIX}.*' AND zone:($(gcloud -q compute zones list --project="${PROJECT}" --filter=region=${REGION} --format=csv[no-heading]\(name\) | tr "\n" "," | sed "s/,$//"))" | wc -l)
echo "Computing number of nodes, NODE_INSTANCE_PREFIX=${NODE_INSTANCE_PREFIX}, REGION=${REGION}, EXPECTED_NUM_NODES=${EXPECTED_NUM_NODES}"
fi
else
EXPECTED_NUM_NODES="${NUM_NODES}"
fi
if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then