mirror of https://github.com/k3s-io/k3s
Add standalone npd on GCI.
parent
56afb95641
commit
d40c0a7099
|
@ -1,6 +1,6 @@
|
||||||
# Maintainers
|
# Maintainers
|
||||||
|
|
||||||
Lantao Liu <lantaol@google.com>
|
Random-Liu <lantaol@google.com>
|
||||||
|
|
||||||
|
|
||||||
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/node-problem-detector/MAINTAINERS.md?pixel)]()
|
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/node-problem-detector/MAINTAINERS.md?pixel)]()
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
apiVersion: extensions/v1beta1
|
|
||||||
kind: DaemonSet
|
|
||||||
metadata:
|
|
||||||
name: node-problem-detector-v0.1
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: node-problem-detector
|
|
||||||
version: v0.1
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
spec:
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: node-problem-detector
|
|
||||||
version: v0.1
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
spec:
|
|
||||||
hostNetwork: true
|
|
||||||
containers:
|
|
||||||
- name: node-problem-detector
|
|
||||||
image: gcr.io/google_containers/node-problem-detector:v0.1
|
|
||||||
securityContext:
|
|
||||||
privileged: true
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: "200m"
|
|
||||||
memory: "100Mi"
|
|
||||||
requests:
|
|
||||||
cpu: "20m"
|
|
||||||
memory: "20Mi"
|
|
||||||
volumeMounts:
|
|
||||||
- name: log
|
|
||||||
mountPath: /log
|
|
||||||
readOnly: true
|
|
||||||
volumes:
|
|
||||||
- name: log
|
|
||||||
hostPath:
|
|
||||||
path: /var/log/
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: node-problem-detector
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1alpha1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: npd-binding
|
||||||
|
labels:
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: system:node-problem-detector
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: node-problem-detector
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: extensions/v1beta1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: npd-v0.3.0-alpha.1
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
k8s-app: node-problem-detector
|
||||||
|
version: v0.3.0-alpha.1
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
k8s-app: node-problem-detector
|
||||||
|
version: v0.3.0-alpha.1
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: node-problem-detector
|
||||||
|
image: gcr.io/google_containers/node-problem-detector:v0.3.0-alpha.1
|
||||||
|
command:
|
||||||
|
- /node-problem-detector
|
||||||
|
- --logtostderr
|
||||||
|
# Pass both config to support both journald and syslog.
|
||||||
|
- --system-log-monitors=/config/kernel-monitor.json,/config/kernel-monitor-filelog.json
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "200m"
|
||||||
|
memory: "100Mi"
|
||||||
|
requests:
|
||||||
|
cpu: "20m"
|
||||||
|
memory: "20Mi"
|
||||||
|
env:
|
||||||
|
- name: NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
volumeMounts:
|
||||||
|
- name: log
|
||||||
|
mountPath: /var/log
|
||||||
|
readOnly: true
|
||||||
|
- name: localtime
|
||||||
|
mountPath: /etc/localtime
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: log
|
||||||
|
hostPath:
|
||||||
|
path: /var/log/
|
||||||
|
- name: localtime
|
||||||
|
hostPath:
|
||||||
|
path: /etc/localtime
|
||||||
|
serviceAccountName: node-problem-detector
|
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1alpha1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: npd-binding
|
||||||
|
labels:
|
||||||
|
kubernetes.io/cluster-service: "true"
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: system:node-problem-detector
|
||||||
|
subjects:
|
||||||
|
- apiVersion: rbac/v1alpha1
|
||||||
|
kind: User
|
||||||
|
name: system:node-problem-detector
|
|
@ -627,7 +627,7 @@ DOCKER_REGISTRY_MIRROR_URL: $(yaml-quote ${DOCKER_REGISTRY_MIRROR_URL:-})
|
||||||
ENABLE_L7_LOADBALANCING: $(yaml-quote ${ENABLE_L7_LOADBALANCING:-none})
|
ENABLE_L7_LOADBALANCING: $(yaml-quote ${ENABLE_L7_LOADBALANCING:-none})
|
||||||
ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false})
|
ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false})
|
||||||
ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
|
ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
|
||||||
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-false})
|
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none})
|
||||||
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
|
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
|
||||||
ENABLE_RESCHEDULER: $(yaml-quote ${ENABLE_RESCHEDULER:-false})
|
ENABLE_RESCHEDULER: $(yaml-quote ${ENABLE_RESCHEDULER:-false})
|
||||||
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
|
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
|
||||||
|
@ -641,6 +641,7 @@ DNS_DOMAIN: $(yaml-quote ${DNS_DOMAIN:-})
|
||||||
ENABLE_DNS_HORIZONTAL_AUTOSCALER: $(yaml-quote ${ENABLE_DNS_HORIZONTAL_AUTOSCALER:-false})
|
ENABLE_DNS_HORIZONTAL_AUTOSCALER: $(yaml-quote ${ENABLE_DNS_HORIZONTAL_AUTOSCALER:-false})
|
||||||
KUBELET_TOKEN: $(yaml-quote ${KUBELET_TOKEN:-})
|
KUBELET_TOKEN: $(yaml-quote ${KUBELET_TOKEN:-})
|
||||||
KUBE_PROXY_TOKEN: $(yaml-quote ${KUBE_PROXY_TOKEN:-})
|
KUBE_PROXY_TOKEN: $(yaml-quote ${KUBE_PROXY_TOKEN:-})
|
||||||
|
NODE_PROBLEM_DETECTOR_TOKEN: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TOKEN:-})
|
||||||
ADMISSION_CONTROL: $(yaml-quote ${ADMISSION_CONTROL:-})
|
ADMISSION_CONTROL: $(yaml-quote ${ADMISSION_CONTROL:-})
|
||||||
MASTER_IP_RANGE: $(yaml-quote ${MASTER_IP_RANGE})
|
MASTER_IP_RANGE: $(yaml-quote ${MASTER_IP_RANGE})
|
||||||
RUNTIME_CONFIG: $(yaml-quote ${RUNTIME_CONFIG})
|
RUNTIME_CONFIG: $(yaml-quote ${RUNTIME_CONFIG})
|
||||||
|
@ -1048,6 +1049,7 @@ function parse-master-env() {
|
||||||
local master_env=$(get-master-env)
|
local master_env=$(get-master-env)
|
||||||
KUBELET_TOKEN=$(get-env-val "${master_env}" "KUBELET_TOKEN")
|
KUBELET_TOKEN=$(get-env-val "${master_env}" "KUBELET_TOKEN")
|
||||||
KUBE_PROXY_TOKEN=$(get-env-val "${master_env}" "KUBE_PROXY_TOKEN")
|
KUBE_PROXY_TOKEN=$(get-env-val "${master_env}" "KUBE_PROXY_TOKEN")
|
||||||
|
NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
|
||||||
CA_CERT_BASE64=$(get-env-val "${master_env}" "CA_CERT")
|
CA_CERT_BASE64=$(get-env-val "${master_env}" "CA_CERT")
|
||||||
CA_KEY_BASE64=$(get-env-val "${master_env}" "CA_KEY")
|
CA_KEY_BASE64=$(get-env-val "${master_env}" "CA_KEY")
|
||||||
KUBEAPISERVER_CERT_BASE64=$(get-env-val "${master_env}" "KUBEAPISERVER_CERT")
|
KUBEAPISERVER_CERT_BASE64=$(get-env-val "${master_env}" "KUBEAPISERVER_CERT")
|
||||||
|
|
|
@ -143,7 +143,16 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
|
||||||
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
||||||
|
|
||||||
# Optional: Install node problem detector.
|
# Optional: Install node problem detector.
|
||||||
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
|
# none - Not run node problem detector.
|
||||||
|
# daemonset - Run node problem detector as daemonset.
|
||||||
|
# standalone - Run node problem detector as standalone system daemon.
|
||||||
|
if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
|
||||||
|
# Enable standalone mode by default for gci.
|
||||||
|
# TODO: Consider upgrade test.
|
||||||
|
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone}"
|
||||||
|
else
|
||||||
|
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}"
|
||||||
|
fi
|
||||||
|
|
||||||
# Optional: Create autoscaler for cluster's nodes.
|
# Optional: Create autoscaler for cluster's nodes.
|
||||||
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
||||||
|
|
|
@ -168,7 +168,16 @@ CLUSTER_REGISTRY_DISK_TYPE_GCE="${CLUSTER_REGISTRY_DISK_TYPE_GCE:-pd-standard}"
|
||||||
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
ENABLE_CLUSTER_UI="${KUBE_ENABLE_CLUSTER_UI:-true}"
|
||||||
|
|
||||||
# Optional: Install node problem detector.
|
# Optional: Install node problem detector.
|
||||||
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-true}"
|
# none - Not run node problem detector.
|
||||||
|
# daemonset - Run node problem detector as daemonset.
|
||||||
|
# standalone - Run node problem detector as standalone system daemon.
|
||||||
|
if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
|
||||||
|
# Enable standalone mode by default for gci.
|
||||||
|
# TODO: Consider upgrade test.
|
||||||
|
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-standalone}"
|
||||||
|
else
|
||||||
|
ENABLE_NODE_PROBLEM_DETECTOR="${KUBE_ENABLE_NODE_PROBLEM_DETECTOR:-daemonset}"
|
||||||
|
fi
|
||||||
|
|
||||||
# Optional: Create autoscaler for cluster's nodes.
|
# Optional: Create autoscaler for cluster's nodes.
|
||||||
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
|
||||||
|
|
|
@ -1174,7 +1174,7 @@ function start-kube-addons {
|
||||||
if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then
|
if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then
|
||||||
setup-addon-manifests "addons" "dashboard"
|
setup-addon-manifests "addons" "dashboard"
|
||||||
fi
|
fi
|
||||||
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "true" ]]; then
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "daemonset" ]]; then
|
||||||
setup-addon-manifests "addons" "node-problem-detector"
|
setup-addon-manifests "addons" "node-problem-detector"
|
||||||
fi
|
fi
|
||||||
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
||||||
|
|
|
@ -242,6 +242,9 @@ function create-master-auth {
|
||||||
if [[ -n "${KUBE_PROXY_TOKEN:-}" ]]; then
|
if [[ -n "${KUBE_PROXY_TOKEN:-}" ]]; then
|
||||||
replace_prefixed_line "${known_tokens_csv}" "${KUBE_PROXY_TOKEN}," "system:kube-proxy,uid:kube_proxy"
|
replace_prefixed_line "${known_tokens_csv}" "${KUBE_PROXY_TOKEN}," "system:kube-proxy,uid:kube_proxy"
|
||||||
fi
|
fi
|
||||||
|
if [[ -n "${NODE_PROBLEM_DETECTOR_TOKEN:-}" ]]; then
|
||||||
|
replace_prefixed_line "${known_tokens_csv}" "${NODE_PROBLEM_DETECTOR_TOKEN}," "system:node-problem-detector,uid:node-problem-detector"
|
||||||
|
fi
|
||||||
local use_cloud_config="false"
|
local use_cloud_config="false"
|
||||||
cat <<EOF >/etc/gce.conf
|
cat <<EOF >/etc/gce.conf
|
||||||
[global]
|
[global]
|
||||||
|
@ -458,6 +461,29 @@ current-context: kube-scheduler
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function create-node-problem-detector-kubeconfig {
|
||||||
|
echo "Creating node-problem-detector kubeconfig file"
|
||||||
|
mkdir -p /var/lib/node-problem-detector
|
||||||
|
cat <<EOF >/var/lib/node-problem-detector/kubeconfig
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Config
|
||||||
|
users:
|
||||||
|
- name: node-problem-detector
|
||||||
|
user:
|
||||||
|
token: ${NODE_PROBLEM_DETECTOR_TOKEN}
|
||||||
|
clusters:
|
||||||
|
- name: local
|
||||||
|
cluster:
|
||||||
|
certificate-authority-data: ${CA_CERT}
|
||||||
|
contexts:
|
||||||
|
- context:
|
||||||
|
cluster: local
|
||||||
|
user: node-problem-detector
|
||||||
|
name: service-account-context
|
||||||
|
current-context: service-account-context
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
function create-master-etcd-auth {
|
function create-master-etcd-auth {
|
||||||
if [[ -n "${ETCD_CA_CERT:-}" && -n "${ETCD_PEER_KEY:-}" && -n "${ETCD_PEER_CERT:-}" ]]; then
|
if [[ -n "${ETCD_CA_CERT:-}" && -n "${ETCD_PEER_KEY:-}" && -n "${ETCD_PEER_CERT:-}" ]]; then
|
||||||
local -r auth_dir="/etc/srv/kubernetes"
|
local -r auth_dir="/etc/srv/kubernetes"
|
||||||
|
@ -660,6 +686,37 @@ EOF
|
||||||
systemctl start kubelet.service
|
systemctl start kubelet.service
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# This function assembles the node problem detector systemd service file and
|
||||||
|
# starts it using systemctl.
|
||||||
|
function start-node-problem-detector {
|
||||||
|
echo "Start node problem detector"
|
||||||
|
local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
|
||||||
|
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
|
||||||
|
echo "Using node problem detector binary at ${npd_bin}"
|
||||||
|
local flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
|
||||||
|
flags+=" --logtostderr"
|
||||||
|
flags+=" --system-log-monitors=${km_config}"
|
||||||
|
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
|
||||||
|
|
||||||
|
# Write the systemd service file for node problem detector.
|
||||||
|
cat <<EOF >/etc/systemd/system/node-problem-detector.service
|
||||||
|
[Unit]
|
||||||
|
Description=Kubernetes node problem detector
|
||||||
|
Requires=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
ExecStart=${npd_bin} ${flags}
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
systemctl start node-problem-detector.service
|
||||||
|
}
|
||||||
|
|
||||||
# Create the log file and set its properties.
|
# Create the log file and set its properties.
|
||||||
#
|
#
|
||||||
# $1 is the file to create.
|
# $1 is the file to create.
|
||||||
|
@ -1249,9 +1306,13 @@ function start-kube-addons {
|
||||||
if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then
|
if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then
|
||||||
setup-addon-manifests "addons" "dashboard"
|
setup-addon-manifests "addons" "dashboard"
|
||||||
fi
|
fi
|
||||||
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "true" ]]; then
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "daemonset" ]]; then
|
||||||
setup-addon-manifests "addons" "node-problem-detector"
|
setup-addon-manifests "addons" "node-problem-detector"
|
||||||
fi
|
fi
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||||
|
# Setup role binding for standalone node problem detector.
|
||||||
|
setup-addon-manifests "addons" "node-problem-detector/standalone"
|
||||||
|
fi
|
||||||
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
||||||
setup-addon-manifests "admission-controls" "limit-range"
|
setup-addon-manifests "admission-controls" "limit-range"
|
||||||
fi
|
fi
|
||||||
|
@ -1404,6 +1465,9 @@ if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then
|
||||||
else
|
else
|
||||||
create-kubelet-kubeconfig
|
create-kubelet-kubeconfig
|
||||||
create-kubeproxy-kubeconfig
|
create-kubeproxy-kubeconfig
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||||
|
create-node-problem-detector-kubeconfig
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
override-kubectl
|
override-kubectl
|
||||||
|
@ -1434,6 +1498,9 @@ else
|
||||||
if [[ "${PREPULL_E2E_IMAGES:-}" == "true" ]]; then
|
if [[ "${PREPULL_E2E_IMAGES:-}" == "true" ]]; then
|
||||||
start-image-puller
|
start-image-puller
|
||||||
fi
|
fi
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||||
|
start-node-problem-detector
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
reset-motd
|
reset-motd
|
||||||
echo "Done for the configuration for kubernetes"
|
echo "Done for the configuration for kubernetes"
|
||||||
|
|
|
@ -130,6 +130,22 @@ function install-gci-mounter-tools {
|
||||||
chmod a+x "${rkt_dst}/rkt"
|
chmod a+x "${rkt_dst}/rkt"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Install node problem detector binary.
|
||||||
|
function install-node-problem-detector {
|
||||||
|
local -r npd_version="v0.3.0-alpha.1"
|
||||||
|
local -r npd_sha1="46f963fac14d92021c8b2a648a6cb0337c1bc833"
|
||||||
|
local -r npd_release_path="https://storage.googleapis.com/kubernetes-release"
|
||||||
|
local -r npd_tar="node-problem-detector-${npd_version}.tar.gz"
|
||||||
|
download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}"
|
||||||
|
local -r npd_dir="${KUBE_HOME}/node-problem-detector"
|
||||||
|
mkdir -p "${npd_dir}"
|
||||||
|
tar xzf "${KUBE_HOME}/${npd_tar}" -C "${npd_dir}" --overwrite
|
||||||
|
mv "${npd_dir}/bin"/* "${KUBE_HOME}/bin"
|
||||||
|
chmod a+x "${KUBE_HOME}/bin/node-problem-detector"
|
||||||
|
rmdir "${npd_dir}/bin"
|
||||||
|
rm -f "${KUBE_HOME}/${npd_tar}"
|
||||||
|
}
|
||||||
|
|
||||||
# Downloads kubernetes binaries and kube-system manifest tarball, unpacks them,
|
# Downloads kubernetes binaries and kube-system manifest tarball, unpacks them,
|
||||||
# and places them into suitable directories. Files are placed in /home/kubernetes.
|
# and places them into suitable directories. Files are placed in /home/kubernetes.
|
||||||
function install-kube-binary-config {
|
function install-kube-binary-config {
|
||||||
|
@ -153,6 +169,9 @@ function install-kube-binary-config {
|
||||||
cp "${src_dir}/"*.docker_tag "${dst_dir}"
|
cp "${src_dir}/"*.docker_tag "${dst_dir}"
|
||||||
if [[ "${KUBERNETES_MASTER:-}" == "false" ]]; then
|
if [[ "${KUBERNETES_MASTER:-}" == "false" ]]; then
|
||||||
cp "${src_dir}/kube-proxy.tar" "${dst_dir}"
|
cp "${src_dir}/kube-proxy.tar" "${dst_dir}"
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||||
|
install-node-problem-detector
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
cp "${src_dir}/kube-apiserver.tar" "${dst_dir}"
|
cp "${src_dir}/kube-apiserver.tar" "${dst_dir}"
|
||||||
cp "${src_dir}/kube-controller-manager.tar" "${dst_dir}"
|
cp "${src_dir}/kube-controller-manager.tar" "${dst_dir}"
|
||||||
|
|
|
@ -972,6 +972,9 @@ start_kube_addons() {
|
||||||
if [ "${ENABLE_CLUSTER_UI:-}" = "true" ]; then
|
if [ "${ENABLE_CLUSTER_UI:-}" = "true" ]; then
|
||||||
setup_addon_manifests "addons" "dashboard"
|
setup_addon_manifests "addons" "dashboard"
|
||||||
fi
|
fi
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "daemonset" ]]; then
|
||||||
|
setup-addon-manifests "addons" "node-problem-detector"
|
||||||
|
fi
|
||||||
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
if echo "${ADMISSION_CONTROL:-}" | grep -q "LimitRanger"; then
|
||||||
setup_addon_manifests "admission-controls" "limit-range"
|
setup_addon_manifests "admission-controls" "limit-range"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -172,6 +172,7 @@ function get-node-os() {
|
||||||
# Vars set:
|
# Vars set:
|
||||||
# KUBELET_TOKEN
|
# KUBELET_TOKEN
|
||||||
# KUBE_PROXY_TOKEN
|
# KUBE_PROXY_TOKEN
|
||||||
|
# NODE_PROBLEM_DETECTOR_TOKEN
|
||||||
# CA_CERT_BASE64
|
# CA_CERT_BASE64
|
||||||
# EXTRA_DOCKER_OPTS
|
# EXTRA_DOCKER_OPTS
|
||||||
# KUBELET_CERT_BASE64
|
# KUBELET_CERT_BASE64
|
||||||
|
@ -206,6 +207,7 @@ fi
|
||||||
# INSTANCE_GROUPS
|
# INSTANCE_GROUPS
|
||||||
# KUBELET_TOKEN
|
# KUBELET_TOKEN
|
||||||
# KUBE_PROXY_TOKEN
|
# KUBE_PROXY_TOKEN
|
||||||
|
# NODE_PROBLEM_DETECTOR_TOKEN
|
||||||
# CA_CERT_BASE64
|
# CA_CERT_BASE64
|
||||||
# EXTRA_DOCKER_OPTS
|
# EXTRA_DOCKER_OPTS
|
||||||
# KUBELET_CERT_BASE64
|
# KUBELET_CERT_BASE64
|
||||||
|
@ -228,6 +230,7 @@ function prepare-node-upgrade() {
|
||||||
local node_env=$(get-node-env)
|
local node_env=$(get-node-env)
|
||||||
KUBELET_TOKEN=$(get-env-val "${node_env}" "KUBELET_TOKEN")
|
KUBELET_TOKEN=$(get-env-val "${node_env}" "KUBELET_TOKEN")
|
||||||
KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
|
KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
|
||||||
|
NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${node_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
|
||||||
CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
|
CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
|
||||||
EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
|
EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
|
||||||
KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
|
KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
|
||||||
|
|
|
@ -825,6 +825,9 @@ function create-master() {
|
||||||
# http://issue.k8s.io/3168
|
# http://issue.k8s.io/3168
|
||||||
KUBELET_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
|
KUBELET_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
|
||||||
KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
|
KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
|
||||||
|
if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then
|
||||||
|
NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
# Reserve the master's IP so that it can later be transferred to another VM
|
# Reserve the master's IP so that it can later be transferred to another VM
|
||||||
# without disrupting the kubelets.
|
# without disrupting the kubelets.
|
||||||
|
|
|
@ -161,10 +161,10 @@ addon-dir-create:
|
||||||
- file_mode: 644
|
- file_mode: 644
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if pillar.get('enable_node_problem_detector', '').lower() == 'true' %}
|
{% if pillar.get('enable_node_problem_detector', '').lower() == 'daemonset' %}
|
||||||
/etc/kubernetes/addons/node-problem-detector/node-problem-detector.yaml:
|
/etc/kubernetes/addons/node-problem-detector/npd.yaml:
|
||||||
file.managed:
|
file.managed:
|
||||||
- source: salt://kube-addons/node-problem-detector/node-problem-detector.yaml
|
- source: salt://kube-addons/node-problem-detector/npd.yaml
|
||||||
- user: root
|
- user: root
|
||||||
- group: root
|
- group: root
|
||||||
- file_mode: 644
|
- file_mode: 644
|
||||||
|
|
|
@ -151,7 +151,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "hollow-node-problem-detector",
|
"name": "hollow-node-problem-detector",
|
||||||
"image": "gcr.io/google_containers/node-problem-detector:v0.3.0-alpha.0",
|
"image": "gcr.io/google_containers/node-problem-detector:v0.3.0-alpha.1",
|
||||||
"env": [
|
"env": [
|
||||||
{
|
{
|
||||||
"name": "NODE_NAME",
|
"name": "NODE_NAME",
|
||||||
|
@ -164,7 +164,7 @@
|
||||||
],
|
],
|
||||||
"command": [
|
"command": [
|
||||||
"/node-problem-detector",
|
"/node-problem-detector",
|
||||||
"--kernel-monitor=/config/kernel.monitor",
|
"--system-log-monitors=/config/kernel.monitor",
|
||||||
"--apiserver-override=https://{{master_ip}}:443?inClusterConfig=false&auth=/kubeconfig/npd.kubeconfig",
|
"--apiserver-override=https://{{master_ip}}:443?inClusterConfig=false&auth=/kubeconfig/npd.kubeconfig",
|
||||||
"--alsologtostderr",
|
"--alsologtostderr",
|
||||||
"1>>/var/logs/npd_$(NODE_NAME).log 2>&1"
|
"1>>/var/logs/npd_$(NODE_NAME).log 2>&1"
|
||||||
|
|
|
@ -1,7 +1,12 @@
|
||||||
{
|
{
|
||||||
|
"plugin": "filelog",
|
||||||
|
"pluginConfig": {
|
||||||
|
"timestamp": "dummy",
|
||||||
|
"message": "dummy",
|
||||||
|
"timestampFormat": "dummy"
|
||||||
|
},
|
||||||
"logPath": "/log/faillog",
|
"logPath": "/log/faillog",
|
||||||
"lookback": "10m",
|
"lookback": "10m",
|
||||||
"startPattern": "Initializing cgroup subsys cpuset",
|
|
||||||
"bufferSize": 10,
|
"bufferSize": 10,
|
||||||
"source": "kernel-monitor",
|
"source": "kernel-monitor",
|
||||||
"conditions": [],
|
"conditions": [],
|
||||||
|
|
Loading…
Reference in New Issue