Merge pull request #66485 from bmoyles0117/apply-latest-stackdriver-fixes

Automatic merge from submit-queue (batch tested with PRs 59030, 64666, 66251, 66485, 66813). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

A large set of improvements to the Stackdriver components.

**What this PR does / why we need it**:
This PR delivers a large set of improvements for both the Stackdriver Logging agent and the Stackdriver Metadata agent.

**Release note**:
```release-note
Metadata Agent Improvements
Bump metadata agent version to 0.2-0.0.21-1.
Expand the metadata agent's access to all API groups.
Remove metadata agent config maps in favor of command line flags.
Update the metadata agent's liveness probe to a new /healthz handler.

Logging Agent Improvements
Bump logging agent version to 0.2-1.5.33-1-k8s-1.
Appropriately set log severity for k8s_container.
Fix detect exceptions plugin to analyze message field instead of log field.
Fix detect exceptions plugin to analyze streams based on local resource id.
Disable the metadata agent for monitored resource construction in logging.
Disable timestamp adjustment in logs to optimize performance.
Reduce logging agent buffer chunk limit to 512k to optimize performance.
```
pull/8/head
Kubernetes Submit Queue 2018-08-06 12:21:08 -07:00 committed by GitHub
commit 51faf6ebdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 47 additions and 73 deletions

View File

@ -98,6 +98,8 @@ data:
# instead of jsonPayload after extracting 'time', 'severity' and
# 'stream' from the record.
message ${record['log']}
# If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
</record>
tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
remove_keys stream,log
@ -109,7 +111,7 @@ data:
remove_tag_prefix raw
message message
stream stream
stream "logging.googleapis.com/local_resource_id"
multiline_flush_interval 5
max_bytes 500000
max_lines 1000
@ -408,9 +410,9 @@ data:
buffer_queue_full_action block
# Set the chunk limit conservatively to avoid exceeding the recommended
# chunk size of 5MB per write request.
buffer_chunk_limit 1M
buffer_chunk_limit 512k
# Cap the combined memory usage of this buffer and the one below to
# 1MiB/chunk * (6 + 2) chunks = 8 MiB
# 512KiB/chunk * (6 + 2) chunks = 4 MiB
buffer_queue_limit 6
# Never wait more than 5 seconds before flushing logs in the non-error case.
flush_interval 5s
@ -421,8 +423,9 @@ data:
# Use multiple threads for processing.
num_threads 2
use_grpc true
# Use Metadata Agent to get monitored resource.
enable_metadata_agent true
# Skip timestamp adjustment as this is in a controlled environment with
# known timestamp format. This helps with CPU usage.
adjust_invalid_timestamps false
</match>
# Attach local_resource_id for 'k8s_node' monitored resource.
@ -450,15 +453,16 @@ data:
buffer_type file
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
buffer_queue_full_action block
buffer_chunk_limit 1M
buffer_chunk_limit 512k
buffer_queue_limit 2
flush_interval 5s
max_retry_wait 30
disable_retry_limit
num_threads 2
use_grpc true
# Use Metadata Agent to get monitored resource.
enable_metadata_agent true
# Skip timestamp adjustment as this is in a controlled environment with
# known timestamp format. This helps with CPU usage.
adjust_invalid_timestamps false
</match>
metadata:
name: fluentd-gcp-config-v1.2.5

View File

@ -1,13 +1,13 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: fluentd-gcp-v3.0.0
name: fluentd-gcp-{{ fluentd_gcp_yaml_version }}
namespace: kube-system
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v3.0.0
version: {{ fluentd_gcp_yaml_version }}
spec:
updateStrategy:
type: RollingUpdate
@ -16,7 +16,7 @@ spec:
labels:
k8s-app: fluentd-gcp
kubernetes.io/cluster-service: "true"
version: v3.0.0
version: {{ fluentd_gcp_yaml_version }}
# This annotation ensures that fluentd does not get evicted if the node
# supports critical pod annotation based priority scheme.
# Note that this does not guarantee admission on the nodes (#40573).

View File

@ -7,9 +7,7 @@ metadata:
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups:
- ""
- "apps"
- "extensions"
- "*"
resources:
- "*"
verbs:

View File

@ -7,22 +7,6 @@ metadata:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: v1
kind: ConfigMap
metadata:
name: metadata-agent-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
data:
node_level.conf: |-
KubernetesUseWatch: true
KubernetesClusterLevelMetadata: false
cluster_level.conf: |-
KubernetesUseWatch: true
KubernetesClusterLevelMetadata: true
---
kind: DaemonSet
apiVersion: extensions/v1beta1
metadata:
@ -45,27 +29,22 @@ spec:
spec:
serviceAccountName: metadata-agent
containers:
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.19-1
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.21-1
imagePullPolicy: IfNotPresent
name: metadata-agent
livenessProbe:
exec:
command:
- /bin/bash
- -c
- |
if [[ -f /var/run/metadata-agent/health/unhealthy ]]; then
exit 1;
fi
periodSeconds: 10
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 1
successThreshold: 1
volumeMounts:
- name: metadata-agent-config-volume
mountPath: /etc/config
command:
- /opt/stackdriver/metadata/sbin/metadatad
- --config-file=/etc/config/node_level.conf
args:
- -o KubernetesUseWatch=true
- -o KubernetesClusterLevelMetadata=false
- -o MetadataReporterPurgeDeleted=true
ports:
- containerPort: 8000
hostPort: 8799
@ -78,10 +57,6 @@ spec:
restartPolicy: Always
schedulerName: default-scheduler
terminationGracePeriodSeconds: 30
volumes:
- name: metadata-agent-config-volume
configMap:
name: metadata-agent-config
updateStrategy:
rollingUpdate:
maxUnavailable: 1
@ -110,27 +85,22 @@ spec:
spec:
serviceAccountName: metadata-agent
containers:
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.19-1
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.21-1
imagePullPolicy: IfNotPresent
name: metadata-agent
livenessProbe:
exec:
command:
- /bin/bash
- -c
- |
if [[ -f /var/run/metadata-agent/health/unhealthy ]]; then
exit 1;
fi
periodSeconds: 10
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 5
failureThreshold: 1
successThreshold: 1
volumeMounts:
- name: metadata-agent-config-volume
mountPath: /etc/config
command:
- /opt/stackdriver/metadata/sbin/metadatad
- --config-file=/etc/config/cluster_level.conf
args:
- -o KubernetesUseWatch=true
- -o KubernetesClusterLevelMetadata=true
- -o MetadataReporterPurgeDeleted=true
ports:
- containerPort: 8000
protocol: TCP
@ -142,10 +112,6 @@ spec:
restartPolicy: Always
schedulerName: default-scheduler
terminationGracePeriodSeconds: 30
volumes:
- name: metadata-agent-config-volume
configMap:
name: metadata-agent-config
strategy:
rollingUpdate:
maxUnavailable: 1

View File

@ -404,6 +404,8 @@ if [[ -n "${LOGROTATE_MAX_SIZE:-}" ]]; then
fi
# Fluentd requirements
# YAML exists to trigger a configuration refresh when changes are made.
FLUENTD_GCP_YAML_VERSION="v3.1.0"
FLUENTD_GCP_VERSION="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
FLUENTD_GCP_MEMORY_LIMIT="${FLUENTD_GCP_MEMORY_LIMIT:-}"
FLUENTD_GCP_CPU_REQUEST="${FLUENTD_GCP_CPU_REQUEST:-}"
@ -422,7 +424,7 @@ CUSTOM_KUBE_DASHBOARD_BANNER="${CUSTOM_KUBE_DASHBOARD_BANNER:-}"
LOGGING_STACKDRIVER_RESOURCE_TYPES="${LOGGING_STACKDRIVER_RESOURCE_TYPES:-old}"
# Adding to PROVIDER_VARS, since this is GCP-specific.
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_YAML_VERSION FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
# Fluentd configuration for node-journal
ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}"

View File

@ -420,6 +420,8 @@ if [[ -n "${LOGROTATE_MAX_SIZE:-}" ]]; then
fi
# Fluentd requirements
# YAML exists to trigger a configuration refresh when changes are made.
FLUENTD_GCP_YAML_VERSION="v3.1.0"
FLUENTD_GCP_VERSION="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
FLUENTD_GCP_MEMORY_LIMIT="${FLUENTD_GCP_MEMORY_LIMIT:-}"
FLUENTD_GCP_CPU_REQUEST="${FLUENTD_GCP_CPU_REQUEST:-}"
@ -438,7 +440,7 @@ CUSTOM_KUBE_DASHBOARD_BANNER="${CUSTOM_KUBE_DASHBOARD_BANNER:-}"
LOGGING_STACKDRIVER_RESOURCE_TYPES="${LOGGING_STACKDRIVER_RESOURCE_TYPES:-old}"
# Adding to PROVIDER_VARS, since this is GCP-specific.
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_YAML_VERSION FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
# Fluentd configuration for node-journal
ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}"

View File

@ -2237,7 +2237,9 @@ function setup-fluentd {
fluentd_gcp_configmap_name="fluentd-gcp-config-old"
fi
sed -i -e "s@{{ fluentd_gcp_configmap_name }}@${fluentd_gcp_configmap_name}@g" "${fluentd_gcp_yaml}"
fluentd_gcp_version="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
fluentd_gcp_yaml_version="${FLUENTD_GCP_YAML_VERSION:-v3.1.0}"
sed -i -e "s@{{ fluentd_gcp_yaml_version }}@${fluentd_gcp_yaml_version}@g" "${fluentd_gcp_yaml}"
fluentd_gcp_version="${FLUENTD_GCP_VERSION:-0.3-1.5.34-1-k8s-1}"
sed -i -e "s@{{ fluentd_gcp_version }}@${fluentd_gcp_version}@g" "${fluentd_gcp_yaml}"
update-prometheus-to-sd-parameters ${fluentd_gcp_yaml}
start-fluentd-resource-update ${fluentd_gcp_yaml}