mirror of https://github.com/k3s-io/k3s
Merge pull request #66485 from bmoyles0117/apply-latest-stackdriver-fixes
Automatic merge from submit-queue (batch tested with PRs 59030, 64666, 66251, 66485, 66813). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. A large set of improvements to the Stackdriver components. **What this PR does / why we need it**: This PR delivers a large set of improvements for both the Stackdriver Logging agent and the Stackdriver Metadata agent. **Release note**: ```release-note Metadata Agent Improvements Bump metadata agent version to 0.2-0.0.21-1. Expand the metadata agent's access to all API groups. Remove metadata agent config maps in favor of command line flags. Update the metadata agent's liveness probe to a new /healthz handler. Logging Agent Improvements Bump logging agent version to 0.2-1.5.33-1-k8s-1. Appropriately set log severity for k8s_container. Fix detect exceptions plugin to analyze message field instead of log field. Fix detect exceptions plugin to analyze streams based on local resource id. Disable the metadata agent for monitored resource construction in logging. Disable timestamp adjustment in logs to optimize performance. Reduce logging agent buffer chunk limit to 512k to optimize performance. ```pull/8/head
commit
51faf6ebdb
|
@ -98,6 +98,8 @@ data:
|
|||
# instead of jsonPayload after extracting 'time', 'severity' and
|
||||
# 'stream' from the record.
|
||||
message ${record['log']}
|
||||
# If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
|
||||
severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
|
||||
</record>
|
||||
tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
|
||||
remove_keys stream,log
|
||||
|
@ -109,7 +111,7 @@ data:
|
|||
|
||||
remove_tag_prefix raw
|
||||
message message
|
||||
stream stream
|
||||
stream "logging.googleapis.com/local_resource_id"
|
||||
multiline_flush_interval 5
|
||||
max_bytes 500000
|
||||
max_lines 1000
|
||||
|
@ -408,9 +410,9 @@ data:
|
|||
buffer_queue_full_action block
|
||||
# Set the chunk limit conservatively to avoid exceeding the recommended
|
||||
# chunk size of 5MB per write request.
|
||||
buffer_chunk_limit 1M
|
||||
buffer_chunk_limit 512k
|
||||
# Cap the combined memory usage of this buffer and the one below to
|
||||
# 1MiB/chunk * (6 + 2) chunks = 8 MiB
|
||||
# 512KiB/chunk * (6 + 2) chunks = 4 MiB
|
||||
buffer_queue_limit 6
|
||||
# Never wait more than 5 seconds before flushing logs in the non-error case.
|
||||
flush_interval 5s
|
||||
|
@ -421,8 +423,9 @@ data:
|
|||
# Use multiple threads for processing.
|
||||
num_threads 2
|
||||
use_grpc true
|
||||
# Use Metadata Agent to get monitored resource.
|
||||
enable_metadata_agent true
|
||||
# Skip timestamp adjustment as this is in a controlled environment with
|
||||
# known timestamp format. This helps with CPU usage.
|
||||
adjust_invalid_timestamps false
|
||||
</match>
|
||||
|
||||
# Attach local_resource_id for 'k8s_node' monitored resource.
|
||||
|
@ -450,15 +453,16 @@ data:
|
|||
buffer_type file
|
||||
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
|
||||
buffer_queue_full_action block
|
||||
buffer_chunk_limit 1M
|
||||
buffer_chunk_limit 512k
|
||||
buffer_queue_limit 2
|
||||
flush_interval 5s
|
||||
max_retry_wait 30
|
||||
disable_retry_limit
|
||||
num_threads 2
|
||||
use_grpc true
|
||||
# Use Metadata Agent to get monitored resource.
|
||||
enable_metadata_agent true
|
||||
# Skip timestamp adjustment as this is in a controlled environment with
|
||||
# known timestamp format. This helps with CPU usage.
|
||||
adjust_invalid_timestamps false
|
||||
</match>
|
||||
metadata:
|
||||
name: fluentd-gcp-config-v1.2.5
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
apiVersion: extensions/v1beta1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: fluentd-gcp-v3.0.0
|
||||
name: fluentd-gcp-{{ fluentd_gcp_yaml_version }}
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: fluentd-gcp
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v3.0.0
|
||||
version: {{ fluentd_gcp_yaml_version }}
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
|
@ -16,7 +16,7 @@ spec:
|
|||
labels:
|
||||
k8s-app: fluentd-gcp
|
||||
kubernetes.io/cluster-service: "true"
|
||||
version: v3.0.0
|
||||
version: {{ fluentd_gcp_yaml_version }}
|
||||
# This annotation ensures that fluentd does not get evicted if the node
|
||||
# supports critical pod annotation based priority scheme.
|
||||
# Note that this does not guarantee admission on the nodes (#40573).
|
||||
|
|
|
@ -7,9 +7,7 @@ metadata:
|
|||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
- "apps"
|
||||
- "extensions"
|
||||
- "*"
|
||||
resources:
|
||||
- "*"
|
||||
verbs:
|
||||
|
|
|
@ -7,22 +7,6 @@ metadata:
|
|||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: metadata-agent-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
data:
|
||||
node_level.conf: |-
|
||||
KubernetesUseWatch: true
|
||||
KubernetesClusterLevelMetadata: false
|
||||
cluster_level.conf: |-
|
||||
KubernetesUseWatch: true
|
||||
KubernetesClusterLevelMetadata: true
|
||||
---
|
||||
kind: DaemonSet
|
||||
apiVersion: extensions/v1beta1
|
||||
metadata:
|
||||
|
@ -45,27 +29,22 @@ spec:
|
|||
spec:
|
||||
serviceAccountName: metadata-agent
|
||||
containers:
|
||||
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.19-1
|
||||
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.21-1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: metadata-agent
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- |
|
||||
if [[ -f /var/run/metadata-agent/health/unhealthy ]]; then
|
||||
exit 1;
|
||||
fi
|
||||
periodSeconds: 10
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 60
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 1
|
||||
successThreshold: 1
|
||||
volumeMounts:
|
||||
- name: metadata-agent-config-volume
|
||||
mountPath: /etc/config
|
||||
command:
|
||||
- /opt/stackdriver/metadata/sbin/metadatad
|
||||
- --config-file=/etc/config/node_level.conf
|
||||
args:
|
||||
- -o KubernetesUseWatch=true
|
||||
- -o KubernetesClusterLevelMetadata=false
|
||||
- -o MetadataReporterPurgeDeleted=true
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
hostPort: 8799
|
||||
|
@ -78,10 +57,6 @@ spec:
|
|||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: metadata-agent-config-volume
|
||||
configMap:
|
||||
name: metadata-agent-config
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
|
@ -110,27 +85,22 @@ spec:
|
|||
spec:
|
||||
serviceAccountName: metadata-agent
|
||||
containers:
|
||||
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.19-1
|
||||
- image: gcr.io/stackdriver-agents/stackdriver-metadata-agent:0.2-0.0.21-1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: metadata-agent
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- |
|
||||
if [[ -f /var/run/metadata-agent/health/unhealthy ]]; then
|
||||
exit 1;
|
||||
fi
|
||||
periodSeconds: 10
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 60
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 1
|
||||
successThreshold: 1
|
||||
volumeMounts:
|
||||
- name: metadata-agent-config-volume
|
||||
mountPath: /etc/config
|
||||
command:
|
||||
- /opt/stackdriver/metadata/sbin/metadatad
|
||||
- --config-file=/etc/config/cluster_level.conf
|
||||
args:
|
||||
- -o KubernetesUseWatch=true
|
||||
- -o KubernetesClusterLevelMetadata=true
|
||||
- -o MetadataReporterPurgeDeleted=true
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
protocol: TCP
|
||||
|
@ -142,10 +112,6 @@ spec:
|
|||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: metadata-agent-config-volume
|
||||
configMap:
|
||||
name: metadata-agent-config
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
|
|
|
@ -404,6 +404,8 @@ if [[ -n "${LOGROTATE_MAX_SIZE:-}" ]]; then
|
|||
fi
|
||||
|
||||
# Fluentd requirements
|
||||
# YAML exists to trigger a configuration refresh when changes are made.
|
||||
FLUENTD_GCP_YAML_VERSION="v3.1.0"
|
||||
FLUENTD_GCP_VERSION="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
|
||||
FLUENTD_GCP_MEMORY_LIMIT="${FLUENTD_GCP_MEMORY_LIMIT:-}"
|
||||
FLUENTD_GCP_CPU_REQUEST="${FLUENTD_GCP_CPU_REQUEST:-}"
|
||||
|
@ -422,7 +424,7 @@ CUSTOM_KUBE_DASHBOARD_BANNER="${CUSTOM_KUBE_DASHBOARD_BANNER:-}"
|
|||
LOGGING_STACKDRIVER_RESOURCE_TYPES="${LOGGING_STACKDRIVER_RESOURCE_TYPES:-old}"
|
||||
|
||||
# Adding to PROVIDER_VARS, since this is GCP-specific.
|
||||
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
|
||||
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_YAML_VERSION FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
|
||||
|
||||
# Fluentd configuration for node-journal
|
||||
ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}"
|
||||
|
|
|
@ -420,6 +420,8 @@ if [[ -n "${LOGROTATE_MAX_SIZE:-}" ]]; then
|
|||
fi
|
||||
|
||||
# Fluentd requirements
|
||||
# YAML exists to trigger a configuration refresh when changes are made.
|
||||
FLUENTD_GCP_YAML_VERSION="v3.1.0"
|
||||
FLUENTD_GCP_VERSION="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
|
||||
FLUENTD_GCP_MEMORY_LIMIT="${FLUENTD_GCP_MEMORY_LIMIT:-}"
|
||||
FLUENTD_GCP_CPU_REQUEST="${FLUENTD_GCP_CPU_REQUEST:-}"
|
||||
|
@ -438,7 +440,7 @@ CUSTOM_KUBE_DASHBOARD_BANNER="${CUSTOM_KUBE_DASHBOARD_BANNER:-}"
|
|||
LOGGING_STACKDRIVER_RESOURCE_TYPES="${LOGGING_STACKDRIVER_RESOURCE_TYPES:-old}"
|
||||
|
||||
# Adding to PROVIDER_VARS, since this is GCP-specific.
|
||||
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
|
||||
PROVIDER_VARS="${PROVIDER_VARS:-} FLUENTD_GCP_YAML_VERSION FLUENTD_GCP_VERSION FLUENTD_GCP_MEMORY_LIMIT FLUENTD_GCP_CPU_REQUEST FLUENTD_GCP_MEMORY_REQUEST HEAPSTER_GCP_BASE_MEMORY HEAPSTER_GCP_MEMORY_PER_NODE HEAPSTER_GCP_BASE_CPU HEAPSTER_GCP_CPU_PER_NODE CUSTOM_KUBE_DASHBOARD_BANNER LOGGING_STACKDRIVER_RESOURCE_TYPES"
|
||||
|
||||
# Fluentd configuration for node-journal
|
||||
ENABLE_NODE_JOURNAL="${ENABLE_NODE_JOURNAL:-false}"
|
||||
|
|
|
@ -2237,7 +2237,9 @@ function setup-fluentd {
|
|||
fluentd_gcp_configmap_name="fluentd-gcp-config-old"
|
||||
fi
|
||||
sed -i -e "s@{{ fluentd_gcp_configmap_name }}@${fluentd_gcp_configmap_name}@g" "${fluentd_gcp_yaml}"
|
||||
fluentd_gcp_version="${FLUENTD_GCP_VERSION:-0.2-1.5.30-1-k8s}"
|
||||
fluentd_gcp_yaml_version="${FLUENTD_GCP_YAML_VERSION:-v3.1.0}"
|
||||
sed -i -e "s@{{ fluentd_gcp_yaml_version }}@${fluentd_gcp_yaml_version}@g" "${fluentd_gcp_yaml}"
|
||||
fluentd_gcp_version="${FLUENTD_GCP_VERSION:-0.3-1.5.34-1-k8s-1}"
|
||||
sed -i -e "s@{{ fluentd_gcp_version }}@${fluentd_gcp_version}@g" "${fluentd_gcp_yaml}"
|
||||
update-prometheus-to-sd-parameters ${fluentd_gcp_yaml}
|
||||
start-fluentd-resource-update ${fluentd_gcp_yaml}
|
||||
|
|
Loading…
Reference in New Issue