mirror of https://github.com/k3s-io/k3s
Merge pull request #48812 from crassirostris/change-fluentd-monitoring
Automatic merge from submit-queue (batch tested with PRs 48812, 48276) Change fluentd-gcp monitoring to use metrics exposed by SD plugin Following https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud/pull/135, make fluentd-gcp expose metrics in Prometheus registry and use them instead of counting records in the pipeline. /cc @piosz @igorpeshansky ```release-note Fluentd-gcp DaemonSet exposes different set of metrics. ```pull/6/head
commit
455e44b616
|
@ -70,27 +70,14 @@ data:
|
||||||
|
|
||||||
# Detect exceptions in the log output and forward them as one log entry.
|
# Detect exceptions in the log output and forward them as one log entry.
|
||||||
<match raw.kubernetes.**>
|
<match raw.kubernetes.**>
|
||||||
@type copy
|
@type detect_exceptions
|
||||||
|
|
||||||
<store>
|
remove_tag_prefix raw
|
||||||
@type prometheus
|
message log
|
||||||
|
stream stream
|
||||||
<metric>
|
multiline_flush_interval 5
|
||||||
type counter
|
max_bytes 500000
|
||||||
name logging_line_count
|
max_lines 1000
|
||||||
desc Total number of lines generated by application containers
|
|
||||||
</metric>
|
|
||||||
</store>
|
|
||||||
<store>
|
|
||||||
@type detect_exceptions
|
|
||||||
|
|
||||||
remove_tag_prefix raw
|
|
||||||
message log
|
|
||||||
stream stream
|
|
||||||
multiline_flush_interval 5
|
|
||||||
max_bytes 500000
|
|
||||||
max_lines 1000
|
|
||||||
</store>
|
|
||||||
</match>
|
</match>
|
||||||
system.input.conf: |-
|
system.input.conf: |-
|
||||||
# Example:
|
# Example:
|
||||||
|
@ -342,77 +329,50 @@ data:
|
||||||
# compute.googleapis.com service rather than container.googleapis.com to keep
|
# compute.googleapis.com service rather than container.googleapis.com to keep
|
||||||
# them separate since most users don't care about the node logs.
|
# them separate since most users don't care about the node logs.
|
||||||
<match kubernetes.**>
|
<match kubernetes.**>
|
||||||
@type copy
|
@type google_cloud
|
||||||
|
|
||||||
<store>
|
# Collect metrics in Prometheus registry about plugin activity.
|
||||||
@type google_cloud
|
enable_monitoring true
|
||||||
|
monitoring_type prometheus
|
||||||
# Set the buffer type to file to improve the reliability and reduce the memory consumption
|
# Set the buffer type to file to improve the reliability and reduce the memory consumption
|
||||||
buffer_type file
|
buffer_type file
|
||||||
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
|
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
|
||||||
# Set queue_full action to block because we want to pause gracefully
|
# Set queue_full action to block because we want to pause gracefully
|
||||||
# in case of the off-the-limits load instead of throwing an exception
|
# in case of the off-the-limits load instead of throwing an exception
|
||||||
buffer_queue_full_action block
|
buffer_queue_full_action block
|
||||||
# Set the chunk limit conservatively to avoid exceeding the GCL limit
|
# Set the chunk limit conservatively to avoid exceeding the GCL limit
|
||||||
# of 10MiB per write request.
|
# of 10MiB per write request.
|
||||||
buffer_chunk_limit 2M
|
buffer_chunk_limit 2M
|
||||||
# Cap the combined memory usage of this buffer and the one below to
|
# Cap the combined memory usage of this buffer and the one below to
|
||||||
# 2MiB/chunk * (6 + 2) chunks = 16 MiB
|
# 2MiB/chunk * (6 + 2) chunks = 16 MiB
|
||||||
buffer_queue_limit 6
|
buffer_queue_limit 6
|
||||||
# Never wait more than 5 seconds before flushing logs in the non-error case.
|
# Never wait more than 5 seconds before flushing logs in the non-error case.
|
||||||
flush_interval 5s
|
flush_interval 5s
|
||||||
# Never wait longer than 30 seconds between retries.
|
# Never wait longer than 30 seconds between retries.
|
||||||
max_retry_wait 30
|
max_retry_wait 30
|
||||||
# Disable the limit on the number of retries (retry forever).
|
# Disable the limit on the number of retries (retry forever).
|
||||||
disable_retry_limit
|
disable_retry_limit
|
||||||
# Use multiple threads for processing.
|
# Use multiple threads for processing.
|
||||||
num_threads 2
|
num_threads 2
|
||||||
</store>
|
|
||||||
<store>
|
|
||||||
@type prometheus
|
|
||||||
|
|
||||||
<metric>
|
|
||||||
type counter
|
|
||||||
name logging_entry_count
|
|
||||||
desc Total number of log entries generated by either application containers or system components
|
|
||||||
<labels>
|
|
||||||
component container
|
|
||||||
</labels>
|
|
||||||
</metric>
|
|
||||||
</store>
|
|
||||||
</match>
|
</match>
|
||||||
|
|
||||||
# Keep a smaller buffer here since these logs are less important than the user's
|
# Keep a smaller buffer here since these logs are less important than the user's
|
||||||
# container logs.
|
# container logs.
|
||||||
<match **>
|
<match **>
|
||||||
@type copy
|
@type google_cloud
|
||||||
|
|
||||||
<store>
|
enable_monitoring true
|
||||||
@type google_cloud
|
monitoring_type prometheus
|
||||||
|
detect_subservice false
|
||||||
detect_subservice false
|
buffer_type file
|
||||||
buffer_type file
|
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
|
||||||
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
|
buffer_queue_full_action block
|
||||||
buffer_queue_full_action block
|
buffer_chunk_limit 2M
|
||||||
buffer_chunk_limit 2M
|
buffer_queue_limit 2
|
||||||
buffer_queue_limit 2
|
flush_interval 5s
|
||||||
flush_interval 5s
|
max_retry_wait 30
|
||||||
max_retry_wait 30
|
disable_retry_limit
|
||||||
disable_retry_limit
|
num_threads 2
|
||||||
num_threads 2
|
|
||||||
</store>
|
|
||||||
<store>
|
|
||||||
@type prometheus
|
|
||||||
|
|
||||||
<metric>
|
|
||||||
type counter
|
|
||||||
name logging_entry_count
|
|
||||||
desc Total number of log entries generated by either application containers or system components
|
|
||||||
<labels>
|
|
||||||
component system
|
|
||||||
</labels>
|
|
||||||
</metric>
|
|
||||||
</store>
|
|
||||||
</match>
|
</match>
|
||||||
metadata:
|
metadata:
|
||||||
name: fluentd-gcp-config-v1.1
|
name: fluentd-gcp-config-v1.1
|
||||||
|
|
|
@ -27,7 +27,7 @@ spec:
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
containers:
|
containers:
|
||||||
- name: fluentd-gcp
|
- name: fluentd-gcp
|
||||||
image: gcr.io/google-containers/fluentd-gcp:2.0.7
|
image: gcr.io/google-containers/fluentd-gcp:2.0.8
|
||||||
# If fluentd consumes its own logs, the following situation may happen:
|
# If fluentd consumes its own logs, the following situation may happen:
|
||||||
# fluentd fails to send a chunk to the server => writes it to the log =>
|
# fluentd fails to send a chunk to the server => writes it to the log =>
|
||||||
# tries to send this message to the server => fails to send a chunk and so on.
|
# tries to send this message to the server => fails to send a chunk and so on.
|
||||||
|
@ -90,13 +90,13 @@ spec:
|
||||||
exit 1;
|
exit 1;
|
||||||
fi;
|
fi;
|
||||||
- name: prometheus-to-sd-exporter
|
- name: prometheus-to-sd-exporter
|
||||||
image: gcr.io/google-containers/prometheus-to-sd:v0.1.0
|
image: gcr.io/google-containers/prometheus-to-sd:v0.1.3
|
||||||
command:
|
command:
|
||||||
- /monitor
|
- /monitor
|
||||||
- --component=fluentd
|
- --component=fluentd
|
||||||
- --target-port=31337
|
- --target-port=31337
|
||||||
- --stackdriver-prefix=container.googleapis.com/internal/addons
|
- --stackdriver-prefix=container.googleapis.com/internal/addons
|
||||||
- --whitelisted-metrics=logging_line_count,logging_entry_count
|
- --whitelisted-metrics=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: ssl-certs
|
- name: ssl-certs
|
||||||
mountPath: /etc/ssl/certs
|
mountPath: /etc/ssl/certs
|
||||||
|
|
Loading…
Reference in New Issue