mirror of https://github.com/k3s-io/k3s
Merge pull request #48812 from crassirostris/change-fluentd-monitoring
Automatic merge from submit-queue (batch tested with PRs 48812, 48276) Change fluentd-gcp monitoring to use metrics exposed by SD plugin Following https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud/pull/135, make fluentd-gcp expose metrics in Prometheus registry and use them instead of counting records in the pipeline. /cc @piosz @igorpeshansky ```release-note Fluentd-gcp DaemonSet exposes different set of metrics. ```pull/6/head
commit
455e44b616
|
@ -70,27 +70,14 @@ data:
|
|||
|
||||
# Detect exceptions in the log output and forward them as one log entry.
|
||||
<match raw.kubernetes.**>
|
||||
@type copy
|
||||
@type detect_exceptions
|
||||
|
||||
<store>
|
||||
@type prometheus
|
||||
|
||||
<metric>
|
||||
type counter
|
||||
name logging_line_count
|
||||
desc Total number of lines generated by application containers
|
||||
</metric>
|
||||
</store>
|
||||
<store>
|
||||
@type detect_exceptions
|
||||
|
||||
remove_tag_prefix raw
|
||||
message log
|
||||
stream stream
|
||||
multiline_flush_interval 5
|
||||
max_bytes 500000
|
||||
max_lines 1000
|
||||
</store>
|
||||
remove_tag_prefix raw
|
||||
message log
|
||||
stream stream
|
||||
multiline_flush_interval 5
|
||||
max_bytes 500000
|
||||
max_lines 1000
|
||||
</match>
|
||||
system.input.conf: |-
|
||||
# Example:
|
||||
|
@ -342,77 +329,50 @@ data:
|
|||
# compute.googleapis.com service rather than container.googleapis.com to keep
|
||||
# them separate since most users don't care about the node logs.
|
||||
<match kubernetes.**>
|
||||
@type copy
|
||||
@type google_cloud
|
||||
|
||||
<store>
|
||||
@type google_cloud
|
||||
|
||||
# Set the buffer type to file to improve the reliability and reduce the memory consumption
|
||||
buffer_type file
|
||||
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
|
||||
# Set queue_full action to block because we want to pause gracefully
|
||||
# in case of the off-the-limits load instead of throwing an exception
|
||||
buffer_queue_full_action block
|
||||
# Set the chunk limit conservatively to avoid exceeding the GCL limit
|
||||
# of 10MiB per write request.
|
||||
buffer_chunk_limit 2M
|
||||
# Cap the combined memory usage of this buffer and the one below to
|
||||
# 2MiB/chunk * (6 + 2) chunks = 16 MiB
|
||||
buffer_queue_limit 6
|
||||
# Never wait more than 5 seconds before flushing logs in the non-error case.
|
||||
flush_interval 5s
|
||||
# Never wait longer than 30 seconds between retries.
|
||||
max_retry_wait 30
|
||||
# Disable the limit on the number of retries (retry forever).
|
||||
disable_retry_limit
|
||||
# Use multiple threads for processing.
|
||||
num_threads 2
|
||||
</store>
|
||||
<store>
|
||||
@type prometheus
|
||||
|
||||
<metric>
|
||||
type counter
|
||||
name logging_entry_count
|
||||
desc Total number of log entries generated by either application containers or system components
|
||||
<labels>
|
||||
component container
|
||||
</labels>
|
||||
</metric>
|
||||
</store>
|
||||
# Collect metrics in Prometheus registry about plugin activity.
|
||||
enable_monitoring true
|
||||
monitoring_type prometheus
|
||||
# Set the buffer type to file to improve the reliability and reduce the memory consumption
|
||||
buffer_type file
|
||||
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
|
||||
# Set queue_full action to block because we want to pause gracefully
|
||||
# in case of the off-the-limits load instead of throwing an exception
|
||||
buffer_queue_full_action block
|
||||
# Set the chunk limit conservatively to avoid exceeding the GCL limit
|
||||
# of 10MiB per write request.
|
||||
buffer_chunk_limit 2M
|
||||
# Cap the combined memory usage of this buffer and the one below to
|
||||
# 2MiB/chunk * (6 + 2) chunks = 16 MiB
|
||||
buffer_queue_limit 6
|
||||
# Never wait more than 5 seconds before flushing logs in the non-error case.
|
||||
flush_interval 5s
|
||||
# Never wait longer than 30 seconds between retries.
|
||||
max_retry_wait 30
|
||||
# Disable the limit on the number of retries (retry forever).
|
||||
disable_retry_limit
|
||||
# Use multiple threads for processing.
|
||||
num_threads 2
|
||||
</match>
|
||||
|
||||
# Keep a smaller buffer here since these logs are less important than the user's
|
||||
# container logs.
|
||||
<match **>
|
||||
@type copy
|
||||
@type google_cloud
|
||||
|
||||
<store>
|
||||
@type google_cloud
|
||||
|
||||
detect_subservice false
|
||||
buffer_type file
|
||||
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
|
||||
buffer_queue_full_action block
|
||||
buffer_chunk_limit 2M
|
||||
buffer_queue_limit 2
|
||||
flush_interval 5s
|
||||
max_retry_wait 30
|
||||
disable_retry_limit
|
||||
num_threads 2
|
||||
</store>
|
||||
<store>
|
||||
@type prometheus
|
||||
|
||||
<metric>
|
||||
type counter
|
||||
name logging_entry_count
|
||||
desc Total number of log entries generated by either application containers or system components
|
||||
<labels>
|
||||
component system
|
||||
</labels>
|
||||
</metric>
|
||||
</store>
|
||||
enable_monitoring true
|
||||
monitoring_type prometheus
|
||||
detect_subservice false
|
||||
buffer_type file
|
||||
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
|
||||
buffer_queue_full_action block
|
||||
buffer_chunk_limit 2M
|
||||
buffer_queue_limit 2
|
||||
flush_interval 5s
|
||||
max_retry_wait 30
|
||||
disable_retry_limit
|
||||
num_threads 2
|
||||
</match>
|
||||
metadata:
|
||||
name: fluentd-gcp-config-v1.1
|
||||
|
|
|
@ -27,7 +27,7 @@ spec:
|
|||
hostNetwork: true
|
||||
containers:
|
||||
- name: fluentd-gcp
|
||||
image: gcr.io/google-containers/fluentd-gcp:2.0.7
|
||||
image: gcr.io/google-containers/fluentd-gcp:2.0.8
|
||||
# If fluentd consumes its own logs, the following situation may happen:
|
||||
# fluentd fails to send a chunk to the server => writes it to the log =>
|
||||
# tries to send this message to the server => fails to send a chunk and so on.
|
||||
|
@ -90,13 +90,13 @@ spec:
|
|||
exit 1;
|
||||
fi;
|
||||
- name: prometheus-to-sd-exporter
|
||||
image: gcr.io/google-containers/prometheus-to-sd:v0.1.0
|
||||
image: gcr.io/google-containers/prometheus-to-sd:v0.1.3
|
||||
command:
|
||||
- /monitor
|
||||
- --component=fluentd
|
||||
- --target-port=31337
|
||||
- --stackdriver-prefix=container.googleapis.com/internal/addons
|
||||
- --whitelisted-metrics=logging_line_count,logging_entry_count
|
||||
- --whitelisted-metrics=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
|
||||
volumeMounts:
|
||||
- name: ssl-certs
|
||||
mountPath: /etc/ssl/certs
|
||||
|
|
Loading…
Reference in New Issue