Merge pull request #48812 from crassirostris/change-fluentd-monitoring

Automatic merge from submit-queue (batch tested with PRs 48812, 48276)

Change fluentd-gcp monitoring to use metrics exposed by SD plugin

Following https://github.com/GoogleCloudPlatform/fluent-plugin-google-cloud/pull/135, make fluentd-gcp expose metrics in Prometheus registry and use them instead of counting records in the pipeline.

/cc @piosz @igorpeshansky

```release-note
Fluentd-gcp DaemonSet exposes different set of metrics.
```
pull/6/head
Kubernetes Submit Queue 2017-07-14 04:43:42 -07:00 committed by GitHub
commit 455e44b616
2 changed files with 47 additions and 87 deletions

View File

@ -70,27 +70,14 @@ data:
# Detect exceptions in the log output and forward them as one log entry. # Detect exceptions in the log output and forward them as one log entry.
<match raw.kubernetes.**> <match raw.kubernetes.**>
@type copy @type detect_exceptions
<store> remove_tag_prefix raw
@type prometheus message log
stream stream
<metric> multiline_flush_interval 5
type counter max_bytes 500000
name logging_line_count max_lines 1000
desc Total number of lines generated by application containers
</metric>
</store>
<store>
@type detect_exceptions
remove_tag_prefix raw
message log
stream stream
multiline_flush_interval 5
max_bytes 500000
max_lines 1000
</store>
</match> </match>
system.input.conf: |- system.input.conf: |-
# Example: # Example:
@ -342,77 +329,50 @@ data:
# compute.googleapis.com service rather than container.googleapis.com to keep # compute.googleapis.com service rather than container.googleapis.com to keep
# them separate since most users don't care about the node logs. # them separate since most users don't care about the node logs.
<match kubernetes.**> <match kubernetes.**>
@type copy @type google_cloud
<store> # Collect metrics in Prometheus registry about plugin activity.
@type google_cloud enable_monitoring true
monitoring_type prometheus
# Set the buffer type to file to improve the reliability and reduce the memory consumption # Set the buffer type to file to improve the reliability and reduce the memory consumption
buffer_type file buffer_type file
buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
# Set queue_full action to block because we want to pause gracefully # Set queue_full action to block because we want to pause gracefully
# in case of the off-the-limits load instead of throwing an exception # in case of the off-the-limits load instead of throwing an exception
buffer_queue_full_action block buffer_queue_full_action block
# Set the chunk limit conservatively to avoid exceeding the GCL limit # Set the chunk limit conservatively to avoid exceeding the GCL limit
# of 10MiB per write request. # of 10MiB per write request.
buffer_chunk_limit 2M buffer_chunk_limit 2M
# Cap the combined memory usage of this buffer and the one below to # Cap the combined memory usage of this buffer and the one below to
# 2MiB/chunk * (6 + 2) chunks = 16 MiB # 2MiB/chunk * (6 + 2) chunks = 16 MiB
buffer_queue_limit 6 buffer_queue_limit 6
# Never wait more than 5 seconds before flushing logs in the non-error case. # Never wait more than 5 seconds before flushing logs in the non-error case.
flush_interval 5s flush_interval 5s
# Never wait longer than 30 seconds between retries. # Never wait longer than 30 seconds between retries.
max_retry_wait 30 max_retry_wait 30
# Disable the limit on the number of retries (retry forever). # Disable the limit on the number of retries (retry forever).
disable_retry_limit disable_retry_limit
# Use multiple threads for processing. # Use multiple threads for processing.
num_threads 2 num_threads 2
</store>
<store>
@type prometheus
<metric>
type counter
name logging_entry_count
desc Total number of log entries generated by either application containers or system components
<labels>
component container
</labels>
</metric>
</store>
</match> </match>
# Keep a smaller buffer here since these logs are less important than the user's # Keep a smaller buffer here since these logs are less important than the user's
# container logs. # container logs.
<match **> <match **>
@type copy @type google_cloud
<store> enable_monitoring true
@type google_cloud monitoring_type prometheus
detect_subservice false
detect_subservice false buffer_type file
buffer_type file buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer buffer_queue_full_action block
buffer_queue_full_action block buffer_chunk_limit 2M
buffer_chunk_limit 2M buffer_queue_limit 2
buffer_queue_limit 2 flush_interval 5s
flush_interval 5s max_retry_wait 30
max_retry_wait 30 disable_retry_limit
disable_retry_limit num_threads 2
num_threads 2
</store>
<store>
@type prometheus
<metric>
type counter
name logging_entry_count
desc Total number of log entries generated by either application containers or system components
<labels>
component system
</labels>
</metric>
</store>
</match> </match>
metadata: metadata:
name: fluentd-gcp-config-v1.1 name: fluentd-gcp-config-v1.1

View File

@ -27,7 +27,7 @@ spec:
hostNetwork: true hostNetwork: true
containers: containers:
- name: fluentd-gcp - name: fluentd-gcp
image: gcr.io/google-containers/fluentd-gcp:2.0.7 image: gcr.io/google-containers/fluentd-gcp:2.0.8
# If fluentd consumes its own logs, the following situation may happen: # If fluentd consumes its own logs, the following situation may happen:
# fluentd fails to send a chunk to the server => writes it to the log => # fluentd fails to send a chunk to the server => writes it to the log =>
# tries to send this message to the server => fails to send a chunk and so on. # tries to send this message to the server => fails to send a chunk and so on.
@ -90,13 +90,13 @@ spec:
exit 1; exit 1;
fi; fi;
- name: prometheus-to-sd-exporter - name: prometheus-to-sd-exporter
image: gcr.io/google-containers/prometheus-to-sd:v0.1.0 image: gcr.io/google-containers/prometheus-to-sd:v0.1.3
command: command:
- /monitor - /monitor
- --component=fluentd - --component=fluentd
- --target-port=31337 - --target-port=31337
- --stackdriver-prefix=container.googleapis.com/internal/addons - --stackdriver-prefix=container.googleapis.com/internal/addons
- --whitelisted-metrics=logging_line_count,logging_entry_count - --whitelisted-metrics=stackdriver_successful_requests_count,stackdriver_failed_requests_count,stackdriver_ingested_entries_count,stackdriver_dropped_entries_count
volumeMounts: volumeMounts:
- name: ssl-certs - name: ssl-certs
mountPath: /etc/ssl/certs mountPath: /etc/ssl/certs