Merge pull request #20185 from a-robinson/fluent

Fluentd improvements to lessen likelihood of buffers filling up and hanging
pull/6/head
Alex Robinson 2016-01-26 17:02:08 -08:00
commit f99cc645bb
7 changed files with 28 additions and 18 deletions

View File

@ -1,7 +1,7 @@
.PHONY: build push
IMAGE = fluentd-elasticsearch
TAG = 1.12
TAG = 1.13
build:
docker build -t gcr.io/google_containers/$(IMAGE):$(TAG) .

View File

@ -100,6 +100,11 @@
# problem yet to be solved as secrets are not usable in static pods which the fluentd
# pod must be until a per-node controller is available in Kubernetes.
# Do not directly collect fluentd's own logs to avoid infinite loops.
<match fluent.**>
type null
</match>
<source>
type tail
path /var/log/containers/*.log
@ -186,12 +191,12 @@
port 9200
logstash_format true
# Set the chunk limit the same as for fluentd-gcp.
buffer_chunk_limit 512K
# Cap buffer memory usage to 512KB/chunk * 128 chunks = 65 MB
buffer_queue_limit 128
buffer_chunk_limit 2M
# Cap buffer memory usage to 2MiB/chunk * 32 chunks = 64 MiB
buffer_queue_limit 32
flush_interval 5s
# Never wait longer than 5 minutes between retries.
max_retry_wait 300
max_retry_wait 30
# Disable the limit on the number of retries (retry forever).
disable_retry_limit
</match>

View File

@ -14,7 +14,7 @@
.PHONY: kbuild kpush
TAG = 1.14
TAG = 1.15
# Rules for building the test image for deployment to Dockerhub with user kubernetes.

View File

@ -42,6 +42,11 @@
# the name of the Kubernetes container regardless of how many times the
# Kubernetes pod has been restarted (resulting in a several Docker container IDs).
# Do not directly collect fluentd's own logs to avoid infinite loops.
<match fluent.**>
type null
</match>
<source>
type tail
format json
@ -130,15 +135,15 @@
<match kubernetes.**>
type google_cloud
# Set the chunk limit conservatively to avoid exceeding the GCL limit
# of 2MB per write request.
buffer_chunk_limit 512K
# of 10MiB per write request.
buffer_chunk_limit 2M
# Cap the combined memory usage of this buffer and the one below to
# 512KB/chunk * (96 + 32) chunks = 65 MB
buffer_queue_limit 96
# 2MiB/chunk * (24 + 8) chunks = 64 MiB
buffer_queue_limit 24
# Never wait more than 5 seconds before flushing logs in the non-error case.
flush_interval 5s
# Never wait longer than 5 minutes between retries.
max_retry_wait 300
# Never wait longer than 30 seconds between retries.
max_retry_wait 30
# Disable the limit on the number of retries (retry forever).
disable_retry_limit
</match>
@ -148,9 +153,9 @@
<match **>
type google_cloud
detect_subservice false
buffer_chunk_limit 512K
buffer_queue_limit 32
buffer_chunk_limit 2M
buffer_queue_limit 8
flush_interval 5s
max_retry_wait 300
max_retry_wait 30
disable_retry_limit
</match>

View File

@ -8,7 +8,7 @@ metadata:
spec:
containers:
- name: fluentd-elasticsearch
image: gcr.io/google_containers/fluentd-elasticsearch:1.12
image: gcr.io/google_containers/fluentd-elasticsearch:1.13
resources:
limits:
cpu: 100m

View File

@ -8,7 +8,7 @@ metadata:
spec:
containers:
- name: fluentd-cloud-logging
image: gcr.io/google_containers/fluentd-gcp:1.14
image: gcr.io/google_containers/fluentd-gcp:1.15
resources:
limits:
cpu: 100m

View File

@ -172,7 +172,7 @@ metadata:
spec:
containers:
- name: fluentd-cloud-logging
image: gcr.io/google_containers/fluentd-gcp:1.14
image: gcr.io/google_containers/fluentd-gcp:1.15
resources:
limits:
cpu: 100m