Merge pull request #59103 from Random-Liu/upload-container-runtime-log

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Upload container runtime log to sd/es.

I've verified this in my environment. My stackdriver has an extra `container-runtime` entry for node log, and it collects container runtime daemon log correctly.

@yujuhong @feiskyer @crassirostris @piosz 
@kubernetes/sig-node-pr-reviews @kubernetes/sig-instrumentation-pr-reviews 
Signed-off-by: Lantao Liu <lantaol@google.com>

**Release note**:

```release-note
Container runtime daemon (e.g. dockerd) logs in GCE cluster will be uploaded to stackdriver and elasticsearch with tag `container-runtime`
```
pull/6/head
Kubernetes Submit Queue 2018-02-14 03:33:21 -08:00 committed by GitHub
commit bc9c6df31d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 46 additions and 8 deletions

View File

@ -1,7 +1,7 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: fluentd-es-config-v0.1.3
name: fluentd-es-config-v0.1.4
namespace: kube-system
labels:
addonmanager.kubernetes.io/mode: Reconcile
@ -160,6 +160,7 @@ data:
# Examples:
# time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
# time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
# TODO(random-liu): Remove this after cri container runtime rolls out.
<source>
@id docker.log
@type tail
@ -307,6 +308,7 @@ data:
</source>
# Logs from systemd-journal for interesting services.
# TODO(random-liu): Remove this after cri container runtime rolls out.
<source>
@id journald-docker
@type systemd
@ -319,6 +321,18 @@ data:
tag docker
</source>
<source>
@id journald-container-runtime
@type systemd
filters [{ "_SYSTEMD_UNIT": "{{ container_runtime }}.service" }]
<storage>
@type local
persistent true
</storage>
read_from_head true
tag container-runtime
</source>
<source>
@id journald-kubelet
@type systemd

View File

@ -113,4 +113,4 @@ spec:
path: /usr/lib64
- name: config-volume
configMap:
name: fluentd-es-config-v0.1.3
name: fluentd-es-config-v0.1.4

View File

@ -102,6 +102,7 @@ data:
# Examples:
# time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
# time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
# TODO(random-liu): Remove this after cri container runtime rolls out.
<source>
type tail
format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
@ -239,6 +240,8 @@ data:
</source>
# Logs from systemd-journal for interesting services.
# TODO(random-liu): Keep this for compatibility, remove this after
# cri container runtime rolls out.
<source>
type systemd
filters [{ "_SYSTEMD_UNIT": "docker.service" }]
@ -247,6 +250,14 @@ data:
tag docker
</source>
<source>
type systemd
filters [{ "_SYSTEMD_UNIT": "{{ container_runtime }}.service" }]
pos_file /var/log/gcp-journald-container-runtime.pos
read_from_head true
tag container-runtime
</source>
<source>
type systemd
filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
@ -387,7 +398,7 @@ data:
num_threads 2
</match>
metadata:
name: fluentd-gcp-config-v1.2.3
name: fluentd-gcp-config-v1.2.4
namespace: kube-system
labels:
addonmanager.kubernetes.io/mode: Reconcile

View File

@ -123,4 +123,4 @@ spec:
path: /usr/lib64
- name: config-volume
configMap:
name: fluentd-gcp-config-v1.2.3
name: fluentd-gcp-config-v1.2.4

View File

@ -562,6 +562,7 @@ ENABLE_PROMETHEUS_TO_SD: $(yaml-quote ${ENABLE_PROMETHEUS_TO_SD:-false})
ENABLE_POD_PRIORITY: $(yaml-quote ${ENABLE_POD_PRIORITY:-})
CONTAINER_RUNTIME: $(yaml-quote ${CONTAINER_RUNTIME:-})
CONTAINER_RUNTIME_ENDPOINT: $(yaml-quote ${CONTAINER_RUNTIME_ENDPOINT:-})
CONTAINER_RUNTIME_NAME: $(yaml-quote ${CONTAINER_RUNTIME_NAME:-})
NODE_LOCAL_SSDS_EXT: $(yaml-quote ${NODE_LOCAL_SSDS_EXT:-})
LOAD_IMAGE_COMMAND: $(yaml-quote ${LOAD_IMAGE_COMMAND:-})
EOF

View File

@ -82,7 +82,8 @@ NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
NODE_SERVICE_ACCOUNT=${KUBE_GCE_NODE_SERVICE_ACCOUNT:-default}
CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
CONTAINER_RUNTIME_ENDPOINT=${KUBE_CONTAINER_RUNTIME_ENDPOINT:-}
LOAD_IMAGE_COMMAND=${KUBE_LOAD_IMAGE_COMMAND:-docker load -i}
CONTAINER_RUNTIME_NAME=${KUBE_CONTAINER_RUNTIME_NAME:-}
LOAD_IMAGE_COMMAND=${KUBE_LOAD_IMAGE_COMMAND:-}
RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}
RKT_STAGE1_IMAGE=${KUBE_RKT_STAGE1_IMAGE:-coreos.com/rkt/stage1-coreos}
# MASTER_EXTRA_METADATA is the extra instance metadata on master instance separated by commas.

View File

@ -80,7 +80,8 @@ NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-cos-cloud}
NODE_SERVICE_ACCOUNT=${KUBE_GCE_NODE_SERVICE_ACCOUNT:-default}
CONTAINER_RUNTIME=${KUBE_CONTAINER_RUNTIME:-docker}
CONTAINER_RUNTIME_ENDPOINT=${KUBE_CONTAINER_RUNTIME_ENDPOINT:-}
LOAD_IMAGE_COMMAND=${KUBE_LOAD_IMAGE_COMMAND:-docker load -i}
CONTAINER_RUNTIME_NAME=${KUBE_CONTAINER_RUNTIME_NAME:-}
LOAD_IMAGE_COMMAND=${KUBE_LOAD_IMAGE_COMMAND:-}
GCI_DOCKER_VERSION=${KUBE_GCI_DOCKER_VERSION:-}
RKT_VERSION=${KUBE_RKT_VERSION:-1.23.0}
RKT_STAGE1_IMAGE=${KUBE_RKT_STAGE1_IMAGE:-coreos.com/rkt/stage1-coreos}

View File

@ -2028,6 +2028,12 @@ function start-fluentd-resource-update {
wait-for-apiserver-and-update-fluentd ${fluentd_gcp_yaml} &
}
# Update {{ container-runtime }} with actual container runtime name.
function update-container-runtime {
local -r configmap_yaml="$1"
sed -i -e "s@{{ *container_runtime *}}@${CONTAINER_RUNTIME_NAME:-docker}@g" "${configmap_yaml}"
}
# Updates parameters in yaml file for prometheus-to-sd configuration, or
# removes component if it is disabled.
function update-prometheus-to-sd-parameters {
@ -2180,15 +2186,19 @@ EOF
[[ "${LOGGING_DESTINATION:-}" == "elasticsearch" ]] && \
[[ "${ENABLE_CLUSTER_LOGGING:-}" == "true" ]]; then
setup-addon-manifests "addons" "fluentd-elasticsearch"
local -r fluentd_es_configmap_yaml="${dst_dir}/fluentd-elasticsearch/fluentd-es-configmap.yaml"
update-container-runtime ${fluentd_es_configmap_yaml}
fi
if [[ "${ENABLE_NODE_LOGGING:-}" == "true" ]] && \
[[ "${LOGGING_DESTINATION:-}" == "gcp" ]]; then
setup-addon-manifests "addons" "fluentd-gcp"
local -r event_exporter_yaml="${dst_dir}/fluentd-gcp/event-exporter.yaml"
local -r fluentd_gcp_yaml="${dst_dir}/fluentd-gcp/fluentd-gcp-ds.yaml"
local -r fluentd_gcp_configmap_yaml="${dst_dir}/fluentd-gcp/fluentd-gcp-configmap.yaml"
update-prometheus-to-sd-parameters ${event_exporter_yaml}
update-prometheus-to-sd-parameters ${fluentd_gcp_yaml}
start-fluentd-resource-update ${fluentd_gcp_yaml}
update-container-runtime ${fluentd_gcp_configmap_yaml}
fi
if [[ "${ENABLE_CLUSTER_UI:-}" == "true" ]]; then
setup-addon-manifests "addons" "dashboard"

View File

@ -178,9 +178,9 @@ var _ = instrumentation.SIGDescribe("Cluster level logging implemented by Stackd
framework.ExpectNoError(err)
})
ginkgo.By("Waiting for some docker logs to be ingested from each node", func() {
ginkgo.By("Waiting for some container runtime logs to be ingested from each node", func() {
nodeIds := utils.GetNodeIds(f.ClientSet)
log := fmt.Sprintf("projects/%s/logs/docker", framework.TestContext.CloudConfig.ProjectID)
log := fmt.Sprintf("projects/%s/logs/container-runtime", framework.TestContext.CloudConfig.ProjectID)
c := utils.NewLogChecker(p, utils.UntilFirstEntryFromLog(log), utils.JustTimeout, nodeIds...)
err := utils.WaitForLogs(c, ingestionInterval, ingestionTimeout)
framework.ExpectNoError(err)