node_exporter/text_collector_examples/mellanox_hca_temp

60 lines
1.7 KiB
Plaintext
Raw Normal View History

#!/bin/bash
set -eu
# Script to read Mellanox HCA temperature using the Mellanox mget_temp_ext tool
# Copyright 2018 The Prometheus Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Jan Phillip Greimann <jan.greimann@cloud.ionos.com>
# check if root
if [ "$EUID" -ne 0 ]; then
echo "${0##*/}: Please run as root!" >&2
exit 1
fi
# check if programs are installed
if ! command -v mget_temp_ext >/dev/null 2>&1; then
echo "${0##*/}: mget_temp_ext is not installed. Aborting." >&2
exit 1
fi
cat <<EOF
# HELP node_infiniband_hca_temp_celsius Celsius temperature of Mellanox InfiniBand HCA.
# TYPE node_infiniband_hca_temp_celsius gauge
EOF
# run for each found Mellanox device
for dev in /sys/class/infiniband/*; do
if test ! -d "$dev"; then
continue
fi
device="${dev##*/}"
# get temperature
if temperature="$(mget_temp_ext -d "${device}")"; then
# output
echo "node_infiniband_hca_temp_celsius{hca_device=\"${device}\"} ${temperature//[[:space:]]/}"
else
echo "${0##*/}: Failed to get temperature from InfiniBand HCA '${device}'!" >&2
fi
done
# if device is empty, no device was found
if [ -z "${device-}" ]; then
echo "${0##*/}: No InfiniBand HCA device found!" >&2
exit 1
fi