@ -18,8 +18,13 @@ package collector
import (
import (
"fmt"
"fmt"
"path"
"strings"
"syscall"
dennwc "github.com/dennwc/btrfs"
"github.com/go-kit/log"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/btrfs"
"github.com/prometheus/procfs/btrfs"
)
)
@ -52,19 +57,163 @@ func NewBtrfsCollector(logger log.Logger) (Collector, error) {
func ( c * btrfsCollector ) Update ( ch chan <- prometheus . Metric ) error {
func ( c * btrfsCollector ) Update ( ch chan <- prometheus . Metric ) error {
stats , err := c . fs . Stats ( )
stats , err := c . fs . Stats ( )
if err != nil {
if err != nil {
return fmt . Errorf ( "failed to retrieve Btrfs stats: %w" , err )
return fmt . Errorf ( "failed to retrieve Btrfs stats from procfs: %w" , err )
}
ioctlStatsMap , err := c . getIoctlStats ( )
if err != nil {
level . Debug ( c . logger ) . Log (
"msg" , "Error querying btrfs device stats with ioctl" ,
"err" , err )
ioctlStatsMap = make ( map [ string ] * btrfsIoctlFsStats )
}
}
for _ , s := range stats {
for _ , s := range stats {
c . updateBtrfsStats ( ch , s )
// match up procfs and ioctl info by filesystem UUID (without dashes)
var fsUUID = strings . Replace ( s . UUID , "-" , "" , - 1 )
ioctlStats := ioctlStatsMap [ fsUUID ]
c . updateBtrfsStats ( ch , s , ioctlStats )
}
}
return nil
return nil
}
}
type btrfsIoctlFsDevStats struct {
path string
uuid string
bytesUsed uint64
totalBytes uint64
// The error stats below match the following upstream lists:
// https://github.com/dennwc/btrfs/blob/b3db0b2dedac3bf580f412034d77e0bf4b420167/btrfs.go#L132-L140
// https://github.com/torvalds/linux/blob/70d605cbeecb408dd884b1f0cd3963eeeaac144c/include/uapi/linux/btrfs.h#L680-L692
writeErrs uint64
readErrs uint64
flushErrs uint64
corruptionErrs uint64
generationErrs uint64
}
type btrfsIoctlFsStats struct {
uuid string
devices [ ] btrfsIoctlFsDevStats
}
func ( c * btrfsCollector ) getIoctlStats ( ) ( map [ string ] * btrfsIoctlFsStats , error ) {
// Instead of introducing more ioctl calls to scan for all btrfs
// filesytems re-use our mount point utils to find known mounts
mountsList , err := mountPointDetails ( c . logger )
if err != nil {
return nil , err
}
// track devices we have successfully scanned, by device path
devicesDone := make ( map [ string ] struct { } )
// filesystems scann results by UUID
fsStats := make ( map [ string ] * btrfsIoctlFsStats )
for _ , mount := range mountsList {
if mount . fsType != "btrfs" {
continue
}
if _ , found := devicesDone [ mount . device ] ; found {
// We already found this filesystem by another mount point
continue
}
fs , err := dennwc . Open ( mount . mountPoint , true )
if err != nil {
// failed to open this mount point, maybe we didn't have permission
// maybe we'll find another mount point for this FS later
level . Debug ( c . logger ) . Log (
"msg" , "Error inspecting btrfs mountpoint" ,
"mountPoint" , mount . mountPoint ,
"err" , err )
continue
}
fsInfo , err := fs . Info ( )
if err != nil {
// Failed to get the FS info for some reason,
// perhaps it'll work with a different mount point
level . Debug ( c . logger ) . Log (
"msg" , "Error querying btrfs filesystem" ,
"mountPoint" , mount . mountPoint ,
"err" , err )
continue
}
fsID := fsInfo . FSID . String ( )
if _ , found := fsStats [ fsID ] ; found {
// We already found this filesystem by another mount point
continue
}
deviceStats , err := c . getIoctlDeviceStats ( fs , & fsInfo )
if err != nil {
level . Debug ( c . logger ) . Log (
"msg" , "Error querying btrfs device stats" ,
"mountPoint" , mount . mountPoint ,
"err" , err )
continue
}
devicesDone [ mount . device ] = struct { } { }
fsStats [ fsID ] = & btrfsIoctlFsStats {
uuid : fsID ,
devices : deviceStats ,
}
}
return fsStats , nil
}
func ( c * btrfsCollector ) getIoctlDeviceStats ( fs * dennwc . FS , fsInfo * dennwc . Info ) ( [ ] btrfsIoctlFsDevStats , error ) {
devices := make ( [ ] btrfsIoctlFsDevStats , 0 , fsInfo . NumDevices )
for i := uint64 ( 0 ) ; i <= fsInfo . MaxID ; i ++ {
deviceInfo , err := fs . GetDevInfo ( i )
if err != nil {
if errno , ok := err . ( syscall . Errno ) ; ok && errno == syscall . ENODEV {
// device IDs do not consistently start at 0, nor are ranges contiguous, so we expect this
continue
}
return nil , err
}
deviceStats , err := fs . GetDevStats ( i )
if err != nil {
return nil , err
}
devices = append ( devices , btrfsIoctlFsDevStats {
path : deviceInfo . Path ,
uuid : deviceInfo . UUID . String ( ) ,
bytesUsed : deviceInfo . BytesUsed ,
totalBytes : deviceInfo . TotalBytes ,
writeErrs : deviceStats . WriteErrs ,
readErrs : deviceStats . ReadErrs ,
flushErrs : deviceStats . FlushErrs ,
corruptionErrs : deviceStats . CorruptionErrs ,
generationErrs : deviceStats . GenerationErrs ,
} )
if uint64 ( len ( devices ) ) == fsInfo . NumDevices {
break
}
}
return devices , nil
}
// btrfsMetric represents a single Btrfs metric that is converted into a Prometheus Metric.
// btrfsMetric represents a single Btrfs metric that is converted into a Prometheus Metric.
type btrfsMetric struct {
type btrfsMetric struct {
name string
name string
metricType prometheus . ValueType
desc string
desc string
value float64
value float64
extraLabel [ ] string
extraLabel [ ] string
@ -72,14 +221,14 @@ type btrfsMetric struct {
}
}
// updateBtrfsStats collects statistics for one bcache ID.
// updateBtrfsStats collects statistics for one bcache ID.
func ( c * btrfsCollector ) updateBtrfsStats ( ch chan <- prometheus . Metric , s * btrfs . Stats ) {
func ( c * btrfsCollector ) updateBtrfsStats ( ch chan <- prometheus . Metric , s * btrfs . Stats , ioctlStats * btrfsIoctlFsStats ) {
const subsystem = "btrfs"
const subsystem = "btrfs"
// Basic information about the filesystem.
// Basic information about the filesystem.
devLabels := [ ] string { "uuid" }
devLabels := [ ] string { "uuid" }
// Retrieve the metrics.
// Retrieve the metrics.
metrics := c . getMetrics ( s )
metrics := c . getMetrics ( s , ioctlStats )
// Convert all gathered metrics to Prometheus Metrics and add to channel.
// Convert all gathered metrics to Prometheus Metrics and add to channel.
for _ , m := range metrics {
for _ , m := range metrics {
@ -99,7 +248,7 @@ func (c *btrfsCollector) updateBtrfsStats(ch chan<- prometheus.Metric, s *btrfs.
ch <- prometheus . MustNewConstMetric (
ch <- prometheus . MustNewConstMetric (
desc ,
desc ,
prometheus. GaugeValu e,
m. metricTyp e,
m . value ,
m . value ,
labelValues ... ,
labelValues ... ,
)
)
@ -107,37 +256,103 @@ func (c *btrfsCollector) updateBtrfsStats(ch chan<- prometheus.Metric, s *btrfs.
}
}
// getMetrics returns metrics for the given Btrfs statistics.
// getMetrics returns metrics for the given Btrfs statistics.
func ( c * btrfsCollector ) getMetrics ( s * btrfs . Stats ) [ ] btrfsMetric {
func ( c * btrfsCollector ) getMetrics ( s * btrfs . Stats , ioctlStats * btrfsIoctlFsStats ) [ ] btrfsMetric {
metrics := [ ] btrfsMetric {
metrics := [ ] btrfsMetric {
{
{
name : "info" ,
name : "info" ,
desc : "Filesystem information" ,
desc : "Filesystem information" ,
value : 1 ,
value : 1 ,
metricType : prometheus . GaugeValue ,
extraLabel : [ ] string { "label" } ,
extraLabel : [ ] string { "label" } ,
extraLabelValue : [ ] string { s . Label } ,
extraLabelValue : [ ] string { s . Label } ,
} ,
} ,
{
{
name : "global_rsv_size_bytes" ,
name : "global_rsv_size_bytes" ,
desc : "Size of global reserve." ,
desc : "Size of global reserve." ,
metricType : prometheus . GaugeValue ,
value : float64 ( s . Allocation . GlobalRsvSize ) ,
value : float64 ( s . Allocation . GlobalRsvSize ) ,
} ,
} ,
}
}
// Information about data, metadata and system data.
metrics = append ( metrics , c . getAllocationStats ( "data" , s . Allocation . Data ) ... )
metrics = append ( metrics , c . getAllocationStats ( "metadata" , s . Allocation . Metadata ) ... )
metrics = append ( metrics , c . getAllocationStats ( "system" , s . Allocation . System ) ... )
// Information about devices.
// Information about devices.
if ioctlStats == nil {
for n , dev := range s . Devices {
for n , dev := range s . Devices {
metrics = append ( metrics , btrfsMetric {
metrics = append ( metrics , btrfsMetric {
name : "device_size_bytes" ,
name : "device_size_bytes" ,
desc : "Size of a device that is part of the filesystem." ,
desc : "Size of a device that is part of the filesystem." ,
metricType : prometheus . GaugeValue ,
value : float64 ( dev . Size ) ,
value : float64 ( dev . Size ) ,
extraLabel : [ ] string { "device" } ,
extraLabel : [ ] string { "device" } ,
extraLabelValue : [ ] string { n } ,
extraLabelValue : [ ] string { n } ,
} )
} )
}
}
return metrics
}
// Information about data, metadata and system data.
for _ , dev := range ioctlStats . devices {
metrics = append ( metrics , c . getAllocationStats ( "data" , s . Allocation . Data ) ... )
// trim the path prefix from the device name so the value should match
metrics = append ( metrics , c . getAllocationStats ( "metadata" , s . Allocation . Metadata ) ... )
// the value used in the fallback branch above.
metrics = append ( metrics , c . getAllocationStats ( "system" , s . Allocation . System ) ... )
// e.g. /dev/sda -> sda, /rootfs/dev/md1 -> md1
_ , device := path . Split ( dev . path )
extraLabels := [ ] string { "device" , "btrfs_dev_uuid" }
extraLabelValues := [ ] string { device , dev . uuid }
metrics = append ( metrics ,
btrfsMetric {
name : "device_size_bytes" ,
desc : "Size of a device that is part of the filesystem." ,
metricType : prometheus . GaugeValue ,
value : float64 ( dev . totalBytes ) ,
extraLabel : extraLabels ,
extraLabelValue : extraLabelValues ,
} ,
// A bytes available metric is probably more useful than a
// bytes used metric, because large numbers of bytes will
// suffer from floating point representation issues
// and we probably care more about the number when it's low anyway
btrfsMetric {
name : "device_unused_bytes" ,
desc : "Unused bytes unused on a device that is part of the filesystem." ,
metricType : prometheus . GaugeValue ,
value : float64 ( dev . totalBytes - dev . bytesUsed ) ,
extraLabel : extraLabels ,
extraLabelValue : extraLabelValues ,
} )
errorLabels := append ( [ ] string { "type" } , extraLabels ... )
values := [ ] uint64 {
dev . writeErrs ,
dev . readErrs ,
dev . flushErrs ,
dev . corruptionErrs ,
dev . generationErrs ,
}
btrfsErrorTypeNames := [ ] string {
"write" ,
"read" ,
"flush" ,
"corruption" ,
"generation" ,
}
for i , errorType := range btrfsErrorTypeNames {
metrics = append ( metrics ,
btrfsMetric {
name : "device_errors_total" ,
desc : "Errors reported for the device" ,
metricType : prometheus . CounterValue ,
value : float64 ( values [ i ] ) ,
extraLabel : errorLabels ,
extraLabelValue : append ( [ ] string { errorType } , extraLabelValues ... ) ,
} )
}
}
return metrics
return metrics
}
}
@ -148,6 +363,7 @@ func (c *btrfsCollector) getAllocationStats(a string, s *btrfs.AllocationStats)
{
{
name : "reserved_bytes" ,
name : "reserved_bytes" ,
desc : "Amount of space reserved for a data type" ,
desc : "Amount of space reserved for a data type" ,
metricType : prometheus . GaugeValue ,
value : float64 ( s . ReservedBytes ) ,
value : float64 ( s . ReservedBytes ) ,
extraLabel : [ ] string { "block_group_type" } ,
extraLabel : [ ] string { "block_group_type" } ,
extraLabelValue : [ ] string { a } ,
extraLabelValue : [ ] string { a } ,
@ -168,6 +384,7 @@ func (c *btrfsCollector) getLayoutStats(a, l string, s *btrfs.LayoutUsage) []btr
{
{
name : "used_bytes" ,
name : "used_bytes" ,
desc : "Amount of used space by a layout/data type" ,
desc : "Amount of used space by a layout/data type" ,
metricType : prometheus . GaugeValue ,
value : float64 ( s . UsedBytes ) ,
value : float64 ( s . UsedBytes ) ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabelValue : [ ] string { a , l } ,
extraLabelValue : [ ] string { a , l } ,
@ -175,6 +392,7 @@ func (c *btrfsCollector) getLayoutStats(a, l string, s *btrfs.LayoutUsage) []btr
{
{
name : "size_bytes" ,
name : "size_bytes" ,
desc : "Amount of space allocated for a layout/data type" ,
desc : "Amount of space allocated for a layout/data type" ,
metricType : prometheus . GaugeValue ,
value : float64 ( s . TotalBytes ) ,
value : float64 ( s . TotalBytes ) ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabelValue : [ ] string { a , l } ,
extraLabelValue : [ ] string { a , l } ,
@ -182,6 +400,7 @@ func (c *btrfsCollector) getLayoutStats(a, l string, s *btrfs.LayoutUsage) []btr
{
{
name : "allocation_ratio" ,
name : "allocation_ratio" ,
desc : "Data allocation ratio for a layout/data type" ,
desc : "Data allocation ratio for a layout/data type" ,
metricType : prometheus . GaugeValue ,
value : s . Ratio ,
value : s . Ratio ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabel : [ ] string { "block_group_type" , "mode" } ,
extraLabelValue : [ ] string { a , l } ,
extraLabelValue : [ ] string { a , l } ,