prometheusmetricshost-metricsmachine-metricsnode-metricsprocfsprometheus-exportersystem-informationsystem-metrics
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
4.6 KiB
160 lines
4.6 KiB
// Copyright 2015 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
// +build !nofilesystem |
|
|
|
package collector |
|
|
|
import ( |
|
"bufio" |
|
"os" |
|
"strings" |
|
"sync" |
|
"syscall" |
|
"time" |
|
|
|
"github.com/prometheus/common/log" |
|
) |
|
|
|
const ( |
|
defIgnoredMountPoints = "^/(dev|proc|sys|var/lib/docker/.+)($|/)" |
|
defIgnoredFSTypes = "^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$" |
|
readOnly = 0x1 // ST_RDONLY |
|
mountTimeout = 30 * time.Second |
|
) |
|
|
|
var stuckMounts = make(map[string]struct{}) |
|
var stuckMountsMtx = &sync.Mutex{} |
|
|
|
// GetStats returns filesystem stats. |
|
func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { |
|
mps, err := mountPointDetails() |
|
if err != nil { |
|
return nil, err |
|
} |
|
stats := []filesystemStats{} |
|
for _, labels := range mps { |
|
if c.ignoredMountPointsPattern.MatchString(labels.mountPoint) { |
|
log.Debugf("Ignoring mount point: %s", labels.mountPoint) |
|
continue |
|
} |
|
if c.ignoredFSTypesPattern.MatchString(labels.fsType) { |
|
log.Debugf("Ignoring fs type: %s", labels.fsType) |
|
continue |
|
} |
|
stuckMountsMtx.Lock() |
|
if _, ok := stuckMounts[labels.mountPoint]; ok { |
|
stats = append(stats, filesystemStats{ |
|
labels: labels, |
|
deviceError: 1, |
|
}) |
|
log.Debugf("Mount point %q is in an unresponsive state", labels.mountPoint) |
|
stuckMountsMtx.Unlock() |
|
continue |
|
} |
|
stuckMountsMtx.Unlock() |
|
|
|
// The success channel is used do tell the "watcher" that the stat |
|
// finished successfully. The channel is closed on success. |
|
success := make(chan struct{}) |
|
go stuckMountWatcher(labels.mountPoint, success) |
|
|
|
buf := new(syscall.Statfs_t) |
|
err = syscall.Statfs(labels.mountPoint, buf) |
|
|
|
stuckMountsMtx.Lock() |
|
close(success) |
|
// If the mount has been marked as stuck, unmark it and log it's recovery. |
|
if _, ok := stuckMounts[labels.mountPoint]; ok { |
|
log.Debugf("Mount point %q has recovered, monitoring will resume", labels.mountPoint) |
|
delete(stuckMounts, labels.mountPoint) |
|
} |
|
stuckMountsMtx.Unlock() |
|
|
|
if err != nil { |
|
stats = append(stats, filesystemStats{ |
|
labels: labels, |
|
deviceError: 1, |
|
}) |
|
log.Debugf("Error on statfs() system call for %q: %s", labels.mountPoint, err) |
|
continue |
|
} |
|
|
|
var ro float64 |
|
for _, option := range strings.Split(labels.options, ",") { |
|
if option == "ro" { |
|
ro = 1 |
|
break |
|
} |
|
} |
|
|
|
stats = append(stats, filesystemStats{ |
|
labels: labels, |
|
size: float64(buf.Blocks) * float64(buf.Bsize), |
|
free: float64(buf.Bfree) * float64(buf.Bsize), |
|
avail: float64(buf.Bavail) * float64(buf.Bsize), |
|
files: float64(buf.Files), |
|
filesFree: float64(buf.Ffree), |
|
ro: ro, |
|
}) |
|
} |
|
return stats, nil |
|
} |
|
|
|
// stuckMountWatcher listens on the given success channel and if the channel closes |
|
// then the watcher does nothing. If instead the timeout is reached, the |
|
// mount point that is being watched is marked as stuck. |
|
func stuckMountWatcher(mountPoint string, success chan struct{}) { |
|
select { |
|
case <-success: |
|
// Success |
|
case <-time.After(mountTimeout): |
|
// Timed out, mark mount as stuck |
|
stuckMountsMtx.Lock() |
|
select { |
|
case <-success: |
|
// Success came in just after the timeout was reached, don't label the mount as stuck |
|
default: |
|
log.Debugf("Mount point %q timed out, it is being labeled as stuck and will not be monitored", mountPoint) |
|
stuckMounts[mountPoint] = struct{}{} |
|
} |
|
stuckMountsMtx.Unlock() |
|
} |
|
} |
|
|
|
func mountPointDetails() ([]filesystemLabels, error) { |
|
file, err := os.Open(procFilePath("mounts")) |
|
if err != nil { |
|
return nil, err |
|
} |
|
defer file.Close() |
|
|
|
filesystems := []filesystemLabels{} |
|
scanner := bufio.NewScanner(file) |
|
for scanner.Scan() { |
|
parts := strings.Fields(scanner.Text()) |
|
|
|
// Ensure we handle the translation of \040 and \011 |
|
// as per fstab(5). |
|
parts[1] = strings.Replace(parts[1], "\\040", " ", -1) |
|
parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) |
|
|
|
filesystems = append(filesystems, filesystemLabels{ |
|
device: parts[0], |
|
mountPoint: parts[1], |
|
fsType: parts[2], |
|
options: parts[3], |
|
}) |
|
} |
|
return filesystems, scanner.Err() |
|
}
|
|
|