Browse Source

filesystem: fix mountTimeout not working issue (#2903)

Signed-off-by: DongWei <jiangxuege@hotmail.com>
pull/2928/head
DongWei 9 months ago committed by GitHub
parent
commit
9f1f791ac2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 22
      collector/filesystem_linux.go

22
collector/filesystem_linux.go

@ -122,16 +122,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta
buf := new(unix.Statfs_t) buf := new(unix.Statfs_t)
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf)
stuckMountsMtx.Lock()
close(success) close(success)
// If the mount has been marked as stuck, unmark it and log it's recovery.
if _, ok := stuckMounts[labels.mountPoint]; ok {
level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint)
delete(stuckMounts, labels.mountPoint)
}
stuckMountsMtx.Unlock()
if err != nil { if err != nil {
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
return filesystemStats{ return filesystemStats{
@ -161,17 +153,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg
select { select {
case <-success: case <-success:
// Success // Success
// If the mount has been marked as stuck, unmark it and log it's recovery.
stuckMountsMtx.Lock()
defer stuckMountsMtx.Unlock()
if _, ok := stuckMounts[mountPoint]; ok {
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
delete(stuckMounts, mountPoint)
}
case <-mountCheckTimer.C: case <-mountCheckTimer.C:
// Timed out, mark mount as stuck // Timed out, mark mount as stuck
stuckMountsMtx.Lock() stuckMountsMtx.Lock()
defer stuckMountsMtx.Unlock()
select { select {
case <-success: case <-success:
// Success came in just after the timeout was reached, don't label the mount as stuck // Success came in just after the timeout was reached, don't label the mount as stuck
// If the mount has been marked as stuck, unmark it and log it's recovery.
if _, ok := stuckMounts[mountPoint]; ok {
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
delete(stuckMounts, mountPoint)
}
default: default:
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
stuckMounts[mountPoint] = struct{}{} stuckMounts[mountPoint] = struct{}{}
} }
stuckMountsMtx.Unlock()
} }
} }

Loading…
Cancel
Save