|
|
@ -122,16 +122,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta |
|
|
|
|
|
|
|
|
|
|
|
buf := new(unix.Statfs_t) |
|
|
|
buf := new(unix.Statfs_t) |
|
|
|
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) |
|
|
|
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) |
|
|
|
stuckMountsMtx.Lock() |
|
|
|
|
|
|
|
close(success) |
|
|
|
close(success) |
|
|
|
|
|
|
|
|
|
|
|
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
|
|
|
|
|
|
|
if _, ok := stuckMounts[labels.mountPoint]; ok { |
|
|
|
|
|
|
|
level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint) |
|
|
|
|
|
|
|
delete(stuckMounts, labels.mountPoint) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
stuckMountsMtx.Unlock() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if err != nil { |
|
|
|
if err != nil { |
|
|
|
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) |
|
|
|
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) |
|
|
|
return filesystemStats{ |
|
|
|
return filesystemStats{ |
|
|
@ -161,17 +153,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg |
|
|
|
select { |
|
|
|
select { |
|
|
|
case <-success: |
|
|
|
case <-success: |
|
|
|
// Success
|
|
|
|
// Success
|
|
|
|
|
|
|
|
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
|
|
|
|
|
|
|
stuckMountsMtx.Lock() |
|
|
|
|
|
|
|
defer stuckMountsMtx.Unlock() |
|
|
|
|
|
|
|
if _, ok := stuckMounts[mountPoint]; ok { |
|
|
|
|
|
|
|
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) |
|
|
|
|
|
|
|
delete(stuckMounts, mountPoint) |
|
|
|
|
|
|
|
} |
|
|
|
case <-mountCheckTimer.C: |
|
|
|
case <-mountCheckTimer.C: |
|
|
|
// Timed out, mark mount as stuck
|
|
|
|
// Timed out, mark mount as stuck
|
|
|
|
stuckMountsMtx.Lock() |
|
|
|
stuckMountsMtx.Lock() |
|
|
|
|
|
|
|
defer stuckMountsMtx.Unlock() |
|
|
|
select { |
|
|
|
select { |
|
|
|
case <-success: |
|
|
|
case <-success: |
|
|
|
// Success came in just after the timeout was reached, don't label the mount as stuck
|
|
|
|
// Success came in just after the timeout was reached, don't label the mount as stuck
|
|
|
|
|
|
|
|
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
|
|
|
|
|
|
|
if _, ok := stuckMounts[mountPoint]; ok { |
|
|
|
|
|
|
|
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) |
|
|
|
|
|
|
|
delete(stuckMounts, mountPoint) |
|
|
|
|
|
|
|
} |
|
|
|
default: |
|
|
|
default: |
|
|
|
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) |
|
|
|
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) |
|
|
|
stuckMounts[mountPoint] = struct{}{} |
|
|
|
stuckMounts[mountPoint] = struct{}{} |
|
|
|
} |
|
|
|
} |
|
|
|
stuckMountsMtx.Unlock() |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|