k3s/vendor/github.com/google/cadvisor/fs/fs.go

763 lines
22 KiB
Go

// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build linux
// Provides Filesystem Stats
package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/google/cadvisor/devicemapper"
"github.com/google/cadvisor/utils"
dockerutil "github.com/google/cadvisor/utils/docker"
zfs "github.com/mistifyio/go-zfs"
"k8s.io/klog"
)
const (
LabelSystemRoot = "root"
LabelDockerImages = "docker-images"
LabelCrioImages = "crio-images"
)
const (
// The block size in bytes.
statBlockSize uint64 = 512
// The maximum number of `disk usage` tasks that can be running at once.
maxConcurrentOps = 20
)
// A pool for restricting the number of consecutive `du` and `find` tasks running.
var pool = make(chan struct{}, maxConcurrentOps)
func init() {
for i := 0; i < maxConcurrentOps; i++ {
releaseToken()
}
}
func claimToken() {
<-pool
}
func releaseToken() {
pool <- struct{}{}
}
type partition struct {
mountpoint string
major uint
minor uint
fsType string
blockSize uint
}
type RealFsInfo struct {
// Map from block device path to partition information.
partitions map[string]partition
// Map from label to block device path.
// Labels are intent-specific tags that are auto-detected.
labels map[string]string
// Map from mountpoint to mount information.
mounts map[string]*mount.Info
// devicemapper client
dmsetup devicemapper.DmsetupClient
// fsUUIDToDeviceName is a map from the filesystem UUID to its device name.
fsUUIDToDeviceName map[string]string
}
func NewFsInfo(context Context) (FsInfo, error) {
mounts, err := mount.GetMounts(nil)
if err != nil {
return nil, err
}
fsUUIDToDeviceName, err := getFsUUIDToDeviceNameMap()
if err != nil {
// UUID is not always available across different OS distributions.
// Do not fail if there is an error.
klog.Warningf("Failed to get disk UUID mapping, getting disk info by uuid will not work: %v", err)
}
// Avoid devicemapper container mounts - these are tracked by the ThinPoolWatcher
excluded := []string{fmt.Sprintf("%s/devicemapper/mnt", context.Docker.Root)}
fsInfo := &RealFsInfo{
partitions: processMounts(mounts, excluded),
labels: make(map[string]string, 0),
mounts: make(map[string]*mount.Info, 0),
dmsetup: devicemapper.NewDmsetupClient(),
fsUUIDToDeviceName: fsUUIDToDeviceName,
}
for _, mount := range mounts {
fsInfo.mounts[mount.Mountpoint] = mount
}
// need to call this before the log line below printing out the partitions, as this function may
// add a "partition" for devicemapper to fsInfo.partitions
fsInfo.addDockerImagesLabel(context, mounts)
fsInfo.addCrioImagesLabel(context, mounts)
klog.V(1).Infof("Filesystem UUIDs: %+v", fsInfo.fsUUIDToDeviceName)
klog.V(1).Infof("Filesystem partitions: %+v", fsInfo.partitions)
fsInfo.addSystemRootLabel(mounts)
return fsInfo, nil
}
// getFsUUIDToDeviceNameMap creates the filesystem uuid to device name map
// using the information in /dev/disk/by-uuid. If the directory does not exist,
// this function will return an empty map.
func getFsUUIDToDeviceNameMap() (map[string]string, error) {
const dir = "/dev/disk/by-uuid"
if _, err := os.Stat(dir); os.IsNotExist(err) {
return make(map[string]string), nil
}
files, err := ioutil.ReadDir(dir)
if err != nil {
return nil, err
}
fsUUIDToDeviceName := make(map[string]string)
for _, file := range files {
path := filepath.Join(dir, file.Name())
target, err := os.Readlink(path)
if err != nil {
klog.Warningf("Failed to resolve symlink for %q", path)
continue
}
device, err := filepath.Abs(filepath.Join(dir, target))
if err != nil {
return nil, fmt.Errorf("failed to resolve the absolute path of %q", filepath.Join(dir, target))
}
fsUUIDToDeviceName[file.Name()] = device
}
return fsUUIDToDeviceName, nil
}
func processMounts(mounts []*mount.Info, excludedMountpointPrefixes []string) map[string]partition {
partitions := make(map[string]partition, 0)
supportedFsType := map[string]bool{
// all ext systems are checked through prefix.
"btrfs": true,
"overlay": true,
"tmpfs": true,
"xfs": true,
"zfs": true,
}
for _, mount := range mounts {
if !strings.HasPrefix(mount.Fstype, "ext") && !supportedFsType[mount.Fstype] {
continue
}
// Avoid bind mounts, exclude tmpfs.
if _, ok := partitions[mount.Source]; ok {
if mount.Fstype != "tmpfs" {
continue
}
}
hasPrefix := false
for _, prefix := range excludedMountpointPrefixes {
if strings.HasPrefix(mount.Mountpoint, prefix) {
hasPrefix = true
break
}
}
if hasPrefix {
continue
}
// using mountpoint to replace device once fstype it tmpfs
if mount.Fstype == "tmpfs" {
mount.Source = mount.Mountpoint
}
// btrfs fix: following workaround fixes wrong btrfs Major and Minor Ids reported in /proc/self/mountinfo.
// instead of using values from /proc/self/mountinfo we use stat to get Ids from btrfs mount point
if mount.Fstype == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") {
major, minor, err := getBtrfsMajorMinorIds(mount)
if err != nil {
klog.Warningf("%s", err)
} else {
mount.Major = major
mount.Minor = minor
}
}
// overlay fix: Making mount source unique for all overlay mounts, using the mount's major and minor ids.
if mount.Fstype == "overlay" {
mount.Source = fmt.Sprintf("%s_%d-%d", mount.Source, mount.Major, mount.Minor)
}
partitions[mount.Source] = partition{
fsType: mount.Fstype,
mountpoint: mount.Mountpoint,
major: uint(mount.Major),
minor: uint(mount.Minor),
}
}
return partitions
}
// getDockerDeviceMapperInfo returns information about the devicemapper device and "partition" if
// docker is using devicemapper for its storage driver. If a loopback device is being used, don't
// return any information or error, as we want to report based on the actual partition where the
// loopback file resides, inside of the loopback file itself.
func (self *RealFsInfo) getDockerDeviceMapperInfo(context DockerContext) (string, *partition, error) {
if context.Driver != DeviceMapper.String() {
return "", nil, nil
}
dataLoopFile := context.DriverStatus[dockerutil.DriverStatusDataLoopFile]
if len(dataLoopFile) > 0 {
return "", nil, nil
}
dev, major, minor, blockSize, err := dockerDMDevice(context.DriverStatus, self.dmsetup)
if err != nil {
return "", nil, err
}
return dev, &partition{
fsType: DeviceMapper.String(),
major: major,
minor: minor,
blockSize: blockSize,
}, nil
}
// addSystemRootLabel attempts to determine which device contains the mount for /.
func (self *RealFsInfo) addSystemRootLabel(mounts []*mount.Info) {
for _, m := range mounts {
if m.Mountpoint == "/" {
self.partitions[m.Source] = partition{
fsType: m.Fstype,
mountpoint: m.Mountpoint,
major: uint(m.Major),
minor: uint(m.Minor),
}
self.labels[LabelSystemRoot] = m.Source
return
}
}
}
// addDockerImagesLabel attempts to determine which device contains the mount for docker images.
func (self *RealFsInfo) addDockerImagesLabel(context Context, mounts []*mount.Info) {
dockerDev, dockerPartition, err := self.getDockerDeviceMapperInfo(context.Docker)
if err != nil {
klog.Warningf("Could not get Docker devicemapper device: %v", err)
}
if len(dockerDev) > 0 && dockerPartition != nil {
self.partitions[dockerDev] = *dockerPartition
self.labels[LabelDockerImages] = dockerDev
} else {
self.updateContainerImagesPath(LabelDockerImages, mounts, getDockerImagePaths(context))
}
}
func (self *RealFsInfo) addCrioImagesLabel(context Context, mounts []*mount.Info) {
if context.Crio.Root != "" {
crioPath := context.Crio.Root
crioImagePaths := map[string]struct{}{
"/": {},
}
for _, dir := range []string{"overlay", "overlay2"} {
crioImagePaths[path.Join(crioPath, dir+"-images")] = struct{}{}
}
for crioPath != "/" && crioPath != "." {
crioImagePaths[crioPath] = struct{}{}
crioPath = filepath.Dir(crioPath)
}
self.updateContainerImagesPath(LabelCrioImages, mounts, crioImagePaths)
}
}
// Generate a list of possible mount points for docker image management from the docker root directory.
// Right now, we look for each type of supported graph driver directories, but we can do better by parsing
// some of the context from `docker info`.
func getDockerImagePaths(context Context) map[string]struct{} {
dockerImagePaths := map[string]struct{}{
"/": {},
}
// TODO(rjnagal): Detect docker root and graphdriver directories from docker info.
dockerRoot := context.Docker.Root
for _, dir := range []string{"devicemapper", "btrfs", "aufs", "overlay", "overlay2", "zfs"} {
dockerImagePaths[path.Join(dockerRoot, dir)] = struct{}{}
}
for dockerRoot != "/" && dockerRoot != "." {
dockerImagePaths[dockerRoot] = struct{}{}
dockerRoot = filepath.Dir(dockerRoot)
}
return dockerImagePaths
}
// This method compares the mountpoints with possible container image mount points. If a match is found,
// the label is added to the partition.
func (self *RealFsInfo) updateContainerImagesPath(label string, mounts []*mount.Info, containerImagePaths map[string]struct{}) {
var useMount *mount.Info
for _, m := range mounts {
if _, ok := containerImagePaths[m.Mountpoint]; ok {
if useMount == nil || (len(useMount.Mountpoint) < len(m.Mountpoint)) {
useMount = m
}
}
}
if useMount != nil {
self.partitions[useMount.Source] = partition{
fsType: useMount.Fstype,
mountpoint: useMount.Mountpoint,
major: uint(useMount.Major),
minor: uint(useMount.Minor),
}
self.labels[label] = useMount.Source
}
}
func (self *RealFsInfo) GetDeviceForLabel(label string) (string, error) {
dev, ok := self.labels[label]
if !ok {
return "", fmt.Errorf("non-existent label %q", label)
}
return dev, nil
}
func (self *RealFsInfo) GetLabelsForDevice(device string) ([]string, error) {
labels := []string{}
for label, dev := range self.labels {
if dev == device {
labels = append(labels, label)
}
}
return labels, nil
}
func (self *RealFsInfo) GetMountpointForDevice(dev string) (string, error) {
p, ok := self.partitions[dev]
if !ok {
return "", fmt.Errorf("no partition info for device %q", dev)
}
return p.mountpoint, nil
}
func (self *RealFsInfo) GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error) {
filesystems := make([]Fs, 0)
deviceSet := make(map[string]struct{})
diskStatsMap, err := getDiskStatsMap("/proc/diskstats")
if err != nil {
return nil, err
}
for device, partition := range self.partitions {
_, hasMount := mountSet[partition.mountpoint]
_, hasDevice := deviceSet[device]
if mountSet == nil || (hasMount && !hasDevice) {
var (
err error
fs Fs
)
switch partition.fsType {
case DeviceMapper.String():
fs.Capacity, fs.Free, fs.Available, err = getDMStats(device, partition.blockSize)
klog.V(5).Infof("got devicemapper fs capacity stats: capacity: %v free: %v available: %v:", fs.Capacity, fs.Free, fs.Available)
fs.Type = DeviceMapper
case ZFS.String():
if _, devzfs := os.Stat("/dev/zfs"); os.IsExist(devzfs) {
fs.Capacity, fs.Free, fs.Available, err = getZfstats(device)
fs.Type = ZFS
break
}
// if /dev/zfs is not present default to VFS
fallthrough
default:
var inodes, inodesFree uint64
if utils.FileExists(partition.mountpoint) {
fs.Capacity, fs.Free, fs.Available, inodes, inodesFree, err = getVfsStats(partition.mountpoint)
fs.Inodes = &inodes
fs.InodesFree = &inodesFree
fs.Type = VFS
} else {
klog.V(4).Infof("unable to determine file system type, partition mountpoint does not exist: %v", partition.mountpoint)
}
}
if err != nil {
klog.V(4).Infof("Stat fs failed. Error: %v", err)
} else {
deviceSet[device] = struct{}{}
fs.DeviceInfo = DeviceInfo{
Device: device,
Major: uint(partition.major),
Minor: uint(partition.minor),
}
fs.DiskStats = diskStatsMap[device]
filesystems = append(filesystems, fs)
}
}
}
return filesystems, nil
}
var partitionRegex = regexp.MustCompile(`^(?:(?:s|v|xv)d[a-z]+\d*|dm-\d+)$`)
func getDiskStatsMap(diskStatsFile string) (map[string]DiskStats, error) {
diskStatsMap := make(map[string]DiskStats)
file, err := os.Open(diskStatsFile)
if err != nil {
if os.IsNotExist(err) {
klog.Warningf("Not collecting filesystem statistics because file %q was not found", diskStatsFile)
return diskStatsMap, nil
}
return nil, err
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
words := strings.Fields(line)
if !partitionRegex.MatchString(words[2]) {
continue
}
// 8 50 sdd2 40 0 280 223 7 0 22 108 0 330 330
deviceName := path.Join("/dev", words[2])
wordLength := len(words)
offset := 3
var stats = make([]uint64, wordLength-offset)
if len(stats) < 11 {
return nil, fmt.Errorf("could not parse all 11 columns of /proc/diskstats")
}
var error error
for i := offset; i < wordLength; i++ {
stats[i-offset], error = strconv.ParseUint(words[i], 10, 64)
if error != nil {
return nil, error
}
}
diskStats := DiskStats{
ReadsCompleted: stats[0],
ReadsMerged: stats[1],
SectorsRead: stats[2],
ReadTime: stats[3],
WritesCompleted: stats[4],
WritesMerged: stats[5],
SectorsWritten: stats[6],
WriteTime: stats[7],
IoInProgress: stats[8],
IoTime: stats[9],
WeightedIoTime: stats[10],
}
diskStatsMap[deviceName] = diskStats
}
return diskStatsMap, nil
}
func (self *RealFsInfo) GetGlobalFsInfo() ([]Fs, error) {
return self.GetFsInfoForPath(nil)
}
func major(devNumber uint64) uint {
return uint((devNumber >> 8) & 0xfff)
}
func minor(devNumber uint64) uint {
return uint((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
}
func (self *RealFsInfo) GetDeviceInfoByFsUUID(uuid string) (*DeviceInfo, error) {
deviceName, found := self.fsUUIDToDeviceName[uuid]
if !found {
return nil, ErrNoSuchDevice
}
p, found := self.partitions[deviceName]
if !found {
return nil, fmt.Errorf("cannot find device %q in partitions", deviceName)
}
return &DeviceInfo{deviceName, p.major, p.minor}, nil
}
func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
buf := new(syscall.Stat_t)
err := syscall.Stat(dir, buf)
if err != nil {
return nil, fmt.Errorf("stat failed on %s with error: %s", dir, err)
}
major := major(buf.Dev)
minor := minor(buf.Dev)
for device, partition := range self.partitions {
if partition.major == major && partition.minor == minor {
return &DeviceInfo{device, major, minor}, nil
}
}
mount, found := self.mounts[dir]
// try the parent dir if not found until we reach the root dir
// this is an issue on btrfs systems where the directory is not
// the subvolume
for !found {
pathdir, _ := filepath.Split(dir)
// break when we reach root
if pathdir == "/" {
break
}
// trim "/" from the new parent path otherwise the next possible
// filepath.Split in the loop will not split the string any further
dir = strings.TrimSuffix(pathdir, "/")
mount, found = self.mounts[dir]
}
if found && mount.Fstype == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") {
major, minor, err := getBtrfsMajorMinorIds(mount)
if err != nil {
klog.Warningf("%s", err)
} else {
return &DeviceInfo{mount.Source, uint(major), uint(minor)}, nil
}
}
return nil, fmt.Errorf("could not find device with major: %d, minor: %d in cached partitions map", major, minor)
}
func GetDirUsage(dir string) (UsageInfo, error) {
var usage UsageInfo
if dir == "" {
return usage, fmt.Errorf("invalid directory")
}
rootInfo, err := os.Stat(dir)
if err != nil {
return usage, fmt.Errorf("could not stat %q to get inode usage: %v", dir, err)
}
rootStat, ok := rootInfo.Sys().(*syscall.Stat_t)
if !ok {
return usage, fmt.Errorf("unsuported fileinfo for getting inode usage of %q", dir)
}
rootDevId := rootStat.Dev
// dedupedInode stores inodes that could be duplicates (nlink > 1)
dedupedInodes := make(map[uint64]struct{})
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if os.IsNotExist(err) {
// expected if files appear/vanish
return nil
}
if err != nil {
return fmt.Errorf("unable to count inodes for part of dir %s: %s", dir, err)
}
// according to the docs, Sys can be nil
if info.Sys() == nil {
return fmt.Errorf("fileinfo Sys is nil")
}
s, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("unsupported fileinfo; could not convert to stat_t")
}
if s.Dev != rootDevId {
// don't descend into directories on other devices
return filepath.SkipDir
}
if s.Nlink > 1 {
if _, ok := dedupedInodes[s.Ino]; !ok {
// Dedupe things that could be hardlinks
dedupedInodes[s.Ino] = struct{}{}
usage.Bytes += uint64(s.Blocks) * statBlockSize
usage.Inodes++
}
} else {
usage.Bytes += uint64(s.Blocks) * statBlockSize
usage.Inodes++
}
return nil
})
return usage, nil
}
func (self *RealFsInfo) GetDirUsage(dir string) (UsageInfo, error) {
claimToken()
defer releaseToken()
return GetDirUsage(dir)
}
func getVfsStats(path string) (total uint64, free uint64, avail uint64, inodes uint64, inodesFree uint64, err error) {
var s syscall.Statfs_t
if err = syscall.Statfs(path, &s); err != nil {
return 0, 0, 0, 0, 0, err
}
total = uint64(s.Frsize) * s.Blocks
free = uint64(s.Frsize) * s.Bfree
avail = uint64(s.Frsize) * s.Bavail
inodes = uint64(s.Files)
inodesFree = uint64(s.Ffree)
return total, free, avail, inodes, inodesFree, nil
}
// Devicemapper thin provisioning is detailed at
// https://www.kernel.org/doc/Documentation/device-mapper/thin-provisioning.txt
func dockerDMDevice(driverStatus map[string]string, dmsetup devicemapper.DmsetupClient) (string, uint, uint, uint, error) {
poolName, ok := driverStatus[dockerutil.DriverStatusPoolName]
if !ok || len(poolName) == 0 {
return "", 0, 0, 0, fmt.Errorf("Could not get dm pool name")
}
out, err := dmsetup.Table(poolName)
if err != nil {
return "", 0, 0, 0, err
}
major, minor, dataBlkSize, err := parseDMTable(string(out))
if err != nil {
return "", 0, 0, 0, err
}
return poolName, major, minor, dataBlkSize, nil
}
// parseDMTable parses a single line of `dmsetup table` output and returns the
// major device, minor device, block size, and an error.
func parseDMTable(dmTable string) (uint, uint, uint, error) {
dmTable = strings.Replace(dmTable, ":", " ", -1)
dmFields := strings.Fields(dmTable)
if len(dmFields) < 8 {
return 0, 0, 0, fmt.Errorf("Invalid dmsetup status output: %s", dmTable)
}
major, err := strconv.ParseUint(dmFields[5], 10, 32)
if err != nil {
return 0, 0, 0, err
}
minor, err := strconv.ParseUint(dmFields[6], 10, 32)
if err != nil {
return 0, 0, 0, err
}
dataBlkSize, err := strconv.ParseUint(dmFields[7], 10, 32)
if err != nil {
return 0, 0, 0, err
}
return uint(major), uint(minor), uint(dataBlkSize), nil
}
func getDMStats(poolName string, dataBlkSize uint) (uint64, uint64, uint64, error) {
out, err := exec.Command("dmsetup", "status", poolName).Output()
if err != nil {
return 0, 0, 0, err
}
used, total, err := parseDMStatus(string(out))
if err != nil {
return 0, 0, 0, err
}
used *= 512 * uint64(dataBlkSize)
total *= 512 * uint64(dataBlkSize)
free := total - used
return total, free, free, nil
}
func parseDMStatus(dmStatus string) (uint64, uint64, error) {
dmStatus = strings.Replace(dmStatus, "/", " ", -1)
dmFields := strings.Fields(dmStatus)
if len(dmFields) < 8 {
return 0, 0, fmt.Errorf("Invalid dmsetup status output: %s", dmStatus)
}
used, err := strconv.ParseUint(dmFields[6], 10, 64)
if err != nil {
return 0, 0, err
}
total, err := strconv.ParseUint(dmFields[7], 10, 64)
if err != nil {
return 0, 0, err
}
return used, total, nil
}
// getZfstats returns ZFS mount stats using zfsutils
func getZfstats(poolName string) (uint64, uint64, uint64, error) {
dataset, err := zfs.GetDataset(poolName)
if err != nil {
return 0, 0, 0, err
}
total := dataset.Used + dataset.Avail + dataset.Usedbydataset
return total, dataset.Avail, dataset.Avail, nil
}
// Simple io.Writer implementation that counts how many bytes were written.
type byteCounter struct{ bytesWritten uint64 }
func (b *byteCounter) Write(p []byte) (int, error) {
b.bytesWritten += uint64(len(p))
return len(p), nil
}
// Get major and minor Ids for a mount point using btrfs as filesystem.
func getBtrfsMajorMinorIds(mount *mount.Info) (int, int, error) {
// btrfs fix: following workaround fixes wrong btrfs Major and Minor Ids reported in /proc/self/mountinfo.
// instead of using values from /proc/self/mountinfo we use stat to get Ids from btrfs mount point
buf := new(syscall.Stat_t)
err := syscall.Stat(mount.Source, buf)
if err != nil {
err = fmt.Errorf("stat failed on %s with error: %s", mount.Source, err)
return 0, 0, err
}
klog.V(4).Infof("btrfs mount %#v", mount)
if buf.Mode&syscall.S_IFMT == syscall.S_IFBLK {
err := syscall.Stat(mount.Mountpoint, buf)
if err != nil {
err = fmt.Errorf("stat failed on %s with error: %s", mount.Mountpoint, err)
return 0, 0, err
}
klog.V(4).Infof("btrfs dev major:minor %d:%d\n", int(major(buf.Dev)), int(minor(buf.Dev)))
klog.V(4).Infof("btrfs rdev major:minor %d:%d\n", int(major(buf.Rdev)), int(minor(buf.Rdev)))
return int(major(buf.Dev)), int(minor(buf.Dev)), nil
} else {
return 0, 0, fmt.Errorf("%s is not a block device", mount.Source)
}
}