Refactor NFS client collector (#816)

* Update vendor github.com/prometheus/procfs/...

* Refactor NFS collector

Use new procfs library to parse NFS client stats.

* Ignore nfs proc file not existing.

* Refactor with reflection to walk the structs.
pull/826/head
Ben Kochie 2018-02-15 13:40:38 +01:00 committed by GitHub
parent 52c031890e
commit 01bd99fb1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 202 additions and 156 deletions

View File

@ -14,98 +14,30 @@
package collector
import (
"errors"
"io/ioutil"
"fmt"
"os"
"regexp"
"strconv"
"reflect"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/procfs"
"github.com/prometheus/procfs/nfs"
)
var (
netLineRE = regexp.MustCompile(`^net \d+ (\d+) (\d+) (\d+)$`)
rpcLineRE = regexp.MustCompile(`^rpc (\d+) (\d+) (\d+)$`)
procLineRE = regexp.MustCompile(`^proc(\d+) \d+ (\d+( \d+)*)$`)
nfsProcedures = map[string][]string{
"2": {
"null", "getattr", "setattr", "root", "lookup",
"readlink", "read", "writecache", "write", "create",
"remove", "rename", "link", "symlink", "mkdir",
"rmdir", "readdir", "statfs",
},
"3": {
"null", "getattr", "setattr", "lookup", "access",
"readlink", "read", "write", "create", "mkdir",
"symlink", "mknod", "remove", "rmdir", "rename",
"link", "readdir", "readdirplus", "fsstat", "fsinfo",
"pathconf", "commit",
},
"4": {
"null", "read", "write", "commit", "open",
"open_confirm", "open_noattr", "open_downgrade",
"close", "setattr", "fsinfo", "renew", "setclientid",
"setclientid_confirm", "lock", "lockt", "locku",
"access", "getattr", "lookup", "lookup_root", "remove",
"rename", "link", "symlink", "create", "pathconf",
"statfs", "readlink", "readdir", "server_caps",
"delegreturn", "getacl", "setacl", "fs_locations",
"release_lockowner", "secinfo", "fsid_present",
"exchange_id", "create_session", "destroy_session",
"sequence", "get_lease_time", "reclaim_complete",
"layoutget", "getdeviceinfo", "layoutcommit",
"layoutreturn", "secinfo_no_name", "test_stateid",
"free_stateid", "getdevicelist",
"bind_conn_to_session", "destroy_clientid", "seek",
"allocate", "deallocate", "layoutstats", "clone",
"copy",
},
}
nfsNetReadsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "net_reads_total"),
"Number of reads at the network layer.",
[]string{"protocol"},
nil,
)
nfsNetConnectionsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "net_connections_total"),
"Number of connections at the network layer.",
[]string{"protocol"},
nil,
)
nfsRPCOperationsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "rpc_operations_total"),
"Number of RPCs performed.",
nil,
nil,
)
nfsRPCRetransmissionsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "rpc_retransmissions_total"),
"Number of RPC transmissions performed.",
nil,
nil,
)
nfsRPCAuthenticationRefreshesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "rpc_authentication_refreshes_total"),
"Number of RPC authentication refreshes performed.",
nil,
nil,
)
nfsProceduresDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "procedures_total"),
"Number of NFS procedures invoked.",
[]string{"version", "procedure"},
nil,
)
const (
nfsSubsystem = "nfs"
)
type nfsCollector struct{}
type nfsCollector struct {
fs procfs.FS
nfsNetReadsDesc *prometheus.Desc
nfsNetConnectionsDesc *prometheus.Desc
nfsRPCOperationsDesc *prometheus.Desc
nfsRPCRetransmissionsDesc *prometheus.Desc
nfsRPCAuthenticationRefreshesDesc *prometheus.Desc
nfsProceduresDesc *prometheus.Desc
}
func init() {
registerCollector("nfs", defaultDisabled, NewNfsCollector)
@ -113,65 +45,170 @@ func init() {
// NewNfsCollector returns a new Collector exposing NFS statistics.
func NewNfsCollector() (Collector, error) {
return &nfsCollector{}, nil
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %v", err)
}
return &nfsCollector{
fs: fs,
nfsNetReadsDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, nfsSubsystem, "net_reads_total"),
"Number of reads at the network layer.",
[]string{"protocol"},
nil,
),
nfsNetConnectionsDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, nfsSubsystem, "net_connections_total"),
"Number of connections at the network layer.",
[]string{"protocol"},
nil,
),
nfsRPCOperationsDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, nfsSubsystem, "rpc_operations_total"),
"Number of RPCs performed.",
nil,
nil,
),
nfsRPCRetransmissionsDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, nfsSubsystem, "rpc_retransmissions_total"),
"Number of RPC transmissions performed.",
nil,
nil,
),
nfsRPCAuthenticationRefreshesDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, nfsSubsystem, "rpc_authentication_refreshes_total"),
"Number of RPC authentication refreshes performed.",
nil,
nil,
),
nfsProceduresDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "nfs", "procedures_total"),
"Number of NFS procedures invoked.",
[]string{"version", "procedure"},
nil,
),
}, nil
}
func (c *nfsCollector) Update(ch chan<- prometheus.Metric) error {
statsFile := procFilePath("net/rpc/nfs")
content, err := ioutil.ReadFile(statsFile)
stats, err := c.fs.NFSClientRPCStats()
if err != nil {
if os.IsNotExist(err) {
log.Debugf("Not collecting NFS statistics, as %q does not exist", statsFile)
log.Debugf("Not collecting NFS metrics: %s", err)
return nil
}
return err
return fmt.Errorf("failed to retrieve nfs stats: %v", err)
}
for _, line := range strings.Split(string(content), "\n") {
if fields := netLineRE.FindStringSubmatch(line); fields != nil {
value, _ := strconv.ParseFloat(fields[1], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetReadsDesc, prometheus.CounterValue,
value, "udp")
c.updateNFSNetworkStats(ch, &stats.Network)
c.updateNFSClientRPCStats(ch, &stats.ClientRPC)
c.updateNFSRequestsv2Stats(ch, &stats.V2Stats)
c.updateNFSRequestsv3Stats(ch, &stats.V3Stats)
c.updateNFSRequestsv4Stats(ch, &stats.ClientV4Stats)
value, _ = strconv.ParseFloat(fields[2], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetReadsDesc, prometheus.CounterValue,
value, "tcp")
value, _ = strconv.ParseFloat(fields[3], 64)
ch <- prometheus.MustNewConstMetric(
nfsNetConnectionsDesc, prometheus.CounterValue,
value, "tcp")
} else if fields := rpcLineRE.FindStringSubmatch(line); fields != nil {
value, _ := strconv.ParseFloat(fields[1], 64)
ch <- prometheus.MustNewConstMetric(
nfsRPCOperationsDesc,
prometheus.CounterValue, value)
value, _ = strconv.ParseFloat(fields[2], 64)
ch <- prometheus.MustNewConstMetric(
nfsRPCRetransmissionsDesc,
prometheus.CounterValue, value)
value, _ = strconv.ParseFloat(fields[3], 64)
ch <- prometheus.MustNewConstMetric(
nfsRPCAuthenticationRefreshesDesc,
prometheus.CounterValue, value)
} else if fields := procLineRE.FindStringSubmatch(line); fields != nil {
version := fields[1]
for procedure, count := range strings.Split(fields[2], " ") {
value, _ := strconv.ParseFloat(count, 64)
ch <- prometheus.MustNewConstMetric(
nfsProceduresDesc,
prometheus.CounterValue,
value,
version,
nfsProcedures[version][procedure])
}
} else if line != "" {
return errors.New("Failed to parse line: " + line)
}
}
return nil
}
// updateNFSNetworkStats collects statistics for network packets/connections.
func (c *nfsCollector) updateNFSNetworkStats(ch chan<- prometheus.Metric, s *nfs.Network) {
ch <- prometheus.MustNewConstMetric(c.nfsNetReadsDesc, prometheus.CounterValue,
float64(s.UDPCount), "udp")
ch <- prometheus.MustNewConstMetric(c.nfsNetReadsDesc, prometheus.CounterValue,
float64(s.TCPCount), "tcp")
ch <- prometheus.MustNewConstMetric(c.nfsNetConnectionsDesc, prometheus.CounterValue,
float64(s.TCPConnect), "tcp")
}
// updateNFSClientRPCStats collects statistics for kernel server RPCs.
func (c *nfsCollector) updateNFSClientRPCStats(ch chan<- prometheus.Metric, s *nfs.ClientRPC) {
ch <- prometheus.MustNewConstMetric(c.nfsRPCOperationsDesc, prometheus.CounterValue,
float64(s.RPCCount))
ch <- prometheus.MustNewConstMetric(c.nfsRPCRetransmissionsDesc, prometheus.CounterValue,
float64(s.Retransmissions))
ch <- prometheus.MustNewConstMetric(c.nfsRPCAuthenticationRefreshesDesc, prometheus.CounterValue,
float64(s.AuthRefreshes))
}
// updateNFSRequestsv2Stats collects statistics for NFSv2 requests.
func (c *nfsCollector) updateNFSRequestsv2Stats(ch chan<- prometheus.Metric, s *nfs.V2Stats) {
const proto = "2"
v := reflect.ValueOf(s).Elem()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
name := strings.ToLower(v.Type().Field(i).Name)
switch name {
case "wrcache":
name = "writecache"
case "fsstat":
name = "statfs"
}
ch <- prometheus.MustNewConstMetric(c.nfsProceduresDesc, prometheus.CounterValue,
float64(field.Uint()), proto, name)
}
}
// updateNFSRequestsv3Stats collects statistics for NFSv3 requests.
func (c *nfsCollector) updateNFSRequestsv3Stats(ch chan<- prometheus.Metric, s *nfs.V3Stats) {
const proto = "3"
v := reflect.ValueOf(s).Elem()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
name := strings.ToLower(v.Type().Field(i).Name)
ch <- prometheus.MustNewConstMetric(c.nfsProceduresDesc, prometheus.CounterValue,
float64(field.Uint()), proto, name)
}
}
// updateNFSRequestsv4Stats collects statistics for NFSv4 requests.
func (c *nfsCollector) updateNFSRequestsv4Stats(ch chan<- prometheus.Metric, s *nfs.ClientV4Stats) {
const proto = "4"
v := reflect.ValueOf(s).Elem()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
name := strings.ToLower(v.Type().Field(i).Name)
switch name {
case "openconfirm":
name = "open_confirm"
case "opendowngrade":
name = "open_downgrade"
case "opennoattr":
name = "open_noattr"
case "setclientidconfirm":
name = "setclientid_confirm"
case "lookuproot":
name = "lookup_root"
case "servercaps":
name = "server_caps"
case "fslocations":
name = "fs_locations"
case "releaselockowner":
name = "release_lockowner"
case "fsidpresent":
name = "fsid_present"
case "exchangeid":
name = "exchange_id"
case "createsession":
name = "create_session"
case "destroysession":
name = "destroy_session"
case "getleasetime":
name = "get_lease_time"
case "reclaimcomplete":
name = "reclaim_complete"
// TODO: Enable these metrics
case "secinfononame", "teststateid", "freestateid", "getdevicelist", "bindconntosession", "destroyclientid", "seek", "allocate", "deallocate", "layoutstats", "clone":
continue
}
ch <- prometheus.MustNewConstMetric(c.nfsProceduresDesc, prometheus.CounterValue,
float64(field.Uint()), proto, name)
}
}

View File

@ -46,8 +46,8 @@ func (fs FS) XFSStats() (*xfs.Stats, error) {
return xfs.ParseStats(f)
}
// NFSdClientRPCStats retrieves NFS daemon RPC statistics.
func (fs FS) NFSdClientRPCStats() (*nfs.ClientRPCStats, error) {
// NFSClientRPCStats retrieves NFS client RPC statistics.
func (fs FS) NFSClientRPCStats() (*nfs.ClientRPCStats, error) {
f, err := os.Open(fs.Path("net/rpc/nfs"))
if err != nil {
return nil, err

View File

@ -178,8 +178,17 @@ func parseV3Stats(v []uint64) (V3Stats, error) {
func parseClientV4Stats(v []uint64) (ClientV4Stats, error) {
values := int(v[0])
if len(v[1:]) != values || values < 59 {
return ClientV4Stats{}, fmt.Errorf("invalid V4Stats line %q", v)
if len(v[1:]) != values {
return ClientV4Stats{}, fmt.Errorf("invalid ClientV4Stats line %q", v)
}
// This function currently supports mapping 59 NFS v4 client stats. Older
// kernels may emit fewer stats, so we must detect this and pad out the
// values to match the expected slice size.
if values < 59 {
newValues := make([]uint64, 60)
copy(newValues, v)
v = newValues
}
return ClientV4Stats{

View File

@ -32,12 +32,12 @@ func ParseClientRPCStats(r io.Reader) (*ClientRPCStats, error) {
parts := strings.Fields(scanner.Text())
// require at least <key> <value>
if len(parts) < 2 {
return nil, fmt.Errorf("invalid NFSd metric line %q", line)
return nil, fmt.Errorf("invalid NFS metric line %q", line)
}
values, err := util.ParseUint64s(parts[1:])
if err != nil {
return nil, fmt.Errorf("error parsing NFSd metric line: %s", err)
return nil, fmt.Errorf("error parsing NFS metric line: %s", err)
}
switch metricLine := parts[0]; metricLine {
@ -52,15 +52,15 @@ func ParseClientRPCStats(r io.Reader) (*ClientRPCStats, error) {
case "proc4":
stats.ClientV4Stats, err = parseClientV4Stats(values)
default:
return nil, fmt.Errorf("unknown NFSd metric line %q", metricLine)
return nil, fmt.Errorf("unknown NFS metric line %q", metricLine)
}
if err != nil {
return nil, fmt.Errorf("errors parsing NFSd metric line: %s", err)
return nil, fmt.Errorf("errors parsing NFS metric line: %s", err)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error scanning NFSd file: %s", err)
return nil, fmt.Errorf("error scanning NFS file: %s", err)
}
return stats, nil

28
vendor/vendor.json vendored
View File

@ -161,40 +161,40 @@
"revisionTime": "2018-01-10T21:49:58Z"
},
{
"checksumSHA1": "lolK0h7LSVERIX8zLyVQ/+7wEyA=",
"checksumSHA1": "Qvc01kv3ttHV2P2+J68g9ioU4Qs=",
"path": "github.com/prometheus/procfs",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "O64FotgWPYIpl3m2gvTEPIem+xg=",
"path": "github.com/prometheus/procfs/bcache",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "lv9rIcjbVEGo8AT1UCUZXhXrfQc=",
"path": "github.com/prometheus/procfs/internal/util",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "BXJH5h2ri8SU5qC6kkDvTIGCky4=",
"checksumSHA1": "EekY1iRG9JY74mDD0jsbFCWbAFs=",
"path": "github.com/prometheus/procfs/nfs",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "wMhQkA/xQw3Q8eI+PIAjFmS94Qo=",
"path": "github.com/prometheus/procfs/sysfs",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "yItvTQLUVqm/ArLEbvEhqG0T5a0=",
"path": "github.com/prometheus/procfs/xfs",
"revision": "cb4147076ac75738c9a7d279075a253c0cc5acbd",
"revisionTime": "2018-01-25T13:30:57Z"
"revision": "282c8707aa210456a825798969cc27edda34992a",
"revisionTime": "2018-02-12T14:59:26Z"
},
{
"checksumSHA1": "ySaT8G3I3y4MmnoXOYAAX0rC+p8=",