You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
171 lines
6.3 KiB
171 lines
6.3 KiB
// Copyright 2015 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:build !nontp
|
|
// +build !nontp
|
|
|
|
package collector
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/beevik/ntp"
|
|
"github.com/go-kit/log"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"gopkg.in/alecthomas/kingpin.v2"
|
|
)
|
|
|
|
const (
|
|
hour24 = 24 * time.Hour // `time` does not export `Day` as Day != 24h because of DST
|
|
ntpSubsystem = "ntp"
|
|
)
|
|
|
|
var (
|
|
ntpServer = kingpin.Flag("collector.ntp.server", "NTP server to use for ntp collector").Default("127.0.0.1").String()
|
|
ntpProtocolVersion = kingpin.Flag("collector.ntp.protocol-version", "NTP protocol version").Default("4").Int()
|
|
ntpServerIsLocal = kingpin.Flag("collector.ntp.server-is-local", "Certify that collector.ntp.server address is not a public ntp server").Default("false").Bool()
|
|
ntpIPTTL = kingpin.Flag("collector.ntp.ip-ttl", "IP TTL to use while sending NTP query").Default("1").Int()
|
|
// 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905
|
|
// max-distance option is used as-is without phi*(1<<poll)
|
|
ntpMaxDistance = kingpin.Flag("collector.ntp.max-distance", "Max accumulated distance to the root").Default("3.46608s").Duration()
|
|
ntpOffsetTolerance = kingpin.Flag("collector.ntp.local-offset-tolerance", "Offset between local clock and local ntpd time to tolerate").Default("1ms").Duration()
|
|
|
|
leapMidnight time.Time
|
|
leapMidnightMutex = &sync.Mutex{}
|
|
)
|
|
|
|
type ntpCollector struct {
|
|
stratum, leap, rtt, offset, reftime, rootDelay, rootDispersion, sanity typedDesc
|
|
logger log.Logger
|
|
}
|
|
|
|
func init() {
|
|
registerCollector("ntp", defaultDisabled, NewNtpCollector)
|
|
}
|
|
|
|
// NewNtpCollector returns a new Collector exposing sanity of local NTP server.
|
|
// Default definition of "local" is:
|
|
// - collector.ntp.server address is a loopback address (or collector.ntp.server-is-mine flag is turned on)
|
|
// - the server is reachable with outgoin IP_TTL = 1
|
|
func NewNtpCollector(logger log.Logger) (Collector, error) {
|
|
ipaddr := net.ParseIP(*ntpServer)
|
|
if !*ntpServerIsLocal && (ipaddr == nil || !ipaddr.IsLoopback()) {
|
|
return nil, fmt.Errorf("only IP address of local NTP server is valid for --collector.ntp.server")
|
|
}
|
|
|
|
if *ntpProtocolVersion < 2 || *ntpProtocolVersion > 4 {
|
|
return nil, fmt.Errorf("invalid NTP protocol version %d; must be 2, 3, or 4", *ntpProtocolVersion)
|
|
}
|
|
|
|
if *ntpOffsetTolerance < 0 {
|
|
return nil, fmt.Errorf("offset tolerance must be non-negative")
|
|
}
|
|
|
|
return &ntpCollector{
|
|
stratum: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "stratum"),
|
|
"NTPD stratum.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
leap: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "leap"),
|
|
"NTPD leap second indicator, 2 bits.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
rtt: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "rtt_seconds"),
|
|
"RTT to NTPD.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
offset: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "offset_seconds"),
|
|
"ClockOffset between NTP and local clock.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
reftime: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "reference_timestamp_seconds"),
|
|
"NTPD ReferenceTime, UNIX timestamp.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
rootDelay: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "root_delay_seconds"),
|
|
"NTPD RootDelay.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
rootDispersion: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "root_dispersion_seconds"),
|
|
"NTPD RootDispersion.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
sanity: typedDesc{prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, ntpSubsystem, "sanity"),
|
|
"NTPD sanity according to RFC5905 heuristics and configured limits.",
|
|
nil, nil,
|
|
), prometheus.GaugeValue},
|
|
logger: logger,
|
|
}, nil
|
|
}
|
|
|
|
func (c *ntpCollector) Update(ch chan<- prometheus.Metric) error {
|
|
resp, err := ntp.QueryWithOptions(*ntpServer, ntp.QueryOptions{
|
|
Version: *ntpProtocolVersion,
|
|
TTL: *ntpIPTTL,
|
|
Timeout: time.Second, // default `ntpdate` timeout
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't get SNTP reply: %w", err)
|
|
}
|
|
|
|
ch <- c.stratum.mustNewConstMetric(float64(resp.Stratum))
|
|
ch <- c.leap.mustNewConstMetric(float64(resp.Leap))
|
|
ch <- c.rtt.mustNewConstMetric(resp.RTT.Seconds())
|
|
ch <- c.offset.mustNewConstMetric(resp.ClockOffset.Seconds())
|
|
if resp.ReferenceTime.Unix() > 0 {
|
|
// Go Zero is 0001-01-01 00:00:00 UTC
|
|
// NTP Zero is 1900-01-01 00:00:00 UTC
|
|
// UNIX Zero is 1970-01-01 00:00:00 UTC
|
|
// so let's keep ALL ancient `reftime` values as zero
|
|
ch <- c.reftime.mustNewConstMetric(float64(resp.ReferenceTime.UnixNano()) / 1e9)
|
|
} else {
|
|
ch <- c.reftime.mustNewConstMetric(0)
|
|
}
|
|
ch <- c.rootDelay.mustNewConstMetric(resp.RootDelay.Seconds())
|
|
ch <- c.rootDispersion.mustNewConstMetric(resp.RootDispersion.Seconds())
|
|
|
|
// Here is SNTP packet sanity check that is exposed to move burden of
|
|
// configuration from node_exporter user to the developer.
|
|
|
|
maxerr := *ntpOffsetTolerance
|
|
leapMidnightMutex.Lock()
|
|
if resp.Leap == ntp.LeapAddSecond || resp.Leap == ntp.LeapDelSecond {
|
|
// state of leapMidnight is cached as leap flag is dropped right after midnight
|
|
leapMidnight = resp.Time.Truncate(hour24).Add(hour24)
|
|
}
|
|
if leapMidnight.Add(-hour24).Before(resp.Time) && resp.Time.Before(leapMidnight.Add(hour24)) {
|
|
// tolerate leap smearing
|
|
maxerr += time.Second
|
|
}
|
|
leapMidnightMutex.Unlock()
|
|
|
|
if resp.Validate() == nil && resp.RootDistance <= *ntpMaxDistance && resp.MinError <= maxerr {
|
|
ch <- c.sanity.mustNewConstMetric(1)
|
|
} else {
|
|
ch <- c.sanity.mustNewConstMetric(0)
|
|
}
|
|
|
|
return nil
|
|
}
|