prometheusmetricshost-metricsmachine-metricsnode-metricsprocfsprometheus-exportersystem-informationsystem-metrics
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
6.3 KiB
169 lines
6.3 KiB
// Copyright 2015 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
// +build !nontp |
|
|
|
package collector |
|
|
|
import ( |
|
"fmt" |
|
"net" |
|
"sync" |
|
"time" |
|
|
|
"github.com/beevik/ntp" |
|
"github.com/go-kit/kit/log" |
|
"github.com/prometheus/client_golang/prometheus" |
|
"gopkg.in/alecthomas/kingpin.v2" |
|
) |
|
|
|
const ( |
|
hour24 = 24 * time.Hour // `time` does not export `Day` as Day != 24h because of DST |
|
ntpSubsystem = "ntp" |
|
) |
|
|
|
var ( |
|
ntpServer = kingpin.Flag("collector.ntp.server", "NTP server to use for ntp collector").Default("127.0.0.1").String() |
|
ntpProtocolVersion = kingpin.Flag("collector.ntp.protocol-version", "NTP protocol version").Default("4").Int() |
|
ntpServerIsLocal = kingpin.Flag("collector.ntp.server-is-local", "Certify that collector.ntp.server address is not a public ntp server").Default("false").Bool() |
|
ntpIPTTL = kingpin.Flag("collector.ntp.ip-ttl", "IP TTL to use while sending NTP query").Default("1").Int() |
|
// 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905 |
|
// max-distance option is used as-is without phi*(1<<poll) |
|
ntpMaxDistance = kingpin.Flag("collector.ntp.max-distance", "Max accumulated distance to the root").Default("3.46608s").Duration() |
|
ntpOffsetTolerance = kingpin.Flag("collector.ntp.local-offset-tolerance", "Offset between local clock and local ntpd time to tolerate").Default("1ms").Duration() |
|
|
|
leapMidnight time.Time |
|
leapMidnightMutex = &sync.Mutex{} |
|
) |
|
|
|
type ntpCollector struct { |
|
stratum, leap, rtt, offset, reftime, rootDelay, rootDispersion, sanity typedDesc |
|
logger log.Logger |
|
} |
|
|
|
func init() { |
|
registerCollector("ntp", defaultDisabled, NewNtpCollector) |
|
} |
|
|
|
// NewNtpCollector returns a new Collector exposing sanity of local NTP server. |
|
// Default definition of "local" is: |
|
// - collector.ntp.server address is a loopback address (or collector.ntp.server-is-mine flag is turned on) |
|
// - the server is reachable with outgoin IP_TTL = 1 |
|
func NewNtpCollector(logger log.Logger) (Collector, error) { |
|
ipaddr := net.ParseIP(*ntpServer) |
|
if !*ntpServerIsLocal && (ipaddr == nil || !ipaddr.IsLoopback()) { |
|
return nil, fmt.Errorf("only IP address of local NTP server is valid for --collector.ntp.server") |
|
} |
|
|
|
if *ntpProtocolVersion < 2 || *ntpProtocolVersion > 4 { |
|
return nil, fmt.Errorf("invalid NTP protocol version %d; must be 2, 3, or 4", *ntpProtocolVersion) |
|
} |
|
|
|
if *ntpOffsetTolerance < 0 { |
|
return nil, fmt.Errorf("offset tolerance must be non-negative") |
|
} |
|
|
|
return &ntpCollector{ |
|
stratum: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "stratum"), |
|
"NTPD stratum.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
leap: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "leap"), |
|
"NTPD leap second indicator, 2 bits.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
rtt: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "rtt_seconds"), |
|
"RTT to NTPD.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
offset: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "offset_seconds"), |
|
"ClockOffset between NTP and local clock.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
reftime: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "reference_timestamp_seconds"), |
|
"NTPD ReferenceTime, UNIX timestamp.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
rootDelay: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "root_delay_seconds"), |
|
"NTPD RootDelay.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
rootDispersion: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "root_dispersion_seconds"), |
|
"NTPD RootDispersion.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
sanity: typedDesc{prometheus.NewDesc( |
|
prometheus.BuildFQName(namespace, ntpSubsystem, "sanity"), |
|
"NTPD sanity according to RFC5905 heuristics and configured limits.", |
|
nil, nil, |
|
), prometheus.GaugeValue}, |
|
logger: logger, |
|
}, nil |
|
} |
|
|
|
func (c *ntpCollector) Update(ch chan<- prometheus.Metric) error { |
|
resp, err := ntp.QueryWithOptions(*ntpServer, ntp.QueryOptions{ |
|
Version: *ntpProtocolVersion, |
|
TTL: *ntpIPTTL, |
|
Timeout: time.Second, // default `ntpdate` timeout |
|
}) |
|
if err != nil { |
|
return fmt.Errorf("couldn't get SNTP reply: %w", err) |
|
} |
|
|
|
ch <- c.stratum.mustNewConstMetric(float64(resp.Stratum)) |
|
ch <- c.leap.mustNewConstMetric(float64(resp.Leap)) |
|
ch <- c.rtt.mustNewConstMetric(resp.RTT.Seconds()) |
|
ch <- c.offset.mustNewConstMetric(resp.ClockOffset.Seconds()) |
|
if resp.ReferenceTime.Unix() > 0 { |
|
// Go Zero is 0001-01-01 00:00:00 UTC |
|
// NTP Zero is 1900-01-01 00:00:00 UTC |
|
// UNIX Zero is 1970-01-01 00:00:00 UTC |
|
// so let's keep ALL ancient `reftime` values as zero |
|
ch <- c.reftime.mustNewConstMetric(float64(resp.ReferenceTime.UnixNano()) / 1e9) |
|
} else { |
|
ch <- c.reftime.mustNewConstMetric(0) |
|
} |
|
ch <- c.rootDelay.mustNewConstMetric(resp.RootDelay.Seconds()) |
|
ch <- c.rootDispersion.mustNewConstMetric(resp.RootDispersion.Seconds()) |
|
|
|
// Here is SNTP packet sanity check that is exposed to move burden of |
|
// configuration from node_exporter user to the developer. |
|
|
|
maxerr := *ntpOffsetTolerance |
|
leapMidnightMutex.Lock() |
|
if resp.Leap == ntp.LeapAddSecond || resp.Leap == ntp.LeapDelSecond { |
|
// state of leapMidnight is cached as leap flag is dropped right after midnight |
|
leapMidnight = resp.Time.Truncate(hour24).Add(hour24) |
|
} |
|
if leapMidnight.Add(-hour24).Before(resp.Time) && resp.Time.Before(leapMidnight.Add(hour24)) { |
|
// tolerate leap smearing |
|
maxerr += time.Second |
|
} |
|
leapMidnightMutex.Unlock() |
|
|
|
if resp.Validate() == nil && resp.RootDistance <= *ntpMaxDistance && resp.MinError <= maxerr { |
|
ch <- c.sanity.mustNewConstMetric(1) |
|
} else { |
|
ch <- c.sanity.mustNewConstMetric(0) |
|
} |
|
|
|
return nil |
|
}
|
|
|