mirror of https://github.com/k3s-io/k3s
315 lines
10 KiB
Go
315 lines
10 KiB
Go
// Copyright 2015 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package etcdserver
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"go.etcd.io/etcd/pkg/netutil"
|
|
"go.etcd.io/etcd/pkg/transport"
|
|
"go.etcd.io/etcd/pkg/types"
|
|
|
|
bolt "go.etcd.io/bbolt"
|
|
"go.uber.org/zap"
|
|
"go.uber.org/zap/zapcore"
|
|
)
|
|
|
|
// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
|
|
type ServerConfig struct {
|
|
Name string
|
|
DiscoveryURL string
|
|
DiscoveryProxy string
|
|
ClientURLs types.URLs
|
|
PeerURLs types.URLs
|
|
DataDir string
|
|
// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
|
|
// rather than the dataDir/member/wal.
|
|
DedicatedWALDir string
|
|
|
|
SnapshotCount uint64
|
|
|
|
// SnapshotCatchUpEntries is the number of entries for a slow follower
|
|
// to catch-up after compacting the raft storage entries.
|
|
// We expect the follower has a millisecond level latency with the leader.
|
|
// The max throughput is around 10K. Keep a 5K entries is enough for helping
|
|
// follower to catch up.
|
|
// WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries"
|
|
SnapshotCatchUpEntries uint64
|
|
|
|
MaxSnapFiles uint
|
|
MaxWALFiles uint
|
|
|
|
// BackendBatchInterval is the maximum time before commit the backend transaction.
|
|
BackendBatchInterval time.Duration
|
|
// BackendBatchLimit is the maximum operations before commit the backend transaction.
|
|
BackendBatchLimit int
|
|
|
|
// BackendFreelistType is the type of the backend boltdb freelist.
|
|
BackendFreelistType bolt.FreelistType
|
|
|
|
InitialPeerURLsMap types.URLsMap
|
|
InitialClusterToken string
|
|
NewCluster bool
|
|
PeerTLSInfo transport.TLSInfo
|
|
|
|
CORS map[string]struct{}
|
|
|
|
// HostWhitelist lists acceptable hostnames from client requests.
|
|
// If server is insecure (no TLS), server only accepts requests
|
|
// whose Host header value exists in this white list.
|
|
HostWhitelist map[string]struct{}
|
|
|
|
TickMs uint
|
|
ElectionTicks int
|
|
|
|
// InitialElectionTickAdvance is true, then local member fast-forwards
|
|
// election ticks to speed up "initial" leader election trigger. This
|
|
// benefits the case of larger election ticks. For instance, cross
|
|
// datacenter deployment may require longer election timeout of 10-second.
|
|
// If true, local node does not need wait up to 10-second. Instead,
|
|
// forwards its election ticks to 8-second, and have only 2-second left
|
|
// before leader election.
|
|
//
|
|
// Major assumptions are that:
|
|
// - cluster has no active leader thus advancing ticks enables faster
|
|
// leader election, or
|
|
// - cluster already has an established leader, and rejoining follower
|
|
// is likely to receive heartbeats from the leader after tick advance
|
|
// and before election timeout.
|
|
//
|
|
// However, when network from leader to rejoining follower is congested,
|
|
// and the follower does not receive leader heartbeat within left election
|
|
// ticks, disruptive election has to happen thus affecting cluster
|
|
// availabilities.
|
|
//
|
|
// Disabling this would slow down initial bootstrap process for cross
|
|
// datacenter deployments. Make your own tradeoffs by configuring
|
|
// --initial-election-tick-advance at the cost of slow initial bootstrap.
|
|
//
|
|
// If single-node, it advances ticks regardless.
|
|
//
|
|
// See https://github.com/etcd-io/etcd/issues/9333 for more detail.
|
|
InitialElectionTickAdvance bool
|
|
|
|
BootstrapTimeout time.Duration
|
|
|
|
AutoCompactionRetention time.Duration
|
|
AutoCompactionMode string
|
|
CompactionBatchLimit int
|
|
QuotaBackendBytes int64
|
|
MaxTxnOps uint
|
|
|
|
// MaxRequestBytes is the maximum request size to send over raft.
|
|
MaxRequestBytes uint
|
|
|
|
StrictReconfigCheck bool
|
|
|
|
// ClientCertAuthEnabled is true when cert has been signed by the client CA.
|
|
ClientCertAuthEnabled bool
|
|
|
|
AuthToken string
|
|
BcryptCost uint
|
|
TokenTTL uint
|
|
|
|
// InitialCorruptCheck is true to check data corruption on boot
|
|
// before serving any peer/client traffic.
|
|
InitialCorruptCheck bool
|
|
CorruptCheckTime time.Duration
|
|
|
|
// PreVote is true to enable Raft Pre-Vote.
|
|
PreVote bool
|
|
|
|
// Logger logs server-side operations.
|
|
// If not nil, it disables "capnslog" and uses the given logger.
|
|
Logger *zap.Logger
|
|
|
|
// LoggerConfig is server logger configuration for Raft logger.
|
|
// Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
|
|
LoggerConfig *zap.Config
|
|
// LoggerCore is "zapcore.Core" for raft logger.
|
|
// Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
|
|
LoggerCore zapcore.Core
|
|
LoggerWriteSyncer zapcore.WriteSyncer
|
|
|
|
Debug bool
|
|
|
|
ForceNewCluster bool
|
|
|
|
// EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
|
|
EnableLeaseCheckpoint bool
|
|
// LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints.
|
|
LeaseCheckpointInterval time.Duration
|
|
|
|
EnableGRPCGateway bool
|
|
|
|
WatchProgressNotifyInterval time.Duration
|
|
|
|
// UnsafeNoFsync disables all uses of fsync.
|
|
// Setting this is unsafe and will cause data loss.
|
|
UnsafeNoFsync bool `json:"unsafe-no-fsync"`
|
|
}
|
|
|
|
// VerifyBootstrap sanity-checks the initial config for bootstrap case
|
|
// and returns an error for things that should never happen.
|
|
func (c *ServerConfig) VerifyBootstrap() error {
|
|
if err := c.hasLocalMember(); err != nil {
|
|
return err
|
|
}
|
|
if err := c.advertiseMatchesCluster(); err != nil {
|
|
return err
|
|
}
|
|
if checkDuplicateURL(c.InitialPeerURLsMap) {
|
|
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
|
|
}
|
|
if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
|
|
return fmt.Errorf("initial cluster unset and no discovery URL found")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// VerifyJoinExisting sanity-checks the initial config for join existing cluster
|
|
// case and returns an error for things that should never happen.
|
|
func (c *ServerConfig) VerifyJoinExisting() error {
|
|
// The member has announced its peer urls to the cluster before starting; no need to
|
|
// set the configuration again.
|
|
if err := c.hasLocalMember(); err != nil {
|
|
return err
|
|
}
|
|
if checkDuplicateURL(c.InitialPeerURLsMap) {
|
|
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
|
|
}
|
|
if c.DiscoveryURL != "" {
|
|
return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// hasLocalMember checks that the cluster at least contains the local server.
|
|
func (c *ServerConfig) hasLocalMember() error {
|
|
if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
|
|
return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
|
|
func (c *ServerConfig) advertiseMatchesCluster() error {
|
|
urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
|
|
urls.Sort()
|
|
sort.Strings(apurls)
|
|
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
|
|
defer cancel()
|
|
ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice())
|
|
if ok {
|
|
return nil
|
|
}
|
|
|
|
initMap, apMap := make(map[string]struct{}), make(map[string]struct{})
|
|
for _, url := range c.PeerURLs {
|
|
apMap[url.String()] = struct{}{}
|
|
}
|
|
for _, url := range c.InitialPeerURLsMap[c.Name] {
|
|
initMap[url.String()] = struct{}{}
|
|
}
|
|
|
|
missing := []string{}
|
|
for url := range initMap {
|
|
if _, ok := apMap[url]; !ok {
|
|
missing = append(missing, url)
|
|
}
|
|
}
|
|
if len(missing) > 0 {
|
|
for i := range missing {
|
|
missing[i] = c.Name + "=" + missing[i]
|
|
}
|
|
mstr := strings.Join(missing, ",")
|
|
apStr := strings.Join(apurls, ",")
|
|
return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err)
|
|
}
|
|
|
|
for url := range apMap {
|
|
if _, ok := initMap[url]; !ok {
|
|
missing = append(missing, url)
|
|
}
|
|
}
|
|
if len(missing) > 0 {
|
|
mstr := strings.Join(missing, ",")
|
|
umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
|
|
return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String())
|
|
}
|
|
|
|
// resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed
|
|
apStr := strings.Join(apurls, ",")
|
|
umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
|
|
return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err)
|
|
}
|
|
|
|
func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") }
|
|
|
|
func (c *ServerConfig) WALDir() string {
|
|
if c.DedicatedWALDir != "" {
|
|
return c.DedicatedWALDir
|
|
}
|
|
return filepath.Join(c.MemberDir(), "wal")
|
|
}
|
|
|
|
func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
|
|
|
|
func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
|
|
|
|
// ReqTimeout returns timeout for request to finish.
|
|
func (c *ServerConfig) ReqTimeout() time.Duration {
|
|
// 5s for queue waiting, computation and disk IO delay
|
|
// + 2 * election timeout for possible leader election
|
|
return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
|
|
}
|
|
|
|
func (c *ServerConfig) electionTimeout() time.Duration {
|
|
return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond
|
|
}
|
|
|
|
func (c *ServerConfig) peerDialTimeout() time.Duration {
|
|
// 1s for queue wait and election timeout
|
|
return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
|
|
}
|
|
|
|
func checkDuplicateURL(urlsmap types.URLsMap) bool {
|
|
um := make(map[string]bool)
|
|
for _, urls := range urlsmap {
|
|
for _, url := range urls {
|
|
u := url.String()
|
|
if um[u] {
|
|
return true
|
|
}
|
|
um[u] = true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (c *ServerConfig) bootstrapTimeout() time.Duration {
|
|
if c.BootstrapTimeout != 0 {
|
|
return c.BootstrapTimeout
|
|
}
|
|
return time.Second
|
|
}
|
|
|
|
func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") }
|