mirror of https://github.com/hashicorp/consul
resolve circleci config conflicts
commit
ef6b80bab2
|
@ -77,16 +77,9 @@ jobs:
|
|||
- run: mkdir -p $TEST_RESULTS_DIR
|
||||
- run: sudo apt-get update && sudo apt-get install -y rsyslog
|
||||
- run: sudo service rsyslog start
|
||||
# Use CircleCI test splitting by classname. Since there are no classes in go,
|
||||
# we fake it by taking everything after github.com/hashicorp/consul/ and setting
|
||||
# it as the classname.
|
||||
|
||||
# This loop writes go test results to <reportname>.xml per go package
|
||||
- run: |
|
||||
for pkg in $(go list ./... | grep -v github.com/hashicorp/consul/agent/proxyprocess |circleci tests split --split-by=timings --timings-type=classname | tr '\n' ' '); do
|
||||
reportname=$(echo $pkg | cut -d '/' -f3- | sed "s#/#_#g")
|
||||
gotestsum --format=short-verbose --junitfile $TEST_RESULTS_DIR/$reportname.xml -- -tags=$GOTAGS $pkg
|
||||
done
|
||||
PACKAGE_NAMES=$(go list ./... | grep -v github.com/hashicorp/consul/agent/proxyprocess | circleci tests split --split-by=timings --timings-type=classname)
|
||||
gotestsum --format=short-verbose --junitfile $TEST_RESULTS_DIR/gotestsum-report.xml -- -tags=$GOTAGS -p 3 $PACKAGE_NAMES
|
||||
|
||||
- store_test_results:
|
||||
path: /tmp/test-results
|
||||
|
@ -108,18 +101,11 @@ jobs:
|
|||
- attach_workspace:
|
||||
at: /go/bin
|
||||
- run: mkdir -p $TEST_RESULTS_DIR
|
||||
# Use CircleCI test splitting by classname. Since there are no classes in go,
|
||||
# we fake it by taking everything after github.com/hashicorp/consul/ and setting
|
||||
# it as the classname.
|
||||
|
||||
# This loop writes go test results to <reportname>.xml per go package
|
||||
- run:
|
||||
working_directory: api
|
||||
command: |
|
||||
for pkg in $(go list ./... | circleci tests split --split-by=timings --timings-type=classname | tr '\n' ' '); do
|
||||
reportname=$(echo $pkg | cut -d '/' -f3- | sed "s#/#_#g")
|
||||
gotestsum --format=short-verbose --junitfile $TEST_RESULTS_DIR/$reportname.xml -- -tags=$GOTAGS $pkg
|
||||
done
|
||||
PACKAGE_NAMES=$(go list ./... | circleci tests split --split-by=timings --timings-type=classname)
|
||||
gotestsum --format=short-verbose --junitfile $TEST_RESULTS_DIR/gotestsum-report.xml -- -tags=$GOTAGS $PACKAGE_NAMES
|
||||
|
||||
- store_test_results:
|
||||
path: /tmp/test-results
|
||||
|
@ -476,7 +462,25 @@ jobs:
|
|||
git_merge_branch="ci/master-merge-$(date +%Y%m%d%H%M%S)"
|
||||
git checkout -b "${git_merge_branch}"
|
||||
latest_oss_commit="$(git rev-parse origin/master)"
|
||||
git merge -m "Merge Consul OSS branch 'master' at commit ${latest_oss_commit}" "${latest_oss_commit}"
|
||||
|
||||
if ! errors=$(git merge -m "Merge Consul OSS branch 'master' at commit ${latest_oss_commit}" "${latest_oss_commit}"); then
|
||||
printf "oss/master merge into ${CIRCLE_BRANCH} failed because git was unable to auto-merge!\n${errors}"
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data \
|
||||
"{ \
|
||||
\"attachments\": [ \
|
||||
{ \
|
||||
\"fallback\": \"master merge into ${CIRCLE_BRANCH} failed because git was unable to auto-merge!\", \
|
||||
\"text\": \"Nightly *master* merge into *${CIRCLE_BRANCH}* failed!\n\nBuild Log: ${CIRCLE_BUILD_URL}\n\nGit was unable to auto-merge due to possible merge conflict.\n\n*Errors:*\n${errors}\", \
|
||||
\"footer\": \"${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}\", \
|
||||
\"ts\": \"$(date +%s)\", \
|
||||
\"color\": \"danger\" \
|
||||
} \
|
||||
] \
|
||||
}" ${CONSUL_SLACK_WEBHOOK_URL}
|
||||
exit 1
|
||||
fi
|
||||
|
||||
git push origin "${git_merge_branch}"
|
||||
sleep 15 # Wait for merge branch to start CircleCI pipeline
|
||||
|
||||
|
@ -568,28 +572,26 @@ workflows:
|
|||
branches:
|
||||
only:
|
||||
- release/1-6
|
||||
build-distros:
|
||||
go-tests:
|
||||
jobs:
|
||||
- lint-consul-retry
|
||||
- go-fmt-and-vet:
|
||||
requires:
|
||||
- lint-consul-retry
|
||||
- build-386: &require-go-fmt-vet
|
||||
requires:
|
||||
- go-fmt-and-vet
|
||||
- build-amd64: *require-go-fmt-vet
|
||||
- build-arm-arm64: *require-go-fmt-vet
|
||||
test-integrations:
|
||||
jobs:
|
||||
- dev-build
|
||||
- dev-build:
|
||||
requires:
|
||||
- lint-consul-retry
|
||||
- go-fmt-and-vet
|
||||
- go-test: &go-test
|
||||
requires:
|
||||
- dev-build
|
||||
filters:
|
||||
branches:
|
||||
ignore:
|
||||
- /^pull\/.*$/ # only run go tests on non forks
|
||||
- go-test-api: *go-test
|
||||
build-distros:
|
||||
jobs:
|
||||
- build-386
|
||||
- build-amd64
|
||||
- build-arm-arm64
|
||||
test-integrations:
|
||||
jobs:
|
||||
- dev-build
|
||||
- dev-upload-s3:
|
||||
requires:
|
||||
- dev-build
|
||||
|
|
23
.travis.yml
23
.travis.yml
|
@ -1,23 +0,0 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
# Please keep this in-sync with the go version we build against in
|
||||
# build-support/docker/Build-Go.dockerfile.
|
||||
- "1.12.1"
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- release/1-6
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- env: GOTEST_PKGS="./api"
|
||||
- env: GOTEST_PKGS="./agent"
|
||||
- env: GOTEST_PKGS="./agent/consul"
|
||||
- env: GOTEST_PKGS_EXCLUDE="./api|./agent|./agent/consul"
|
||||
|
||||
script:
|
||||
- make test-ci
|
||||
|
||||
sudo: false
|
|
@ -6,6 +6,8 @@ FEATURES:
|
|||
|
||||
IMPROVEMENTS:
|
||||
|
||||
* raft: allow trailing logs to be configured as an escape hatch for extreme load that prevents followers catching up with leader [[GH-6186](https://github.com/hashicorp/consul/pull/6186)]
|
||||
* agent: added configurable limit for log files to be rotated [[GH-5831](https://github.com/hashicorp/consul/pull/5831)]
|
||||
* agent: health checks: change long timeout behavior to use to user-configured `timeout` value [[GH-6094](https://github.com/hashicorp/consul/pull/6094)]
|
||||
* api: Update filtering language to include substring and regular expression matching on string values [[GH-6190](https://github.com/hashicorp/consul/pull/6190)]
|
||||
* api: Display allowed HTTP CIDR information nicely [[GH-6029](https://github.com/hashicorp/consul/pull/6029)]
|
||||
|
|
|
@ -1162,6 +1162,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
|
|||
if a.config.RaftSnapshotInterval != 0 {
|
||||
base.RaftConfig.SnapshotInterval = a.config.RaftSnapshotInterval
|
||||
}
|
||||
if a.config.RaftTrailingLogs != 0 {
|
||||
base.RaftConfig.TrailingLogs = uint64(a.config.RaftTrailingLogs)
|
||||
}
|
||||
if a.config.ACLMasterToken != "" {
|
||||
base.ACLMasterToken = a.config.ACLMasterToken
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ func (s *HTTPServer) AgentReload(resp http.ResponseWriter, req *http.Request) (i
|
|||
}
|
||||
|
||||
// Trigger the reload
|
||||
errCh := make(chan error, 0)
|
||||
errCh := make(chan error)
|
||||
select {
|
||||
case <-s.agent.shutdownCh:
|
||||
return nil, fmt.Errorf("Agent was shutdown before reload could be completed")
|
||||
|
|
|
@ -564,7 +564,7 @@ func TestAgent_Service(t *testing.T) {
|
|||
}
|
||||
start := time.Now()
|
||||
obj, err := a.srv.AgentService(resp, req)
|
||||
elapsed := time.Now().Sub(start)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if tt.wantErr != "" {
|
||||
require.Error(err)
|
||||
|
@ -5350,7 +5350,7 @@ func TestAgentConnectProxyConfig_Blocking(t *testing.T) {
|
|||
}
|
||||
start := time.Now()
|
||||
obj, err := a.srv.AgentConnectProxyConfig(resp, req)
|
||||
elapsed := time.Now().Sub(start)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
if tt.wantErr {
|
||||
require.Error(err)
|
||||
|
|
|
@ -3925,7 +3925,7 @@ func TestAgent_ReloadConfigTLSConfigFailure(t *testing.T) {
|
|||
require.Len(t, tlsConf.RootCAs.Subjects(), 1)
|
||||
}
|
||||
|
||||
func TestAgent_consulConfig(t *testing.T) {
|
||||
func TestAgent_consulConfig_AutoEncryptAllowTLS(t *testing.T) {
|
||||
t.Parallel()
|
||||
dataDir := testutil.TempDir(t, "agent") // we manage the data dir
|
||||
defer os.RemoveAll(dataDir)
|
||||
|
@ -3941,3 +3941,13 @@ func TestAgent_consulConfig(t *testing.T) {
|
|||
defer a.Shutdown()
|
||||
require.True(t, a.consulConfig().AutoEncryptAllowTLS)
|
||||
}
|
||||
|
||||
func TestAgent_consulConfig_RaftTrailingLogs(t *testing.T) {
|
||||
t.Parallel()
|
||||
hcl := `
|
||||
raft_trailing_logs = 812345
|
||||
`
|
||||
a := NewTestAgent(t, t.Name(), hcl)
|
||||
defer a.Shutdown()
|
||||
require.Equal(t, uint64(812345), a.consulConfig().RaftConfig.TrailingLogs)
|
||||
}
|
||||
|
|
|
@ -703,7 +703,7 @@ func (c *Cache) runExpiryLoop() {
|
|||
c.entriesLock.RLock()
|
||||
if len(c.entriesExpiryHeap.Entries) > 0 {
|
||||
entry = c.entriesExpiryHeap.Entries[0]
|
||||
expiryTimer = time.NewTimer(entry.Expires.Sub(time.Now()))
|
||||
expiryTimer = time.NewTimer(time.Until(entry.Expires))
|
||||
expiryCh = expiryTimer.C
|
||||
}
|
||||
c.entriesLock.RUnlock()
|
||||
|
|
|
@ -189,7 +189,7 @@ func (s *HTTPServer) CatalogServices(resp http.ResponseWriter, req *http.Request
|
|||
|
||||
// Use empty map instead of nil
|
||||
if out.Services == nil {
|
||||
out.Services = make(structs.Services, 0)
|
||||
out.Services = make(structs.Services)
|
||||
}
|
||||
metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_services"}, 1,
|
||||
[]metrics.Label{{Name: "node", Value: s.nodeName()}})
|
||||
|
|
|
@ -862,6 +862,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
|
|||
RaftProtocol: b.intVal(c.RaftProtocol),
|
||||
RaftSnapshotThreshold: b.intVal(c.RaftSnapshotThreshold),
|
||||
RaftSnapshotInterval: b.durationVal("raft_snapshot_interval", c.RaftSnapshotInterval),
|
||||
RaftTrailingLogs: b.intVal(c.RaftTrailingLogs),
|
||||
ReconnectTimeoutLAN: b.durationVal("reconnect_timeout", c.ReconnectTimeoutLAN),
|
||||
ReconnectTimeoutWAN: b.durationVal("reconnect_timeout_wan", c.ReconnectTimeoutWAN),
|
||||
RejoinAfterLeave: b.boolVal(c.RejoinAfterLeave),
|
||||
|
|
|
@ -241,6 +241,7 @@ type Config struct {
|
|||
RaftProtocol *int `json:"raft_protocol,omitempty" hcl:"raft_protocol" mapstructure:"raft_protocol"`
|
||||
RaftSnapshotThreshold *int `json:"raft_snapshot_threshold,omitempty" hcl:"raft_snapshot_threshold" mapstructure:"raft_snapshot_threshold"`
|
||||
RaftSnapshotInterval *string `json:"raft_snapshot_interval,omitempty" hcl:"raft_snapshot_interval" mapstructure:"raft_snapshot_interval"`
|
||||
RaftTrailingLogs *int `json:"raft_trailing_logs,omitempty" hcl:"raft_trailing_logs" mapstructure:"raft_trailing_logs"`
|
||||
ReconnectTimeoutLAN *string `json:"reconnect_timeout,omitempty" hcl:"reconnect_timeout" mapstructure:"reconnect_timeout"`
|
||||
ReconnectTimeoutWAN *string `json:"reconnect_timeout_wan,omitempty" hcl:"reconnect_timeout_wan" mapstructure:"reconnect_timeout_wan"`
|
||||
RejoinAfterLeave *bool `json:"rejoin_after_leave,omitempty" hcl:"rejoin_after_leave" mapstructure:"rejoin_after_leave"`
|
||||
|
|
|
@ -965,6 +965,22 @@ type RuntimeConfig struct {
|
|||
// hcl: raft_snapshot_threshold = int
|
||||
RaftSnapshotInterval time.Duration
|
||||
|
||||
// RaftTrailingLogs sets the number of log entries that will be left in the
|
||||
// log store after a snapshot. This must be large enough that a follower can
|
||||
// transfer and restore an entire snapshot of the state before this many new
|
||||
// entries have been appended. In vast majority of cases the default is plenty
|
||||
// but if there is a sustained high write throughput coupled with a huge
|
||||
// multi-gigabyte snapshot setting this higher may be necessary to allow
|
||||
// followers time to reload from snapshot without becoming unhealthy. If it's
|
||||
// too low then followers are unable to ever recover from a restart and will
|
||||
// enter a loop of constantly downloading full snapshots and never catching
|
||||
// up. If you need to change this you should reconsider your usage of Consul
|
||||
// as it is not designed to store multiple-gigabyte data sets with high write
|
||||
// throughput. Defaults to 10000.
|
||||
//
|
||||
// hcl: raft_trailing_logs = int
|
||||
RaftTrailingLogs int
|
||||
|
||||
// ReconnectTimeoutLAN specifies the amount of time to wait to reconnect with
|
||||
// another agent before deciding it's permanently gone. This can be used to
|
||||
// control the time it takes to reap failed nodes from the cluster.
|
||||
|
|
|
@ -3767,6 +3767,7 @@ func TestFullConfig(t *testing.T) {
|
|||
"raft_protocol": 19016,
|
||||
"raft_snapshot_threshold": 16384,
|
||||
"raft_snapshot_interval": "30s",
|
||||
"raft_trailing_logs": 83749,
|
||||
"reconnect_timeout": "23739s",
|
||||
"reconnect_timeout_wan": "26694s",
|
||||
"recursors": [ "63.38.39.58", "92.49.18.18" ],
|
||||
|
@ -4371,6 +4372,7 @@ func TestFullConfig(t *testing.T) {
|
|||
raft_protocol = 19016
|
||||
raft_snapshot_threshold = 16384
|
||||
raft_snapshot_interval = "30s"
|
||||
raft_trailing_logs = 83749
|
||||
reconnect_timeout = "23739s"
|
||||
reconnect_timeout_wan = "26694s"
|
||||
recursors = [ "63.38.39.58", "92.49.18.18" ]
|
||||
|
@ -5043,6 +5045,7 @@ func TestFullConfig(t *testing.T) {
|
|||
RaftProtocol: 19016,
|
||||
RaftSnapshotThreshold: 16384,
|
||||
RaftSnapshotInterval: 30 * time.Second,
|
||||
RaftTrailingLogs: 83749,
|
||||
ReconnectTimeoutLAN: 23739 * time.Second,
|
||||
ReconnectTimeoutWAN: 26694 * time.Second,
|
||||
RejoinAfterLeave: true,
|
||||
|
@ -5901,6 +5904,7 @@ func TestSanitize(t *testing.T) {
|
|||
"RaftProtocol": 0,
|
||||
"RaftSnapshotInterval": "0s",
|
||||
"RaftSnapshotThreshold": 0,
|
||||
"RaftTrailingLogs": 0,
|
||||
"ReconnectTimeoutLAN": "0s",
|
||||
"ReconnectTimeoutWAN": "0s",
|
||||
"RejoinAfterLeave": false,
|
||||
|
|
|
@ -178,7 +178,7 @@ func TestConsulCAProvider_SignLeaf(t *testing.T) {
|
|||
require.Equal(parsed.SerialNumber.Uint64(), uint64(2))
|
||||
|
||||
// Ensure the cert is valid now and expires within the correct limit.
|
||||
require.True(parsed.NotAfter.Sub(time.Now()) < 3*24*time.Hour)
|
||||
require.True(time.Until(parsed.NotAfter) < 3*24*time.Hour)
|
||||
require.True(parsed.NotBefore.Before(time.Now()))
|
||||
}
|
||||
|
||||
|
|
|
@ -186,7 +186,7 @@ func TestVaultCAProvider_SignLeaf(t *testing.T) {
|
|||
require.NotEqual(firstSerial, parsed.SerialNumber.Uint64())
|
||||
|
||||
// Ensure the cert is valid now and expires within the correct limit.
|
||||
require.True(parsed.NotAfter.Sub(time.Now()) < time.Hour)
|
||||
require.True(time.Until(parsed.NotAfter) < time.Hour)
|
||||
require.True(parsed.NotBefore.Before(time.Now()))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -316,7 +316,7 @@ func (r *aclRoleReplicator) FetchUpdated(srv *Server, updates []string) (int, er
|
|||
delete(keep, role.ID)
|
||||
}
|
||||
missing := make([]string, 0, len(keep))
|
||||
for id, _ := range keep {
|
||||
for id := range keep {
|
||||
missing = append(missing, id)
|
||||
}
|
||||
return 0, fmt.Errorf("role replication trying to replicated uncached roles with IDs: %v", missing)
|
||||
|
|
|
@ -596,11 +596,7 @@ key "zip" {
|
|||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
actualKeys = []string{}
|
||||
|
||||
for _, key := range keyList.Keys {
|
||||
actualKeys = append(actualKeys, key)
|
||||
}
|
||||
actualKeys = keyList.Keys
|
||||
|
||||
verify.Values(t, "", actualKeys, expectedKeys)
|
||||
|
||||
|
|
|
@ -1192,7 +1192,7 @@ func (s *Server) pruneCARoots() error {
|
|||
|
||||
var newRoots structs.CARoots
|
||||
for _, r := range roots {
|
||||
if !r.Active && !r.RotatedOutAt.IsZero() && time.Now().Sub(r.RotatedOutAt) > common.LeafCertTTL*2 {
|
||||
if !r.Active && !r.RotatedOutAt.IsZero() && time.Since(r.RotatedOutAt) > common.LeafCertTTL*2 {
|
||||
s.logger.Printf("[INFO] connect: pruning old unused root CA (ID: %s)", r.ID)
|
||||
continue
|
||||
}
|
||||
|
|
|
@ -51,8 +51,7 @@ func (sl *ServerLookup) ServerAddr(id raft.ServerID) (raft.ServerAddress, error)
|
|||
func (sl *ServerLookup) Server(addr raft.ServerAddress) *metadata.Server {
|
||||
sl.lock.RLock()
|
||||
defer sl.lock.RUnlock()
|
||||
svr, _ := sl.addressToServer[addr]
|
||||
return svr
|
||||
return sl.addressToServer[addr]
|
||||
}
|
||||
|
||||
func (sl *ServerLookup) Servers() []*metadata.Server {
|
||||
|
|
|
@ -183,6 +183,10 @@ func newServer(c *Config) (*Server, error) {
|
|||
oldNotify()
|
||||
}
|
||||
}
|
||||
// Restore old notify to guard against re-closing `up` on a retry
|
||||
defer func() {
|
||||
c.NotifyListen = oldNotify
|
||||
}()
|
||||
|
||||
// start server
|
||||
w := c.LogOutput
|
||||
|
@ -820,7 +824,6 @@ func TestServer_BadExpect(t *testing.T) {
|
|||
type fakeGlobalResp struct{}
|
||||
|
||||
func (r *fakeGlobalResp) Add(interface{}) {
|
||||
return
|
||||
}
|
||||
|
||||
func (r *fakeGlobalResp) New() interface{} {
|
||||
|
|
|
@ -3824,11 +3824,11 @@ func stripIrrelevantTokenFields(token *structs.ACLToken) *structs.ACLToken {
|
|||
// When comparing the tokens disregard the policy link names. This
|
||||
// data is not cleanly updated in a variety of scenarios and should not
|
||||
// be relied upon.
|
||||
for i, _ := range tokenCopy.Policies {
|
||||
for i := range tokenCopy.Policies {
|
||||
tokenCopy.Policies[i].Name = ""
|
||||
}
|
||||
// Also do the same for Role links.
|
||||
for i, _ := range tokenCopy.Roles {
|
||||
for i := range tokenCopy.Roles {
|
||||
tokenCopy.Roles[i].Name = ""
|
||||
}
|
||||
// The raft indexes won't match either because the requester will not
|
||||
|
|
|
@ -293,7 +293,7 @@ func (s *HTTPServer) handler(enableDebug bool) http.Handler {
|
|||
mux.HandleFunc("/", s.Index)
|
||||
for pattern, fn := range endpoints {
|
||||
thisFn := fn
|
||||
methods, _ := allowedMethods[pattern]
|
||||
methods := allowedMethods[pattern]
|
||||
bound := func(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
return thisFn(s, resp, req)
|
||||
}
|
||||
|
|
|
@ -342,8 +342,6 @@ func (m *Manager) RebalanceServers() {
|
|||
// continue to use the existing connection until the next
|
||||
// rebalance occurs.
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// reconcileServerList returns true when the first server in serverList
|
||||
|
|
|
@ -60,7 +60,7 @@ func TestUiIndex(t *testing.T) {
|
|||
// Verify the body
|
||||
out := bytes.NewBuffer(nil)
|
||||
io.Copy(out, resp.Body)
|
||||
if string(out.Bytes()) != "test" {
|
||||
if out.String() != "test" {
|
||||
t.Fatalf("bad: %s", out.Bytes())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -302,7 +302,7 @@ func (c *cmd) captureStatic() error {
|
|||
var errors error
|
||||
|
||||
// Collect the named outputs here
|
||||
outputs := make(map[string]interface{}, 0)
|
||||
outputs := make(map[string]interface{})
|
||||
|
||||
// Capture host information
|
||||
if c.configuredTarget("host") {
|
||||
|
|
|
@ -225,7 +225,7 @@ func (c *TelemetryConfig) MergeDefaults(defaults *TelemetryConfig) {
|
|||
continue
|
||||
}
|
||||
case reflect.Bool:
|
||||
if f.Bool() != false {
|
||||
if f.Bool() {
|
||||
continue
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -110,7 +110,7 @@ func dedup(a []string) string {
|
|||
delete(m, s)
|
||||
}
|
||||
}
|
||||
return string(b.Bytes())
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func run(r Retryer, t Failer, f func(r *R)) {
|
||||
|
|
|
@ -407,21 +407,6 @@ will exit with an error at startup.
|
|||
[Raft Protocol Version Compatibility](/docs/upgrade-specific.html#raft-protocol-version-compatibility)
|
||||
for more details.
|
||||
|
||||
* <a name="_raft_snapshot_threshold"></a><a href="#_raft_snapshot_threshold">`-raft-snapshot-threshold`</a> - This controls the
|
||||
minimum number of raft commit entries between snapshots that are saved to disk. This is a low-level parameter that should
|
||||
rarely need to be changed. Very busy clusters experiencing excessive disk IO may increase this value to reduce disk IO, and minimize
|
||||
the chances of all servers taking snapshots at the same time. Increasing this trades off disk IO for disk space since the log will
|
||||
grow much larger and the space in the raft.db file can't be reclaimed till the next snapshot. Servers may take longer to recover from
|
||||
crashes or failover if this is increased significantly as more logs will need to be replayed. In Consul 1.1.0 and later this
|
||||
defaults to 16384, and in prior versions it was set to 8192.
|
||||
|
||||
* <a name="_raft_snapshot_interval"></a><a href="#_raft_snapshot_interval">`-raft-snapshot-interval`</a> - This controls how often servers
|
||||
check if they need to save a snapshot to disk. his is a low-level parameter that should rarely need to be changed. Very busy clusters
|
||||
experiencing excessive disk IO may increase this value to reduce disk IO, and minimize the chances of all servers taking snapshots at the same time.
|
||||
Increasing this trades off disk IO for disk space since the log will grow much larger and the space in the raft.db file can't be reclaimed
|
||||
till the next snapshot. Servers may take longer to recover from crashes or failover if this is increased significantly as more logs
|
||||
will need to be replayed. In Consul 1.1.0 and later this defaults to `30s`, and in prior versions it was set to `5s`.
|
||||
|
||||
* <a name="_recursor"></a><a href="#_recursor">`-recursor`</a> - Specifies the address of an upstream DNS
|
||||
server. This option may be provided multiple times, and is functionally
|
||||
equivalent to the [`recursors` configuration option](#recursors).
|
||||
|
@ -1431,11 +1416,46 @@ default will automatically work with some tooling.
|
|||
* <a name="raft_protocol"></a><a href="#raft_protocol">`raft_protocol`</a> Equivalent to the
|
||||
[`-raft-protocol` command-line flag](#_raft_protocol).
|
||||
|
||||
* <a name="raft_snapshot_threshold"></a><a href="#raft_snapshot_threshold">`raft_snapshot_threshold`</a> Equivalent to the
|
||||
[`-raft-snapshot-threshold` command-line flag](#_raft_snapshot_threshold).
|
||||
<!-- Note the extra _ anchors are here because we used to erroneously list these as
|
||||
command line flags even though they are not actually defined as valid flags and can
|
||||
only be set in config file. Duplicating the anchor preserves any existing external links
|
||||
to the old fragment -->
|
||||
* <a name="raft_snapshot_threshold"></a><a name="_raft_snapshot_threshold"></a>
|
||||
<a href="#raft_snapshot_threshold">`raft_snapshot_threshold`</a> This controls
|
||||
the minimum number of raft commit entries between snapshots that are saved to
|
||||
disk. This is a low-level parameter that should rarely need to be changed.
|
||||
Very busy clusters experiencing excessive disk IO may increase this value to
|
||||
reduce disk IO, and minimize the chances of all servers taking snapshots at
|
||||
the same time. Increasing this trades off disk IO for disk space since the log
|
||||
will grow much larger and the space in the raft.db file can't be reclaimed
|
||||
till the next snapshot. Servers may take longer to recover from crashes or
|
||||
failover if this is increased significantly as more logs will need to be
|
||||
replayed. In Consul 1.1.0 and later this defaults to 16384, and in prior
|
||||
versions it was set to 8192.
|
||||
|
||||
* <a name="raft_snapshot_interval"></a><a href="#raft_snapshot_interval">`raft_snapshot_interval`</a> Equivalent to the
|
||||
[`-raft-snapshot-interval` command-line flag](#_raft_snapshot_interval).
|
||||
* <a name="raft_snapshot_interval"></a><a name="_raft_snapshot_interval"></a> <a
|
||||
href="#raft_snapshot_interval">`raft_snapshot_interval`</a> This controls how
|
||||
often servers check if they need to save a snapshot to disk. his is a
|
||||
low-level parameter that should rarely need to be changed. Very busy clusters
|
||||
experiencing excessive disk IO may increase this value to reduce disk IO, and
|
||||
minimize the chances of all servers taking snapshots at the same time.
|
||||
Increasing this trades off disk IO for disk space since the log will grow much
|
||||
larger and the space in th e raft.db file can't be reclaimed till the next
|
||||
snapshot. Servers may take longer to recover from crashes or failover if this
|
||||
is increased significantly as more logs will need to be replayed. In Consul
|
||||
1.1.0 and later this defaults to `30s`, and in prior versions it was set to
|
||||
`5s`.
|
||||
|
||||
* <a name="raft_trailing_logs"></a><a
|
||||
href="#raft_trailing_logs">`raft_trailing_logs`</a> - This controls how many
|
||||
log entries are left in the log store on disk after a snapshot is made. This
|
||||
should only be adjusted when followers cannot catch up to the leader due to a
|
||||
very large snapshot size that and high write throughput causing log truncation
|
||||
before an snapshot can be fully installed. If you need to use this to recover
|
||||
a cluster, consider reducing write throughput or the amount of data stored on
|
||||
Consul as it is likely under a load it is not designed to handle. The default
|
||||
value is 10000 which is suitable for all normal workloads. Added in Consul
|
||||
1.5.3.
|
||||
|
||||
* <a name="reap"></a><a href="#reap">`reap`</a> This controls Consul's automatic reaping of child processes,
|
||||
which is useful if Consul is running as PID 1 in a Docker container. If this isn't specified, then Consul will
|
||||
|
|
|
@ -0,0 +1,286 @@
|
|||
---
|
||||
name: "Consul-Kubernetes Deployment Guide"
|
||||
content_length: 14
|
||||
id: kubernetes-production-deploy
|
||||
layout: content_layout
|
||||
products_used:
|
||||
- Consul
|
||||
description: This guide covers the necessary steps to install and configure a new Consul cluster on Kubernetes.
|
||||
level: Advanced
|
||||
___
|
||||
|
||||
|
||||
This guide covers the necessary steps to install and configure a new Consul
|
||||
cluster on Kubernetes, as defined in the [Consul Reference Architecture
|
||||
guide](/consul/day-1-operations/kubernetes-reference#consul-datacenter-deployed-in-kubernetes).
|
||||
By the end of this guide, you will be able to identify the installation
|
||||
prerequisites, customize the Helm chart to fit your environment requirements,
|
||||
and interact with your new Consul cluster.
|
||||
|
||||
~> You should have the following configured before starting this guide: Helm
|
||||
installed and configured locally, tiller running in the Kubernetes cluster, and
|
||||
the Kubernetes CLI configured.
|
||||
|
||||
## Configure Kubernetes Permissions to Deploy Consul
|
||||
|
||||
Before deploying Consul, you will need to create a new Kubernetes service
|
||||
account with the correct permissions and to authenticate it on the command
|
||||
line. You will need Kubernetes operators permissions to create and modify
|
||||
policies, deploy services, access the Kubernetes dashboard, create secrets, and
|
||||
create RBAC objects. You can find documentation for RBAC and service accounts
|
||||
for the following cloud providers.
|
||||
|
||||
- [AKS](https://docs.microsoft.com/en-us/azure/aks/kubernetes-service-principal)
|
||||
- [EKS](https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html)
|
||||
- [GCP](https://console.cloud.google.com/iam-admin/serviceaccounts)
|
||||
|
||||
Note, Consul can be deployed on any properly configured Kubernetes cluster in
|
||||
the cloud or on premises.
|
||||
|
||||
Once you have a service account, you will also need to add a permission to
|
||||
deploy the helm chart. This is done with the `clusterrolebinding` method.
|
||||
|
||||
```sh
|
||||
$ kubectl create clusterrolebinding kubernetes-dashboard -n kube-system --clusterrole=cluster-admin --serviceaccount=kube-system:kubernetes-dashboard
|
||||
```
|
||||
|
||||
Finally, you may need to create Kubernetes secrets to store Consul data. You
|
||||
can reference these secrets in the customized Helm chart values file.
|
||||
|
||||
- If you have purchased Enterprise Consul, the enterprise license file should be
|
||||
used with the official image, `hashicorp/consul-enterprise:1.5.0-ent`.
|
||||
|
||||
- Enable
|
||||
[encryption](https://www.consul.io/docs/agent/encryption.html#gossip-encryption) to secure gossip traffic within the Consul cluster.
|
||||
|
||||
|
||||
~> Note, depending on your environment, the previous secrets may not be
|
||||
necessary.
|
||||
|
||||
## Configure Helm Chart
|
||||
|
||||
Now that you have prepared your Kubernetes cluster, you can customize the Helm
|
||||
chart. First, you will need to download the latest official Helm chart.
|
||||
|
||||
```sh
|
||||
$ git clone https://github.com/hashicorp/consul-helm.git
|
||||
```
|
||||
|
||||
The `consul-helm` directory will contain a `values.yaml` file with example
|
||||
parameters. You can update this file to customize your Consul deployment. Below
|
||||
we detail some of the parameters you should customize and provide an example
|
||||
file, however you should consider your particular production needs when
|
||||
configuring your chart.
|
||||
|
||||
### Global Values
|
||||
|
||||
The global values will affect all the other parameters in the chart.
|
||||
|
||||
To enable all of the Consul components in the Helm chart, set `enabled` to
|
||||
`true`. This means servers, clients, Consul DNS, and the Consul UI will be
|
||||
installed with their defaults. You should also set the following global
|
||||
parameters based on your specific environment requirements.
|
||||
|
||||
- `image` is the name and tag of the Consul Docker image.
|
||||
- `imagek8s` is the name and tag of the Docker image for the consul-k8s binary.
|
||||
- `datacenter` the name of your Consul datacenter.
|
||||
- `domain` the domain Consul uses for DNS queries.
|
||||
|
||||
For security, set the `bootstrapACLs` parameter to true. This will enable
|
||||
Kubernetes to initially setup Consul's [ACL
|
||||
system](https://www.consul.io/docs/acl/acl-system.html).
|
||||
|
||||
Read the Consul Helm chart documentation to review all the [global
|
||||
parameters](https://www.consul.io/docs/platform/k8s/helm.html#v-global).
|
||||
|
||||
### Consul UI
|
||||
|
||||
To enable the Consul web UI update the `ui` section to your values file and set
|
||||
`enabled` to `true`.
|
||||
|
||||
Note, you can also set up a [loadbalancer
|
||||
resource](https://github.com/hashicorp/demo-consul-101/tree/master/k8s#implement-load-balancer)
|
||||
or other service type in Kubernetes to make it easier to access the UI.
|
||||
|
||||
### Consul Servers
|
||||
|
||||
For production deployments, you will need to deploy [3 or 5 Consul
|
||||
servers](https://www.consul.io/docs/internals/consensus.html#deployment-table)
|
||||
for quorum and failure tolerance. For most deployments, 3 servers are adequate.
|
||||
|
||||
In the server section set both `replicas` and `bootstrapExpect` to 3. This will
|
||||
deploy three servers and cause Consul to wait to perform leader election until
|
||||
all three are healthy. The `resources` will depend on your environment; in the
|
||||
example at the end of the guide, the resources are set for a large environment.
|
||||
|
||||
#### Affinity
|
||||
|
||||
To ensure the Consul servers are placed on different Kubernetes nodes, you will
|
||||
need to configure affinity. Otherwise, the failure of one Kubernetes node could
|
||||
cause the loss of multiple Consul servers, and result in quorum loss. By
|
||||
default, the example `values.yaml` has affinity configured correctly.
|
||||
|
||||
#### Enterprise License
|
||||
|
||||
If you have an [Enterprise
|
||||
license](https://www.hashicorp.com/products/consul/enterprise) you should
|
||||
reference the Kubernetes secret in the `enterpriseLicense` parameter.
|
||||
|
||||
Read the Consul Helm chart documentation to review all the [server
|
||||
parameters](https://www.consul.io/docs/platform/k8s/helm.html#v-server)
|
||||
|
||||
### Consul Clients
|
||||
|
||||
A Consul client is deployed on every Kubernetes node, so you do not need to
|
||||
specify the number of clients for your deployments. You will need to specify
|
||||
resources and enable gRPC. The resources in the example at the end of this guide
|
||||
should be
|
||||
sufficient for most production scenarios since Consul clients are designed for
|
||||
horizontal scalability. Enabling `grpc` enables the GRPC listener on port 8502
|
||||
and exposes it to the host. It is required to use Consul Connect.
|
||||
|
||||
Read the Consul Helm chart documentation to review all the [client
|
||||
parameters](https://www.consul.io/docs/platform/k8s/helm.html#v-client)
|
||||
|
||||
### Consul Connect Injection Security
|
||||
|
||||
Even though you enabled Consul server communication over Connect in the server section, you will also
|
||||
need to enable `connectInject` by setting `enabled` to `true`. In the
|
||||
`connectInject` section you will also configure security features. Enabling the
|
||||
`default` parameter will allow the injector to automatically inject the Connect
|
||||
sidecar into all pods. If you would prefer to manually annotate which pods to inject, you
|
||||
can set this to false. Setting the 'aclBindingRuleSelector` parameter to
|
||||
`serviceaccount.name!=default` ensures that new services do not all receive the
|
||||
same token if you are only using a default service account. This setting is
|
||||
only necessary if you have enabled ACLs in the global section.
|
||||
|
||||
Read more about the [Connect Inject
|
||||
parameters](https://www.consul.io/docs/platform/k8s/helm.html#v-connectinject).
|
||||
|
||||
## Complete Example
|
||||
|
||||
Your finished values file should resemble the following example. For more
|
||||
complete descriptions of all the available parameters see the `values.yaml`
|
||||
file provided with the Helm chart and the [reference
|
||||
documentation](https://www.consul.io/docs/platform/k8s/helm.html).
|
||||
|
||||
```yaml
|
||||
# Configure global settings in this section.
|
||||
global:
|
||||
# Enable all the components within this chart by default.
|
||||
enabled: true
|
||||
# Specify the Consul and consul-k8s images to use
|
||||
image: "consul:1.5.0"
|
||||
imagek8s: "hashicorp/consul-k8s:0.8.1"
|
||||
domain: consul
|
||||
datacenter: primarydc
|
||||
# Bootstrap ACLs within Consul. This is highly recommended.
|
||||
bootstrapACLs: true
|
||||
# Gossip encryption
|
||||
gossipEncryption: |
|
||||
secretName: "encrypt-key"
|
||||
secretKey: "key
|
||||
# Configure your Consul servers in this section.
|
||||
server:
|
||||
enabled: true
|
||||
connect: true
|
||||
# Specify three servers that wait till all are healthy to bootstrap the Consul cluster.
|
||||
replicas: 3
|
||||
bootstrapExpect: 3
|
||||
# Specify the resources that servers request for placement. These values will serve a large environment.
|
||||
resources: |
|
||||
requests:
|
||||
memory: "32Gi"
|
||||
cpu: "4"
|
||||
disk: "50Gi"
|
||||
limits:
|
||||
memory: "32Gi"
|
||||
cpu: "4"
|
||||
disk: "50Gi"
|
||||
# If using Enterprise, reference the Kubernetes secret that holds your license here
|
||||
enterpriseLicense:
|
||||
secretName: "consul-license"
|
||||
secretKey: "key"
|
||||
# Prevent Consul servers from co-location on Kubernetes nodes.
|
||||
affinity: |
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchLabels:
|
||||
app: {{ template "consul.name" . }}
|
||||
release: "{{ .Release.Name }}"
|
||||
component: server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
# Configure Consul clients in this section
|
||||
client:
|
||||
enabled: true
|
||||
# Specify the resources that clients request for deployment.
|
||||
resources: |
|
||||
requests:
|
||||
memory: "8Gi"
|
||||
cpu: "2"
|
||||
disk: "15Gi"
|
||||
limits:
|
||||
memory: "8Gi"
|
||||
cpu: "2"
|
||||
disk: "15Gi"
|
||||
grpc: true
|
||||
# Enable and configure the Consul UI.
|
||||
ui:
|
||||
enabled: true
|
||||
# Configure security for Consul Connect pod injection
|
||||
connectInject:
|
||||
enabled: true
|
||||
default: true
|
||||
namespaceSelector: "my-namespace"
|
||||
aclBindingRuleSelector: “serviceaccount.name!=default”
|
||||
```
|
||||
## Deploy Consul
|
||||
|
||||
Now that you have customized the `values.yml` file, you can deploy Consul with
|
||||
Helm. This should only take a few minutes. The Consul pods should appear in the
|
||||
Kubernetes dashboard immediately and you can monitor the deployment process
|
||||
there.
|
||||
|
||||
```sh
|
||||
$ helm install ./consul-helm -f values.yaml
|
||||
```
|
||||
|
||||
To check the deployment process on the command line you can use `kubectl`.
|
||||
|
||||
```sh
|
||||
$ kubectl get pods
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
In this guide, you configured Consul, using the Helm chart, for a production
|
||||
environment. This involved ensuring that your cluster had a properly
|
||||
distributed server cluster, specifying enough resources for your agents,
|
||||
securing the cluster with ACLs and gossip encryption, and enabling other Consul
|
||||
functionality including Connect and the Consul UI.
|
||||
|
||||
Now you can interact with your Consul cluster through the UI or CLI.
|
||||
|
||||
If you exposed the UI using a load balancer it will be available at the
|
||||
`LoadBalancer Ingress` IP address and `Port` that is output from the following
|
||||
command. Note, you will need to replace _consul server_ with the server name
|
||||
from your cluster.
|
||||
|
||||
```sh
|
||||
$ kubectl describe services consul-server
|
||||
```
|
||||
|
||||
To access the Consul CLI, open a terminal session using the Kubernetes CLI.
|
||||
|
||||
```sh
|
||||
$ kubectl exec <pod name> -it /bin/ash
|
||||
```
|
||||
|
||||
To learn more about how to interact with your Consul cluster or use it for
|
||||
service discovery, configuration or segmentation, try one of Learn’s
|
||||
[Operations or Development tracks](/consul/#advanced). Follow the [Security and
|
||||
Networking track](/consul/?track=security-networking#security-networking) to
|
||||
learn more about securing your Consul cluster.
|
||||
|
||||
|
Loading…
Reference in New Issue