mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1133 lines
33 KiB
1133 lines
33 KiB
package leafcert |
|
|
|
import ( |
|
"context" |
|
"crypto/tls" |
|
"crypto/x509" |
|
"encoding/pem" |
|
"fmt" |
|
"sync" |
|
"sync/atomic" |
|
"testing" |
|
"time" |
|
|
|
"github.com/stretchr/testify/require" |
|
|
|
"github.com/hashicorp/consul/acl" |
|
"github.com/hashicorp/consul/agent/cache" |
|
"github.com/hashicorp/consul/agent/connect" |
|
"github.com/hashicorp/consul/agent/consul" |
|
"github.com/hashicorp/consul/agent/structs" |
|
"github.com/hashicorp/consul/sdk/testutil" |
|
"github.com/hashicorp/consul/sdk/testutil/retry" |
|
) |
|
|
|
// Test that after an initial signing, new CA roots (new ID) will |
|
// trigger a blocking query to execute. |
|
func TestManager_changingRoots(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
t.Parallel() |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
caRoot := signer.UpdateCA(t, nil) |
|
|
|
// We'll reuse the fetch options and request |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", Service: "web", |
|
MinQueryIndex: 0, MaxQueryTime: 10 * time.Second, |
|
} |
|
|
|
// First fetch should return immediately |
|
getCh := testAsyncGet(t, m, req) |
|
var idx uint64 |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
requireLeafValidUnderCA(t, result.Value, caRoot) |
|
require.True(t, result.Index > 0) |
|
|
|
idx = result.Index |
|
} |
|
|
|
// Second fetch should block with set index |
|
req.MinQueryIndex = idx |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case result := <-getCh: |
|
t.Fatalf("should not return: %#v", result) |
|
case <-time.After(100 * time.Millisecond): |
|
} |
|
|
|
// Let's send in new roots, which should trigger the sign req. We need to take |
|
// care to set the new root as active |
|
caRoot2 := signer.UpdateCA(t, nil) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
require.True(t, result.Index > idx) |
|
requireLeafValidUnderCA(t, result.Value, caRoot2) |
|
} |
|
|
|
// Third fetch should block |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case result := <-getCh: |
|
t.Fatalf("should not return: %#v", result) |
|
case <-time.After(100 * time.Millisecond): |
|
} |
|
} |
|
|
|
// Tests that if the root change jitter is longer than the time left on the |
|
// timeout, we return normally but then still renew the cert on a subsequent |
|
// call. |
|
func TestManager_changingRootsJitterBetweenCalls(t *testing.T) { |
|
t.Parallel() |
|
|
|
const TestOverrideCAChangeInitialDelay = 100 * time.Millisecond |
|
|
|
m, signer := testManager(t, func(cfg *Config) { |
|
// Override the root-change delay so we will timeout first. We can't set it to |
|
// a crazy high value otherwise we'll have to wait that long in the test to |
|
// see if it actually happens on subsequent calls. We instead reduce the |
|
// timeout in FetchOptions to be much shorter than this. |
|
cfg.TestOverrideCAChangeInitialDelay = TestOverrideCAChangeInitialDelay |
|
}) |
|
|
|
caRoot := signer.UpdateCA(t, nil) |
|
|
|
// We'll reuse the fetch options and request. Timeout must be much shorter |
|
// than the initial root delay. 20ms means that if we deliver the root change |
|
// during the first blocking call, we should need to block fully for 5 more |
|
// calls before the cert is renewed. We pick a timeout that is not an exact |
|
// multiple of the 100ms delay above to reduce the chance that timing works |
|
// out in a way that makes it hard to tell a timeout from an early return due |
|
// to a cert renewal. |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", Service: "web", |
|
MinQueryIndex: 0, MaxQueryTime: 35 * time.Millisecond, |
|
} |
|
|
|
// First fetch should return immediately |
|
getCh := testAsyncGet(t, m, req) |
|
var ( |
|
idx uint64 |
|
issued *structs.IssuedCert |
|
) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
require.True(t, result.Index > 0) |
|
requireLeafValidUnderCA(t, result.Value, caRoot) |
|
idx = result.Index |
|
issued = result.Value |
|
} |
|
|
|
// Let's send in new roots, which should eventually trigger the sign req. We |
|
// need to take care to set the new root as active. Note that this is |
|
// implicitly testing that root updates that happen in between leaf blocking |
|
// queries are still noticed too. At this point no leaf blocking query is |
|
// running so the root watch should be stopped. By pushing this update, the |
|
// next blocking query will _immediately_ see the new root which means it |
|
// needs to correctly notice that it is not the same one that generated the |
|
// current cert and start the rotation. This is good, just not obvious that |
|
// the behavior is actually well tested here when it is. |
|
caRoot2 := signer.UpdateCA(t, nil) |
|
earliestRootDelivery := time.Now() |
|
|
|
// Some number of fetches (2,3,4 likely) should timeout after 20ms and after |
|
// 100ms has elapsed total we should see the new cert. Since this is all very |
|
// timing dependent, we don't hard code exact numbers here and instead loop |
|
// for plenty of time and do as many calls as it takes and just assert on the |
|
// time taken and that the call either blocks and returns the cached cert, or |
|
// returns the new one. |
|
req.MinQueryIndex = idx |
|
var shouldExpireAfter time.Time |
|
i := 1 |
|
rootsDelivered := false |
|
for rootsDelivered { |
|
start := time.Now() |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
timeTaken := time.Since(start) |
|
|
|
// There are two options, either it blocked waiting for the delay after |
|
// the rotation or it returned the new CA cert before the timeout was |
|
// done. TO be more robust against timing, we take the value as the |
|
// decider for which case it is, and assert timing matches our expected |
|
// bounds rather than vice versa. |
|
|
|
if result.Index > idx { |
|
// Got a new cert |
|
require.NotEqual(t, issued, result.Value) |
|
require.NotNil(t, result.Value) |
|
requireLeafValidUnderCA(t, result.Value, caRoot2) |
|
// Should not have been delivered before the delay |
|
require.True(t, time.Since(earliestRootDelivery) > TestOverrideCAChangeInitialDelay) |
|
// All good. We are done! |
|
rootsDelivered = true |
|
} else { |
|
// Should be the cached cert |
|
require.Equal(t, issued, result.Value) |
|
require.Equal(t, idx, result.Index) |
|
requireLeafValidUnderCA(t, result.Value, caRoot) |
|
// Sanity check we blocked for the whole timeout |
|
require.Truef(t, timeTaken > req.MaxQueryTime, |
|
"should block for at least %s, returned after %s", |
|
req.MaxQueryTime, timeTaken) |
|
// Sanity check that the forceExpireAfter state was set correctly |
|
shouldExpireAfter := testObserveLeafCert(m, req, func(cd *certData) time.Time { |
|
return cd.state.forceExpireAfter |
|
}) |
|
require.True(t, shouldExpireAfter.After(time.Now())) |
|
require.True(t, shouldExpireAfter.Before(time.Now().Add(TestOverrideCAChangeInitialDelay))) |
|
} |
|
case <-time.After(50 * time.Millisecond): |
|
t.Fatalf("request %d blocked too long", i) |
|
} |
|
i++ |
|
|
|
// Sanity check that we've not gone way beyond the deadline without a |
|
// new cert. We give some leeway to make it less brittle. |
|
require.Falsef(t, time.Now().After(shouldExpireAfter.Add(100*time.Millisecond)), |
|
"waited extra 100ms and delayed CA rotate renew didn't happen") |
|
} |
|
} |
|
|
|
func testObserveLeafCert[T any](m *Manager, req *ConnectCALeafRequest, cb func(*certData) T) T { |
|
key := req.Key() |
|
|
|
cd := m.getCertData(key) |
|
|
|
cd.lock.Lock() |
|
defer cd.lock.Unlock() |
|
|
|
return cb(cd) |
|
} |
|
|
|
// Tests that if the root changes in between blocking calls we still pick it up. |
|
func TestManager_changingRootsBetweenBlockingCalls(t *testing.T) { |
|
t.Parallel() |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
caRoot := signer.UpdateCA(t, nil) |
|
|
|
// We'll reuse the fetch options and request. Short timeout important since we |
|
// wait the full timeout before chaning roots. |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", Service: "web", |
|
MinQueryIndex: 0, MaxQueryTime: 35 * time.Millisecond, |
|
} |
|
|
|
// First fetch should return immediately |
|
getCh := testAsyncGet(t, m, req) |
|
var ( |
|
idx uint64 |
|
issued *structs.IssuedCert |
|
) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
requireLeafValidUnderCA(t, result.Value, caRoot) |
|
require.True(t, result.Index > 0) |
|
idx = result.Index |
|
issued = result.Value |
|
} |
|
|
|
// Next fetch should block for the full timeout |
|
start := time.Now() |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block for too long waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.Equal(t, issued, result.Value) |
|
// Still the initial cached result |
|
require.Equal(t, idx, result.Index) |
|
// Sanity check that it waited |
|
require.True(t, time.Since(start) > req.MaxQueryTime) |
|
} |
|
|
|
// No active requests, simulate root change now |
|
caRoot2 := signer.UpdateCA(t, nil) |
|
earliestRootDelivery := time.Now() |
|
|
|
// We should get the new cert immediately on next fetch (since test override |
|
// root change jitter to be 1 nanosecond so no delay expected). |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block too long waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotEqual(t, issued, result.Value) |
|
requireLeafValidUnderCA(t, result.Value, caRoot2) |
|
require.True(t, result.Index > idx) |
|
// Sanity check that we didn't wait too long |
|
require.True(t, time.Since(earliestRootDelivery) < req.MaxQueryTime) |
|
} |
|
} |
|
|
|
func TestManager_CSRRateLimiting(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
t.Parallel() |
|
|
|
m, signer := testManager(t, func(cfg *Config) { |
|
// Each jitter window will be only 100 ms long to make testing quick but |
|
// highly likely not to fail based on scheduling issues. |
|
cfg.TestOverrideCAChangeInitialDelay = 100 * time.Millisecond |
|
}) |
|
|
|
signer.UpdateCA(t, nil) |
|
|
|
signer.SetSignCallErrors( |
|
// First call return rate limit error. This is important as it checks |
|
// behavior when cache is empty and we have to return a nil Value but need to |
|
// save state to do the right thing for retry. |
|
consul.ErrRateLimited, // inc |
|
// Then succeed on second call |
|
nil, |
|
// Then be rate limited again on several further calls |
|
consul.ErrRateLimited, // inc |
|
consul.ErrRateLimited, // inc |
|
// Then fine after that |
|
) |
|
|
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "web", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
// First fetch should return rate limit error directly - client is expected to |
|
// backoff itself. |
|
getCh := testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(200 * time.Millisecond): |
|
t.Fatal("shouldn't block longer than one jitter window for success") |
|
case result := <-getCh: |
|
require.Error(t, result.Err) |
|
require.Equal(t, consul.ErrRateLimited.Error(), result.Err.Error()) |
|
} |
|
|
|
// Second call should return correct cert immediately. |
|
getCh = testAsyncGet(t, m, req) |
|
var ( |
|
idx uint64 |
|
issued *structs.IssuedCert |
|
) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
require.True(t, result.Index > 0) |
|
idx = result.Index |
|
issued = result.Value |
|
} |
|
|
|
// Send in new roots, which should trigger the next sign req. We need to take |
|
// care to set the new root as active |
|
signer.UpdateCA(t, nil) |
|
earliestRootDelivery := time.Now() |
|
|
|
// Sanity check state |
|
require.Equal(t, uint64(1), signer.GetSignCallErrorCount()) |
|
|
|
// After root rotation jitter has been waited out, a new CSR will |
|
// be attempted but will fail and return the previous cached result with no |
|
// error since we will try again soon. |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(200 * time.Millisecond): |
|
t.Fatal("shouldn't block too long waiting for fetch") |
|
case result := <-getCh: |
|
// We should block for _at least_ one jitter period since we set that to |
|
// 100ms and in test override mode we always pick the max jitter not a |
|
// random amount. |
|
require.True(t, time.Since(earliestRootDelivery) > 100*time.Millisecond) |
|
require.Equal(t, uint64(2), signer.GetSignCallErrorCount()) |
|
|
|
require.NoError(t, result.Err) |
|
require.Equal(t, issued, result.Value) |
|
// 1 since this should still be the original cached result as we failed to |
|
// get a new cert. |
|
require.Equal(t, idx, result.Index) |
|
} |
|
|
|
// Root rotation state is now only captured in the opts.LastResult.State so a |
|
// subsequent call should also wait for 100ms and then attempt to generate a |
|
// new cert since we failed last time. |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(200 * time.Millisecond): |
|
t.Fatal("shouldn't block too long waiting for fetch") |
|
case result := <-getCh: |
|
// We should block for _at least_ two jitter periods now. |
|
require.True(t, time.Since(earliestRootDelivery) > 200*time.Millisecond) |
|
require.Equal(t, uint64(3), signer.GetSignCallErrorCount()) |
|
|
|
require.NoError(t, result.Err) |
|
require.Equal(t, issued, result.Value) |
|
// 1 since this should still be the original cached result as we failed to |
|
// get a new cert. |
|
require.Equal(t, idx, result.Index) |
|
} |
|
|
|
// Now we've had two rate limit failures and seen root rotation state work |
|
// across both the blocking request that observed the rotation and the |
|
// subsequent one. The next request should wait out the rest of the backoff |
|
// and then actually fetch a new cert at last! |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(200 * time.Millisecond): |
|
t.Fatal("shouldn't block too long waiting for fetch") |
|
case result := <-getCh: |
|
// We should block for _at least_ three jitter periods now. |
|
require.True(t, time.Since(earliestRootDelivery) > 300*time.Millisecond) |
|
require.Equal(t, uint64(3), signer.GetSignCallErrorCount()) |
|
|
|
require.NoError(t, result.Err) |
|
require.NotEqual(t, issued, result.Value) |
|
// 3 since the rootCA change used 2 |
|
require.True(t, result.Index > idx) |
|
} |
|
} |
|
|
|
// This test runs multiple concurrent callers watching different leaf certs and |
|
// tries to ensure that the background root watch activity behaves correctly. |
|
func TestManager_watchRootsDedupingMultipleCallers(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
t.Parallel() |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
caRoot := signer.UpdateCA(t, nil) |
|
|
|
// n is the number of clients we'll run |
|
n := 3 |
|
|
|
// setup/testDoneCh are used for coordinating clients such that each has |
|
// initial cert delivered and is blocking before the root changes. It's not a |
|
// wait group since we want to be able to timeout the main test goroutine if |
|
// one of the clients gets stuck. Instead it's a buffered chan. |
|
setupDoneCh := make(chan error, n) |
|
testDoneCh := make(chan error, n) |
|
// rootsUpdate is used to coordinate clients so they know when they should |
|
// expect to see leaf renewed after root change. |
|
rootsUpdatedCh := make(chan struct{}) |
|
|
|
// Create a function that models a single client. It should go through the |
|
// steps of getting an initial cert and then watching for changes until root |
|
// updates. |
|
client := func(i int) { |
|
// We'll reuse the fetch options and request |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", Service: fmt.Sprintf("web-%d", i), |
|
MinQueryIndex: 0, MaxQueryTime: 10 * time.Second, |
|
} |
|
|
|
// First fetch should return immediately |
|
getCh := testAsyncGet(t, m, req) |
|
var idx uint64 |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
setupDoneCh <- fmt.Errorf("shouldn't block waiting for fetch") |
|
return |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
idx = result.Index |
|
} |
|
|
|
// Second fetch should block with set index |
|
req.MinQueryIndex = idx |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case result := <-getCh: |
|
setupDoneCh <- fmt.Errorf("should not return: %#v", result) |
|
return |
|
case <-time.After(100 * time.Millisecond): |
|
} |
|
|
|
// We're done with setup and the blocking call is still blocking in |
|
// background. |
|
setupDoneCh <- nil |
|
|
|
// Wait until all others are also done and roots change incase there are |
|
// stragglers delaying the root update. |
|
select { |
|
case <-rootsUpdatedCh: |
|
case <-time.After(200 * time.Millisecond): |
|
testDoneCh <- fmt.Errorf("waited too long for root update") |
|
return |
|
} |
|
|
|
// Now we should see root update within a short period |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
testDoneCh <- fmt.Errorf("shouldn't block waiting for fetch") |
|
return |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
if req.MinQueryIndex == result.Value.CreateIndex { |
|
testDoneCh <- fmt.Errorf("index must be different") |
|
return |
|
} |
|
} |
|
|
|
testDoneCh <- nil |
|
} |
|
|
|
// Sanity check the roots watcher is not running yet |
|
assertRootsWatchCounts(t, m, 0, 0) |
|
|
|
for i := 0; i < n; i++ { |
|
go client(i) |
|
} |
|
|
|
timeoutCh := time.After(200 * time.Millisecond) |
|
|
|
for i := 0; i < n; i++ { |
|
select { |
|
case <-timeoutCh: |
|
t.Fatal("timed out waiting for clients") |
|
case err := <-setupDoneCh: |
|
if err != nil { |
|
t.Fatalf(err.Error()) |
|
} |
|
} |
|
} |
|
|
|
// Should be 3 clients running now, so the roots watcher should have started |
|
// once and not stopped. |
|
assertRootsWatchCounts(t, m, 1, 0) |
|
|
|
caRootCopy := caRoot.Clone() |
|
caRootCopy.Active = false |
|
|
|
// Now we deliver the root update |
|
_ = signer.UpdateCA(t, nil) |
|
// And notify clients |
|
close(rootsUpdatedCh) |
|
|
|
timeoutCh = time.After(200 * time.Millisecond) |
|
for i := 0; i < n; i++ { |
|
select { |
|
case <-timeoutCh: |
|
t.Fatalf("timed out waiting for %d of %d clients to renew after root change", n-i, n) |
|
case err := <-testDoneCh: |
|
if err != nil { |
|
t.Fatalf(err.Error()) |
|
} |
|
} |
|
} |
|
|
|
// All active requests have returned the new cert so the rootsWatcher should |
|
// have stopped. This is timing dependent though so retry a few times |
|
retry.RunWith(retry.ThreeTimes(), t, func(r *retry.R) { |
|
assertRootsWatchCounts(r, m, 1, 1) |
|
}) |
|
} |
|
|
|
func assertRootsWatchCounts(t require.TestingT, m *Manager, wantStarts, wantStops int) { |
|
if tt, ok := t.(*testing.T); ok { |
|
tt.Helper() |
|
} |
|
starts := atomic.LoadUint32(&m.rootWatcher.testStartCount) |
|
stops := atomic.LoadUint32(&m.rootWatcher.testStopCount) |
|
require.Equal(t, wantStarts, int(starts)) |
|
require.Equal(t, wantStops, int(stops)) |
|
} |
|
|
|
// Test that after an initial signing, an expiringLeaf will trigger a |
|
// blocking query to resign. |
|
func TestManager_expiringLeaf(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
t.Parallel() |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
caRoot := signer.UpdateCA(t, nil) |
|
|
|
signer.SetSignCallErrors( |
|
// First call returns expired cert to prime cache with an expired one. |
|
ReplyWithExpiredCert, |
|
) |
|
|
|
// We'll reuse the fetch options and request |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", Service: "web", |
|
MinQueryIndex: 0, MaxQueryTime: 10 * time.Second, |
|
} |
|
|
|
// First fetch should return immediately |
|
getCh := testAsyncGet(t, m, req) |
|
var ( |
|
idx uint64 |
|
issued *structs.IssuedCert |
|
) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotNil(t, result.Value) |
|
require.True(t, result.Index > 0) |
|
idx = result.Index |
|
issued = result.Value |
|
} |
|
|
|
// Second fetch should return immediately despite there being |
|
// no updated CA roots, because we issued an expired cert. |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case <-time.After(100 * time.Millisecond): |
|
t.Fatal("shouldn't block waiting for fetch") |
|
case result := <-getCh: |
|
require.NoError(t, result.Err) |
|
require.NotEqual(t, issued, result.Value) |
|
require.True(t, result.Index > idx) |
|
requireLeafValidUnderCA(t, result.Value, caRoot) |
|
idx = result.Index |
|
} |
|
|
|
// Third fetch should block since the cert is not expiring and |
|
// we also didn't update CA certs. |
|
req.MinQueryIndex = idx |
|
getCh = testAsyncGet(t, m, req) |
|
select { |
|
case result := <-getCh: |
|
t.Fatalf("should not return: %#v", result) |
|
case <-time.After(100 * time.Millisecond): |
|
} |
|
} |
|
|
|
func TestManager_DNSSANForService(t *testing.T) { |
|
t.Parallel() |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
_ = signer.UpdateCA(t, nil) |
|
|
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "web", |
|
DNSSAN: []string{"test.example.com"}, |
|
} |
|
|
|
_, _, err := m.Get(context.Background(), req) |
|
require.NoError(t, err) |
|
|
|
caReq := signer.GetCapture(0) |
|
require.NotNil(t, caReq) |
|
|
|
pemBlock, _ := pem.Decode([]byte(caReq.CSR)) |
|
csr, err := x509.ParseCertificateRequest(pemBlock.Bytes) |
|
require.NoError(t, err) |
|
require.Equal(t, csr.DNSNames, []string{"test.example.com"}) |
|
} |
|
|
|
func TestManager_workflow_good(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
ctx, cancel := context.WithCancel(context.Background()) |
|
t.Cleanup(cancel) |
|
|
|
const TestOverrideCAChangeInitialDelay = 1 * time.Nanosecond |
|
|
|
m, signer := testManager(t, func(cfg *Config) { |
|
cfg.TestOverrideCAChangeInitialDelay = TestOverrideCAChangeInitialDelay |
|
}) |
|
|
|
ca1 := signer.UpdateCA(t, nil) |
|
|
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
// List |
|
issued, meta, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
require.False(t, meta.Hit) |
|
require.NotNil(t, issued) |
|
|
|
// Verify that the cert is signed by the CA |
|
requireLeafValidUnderCA(t, issued, ca1) |
|
|
|
// Verify blocking index |
|
require.True(t, issued.ModifyIndex > 0) |
|
require.Equal(t, issued.ModifyIndex, meta.Index) |
|
|
|
index := meta.Index |
|
|
|
// Fetch it again |
|
testutil.RunStep(t, "test you get a cache hit on another read", func(t *testing.T) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
issued2, _, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
require.NotNil(t, issued2) |
|
require.Equal(t, issued, issued2) |
|
}) |
|
|
|
type reply struct { |
|
cert *structs.IssuedCert |
|
meta cache.ResultMeta |
|
err error |
|
} |
|
|
|
replyCh := make(chan *reply, 1) |
|
go func() { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
MinQueryIndex: index, |
|
} |
|
|
|
issued2, meta2, err := m.Get(ctx, req) |
|
|
|
replyCh <- &reply{issued2, meta2, err} |
|
}() |
|
|
|
// Set a new CA |
|
ca2 := signer.UpdateCA(t, nil) |
|
|
|
// Issue a blocking query to ensure that the cert gets updated appropriately |
|
testutil.RunStep(t, "test blocking queries update leaf cert", func(t *testing.T) { |
|
var got *reply |
|
select { |
|
case got = <-replyCh: |
|
case <-time.After(500 * time.Millisecond): |
|
t.Fatal("blocking query did not wake up during rotation") |
|
} |
|
|
|
issued2, meta2, err := got.cert, got.meta, got.err |
|
require.NoError(t, err) |
|
require.NotNil(t, issued2) |
|
|
|
require.NotEqual(t, issued.CertPEM, issued2.CertPEM) |
|
require.NotEqual(t, issued.PrivateKeyPEM, issued2.PrivateKeyPEM) |
|
|
|
// Verify that the cert is signed by the new CA |
|
requireLeafValidUnderCA(t, issued2, ca2) |
|
|
|
// Should not be a cache hit! The data was updated in response to the blocking |
|
// query being made. |
|
require.False(t, meta2.Hit) |
|
}) |
|
|
|
testutil.RunStep(t, "test non-blocking queries update leaf cert", func(t *testing.T) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
issued, _, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
require.NotNil(t, issued) |
|
|
|
// Verify that the cert is signed by the CA |
|
requireLeafValidUnderCA(t, issued, ca2) |
|
|
|
// Issue a non blocking query to ensure that the cert gets updated appropriately |
|
{ |
|
// Set a new CA |
|
ca3 := signer.UpdateCA(t, nil) |
|
|
|
retry.Run(t, func(r *retry.R) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
issued2, meta2, err := m.Get(ctx, req) |
|
require.NoError(r, err) |
|
require.NotNil(r, issued2) |
|
|
|
requireLeafValidUnderCA(r, issued2, ca3) |
|
|
|
// Should not be a cache hit! |
|
require.False(r, meta2.Hit) |
|
|
|
require.NotEqual(r, issued.CertPEM, issued2.CertPEM) |
|
require.NotEqual(r, issued.PrivateKeyPEM, issued2.PrivateKeyPEM) |
|
|
|
// Verify that the cert is signed by the new CA |
|
requireLeafValidUnderCA(r, issued2, ca3) |
|
}) |
|
} |
|
}) |
|
} |
|
|
|
// Test we can request a leaf cert for a service and witness correct caching, |
|
// blocking, and update semantics. |
|
// |
|
// This test originally was a client agent test in |
|
// agent.TestAgentConnectCALeafCert_goodNotLocal and was cloned here to |
|
// increase complex coverage, but the specific naming of the parent test is |
|
// irrelevant here since there's no notion of the catalog at all at this layer. |
|
func TestManager_workflow_goodNotLocal(t *testing.T) { |
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
ctx, cancel := context.WithCancel(context.Background()) |
|
t.Cleanup(cancel) |
|
|
|
const TestOverrideCAChangeInitialDelay = 1 * time.Nanosecond |
|
|
|
m, signer := testManager(t, func(cfg *Config) { |
|
cfg.TestOverrideCAChangeInitialDelay = TestOverrideCAChangeInitialDelay |
|
}) |
|
|
|
ca1 := signer.UpdateCA(t, nil) |
|
|
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
// List |
|
issued, meta, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
require.False(t, meta.Hit) |
|
require.NotNil(t, issued) |
|
|
|
// Verify that the cert is signed by the CA |
|
requireLeafValidUnderCA(t, issued, ca1) |
|
|
|
// Verify blocking index |
|
require.True(t, issued.ModifyIndex > 0) |
|
require.Equal(t, issued.ModifyIndex, meta.Index) |
|
|
|
// Fetch it again |
|
testutil.RunStep(t, "test you get a cache hit on another read", func(t *testing.T) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
issued2, _, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
require.NotNil(t, issued2) |
|
require.Equal(t, issued, issued2) |
|
}) |
|
|
|
// Test Blocking - see https://github.com/hashicorp/consul/issues/4462 |
|
testutil.RunStep(t, "test blocking issue 4462", func(t *testing.T) { |
|
// Fetch it again |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
MinQueryIndex: issued.ModifyIndex, |
|
MaxQueryTime: 125 * time.Millisecond, |
|
} |
|
var ( |
|
respCh = make(chan *structs.IssuedCert) |
|
errCh = make(chan error, 1) |
|
) |
|
go func() { |
|
issued2, _, err := m.Get(ctx, req) |
|
if err != nil { |
|
errCh <- err |
|
} else { |
|
respCh <- issued2 |
|
} |
|
}() |
|
|
|
select { |
|
case <-time.After(500 * time.Millisecond): |
|
require.FailNow(t, "Shouldn't block for this long - not respecting wait parameter in the query") |
|
|
|
case err := <-errCh: |
|
require.NoError(t, err) |
|
case <-respCh: |
|
} |
|
}) |
|
|
|
testutil.RunStep(t, "test that caching is updated in the background", func(t *testing.T) { |
|
// Set a new CA |
|
ca := signer.UpdateCA(t, nil) |
|
|
|
retry.Run(t, func(r *retry.R) { |
|
// Try and sign again (note no index/wait arg since cache should update in |
|
// background even if we aren't actively blocking) |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
|
|
issued2, _, err := m.Get(ctx, req) |
|
require.NoError(r, err) |
|
|
|
if issued.CertPEM == issued2.CertPEM { |
|
r.Fatalf("leaf has not updated") |
|
} |
|
|
|
// Got a new leaf. Sanity check it's a whole new key as well as different |
|
// cert. |
|
if issued.PrivateKeyPEM == issued2.PrivateKeyPEM { |
|
r.Fatalf("new leaf has same private key as before") |
|
} |
|
|
|
// Verify that the cert is signed by the new CA |
|
requireLeafValidUnderCA(r, issued2, ca) |
|
|
|
require.NotEqual(r, issued, issued2) |
|
}) |
|
}) |
|
} |
|
|
|
func TestManager_workflow_nonBlockingQuery_after_blockingQuery_shouldNotBlock(t *testing.T) { |
|
// see: https://github.com/hashicorp/consul/issues/12048 |
|
|
|
if testing.Short() { |
|
t.Skip("too slow for testing.Short") |
|
} |
|
|
|
t.Parallel() |
|
|
|
ctx, cancel := context.WithCancel(context.Background()) |
|
t.Cleanup(cancel) |
|
|
|
m, signer := testManager(t, nil) |
|
|
|
_ = signer.UpdateCA(t, nil) |
|
|
|
var ( |
|
serialNumber string |
|
index uint64 |
|
issued *structs.IssuedCert |
|
) |
|
testutil.RunStep(t, "do initial non-blocking query", func(t *testing.T) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
issued1, meta, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
|
|
serialNumber = issued1.SerialNumber |
|
|
|
require.False(t, meta.Hit, "for the leaf cert cache type these are always MISS") |
|
index = meta.Index |
|
issued = issued1 |
|
}) |
|
|
|
go func() { |
|
// launch goroutine for blocking query |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
MinQueryIndex: index, |
|
} |
|
_, _, _ = m.Get(ctx, req) |
|
}() |
|
|
|
// We just need to ensure that the above blocking query is in-flight before |
|
// the next step, so do a little sleep. |
|
time.Sleep(50 * time.Millisecond) |
|
|
|
// The initial non-blocking query populated the leaf cert cache entry |
|
// implicitly. The agent cache doesn't prune entries very often at all, so |
|
// in between both of these steps the data should still be there, causing |
|
// this to be a HIT that completes in less than 10m (the default inner leaf |
|
// cert blocking query timeout). |
|
testutil.RunStep(t, "do a non-blocking query that should not block", func(t *testing.T) { |
|
req := &ConnectCALeafRequest{ |
|
Datacenter: "dc1", |
|
Service: "test", |
|
EnterpriseMeta: *acl.DefaultEnterpriseMeta(), |
|
} |
|
issued2, meta2, err := m.Get(ctx, req) |
|
require.NoError(t, err) |
|
|
|
require.True(t, meta2.Hit) |
|
|
|
// If this is actually returning a cached result, the serial number |
|
// should be unchanged. |
|
require.Equal(t, serialNumber, issued2.SerialNumber) |
|
|
|
require.Equal(t, issued, issued2) |
|
}) |
|
} |
|
|
|
func requireLeafValidUnderCA(t require.TestingT, issued *structs.IssuedCert, ca *structs.CARoot) { |
|
require.NotNil(t, issued) |
|
require.NotNil(t, ca) |
|
|
|
leaf, intermediates, err := connect.ParseLeafCerts(issued.CertPEM) |
|
require.NoError(t, err) |
|
|
|
roots := x509.NewCertPool() |
|
require.True(t, roots.AppendCertsFromPEM([]byte(ca.RootCert))) |
|
|
|
_, err = leaf.Verify(x509.VerifyOptions{ |
|
Roots: roots, |
|
Intermediates: intermediates, |
|
}) |
|
require.NoError(t, err) |
|
|
|
// Verify the private key matches. tls.LoadX509Keypair does this for us! |
|
_, err = tls.X509KeyPair([]byte(issued.CertPEM), []byte(issued.PrivateKeyPEM)) |
|
require.NoError(t, err) |
|
} |
|
|
|
// testManager returns a *Manager that is pre-configured to use a mock RPC |
|
// implementation that can sign certs, and an in-memory CA roots reader that |
|
// interacts well with it. |
|
func testManager(t *testing.T, mut func(*Config)) (*Manager, *testSigner) { |
|
signer := newTestSigner(t, nil, nil) |
|
|
|
deps := Deps{ |
|
Logger: testutil.Logger(t), |
|
RootsReader: signer.RootsReader, |
|
CertSigner: signer, |
|
Config: Config{ |
|
// Override the root-change spread so we don't have to wait up to 20 seconds |
|
// to see root changes work. Can be changed back for specific tests that |
|
// need to test this, Note it's not 0 since that used default but is |
|
// effectively the same. |
|
TestOverrideCAChangeInitialDelay: 1 * time.Microsecond, |
|
}, |
|
} |
|
if mut != nil { |
|
mut(&deps.Config) |
|
} |
|
|
|
m := NewManager(deps) |
|
t.Cleanup(m.Stop) |
|
|
|
return m, signer |
|
} |
|
|
|
type testRootsReader struct { |
|
mu sync.Mutex |
|
index uint64 |
|
roots *structs.IndexedCARoots |
|
watcher chan struct{} |
|
} |
|
|
|
func newTestRootsReader(t *testing.T) *testRootsReader { |
|
r := &testRootsReader{ |
|
watcher: make(chan struct{}), |
|
} |
|
t.Cleanup(func() { |
|
r.mu.Lock() |
|
watcher := r.watcher |
|
r.mu.Unlock() |
|
close(watcher) |
|
}) |
|
return r |
|
} |
|
|
|
var _ RootsReader = (*testRootsReader)(nil) |
|
|
|
func (r *testRootsReader) Set(roots *structs.IndexedCARoots) { |
|
r.mu.Lock() |
|
oldWatcher := r.watcher |
|
r.watcher = make(chan struct{}) |
|
r.roots = roots |
|
if roots == nil { |
|
r.index = 1 |
|
} else { |
|
r.index = roots.Index |
|
} |
|
r.mu.Unlock() |
|
|
|
close(oldWatcher) |
|
} |
|
|
|
func (r *testRootsReader) Get() (*structs.IndexedCARoots, error) { |
|
r.mu.Lock() |
|
defer r.mu.Unlock() |
|
return r.roots, nil |
|
} |
|
|
|
func (r *testRootsReader) Notify(ctx context.Context, correlationID string, ch chan<- cache.UpdateEvent) error { |
|
r.mu.Lock() |
|
watcher := r.watcher |
|
r.mu.Unlock() |
|
|
|
go func() { |
|
<-watcher |
|
|
|
r.mu.Lock() |
|
defer r.mu.Unlock() |
|
|
|
ch <- cache.UpdateEvent{ |
|
CorrelationID: correlationID, |
|
Result: r.roots, |
|
Meta: cache.ResultMeta{Index: r.index}, |
|
Err: nil, |
|
} |
|
}() |
|
return nil |
|
} |
|
|
|
type testGetResult struct { |
|
Index uint64 |
|
Value *structs.IssuedCert |
|
Err error |
|
} |
|
|
|
// testAsyncGet returns a channel that returns the result of the testGet call. |
|
// |
|
// This is useful for testing timing and concurrency with testGet calls. |
|
func testAsyncGet(t *testing.T, m *Manager, req *ConnectCALeafRequest) <-chan testGetResult { |
|
ch := make(chan testGetResult) |
|
go func() { |
|
index, cert, err := m.testGet(req) |
|
if err != nil { |
|
ch <- testGetResult{Err: err} |
|
return |
|
} |
|
|
|
ch <- testGetResult{Index: index, Value: cert} |
|
}() |
|
return ch |
|
}
|
|
|