mirror of https://github.com/hashicorp/consul
184 lines
5.2 KiB
Go
184 lines
5.2 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: BUSL-1.1
|
|
|
|
package sprawl
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul/api"
|
|
"github.com/hashicorp/go-hclog"
|
|
|
|
"github.com/hashicorp/consul/testing/deployer/topology"
|
|
)
|
|
|
|
// TODO: this is definitely a grpc resolver/balancer issue to look into
|
|
const grpcWeirdError = `transport: Error while dialing failed to find Consul server for global address`
|
|
|
|
func isWeirdGRPCError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
return strings.Contains(err.Error(), grpcWeirdError)
|
|
}
|
|
|
|
func (s *Sprawl) initPeerings() error {
|
|
// TODO: wait until services are healthy? wait until mesh gateways work?
|
|
// if err := s.generator.Generate(tfgen.StepPeering); err != nil {
|
|
// return fmt.Errorf("generator[peering]: %w", err)
|
|
// }
|
|
|
|
var (
|
|
logger = s.logger.Named("peering")
|
|
_ = logger
|
|
)
|
|
|
|
for _, peering := range s.topology.Peerings {
|
|
dialingCluster, ok := s.topology.Clusters[peering.Dialing.Name]
|
|
if !ok {
|
|
return fmt.Errorf("peering references dialing cluster that does not exist: %s", peering.String())
|
|
}
|
|
acceptingCluster, ok := s.topology.Clusters[peering.Accepting.Name]
|
|
if !ok {
|
|
return fmt.Errorf("peering references accepting cluster that does not exist: %s", peering.String())
|
|
}
|
|
|
|
var (
|
|
dialingClient = s.clients[dialingCluster.Name]
|
|
acceptingClient = s.clients[acceptingCluster.Name]
|
|
)
|
|
|
|
// TODO: allow for use of ServerExternalAddresses
|
|
|
|
req1 := api.PeeringGenerateTokenRequest{
|
|
PeerName: peering.Accepting.PeerName,
|
|
}
|
|
if acceptingCluster.Enterprise {
|
|
req1.Partition = peering.Accepting.Partition
|
|
}
|
|
|
|
GENTOKEN:
|
|
resp, _, err := acceptingClient.Peerings().GenerateToken(context.Background(), req1, nil)
|
|
if err != nil {
|
|
if isWeirdGRPCError(err) {
|
|
time.Sleep(50 * time.Millisecond)
|
|
goto GENTOKEN
|
|
}
|
|
return fmt.Errorf("error generating peering token for %q: %w", peering.String(), err)
|
|
}
|
|
|
|
peeringToken := resp.PeeringToken
|
|
logger.Debug("generated peering token", "peering", peering.String())
|
|
|
|
req2 := api.PeeringEstablishRequest{
|
|
PeerName: peering.Dialing.PeerName,
|
|
PeeringToken: peeringToken,
|
|
}
|
|
if dialingCluster.Enterprise {
|
|
req2.Partition = peering.Dialing.Partition
|
|
}
|
|
|
|
logger.Info("registering peering with token", "peering", peering.String())
|
|
ESTABLISH:
|
|
_, _, err = dialingClient.Peerings().Establish(context.Background(), req2, nil)
|
|
if err != nil {
|
|
if isWeirdGRPCError(err) {
|
|
time.Sleep(50 * time.Millisecond)
|
|
goto ESTABLISH
|
|
}
|
|
// Establish and friends return an api.StatusError value, not pointer
|
|
// not sure if this is weird
|
|
var asStatusError api.StatusError
|
|
if errors.As(err, &asStatusError) && asStatusError.Code == http.StatusGatewayTimeout {
|
|
time.Sleep(50 * time.Millisecond)
|
|
goto ESTABLISH
|
|
}
|
|
return fmt.Errorf("error establishing peering with token for %q: %#v", peering.String(), err)
|
|
}
|
|
|
|
logger.Info("peering registered", "peering", peering.String())
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Sprawl) waitForPeeringEstablishment() error {
|
|
var (
|
|
logger = s.logger.Named("peering")
|
|
)
|
|
logger.Info("awaiting peering establishment")
|
|
startTimeTotal := time.Now()
|
|
|
|
for _, peering := range s.topology.Peerings {
|
|
dialingCluster, ok := s.topology.Clusters[peering.Dialing.Name]
|
|
if !ok {
|
|
return fmt.Errorf("peering references dialing cluster that does not exist: %s", peering.String())
|
|
}
|
|
acceptingCluster, ok := s.topology.Clusters[peering.Accepting.Name]
|
|
if !ok {
|
|
return fmt.Errorf("peering references accepting cluster that does not exist: %s", peering.String())
|
|
}
|
|
|
|
var (
|
|
dialingClient = s.clients[dialingCluster.Name]
|
|
acceptingClient = s.clients[acceptingCluster.Name]
|
|
|
|
dialingLogger = logger.With(
|
|
"cluster", dialingCluster.Name,
|
|
"peering", peering.String(),
|
|
)
|
|
acceptingLogger = logger.With(
|
|
"cluster", acceptingCluster.Name,
|
|
"peering", peering.String(),
|
|
)
|
|
)
|
|
|
|
s.checkPeeringDirection(dialingLogger, dialingClient, peering.Dialing, dialingCluster.Enterprise)
|
|
s.checkPeeringDirection(acceptingLogger, acceptingClient, peering.Accepting, acceptingCluster.Enterprise)
|
|
}
|
|
logger.Info("peering established", "dur", time.Since(startTimeTotal).Round(time.Second))
|
|
return nil
|
|
}
|
|
|
|
func (s *Sprawl) checkPeeringDirection(logger hclog.Logger, client *api.Client, pc topology.PeerCluster, enterprise bool) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
startTime := time.Now()
|
|
|
|
for {
|
|
opts := &api.QueryOptions{}
|
|
logger2 := logger.With("dur", time.Since(startTime).Round(time.Second))
|
|
if enterprise {
|
|
opts.Partition = pc.Partition
|
|
}
|
|
res, _, err := client.Peerings().Read(ctx, pc.PeerName, opts)
|
|
if isWeirdGRPCError(err) {
|
|
time.Sleep(50 * time.Millisecond)
|
|
continue
|
|
}
|
|
if err != nil {
|
|
logger2.Debug("error looking up peering", "error", err)
|
|
time.Sleep(100 * time.Millisecond)
|
|
continue
|
|
}
|
|
if res == nil {
|
|
logger2.Debug("peering not found")
|
|
time.Sleep(100 * time.Millisecond)
|
|
continue
|
|
}
|
|
|
|
if res.State == api.PeeringStateActive {
|
|
break
|
|
}
|
|
logger2.Debug("peering not active yet", "state", res.State)
|
|
time.Sleep(500 * time.Millisecond)
|
|
}
|
|
logger.Debug("peering is active", "dur", time.Since(startTime).Round(time.Second))
|
|
}
|