consul/agent/grpc-external/services/resource/write.go

311 lines
11 KiB
Go
Raw Normal View History

// Copyright (c) HashiCorp, Inc.
[COMPLIANCE] License changes (#18443) * Adding explicit MPL license for sub-package This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository. * Adding explicit MPL license for sub-package This directory and its subdirectories (packages) contain files licensed with the MPLv2 `LICENSE` file in this directory and are intentionally licensed separately from the BSL `LICENSE` file at the root of this repository. * Updating the license from MPL to Business Source License Going forward, this project will be licensed under the Business Source License v1.1. Please see our blog post for more details at <Blog URL>, FAQ at www.hashicorp.com/licensing-faq, and details of the license at www.hashicorp.com/bsl. * add missing license headers * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 * Update copyright file headers to BUSL-1.1 --------- Co-authored-by: hashicorp-copywrite[bot] <110428419+hashicorp-copywrite[bot]@users.noreply.github.com>
2023-08-11 13:12:13 +00:00
// SPDX-License-Identifier: BUSL-1.1
2023-04-06 09:40:04 +00:00
package resource
import (
"context"
"errors"
"strings"
"time"
"github.com/oklog/ulid/v2"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/internal/resource"
2023-04-06 09:40:04 +00:00
"github.com/hashicorp/consul/internal/storage"
"github.com/hashicorp/consul/lib/retry"
"github.com/hashicorp/consul/proto-public/pbresource"
)
// errUseWriteStatus is returned when the user attempts to modify the resource
// status using the Write endpoint.
//
// We only allow modifications to the status using the WriteStatus endpoint
// because:
//
// - Setting statuses should only be done by controllers and requires different
// permissions.
//
// - Status-only updates shouldn't increment the resource generation.
//
// While we could accomplish both in the Write handler, there's seldom need to
// update the resource body and status at the same time, so it makes more sense
// to keep them separate.
var errUseWriteStatus = status.Error(codes.InvalidArgument, "resource.status can only be set using the WriteStatus endpoint")
2023-04-06 09:40:04 +00:00
func (s *Server) Write(ctx context.Context, req *pbresource.WriteRequest) (*pbresource.WriteResponse, error) {
reg, err := s.validateWriteRequest(req)
2023-04-06 09:40:04 +00:00
if err != nil {
return nil, err
}
v1EntMeta := v2TenancyToV1EntMeta(req.Resource.Id.Tenancy)
authz, authzContext, err := s.getAuthorizer(tokenFromContext(ctx), v1EntMeta)
if err != nil {
return nil, err
}
v1EntMetaToV2Tenancy(reg, v1EntMeta, req.Resource.Id.Tenancy)
// ACL check comes before tenancy existence checks to not leak tenancy "existence".
err = reg.ACLs.Write(authz, authzContext, req.Resource)
switch {
case acl.IsErrPermissionDenied(err):
return nil, status.Error(codes.PermissionDenied, err.Error())
case err != nil:
return nil, status.Errorf(codes.Internal, "failed write acl: %v", err)
}
2023-04-06 09:40:04 +00:00
// Check the user sent the correct type of data.
if !req.Resource.Data.MessageIs(reg.Proto) {
got := strings.TrimPrefix(req.Resource.Data.TypeUrl, "type.googleapis.com/")
return nil, status.Errorf(
codes.InvalidArgument,
"resource.data is of wrong type (expected=%q, got=%q)",
reg.Proto.ProtoReflect().Descriptor().FullName(),
got,
)
}
// Check V1 tenancy exists for the V2 resource
if err = v1TenancyExists(reg, s.V1TenancyBridge, req.Resource.Id.Tenancy, codes.InvalidArgument); err != nil {
return nil, err
}
// Check V1 tenancy not marked for deletion.
if err = v1TenancyMarkedForDeletion(reg, s.V1TenancyBridge, req.Resource.Id.Tenancy); err != nil {
return nil, err
}
if err = reg.Mutate(req.Resource); err != nil {
return nil, status.Errorf(codes.Internal, "failed mutate hook: %v", err.Error())
}
if err = reg.Validate(req.Resource); err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
2023-04-06 09:40:04 +00:00
// At the storage backend layer, all writes are CAS operations.
//
// This makes it possible to *safely* do things like keeping the Uid stable
// across writes, carrying statuses over, and passing the current version of
// the resource to hooks, without restricting ourselves to only using the more
// feature-rich storage systems that support "patch" updates etc. natively.
//
// Although CAS semantics are useful for machine users like controllers, human
// users generally don't need them. If the user is performing a non-CAS write,
// we read the current version, and automatically retry if the CAS write fails.
var result *pbresource.Resource
err = s.retryCAS(ctx, req.Resource.Version, func() error {
input := clone(req.Resource)
// We read with EventualConsistency here because:
//
// - In the common case, individual resources are written infrequently, and
// when using the Raft backend followers are generally within a few hundred
// milliseconds of the leader, so the first read will probably return the
// current version.
//
// - StrongConsistency is expensive. In the Raft backend, it involves a round
// of heartbeats to verify cluster leadership (in addition to the write's
// log replication).
//
// - CAS failures will be retried by retryCAS anyway. So the read-modify-write
// cycle should eventually succeed.
var mismatchError storage.GroupVersionMismatchError
2023-04-06 09:40:04 +00:00
existing, err := s.Backend.Read(ctx, storage.EventualConsistency, input.Id)
switch {
// Create path.
case errors.Is(err, storage.ErrNotFound):
input.Id.Uid = ulid.Make().String()
// Prevent setting statuses in this endpoint.
if len(input.Status) != 0 {
return errUseWriteStatus
}
2023-04-06 09:40:04 +00:00
// Generally, we expect resources with owners to be created by controllers,
// and they should provide the Uid. In cases where no Uid is given (e.g. the
// owner is specified in the resource HCL) we'll look up whatever the current
// Uid is and use that.
//
// An important note on consistency:
//
// We read the owner with StrongConsistency here to reduce the likelihood of
// creating a resource pointing to the wrong "incarnation" of the owner in
// cases where the owner is deleted and re-created in quick succession.
//
// That said, there is still a chance that the owner has been deleted by the
// time we write this resource. This is not a relational database and we do
// not support ACID transactions or real foreign key constraints.
if input.Owner != nil && input.Owner.Uid == "" {
owner, err := s.Backend.Read(ctx, storage.StrongConsistency, input.Owner)
switch {
case errors.Is(err, storage.ErrNotFound):
return status.Error(codes.InvalidArgument, "resource.owner does not exist")
case err != nil:
return status.Errorf(codes.Internal, "failed to resolve owner: %v", err)
}
input.Owner = owner.Id
}
// TODO(spatel): Revisit owner<->resource tenancy rules post-1.16
2023-04-06 09:40:04 +00:00
// Update path.
case err == nil || errors.As(err, &mismatchError):
// Allow writes that update GroupVersion.
if mismatchError.Stored != nil {
existing = mismatchError.Stored
}
2023-04-06 09:40:04 +00:00
// Use the stored ID because it includes the Uid.
//
// Generally, users won't provide the Uid but controllers will, because
// controllers need to operate on a specific "incarnation" of a resource
// as opposed to an older/newer resource with the same name, whereas users
// just want to update the current resource.
input.Id = existing.Id
// User is doing a non-CAS write, use the current version.
if input.Version == "" {
input.Version = existing.Version
}
// Check the stored version matches the user-given version.
//
// Although CAS operations are implemented "for real" at the storage backend
// layer, we must check the version here too to prevent a scenario where:
//
// - Current resource version is `v2`
// - User passes version `v2`
// - Read returns stale version `v1`
// - We carry `v1`'s statuses over (effectively overwriting `v2`'s statuses)
// - CAS operation succeeds anyway because user-given version is current
if input.Version != existing.Version {
return storage.ErrCASFailure
}
// Fill in an empty Owner UID with the existing owner's UID. If other parts
// of the owner ID like the type or name have changed then the subsequent
// EqualID call will still error as you are not allowed to change the owner.
// This is a small UX nicety to repeatedly "apply" a resource that should
// have an owner without having to care about the current owners incarnation.
if input.Owner != nil && existing.Owner != nil && input.Owner.Uid == "" {
input.Owner.Uid = existing.Owner.Uid
}
// Owner can only be set on creation. Enforce immutability.
if !resource.EqualID(input.Owner, existing.Owner) {
return status.Errorf(codes.InvalidArgument, "owner cannot be changed")
}
// Carry over status and prevent updates
if input.Status == nil {
input.Status = existing.Status
} else if !resource.EqualStatusMap(input.Status, existing.Status) {
return errUseWriteStatus
}
2023-04-06 09:40:04 +00:00
default:
return err
}
input.Generation = ulid.Make().String()
result, err = s.Backend.WriteCAS(ctx, input)
return err
})
switch {
case errors.Is(err, storage.ErrCASFailure):
return nil, status.Error(codes.Aborted, err.Error())
case errors.Is(err, storage.ErrWrongUid):
return nil, status.Error(codes.FailedPrecondition, err.Error())
case isGRPCStatusError(err):
return nil, err
case err != nil:
return nil, status.Errorf(codes.Internal, "failed to write resource: %v", err.Error())
2023-04-06 09:40:04 +00:00
}
return &pbresource.WriteResponse{Resource: result}, nil
}
// retryCAS retries the given operation with exponential backoff if the user
// didn't provide a version. This is intended to hide failures when the user
// isn't intentionally performing a CAS operation (all writes are, by design,
// CAS operations at the storage backend layer).
func (s *Server) retryCAS(ctx context.Context, vsn string, cas func() error) error {
if vsn != "" {
return cas()
}
const maxAttempts = 5
// These parameters are fairly arbitrary, so if you find better ones then go
// ahead and swap them out! In general, we want to wait long enough to smooth
// over small amounts of storage replication lag, but not so long that we make
// matters worse by holding onto load.
backoff := &retry.Waiter{
MinWait: 50 * time.Millisecond,
MaxWait: 1 * time.Second,
Jitter: retry.NewJitter(50),
Factor: 75 * time.Millisecond,
}
var err error
for i := 1; i <= maxAttempts; i++ {
if err = cas(); !errors.Is(err, storage.ErrCASFailure) {
break
}
if backoff.Wait(ctx) != nil {
break
}
s.Logger.Trace("retrying failed CAS operation", "failure_count", i)
}
return err
}
func (s *Server) validateWriteRequest(req *pbresource.WriteRequest) (*resource.Registration, error) {
2023-04-06 09:40:04 +00:00
var field string
switch {
case req.Resource == nil:
field = "resource"
case req.Resource.Id == nil:
field = "resource.id"
case req.Resource.Data == nil:
field = "resource.data"
}
if field != "" {
return nil, status.Errorf(codes.InvalidArgument, "%s is required", field)
2023-04-06 09:40:04 +00:00
}
if err := validateId(req.Resource.Id, "resource.id"); err != nil {
return nil, err
}
if req.Resource.Owner != nil {
if err := validateId(req.Resource.Owner, "resource.owner"); err != nil {
return nil, err
}
}
// Check type exists.
reg, err := s.resolveType(req.Resource.Id.Type)
if err != nil {
return nil, err
}
// Check scope
if reg.Scope == resource.ScopePartition && req.Resource.Id.Tenancy.Namespace != "" {
return nil, status.Errorf(
codes.InvalidArgument,
"partition scoped resource %s cannot have a namespace. got: %s",
resource.ToGVK(req.Resource.Id.Type),
req.Resource.Id.Tenancy.Namespace,
)
}
return reg, nil
2023-04-06 09:40:04 +00:00
}