mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
332 lines
7.9 KiB
332 lines
7.9 KiB
// Copyright (c) HashiCorp, Inc. |
|
// SPDX-License-Identifier: BUSL-1.1 |
|
|
|
package agent |
|
|
|
import ( |
|
"context" |
|
"encoding/json" |
|
"fmt" |
|
"os" |
|
osexec "os/exec" |
|
"path" |
|
"strconv" |
|
"sync" |
|
"syscall" |
|
"time" |
|
|
|
"github.com/hashicorp/consul/agent/exec" |
|
"github.com/hashicorp/consul/agent/structs" |
|
"github.com/hashicorp/consul/api" |
|
) |
|
|
|
const ( |
|
// remoteExecFileName is the name of the file we append to |
|
// the path, e.g. _rexec/session_id/job |
|
remoteExecFileName = "job" |
|
|
|
// rExecAck is the suffix added to an ack path |
|
remoteExecAckSuffix = "ack" |
|
|
|
// remoteExecAck is the suffix added to an exit code |
|
remoteExecExitSuffix = "exit" |
|
|
|
// remoteExecOutputDivider is used to namespace the output |
|
remoteExecOutputDivider = "out" |
|
|
|
// remoteExecOutputSize is the size we chunk output too |
|
remoteExecOutputSize = 4 * 1024 |
|
|
|
// remoteExecOutputDeadline is how long we wait before uploading |
|
// less than the chunk size |
|
remoteExecOutputDeadline = 500 * time.Millisecond |
|
) |
|
|
|
// remoteExecEvent is used as the payload of the user event to transmit |
|
// what we need to know about the event |
|
type remoteExecEvent struct { |
|
Prefix string |
|
Session string |
|
} |
|
|
|
// remoteExecSpec is used as the specification of the remote exec. |
|
// It is stored in the KV store |
|
type remoteExecSpec struct { |
|
Command string |
|
Args []string |
|
Script []byte |
|
Wait time.Duration |
|
} |
|
|
|
type rexecWriter struct { |
|
BufCh chan []byte |
|
BufSize int |
|
BufIdle time.Duration |
|
CancelCh chan struct{} |
|
|
|
buf []byte |
|
bufLen int |
|
bufLock sync.Mutex |
|
flush *time.Timer |
|
} |
|
|
|
func (r *rexecWriter) Write(b []byte) (int, error) { |
|
r.bufLock.Lock() |
|
defer r.bufLock.Unlock() |
|
if r.flush != nil { |
|
r.flush.Stop() |
|
r.flush = nil |
|
} |
|
inpLen := len(b) |
|
if r.buf == nil { |
|
r.buf = make([]byte, r.BufSize) |
|
} |
|
|
|
COPY: |
|
remain := len(r.buf) - r.bufLen |
|
if remain > len(b) { |
|
copy(r.buf[r.bufLen:], b) |
|
r.bufLen += len(b) |
|
} else { |
|
copy(r.buf[r.bufLen:], b[:remain]) |
|
b = b[remain:] |
|
r.bufLen += remain |
|
r.bufLock.Unlock() |
|
r.Flush() |
|
r.bufLock.Lock() |
|
goto COPY |
|
} |
|
|
|
r.flush = time.AfterFunc(r.BufIdle, r.Flush) |
|
return inpLen, nil |
|
} |
|
|
|
func (r *rexecWriter) Flush() { |
|
r.bufLock.Lock() |
|
defer r.bufLock.Unlock() |
|
if r.flush != nil { |
|
r.flush.Stop() |
|
r.flush = nil |
|
} |
|
if r.bufLen == 0 { |
|
return |
|
} |
|
select { |
|
case r.BufCh <- r.buf[:r.bufLen]: |
|
r.buf = make([]byte, r.BufSize) |
|
r.bufLen = 0 |
|
case <-r.CancelCh: |
|
r.bufLen = 0 |
|
} |
|
} |
|
|
|
// handleRemoteExec is invoked when a new remote exec request is received |
|
func (a *Agent) handleRemoteExec(msg *UserEvent) { |
|
a.logger.Debug("received remote exec event", "id", msg.ID) |
|
// Decode the event payload |
|
var event remoteExecEvent |
|
if err := json.Unmarshal(msg.Payload, &event); err != nil { |
|
a.logger.Error("failed to decode remote exec event", "error", err) |
|
return |
|
} |
|
|
|
// Read the job specification |
|
var spec remoteExecSpec |
|
if !a.remoteExecGetSpec(&event, &spec) { |
|
return |
|
} |
|
|
|
// Write the acknowledgement |
|
if !a.remoteExecWriteAck(&event) { |
|
return |
|
} |
|
|
|
// Ensure we write out an exit code |
|
exitCode := 0 |
|
defer a.remoteExecWriteExitCode(&event, &exitCode) |
|
|
|
// Check if this is a script, we may need to spill to disk |
|
var script string |
|
if len(spec.Script) != 0 { |
|
tmpFile, err := os.CreateTemp("", "rexec") |
|
if err != nil { |
|
a.logger.Debug("failed to make tmp file", "error", err) |
|
exitCode = 255 |
|
return |
|
} |
|
defer os.Remove(tmpFile.Name()) |
|
os.Chmod(tmpFile.Name(), 0750) |
|
tmpFile.Write(spec.Script) |
|
tmpFile.Close() |
|
script = tmpFile.Name() |
|
} else { |
|
script = spec.Command |
|
} |
|
|
|
// Create the exec.Cmd |
|
a.logger.Info("remote exec script", "script", script) |
|
var cmd *osexec.Cmd |
|
var err error |
|
if len(spec.Args) > 0 { |
|
cmd, err = exec.Subprocess(spec.Args) |
|
} else { |
|
cmd, err = exec.Script(script) |
|
} |
|
if err != nil { |
|
a.logger.Debug("failed to start remote exec", "error", err) |
|
exitCode = 255 |
|
return |
|
} |
|
|
|
// Setup the output streaming |
|
writer := &rexecWriter{ |
|
BufCh: make(chan []byte, 16), |
|
BufSize: remoteExecOutputSize, |
|
BufIdle: remoteExecOutputDeadline, |
|
CancelCh: make(chan struct{}), |
|
} |
|
cmd.Stdout = writer |
|
cmd.Stderr = writer |
|
|
|
// Start execution |
|
if err := cmd.Start(); err != nil { |
|
a.logger.Debug("failed to start remote exec", "error", err) |
|
exitCode = 255 |
|
return |
|
} |
|
|
|
// Wait for the process to exit |
|
exitCh := make(chan int, 1) |
|
go func() { |
|
err := cmd.Wait() |
|
writer.Flush() |
|
close(writer.BufCh) |
|
if err == nil { |
|
exitCh <- 0 |
|
return |
|
} |
|
|
|
// Try to determine the exit code |
|
if exitErr, ok := err.(*osexec.ExitError); ok { |
|
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { |
|
exitCh <- status.ExitStatus() |
|
return |
|
} |
|
} |
|
exitCh <- 1 |
|
}() |
|
|
|
// Wait until we are complete, uploading as we go |
|
WAIT: |
|
for num := 0; ; num++ { |
|
select { |
|
case out := <-writer.BufCh: |
|
if out == nil { |
|
break WAIT |
|
} |
|
if !a.remoteExecWriteOutput(&event, num, out) { |
|
close(writer.CancelCh) |
|
exitCode = 255 |
|
return |
|
} |
|
case <-time.After(spec.Wait): |
|
// Acts like a heartbeat, since there is no output |
|
if !a.remoteExecWriteOutput(&event, num, nil) { |
|
close(writer.CancelCh) |
|
exitCode = 255 |
|
return |
|
} |
|
} |
|
} |
|
|
|
// Get the exit code |
|
exitCode = <-exitCh |
|
} |
|
|
|
// remoteExecGetSpec is used to get the exec specification. |
|
// Returns if execution should continue |
|
func (a *Agent) remoteExecGetSpec(event *remoteExecEvent, spec *remoteExecSpec) bool { |
|
get := structs.KeyRequest{ |
|
Datacenter: a.config.Datacenter, |
|
Key: path.Join(event.Prefix, event.Session, remoteExecFileName), |
|
QueryOptions: structs.QueryOptions{ |
|
AllowStale: true, // Stale read for scale! Retry on failure. |
|
}, |
|
} |
|
get.Token = a.tokens.AgentToken() |
|
var out structs.IndexedDirEntries |
|
QUERY: |
|
if err := a.RPC(context.Background(), "KVS.Get", &get, &out); err != nil { |
|
a.logger.Error("failed to get remote exec job", "error", err) |
|
return false |
|
} |
|
if len(out.Entries) == 0 { |
|
// If the initial read was stale and had no data, retry as a consistent read |
|
if get.QueryOptions.AllowStale { |
|
a.logger.Debug("trying consistent fetch of remote exec job spec") |
|
get.QueryOptions.AllowStale = false |
|
goto QUERY |
|
} else { |
|
a.logger.Debug("remote exec aborted, job spec missing") |
|
return false |
|
} |
|
} |
|
if err := json.Unmarshal(out.Entries[0].Value, &spec); err != nil { |
|
a.logger.Error("failed to decode remote exec spec", "error", err) |
|
return false |
|
} |
|
return true |
|
} |
|
|
|
// remoteExecWriteAck is used to write an ack. Returns if execution should |
|
// continue. |
|
func (a *Agent) remoteExecWriteAck(event *remoteExecEvent) bool { |
|
if err := a.remoteExecWriteKey(event, remoteExecAckSuffix, nil); err != nil { |
|
a.logger.Error("failed to ack remote exec job", "error", err) |
|
return false |
|
} |
|
return true |
|
} |
|
|
|
// remoteExecWriteOutput is used to write output |
|
func (a *Agent) remoteExecWriteOutput(event *remoteExecEvent, num int, output []byte) bool { |
|
suffix := path.Join(remoteExecOutputDivider, fmt.Sprintf("%05x", num)) |
|
if err := a.remoteExecWriteKey(event, suffix, output); err != nil { |
|
a.logger.Error("failed to write output for remote exec job", "error", err) |
|
return false |
|
} |
|
return true |
|
} |
|
|
|
// remoteExecWriteExitCode is used to write an exit code |
|
func (a *Agent) remoteExecWriteExitCode(event *remoteExecEvent, exitCode *int) bool { |
|
val := []byte(strconv.FormatInt(int64(*exitCode), 10)) |
|
if err := a.remoteExecWriteKey(event, remoteExecExitSuffix, val); err != nil { |
|
a.logger.Error("failed to write exit code for remote exec job", "error", err) |
|
return false |
|
} |
|
return true |
|
} |
|
|
|
// remoteExecWriteKey is used to write an output key for a remote exec job |
|
func (a *Agent) remoteExecWriteKey(event *remoteExecEvent, suffix string, val []byte) error { |
|
key := path.Join(event.Prefix, event.Session, a.config.NodeName, suffix) |
|
write := structs.KVSRequest{ |
|
Datacenter: a.config.Datacenter, |
|
Op: api.KVLock, |
|
DirEnt: structs.DirEntry{ |
|
Key: key, |
|
Value: val, |
|
Session: event.Session, |
|
}, |
|
} |
|
write.Token = a.tokens.AgentToken() |
|
var success bool |
|
if err := a.RPC(context.Background(), "KVS.Apply", &write, &success); err != nil { |
|
return err |
|
} |
|
if !success { |
|
return fmt.Errorf("write failed") |
|
} |
|
return nil |
|
}
|
|
|