mirror of https://github.com/k3s-io/k3s
450 lines
14 KiB
Go
450 lines
14 KiB
Go
|
// Copyright 2019 The Bazel Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package starlark
|
||
|
|
||
|
// This file defines a simple execution-time profiler for Starlark.
|
||
|
// It measures the wall time spent executing Starlark code, and emits a
|
||
|
// gzipped protocol message in pprof format (github.com/google/pprof).
|
||
|
//
|
||
|
// When profiling is enabled, the interpreter calls the profiler to
|
||
|
// indicate the start and end of each "span" or time interval. A leaf
|
||
|
// function (whether Go or Starlark) has a single span. A function that
|
||
|
// calls another function has spans for each interval in which it is the
|
||
|
// top of the stack. (A LOAD instruction also ends a span.)
|
||
|
//
|
||
|
// At the start of a span, the interpreter records the current time in
|
||
|
// the thread's topmost frame. At the end of the span, it obtains the
|
||
|
// time again and subtracts the span start time. The difference is added
|
||
|
// to an accumulator variable in the thread. If the accumulator exceeds
|
||
|
// some fixed quantum (10ms, say), the profiler records the current call
|
||
|
// stack and sends it to the profiler goroutine, along with the number
|
||
|
// of quanta, which are subtracted. For example, if the accumulator
|
||
|
// holds 3ms and then a completed span adds 25ms to it, its value is 28ms,
|
||
|
// which exceeeds 10ms. The profiler records a stack with the value 20ms
|
||
|
// (2 quanta), and the accumulator is left with 8ms.
|
||
|
//
|
||
|
// The profiler goroutine converts the stacks into the pprof format and
|
||
|
// emits a gzip-compressed protocol message to the designated output
|
||
|
// file. We use a hand-written streaming proto encoder to avoid
|
||
|
// dependencies on pprof and proto, and to avoid the need to
|
||
|
// materialize the profile data structure in memory.
|
||
|
//
|
||
|
// A limitation of this profiler is that it measures wall time, which
|
||
|
// does not necessarily correspond to CPU time. A CPU profiler requires
|
||
|
// that only running (not runnable) threads are sampled; this is
|
||
|
// commonly achieved by having the kernel deliver a (PROF) signal to an
|
||
|
// arbitrary running thread, through setitimer(2). The CPU profiler in the
|
||
|
// Go runtime uses this mechanism, but it is not possible for a Go
|
||
|
// application to register a SIGPROF handler, nor is it possible for a
|
||
|
// Go handler for some other signal to read the stack pointer of
|
||
|
// the interrupted thread.
|
||
|
//
|
||
|
// Two caveats:
|
||
|
// (1) it is tempting to send the leaf Frame directly to the profiler
|
||
|
// goroutine instead of making a copy of the stack, since a Frame is a
|
||
|
// spaghetti stack--a linked list. However, as soon as execution
|
||
|
// resumes, the stack's Frame.pc values may be mutated, so Frames are
|
||
|
// not safe to share with the asynchronous profiler goroutine.
|
||
|
// (2) it is tempting to use Callables as keys in a map when tabulating
|
||
|
// the pprof protocols's Function entities. However, we cannot assume
|
||
|
// that Callables are valid map keys, and furthermore we must not
|
||
|
// pin function values in memory indefinitely as this may cause lambda
|
||
|
// values to keep their free variables live much longer than necessary.
|
||
|
|
||
|
// TODO(adonovan):
|
||
|
// - make Start/Stop fully thread-safe.
|
||
|
// - fix the pc hack.
|
||
|
// - experiment with other values of quantum.
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"bytes"
|
||
|
"compress/gzip"
|
||
|
"encoding/binary"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"log"
|
||
|
"reflect"
|
||
|
"sync/atomic"
|
||
|
"time"
|
||
|
"unsafe"
|
||
|
|
||
|
"go.starlark.net/syntax"
|
||
|
)
|
||
|
|
||
|
// StartProfile enables time profiling of all Starlark threads,
|
||
|
// and writes a profile in pprof format to w.
|
||
|
// It must be followed by a call to StopProfiler to stop
|
||
|
// the profiler and finalize the profile.
|
||
|
//
|
||
|
// StartProfile returns an error if profiling was already enabled.
|
||
|
//
|
||
|
// StartProfile must not be called concurrently with Starlark execution.
|
||
|
func StartProfile(w io.Writer) error {
|
||
|
if !atomic.CompareAndSwapUint32(&profiler.on, 0, 1) {
|
||
|
return fmt.Errorf("profiler already running")
|
||
|
}
|
||
|
|
||
|
// TODO(adonovan): make the API fully concurrency-safe.
|
||
|
// The main challenge is racy reads/writes of profiler.events,
|
||
|
// and of send/close races on the channel it refers to.
|
||
|
// It's easy to solve them with a mutex but harder to do
|
||
|
// it efficiently.
|
||
|
|
||
|
profiler.events = make(chan *profEvent, 1)
|
||
|
profiler.done = make(chan error)
|
||
|
|
||
|
go profile(w)
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// StopProfiler stops the profiler started by a prior call to
|
||
|
// StartProfile and finalizes the profile. It returns an error if the
|
||
|
// profile could not be completed.
|
||
|
//
|
||
|
// StopProfiler must not be called concurrently with Starlark execution.
|
||
|
func StopProfile() error {
|
||
|
// Terminate the profiler goroutine and get its result.
|
||
|
close(profiler.events)
|
||
|
err := <-profiler.done
|
||
|
|
||
|
profiler.done = nil
|
||
|
profiler.events = nil
|
||
|
atomic.StoreUint32(&profiler.on, 0)
|
||
|
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// globals
|
||
|
var profiler struct {
|
||
|
on uint32 // nonzero => profiler running
|
||
|
events chan *profEvent // profile events from interpreter threads
|
||
|
done chan error // indicates profiler goroutine is ready
|
||
|
}
|
||
|
|
||
|
func (thread *Thread) beginProfSpan() {
|
||
|
if profiler.events == nil {
|
||
|
return // profiling not enabled
|
||
|
}
|
||
|
|
||
|
thread.frameAt(0).spanStart = nanotime()
|
||
|
}
|
||
|
|
||
|
// TODO(adonovan): experiment with smaller values,
|
||
|
// which trade space and time for greater precision.
|
||
|
const quantum = 10 * time.Millisecond
|
||
|
|
||
|
func (thread *Thread) endProfSpan() {
|
||
|
if profiler.events == nil {
|
||
|
return // profiling not enabled
|
||
|
}
|
||
|
|
||
|
// Add the span to the thread's accumulator.
|
||
|
thread.proftime += time.Duration(nanotime() - thread.frameAt(0).spanStart)
|
||
|
if thread.proftime < quantum {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Only record complete quanta.
|
||
|
n := thread.proftime / quantum
|
||
|
thread.proftime -= n * quantum
|
||
|
|
||
|
// Copy the stack.
|
||
|
// (We can't save thread.frame because its pc will change.)
|
||
|
ev := &profEvent{
|
||
|
thread: thread,
|
||
|
time: n * quantum,
|
||
|
}
|
||
|
ev.stack = ev.stackSpace[:0]
|
||
|
for i := range thread.stack {
|
||
|
fr := thread.frameAt(i)
|
||
|
ev.stack = append(ev.stack, profFrame{
|
||
|
pos: fr.Position(),
|
||
|
fn: fr.Callable(),
|
||
|
pc: fr.pc,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
profiler.events <- ev
|
||
|
}
|
||
|
|
||
|
type profEvent struct {
|
||
|
thread *Thread // currently unused
|
||
|
time time.Duration
|
||
|
stack []profFrame
|
||
|
stackSpace [8]profFrame // initial space for stack
|
||
|
}
|
||
|
|
||
|
type profFrame struct {
|
||
|
fn Callable // don't hold this live for too long (prevents GC of lambdas)
|
||
|
pc uint32 // program counter (Starlark frames only)
|
||
|
pos syntax.Position // position of pc within this frame
|
||
|
}
|
||
|
|
||
|
// profile is the profiler goroutine.
|
||
|
// It runs until StopProfiler is called.
|
||
|
func profile(w io.Writer) {
|
||
|
// Field numbers from pprof protocol.
|
||
|
// See https://github.com/google/pprof/blob/master/proto/profile.proto
|
||
|
const (
|
||
|
Profile_sample_type = 1 // repeated ValueType
|
||
|
Profile_sample = 2 // repeated Sample
|
||
|
Profile_mapping = 3 // repeated Mapping
|
||
|
Profile_location = 4 // repeated Location
|
||
|
Profile_function = 5 // repeated Function
|
||
|
Profile_string_table = 6 // repeated string
|
||
|
Profile_time_nanos = 9 // int64
|
||
|
Profile_duration_nanos = 10 // int64
|
||
|
Profile_period_type = 11 // ValueType
|
||
|
Profile_period = 12 // int64
|
||
|
|
||
|
ValueType_type = 1 // int64
|
||
|
ValueType_unit = 2 // int64
|
||
|
|
||
|
Sample_location_id = 1 // repeated uint64
|
||
|
Sample_value = 2 // repeated int64
|
||
|
Sample_label = 3 // repeated Label
|
||
|
|
||
|
Label_key = 1 // int64
|
||
|
Label_str = 2 // int64
|
||
|
Label_num = 3 // int64
|
||
|
Label_num_unit = 4 // int64
|
||
|
|
||
|
Location_id = 1 // uint64
|
||
|
Location_mapping_id = 2 // uint64
|
||
|
Location_address = 3 // uint64
|
||
|
Location_line = 4 // repeated Line
|
||
|
|
||
|
Line_function_id = 1 // uint64
|
||
|
Line_line = 2 // int64
|
||
|
|
||
|
Function_id = 1 // uint64
|
||
|
Function_name = 2 // int64
|
||
|
Function_system_name = 3 // int64
|
||
|
Function_filename = 4 // int64
|
||
|
Function_start_line = 5 // int64
|
||
|
)
|
||
|
|
||
|
bufw := bufio.NewWriter(w) // write file in 4KB (not 240B flate-sized) chunks
|
||
|
gz := gzip.NewWriter(bufw)
|
||
|
enc := protoEncoder{w: gz}
|
||
|
|
||
|
// strings
|
||
|
stringIndex := make(map[string]int64)
|
||
|
str := func(s string) int64 {
|
||
|
i, ok := stringIndex[s]
|
||
|
if !ok {
|
||
|
i = int64(len(stringIndex))
|
||
|
enc.string(Profile_string_table, s)
|
||
|
stringIndex[s] = i
|
||
|
}
|
||
|
return i
|
||
|
}
|
||
|
str("") // entry 0
|
||
|
|
||
|
// functions
|
||
|
//
|
||
|
// function returns the ID of a Callable for use in Line.FunctionId.
|
||
|
// The ID is the same as the function's logical address,
|
||
|
// which is supplied by the caller to avoid the need to recompute it.
|
||
|
functionId := make(map[uintptr]uint64)
|
||
|
function := func(fn Callable, addr uintptr) uint64 {
|
||
|
id, ok := functionId[addr]
|
||
|
if !ok {
|
||
|
id = uint64(addr)
|
||
|
|
||
|
var pos syntax.Position
|
||
|
if fn, ok := fn.(callableWithPosition); ok {
|
||
|
pos = fn.Position()
|
||
|
}
|
||
|
|
||
|
name := fn.Name()
|
||
|
if name == "<toplevel>" {
|
||
|
name = pos.Filename()
|
||
|
}
|
||
|
|
||
|
nameIndex := str(name)
|
||
|
|
||
|
fun := new(bytes.Buffer)
|
||
|
funenc := protoEncoder{w: fun}
|
||
|
funenc.uint(Function_id, id)
|
||
|
funenc.int(Function_name, nameIndex)
|
||
|
funenc.int(Function_system_name, nameIndex)
|
||
|
funenc.int(Function_filename, str(pos.Filename()))
|
||
|
funenc.int(Function_start_line, int64(pos.Line))
|
||
|
enc.bytes(Profile_function, fun.Bytes())
|
||
|
|
||
|
functionId[addr] = id
|
||
|
}
|
||
|
return id
|
||
|
}
|
||
|
|
||
|
// locations
|
||
|
//
|
||
|
// location returns the ID of the location denoted by fr.
|
||
|
// For Starlark frames, this is the Frame pc.
|
||
|
locationId := make(map[uintptr]uint64)
|
||
|
location := func(fr profFrame) uint64 {
|
||
|
fnAddr := profFuncAddr(fr.fn)
|
||
|
|
||
|
// For Starlark functions, the frame position
|
||
|
// represents the current PC value.
|
||
|
// Mix it into the low bits of the address.
|
||
|
// This is super hacky and may result in collisions
|
||
|
// in large functions or if functions are numerous.
|
||
|
// TODO(adonovan): fix: try making this cleaner by treating
|
||
|
// each bytecode segment as a Profile.Mapping.
|
||
|
pcAddr := fnAddr
|
||
|
if _, ok := fr.fn.(*Function); ok {
|
||
|
pcAddr = (pcAddr << 16) ^ uintptr(fr.pc)
|
||
|
}
|
||
|
|
||
|
id, ok := locationId[pcAddr]
|
||
|
if !ok {
|
||
|
id = uint64(pcAddr)
|
||
|
|
||
|
line := new(bytes.Buffer)
|
||
|
lineenc := protoEncoder{w: line}
|
||
|
lineenc.uint(Line_function_id, function(fr.fn, fnAddr))
|
||
|
lineenc.int(Line_line, int64(fr.pos.Line))
|
||
|
loc := new(bytes.Buffer)
|
||
|
locenc := protoEncoder{w: loc}
|
||
|
locenc.uint(Location_id, id)
|
||
|
locenc.uint(Location_address, uint64(pcAddr))
|
||
|
locenc.bytes(Location_line, line.Bytes())
|
||
|
enc.bytes(Profile_location, loc.Bytes())
|
||
|
|
||
|
locationId[pcAddr] = id
|
||
|
}
|
||
|
return id
|
||
|
}
|
||
|
|
||
|
wallNanos := new(bytes.Buffer)
|
||
|
wnenc := protoEncoder{w: wallNanos}
|
||
|
wnenc.int(ValueType_type, str("wall"))
|
||
|
wnenc.int(ValueType_unit, str("nanoseconds"))
|
||
|
|
||
|
// informational fields of Profile
|
||
|
enc.bytes(Profile_sample_type, wallNanos.Bytes())
|
||
|
enc.int(Profile_period, quantum.Nanoseconds()) // magnitude of sampling period
|
||
|
enc.bytes(Profile_period_type, wallNanos.Bytes()) // dimension and unit of period
|
||
|
enc.int(Profile_time_nanos, time.Now().UnixNano()) // start (real) time of profile
|
||
|
|
||
|
startNano := nanotime()
|
||
|
|
||
|
// Read profile events from the channel
|
||
|
// until it is closed by StopProfiler.
|
||
|
for e := range profiler.events {
|
||
|
sample := new(bytes.Buffer)
|
||
|
sampleenc := protoEncoder{w: sample}
|
||
|
sampleenc.int(Sample_value, e.time.Nanoseconds()) // wall nanoseconds
|
||
|
for _, fr := range e.stack {
|
||
|
sampleenc.uint(Sample_location_id, location(fr))
|
||
|
}
|
||
|
enc.bytes(Profile_sample, sample.Bytes())
|
||
|
}
|
||
|
|
||
|
endNano := nanotime()
|
||
|
enc.int(Profile_duration_nanos, endNano-startNano)
|
||
|
|
||
|
err := gz.Close() // Close reports any prior write error
|
||
|
if flushErr := bufw.Flush(); err == nil {
|
||
|
err = flushErr
|
||
|
}
|
||
|
profiler.done <- err
|
||
|
}
|
||
|
|
||
|
// nanotime returns the time in nanoseconds since epoch.
|
||
|
// It is implemented by runtime.nanotime using the linkname hack;
|
||
|
// runtime.nanotime is defined for all OSs/ARCHS and uses the
|
||
|
// monotonic system clock, which there is no portable way to access.
|
||
|
// Should that function ever go away, these alternatives exist:
|
||
|
//
|
||
|
// // POSIX only. REALTIME not MONOTONIC. 17ns.
|
||
|
// var tv syscall.Timeval
|
||
|
// syscall.Gettimeofday(&tv) // can't fail
|
||
|
// return tv.Nano()
|
||
|
//
|
||
|
// // Portable. REALTIME not MONOTONIC. 46ns.
|
||
|
// return time.Now().Nanoseconds()
|
||
|
//
|
||
|
// // POSIX only. Adds a dependency.
|
||
|
// import "golang.org/x/sys/unix"
|
||
|
// var ts unix.Timespec
|
||
|
// unix.ClockGettime(CLOCK_MONOTONIC, &ts) // can't fail
|
||
|
// return unix.TimespecToNsec(ts)
|
||
|
//
|
||
|
//go:linkname nanotime runtime.nanotime
|
||
|
func nanotime() int64
|
||
|
|
||
|
// profFuncAddr returns the canonical "address"
|
||
|
// of a Callable for use by the profiler.
|
||
|
func profFuncAddr(fn Callable) uintptr {
|
||
|
switch fn := fn.(type) {
|
||
|
case *Builtin:
|
||
|
return reflect.ValueOf(fn.fn).Pointer()
|
||
|
case *Function:
|
||
|
return uintptr(unsafe.Pointer(fn.funcode))
|
||
|
}
|
||
|
|
||
|
// User-defined callable types are typically of
|
||
|
// of kind pointer-to-struct. Handle them specially.
|
||
|
if v := reflect.ValueOf(fn); v.Type().Kind() == reflect.Ptr {
|
||
|
return v.Pointer()
|
||
|
}
|
||
|
|
||
|
// Address zero is reserved by the protocol.
|
||
|
// Use 1 for callables we don't recognize.
|
||
|
log.Printf("Starlark profiler: no address for Callable %T", fn)
|
||
|
return 1
|
||
|
}
|
||
|
|
||
|
// We encode the protocol message by hand to avoid making
|
||
|
// the interpreter depend on both github.com/google/pprof
|
||
|
// and github.com/golang/protobuf.
|
||
|
//
|
||
|
// This also avoids the need to materialize a protocol message object
|
||
|
// tree of unbounded size and serialize it all at the end.
|
||
|
// The pprof format appears to have been designed to
|
||
|
// permit streaming implementations such as this one.
|
||
|
//
|
||
|
// See https://developers.google.com/protocol-buffers/docs/encoding.
|
||
|
type protoEncoder struct {
|
||
|
w io.Writer // *bytes.Buffer or *gzip.Writer
|
||
|
tmp [binary.MaxVarintLen64]byte
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) uvarint(x uint64) {
|
||
|
n := binary.PutUvarint(e.tmp[:], x)
|
||
|
e.w.Write(e.tmp[:n])
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) tag(field, wire uint) {
|
||
|
e.uvarint(uint64(field<<3 | wire))
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) string(field uint, s string) {
|
||
|
e.tag(field, 2) // length-delimited
|
||
|
e.uvarint(uint64(len(s)))
|
||
|
io.WriteString(e.w, s)
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) bytes(field uint, b []byte) {
|
||
|
e.tag(field, 2) // length-delimited
|
||
|
e.uvarint(uint64(len(b)))
|
||
|
e.w.Write(b)
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) uint(field uint, x uint64) {
|
||
|
e.tag(field, 0) // varint
|
||
|
e.uvarint(x)
|
||
|
}
|
||
|
|
||
|
func (e *protoEncoder) int(field uint, x int64) {
|
||
|
e.tag(field, 0) // varint
|
||
|
e.uvarint(uint64(x))
|
||
|
}
|