package compile // This file defines functions to read and write a compile.Program to a file. // // It is the client's responsibility to avoid version skew between the // compiler used to produce a file and the interpreter that consumes it. // The version number is provided as a constant. // Incompatible protocol changes should also increment the version number. // // Encoding // // Program: // "sky!" [4]byte # magic number // str uint32le # offset of section // version varint # must match Version // filename string // numloads varint // loads []Ident // numnames varint // names []string // numconsts varint // consts []Constant // numglobals varint // globals []Ident // toplevel Funcode // numfuncs varint // funcs []Funcode // []byte # concatenation of all referenced strings // EOF // // Funcode: // id Ident // code []byte // pclinetablen varint // pclinetab []varint // numlocals varint // locals []Ident // numcells varint // cells []int // numfreevars varint // freevar []Ident // maxstack varint // numparams varint // numkwonlyparams varint // hasvarargs varint (0 or 1) // haskwargs varint (0 or 1) // // Ident: // filename string // line, col varint // // Constant: # type data // type varint # 0=string string // data ... # 1=int varint // # 2=float varint (bits as uint64) // # 3=bigint string (decimal ASCII text) // // The encoding starts with a four-byte magic number. // The next four bytes are a little-endian uint32 // that provides the offset of the string section // at the end of the file, which contains the ordered // concatenation of all strings referenced by the // program. This design permits the decoder to read // the first and second parts of the file into different // memory allocations: the first (the encoded program) // is transient, but the second (the strings) persists // for the life of the Program. // // Within the encoded program, all strings are referred // to by their length. As the encoder and decoder process // the entire file sequentially, they are in lock step, // so the start offset of each string is implicit. // // Program.Code is represented as a []byte slice to permit // modification when breakpoints are set. All other strings // are represented as strings. They all (unsafely) share the // same backing byte slice. // // Aside from the str field, all integers are encoded as varints. import ( "encoding/binary" "fmt" "math" "math/big" debugpkg "runtime/debug" "unsafe" "go.starlark.net/syntax" ) const magic = "!sky" // Encode encodes a compiled Starlark program. func (prog *Program) Encode() []byte { var e encoder e.p = append(e.p, magic...) e.p = append(e.p, "????"...) // string data offset; filled in later e.int(Version) e.string(prog.Toplevel.Pos.Filename()) e.bindings(prog.Loads) e.int(len(prog.Names)) for _, name := range prog.Names { e.string(name) } e.int(len(prog.Constants)) for _, c := range prog.Constants { switch c := c.(type) { case string: e.int(0) e.string(c) case int64: e.int(1) e.int64(c) case float64: e.int(2) e.uint64(math.Float64bits(c)) case *big.Int: e.int(3) e.string(c.Text(10)) } } e.bindings(prog.Globals) e.function(prog.Toplevel) e.int(len(prog.Functions)) for _, fn := range prog.Functions { e.function(fn) } // Patch in the offset of the string data section. binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) return append(e.p, e.s...) } type encoder struct { p []byte // encoded program s []byte // strings tmp [binary.MaxVarintLen64]byte } func (e *encoder) int(x int) { e.int64(int64(x)) } func (e *encoder) int64(x int64) { n := binary.PutVarint(e.tmp[:], x) e.p = append(e.p, e.tmp[:n]...) } func (e *encoder) uint64(x uint64) { n := binary.PutUvarint(e.tmp[:], x) e.p = append(e.p, e.tmp[:n]...) } func (e *encoder) string(s string) { e.int(len(s)) e.s = append(e.s, s...) } func (e *encoder) bytes(b []byte) { e.int(len(b)) e.s = append(e.s, b...) } func (e *encoder) binding(bind Binding) { e.string(bind.Name) e.int(int(bind.Pos.Line)) e.int(int(bind.Pos.Col)) } func (e *encoder) bindings(binds []Binding) { e.int(len(binds)) for _, bind := range binds { e.binding(bind) } } func (e *encoder) function(fn *Funcode) { e.binding(Binding{fn.Name, fn.Pos}) e.string(fn.Doc) e.bytes(fn.Code) e.int(len(fn.pclinetab)) for _, x := range fn.pclinetab { e.int64(int64(x)) } e.bindings(fn.Locals) e.int(len(fn.Cells)) for _, index := range fn.Cells { e.int(index) } e.bindings(fn.Freevars) e.int(fn.MaxStack) e.int(fn.NumParams) e.int(fn.NumKwonlyParams) e.int(b2i(fn.HasVarargs)) e.int(b2i(fn.HasKwargs)) } func b2i(b bool) int { if b { return 1 } else { return 0 } } // DecodeProgram decodes a compiled Starlark program from data. func DecodeProgram(data []byte) (_ *Program, err error) { if len(data) < len(magic) { return nil, fmt.Errorf("not a compiled module: no magic number") } if got := string(data[:4]); got != magic { return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", got, magic) } defer func() { if x := recover(); x != nil { debugpkg.PrintStack() err = fmt.Errorf("internal error while decoding program: %v", x) } }() offset := binary.LittleEndian.Uint32(data[4:8]) d := decoder{ p: data[8:offset], s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist } if v := d.int(); v != Version { return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) } filename := d.string() d.filename = &filename loads := d.bindings() names := make([]string, d.int()) for i := range names { names[i] = d.string() } // constants constants := make([]interface{}, d.int()) for i := range constants { var c interface{} switch d.int() { case 0: c = d.string() case 1: c = d.int64() case 2: c = math.Float64frombits(d.uint64()) case 3: c, _ = new(big.Int).SetString(d.string(), 10) } constants[i] = c } globals := d.bindings() toplevel := d.function() funcs := make([]*Funcode, d.int()) for i := range funcs { funcs[i] = d.function() } prog := &Program{ Loads: loads, Names: names, Constants: constants, Globals: globals, Functions: funcs, Toplevel: toplevel, } toplevel.Prog = prog for _, f := range funcs { f.Prog = prog } if len(d.p)+len(d.s) > 0 { return nil, fmt.Errorf("internal error: unconsumed data during decoding") } return prog, nil } type decoder struct { p []byte // encoded program s []byte // strings filename *string // (indirect to avoid keeping decoder live) } func (d *decoder) int() int { return int(d.int64()) } func (d *decoder) int64() int64 { x, len := binary.Varint(d.p[:]) d.p = d.p[len:] return x } func (d *decoder) uint64() uint64 { x, len := binary.Uvarint(d.p[:]) d.p = d.p[len:] return x } func (d *decoder) string() (s string) { if slice := d.bytes(); len(slice) > 0 { // Avoid a memory allocation for each string // by unsafely aliasing slice. type string struct { data *byte len int } ptr := (*string)(unsafe.Pointer(&s)) ptr.data = &slice[0] ptr.len = len(slice) } return s } func (d *decoder) bytes() []byte { len := d.int() r := d.s[:len:len] d.s = d.s[len:] return r } func (d *decoder) binding() Binding { name := d.string() line := int32(d.int()) col := int32(d.int()) return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} } func (d *decoder) bindings() []Binding { bindings := make([]Binding, d.int()) for i := range bindings { bindings[i] = d.binding() } return bindings } func (d *decoder) ints() []int { ints := make([]int, d.int()) for i := range ints { ints[i] = d.int() } return ints } func (d *decoder) bool() bool { return d.int() != 0 } func (d *decoder) function() *Funcode { id := d.binding() doc := d.string() code := d.bytes() pclinetab := make([]uint16, d.int()) for i := range pclinetab { pclinetab[i] = uint16(d.int()) } locals := d.bindings() cells := d.ints() freevars := d.bindings() maxStack := d.int() numParams := d.int() numKwonlyParams := d.int() hasVarargs := d.int() != 0 hasKwargs := d.int() != 0 return &Funcode{ // Prog is filled in later. Pos: id.Pos, Name: id.Name, Doc: doc, Code: code, pclinetab: pclinetab, Locals: locals, Cells: cells, Freevars: freevars, MaxStack: maxStack, NumParams: numParams, NumKwonlyParams: numKwonlyParams, HasVarargs: hasVarargs, HasKwargs: hasKwargs, } }