mirror of https://github.com/k3s-io/k3s
Merge pull request #1742 from dweomer/runc/replace-rc10
runc: bump to rc10pull/1832/head v1.18.2+k3s1
commit
698e444a03
2
go.mod
2
go.mod
|
@ -95,7 +95,7 @@ require (
|
|||
github.com/lxc/lxd v0.0.0-20191108214106-60ea15630455
|
||||
github.com/mattn/go-sqlite3 v1.13.0
|
||||
github.com/natefinch/lumberjack v2.0.0+incompatible
|
||||
github.com/opencontainers/runc v1.0.0-rc9
|
||||
github.com/opencontainers/runc v1.0.0-rc10
|
||||
github.com/opencontainers/selinux v1.3.1-0.20190929122143-5215b1806f52
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/rakelkar/gonetsh v0.0.0-20190719023240-501daadcadf8 // indirect
|
||||
|
|
4
go.sum
4
go.sum
|
@ -102,6 +102,7 @@ github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5/go.mod h1:/iP1
|
|||
github.com/checkpoint-restore/go-criu v0.0.0-20181120144056-17b0214f6c48 h1:AQMF0Xixllgf29MKlx/TGEhRk7bEDX5kxz8Ui8lOvEs=
|
||||
github.com/checkpoint-restore/go-criu v0.0.0-20181120144056-17b0214f6c48/go.mod h1:TrMrLQfeENAPYPRsJuq3jsqdlRh3lvi6trTZJG8+tho=
|
||||
github.com/cheekybits/genny v0.0.0-20170328200008-9127e812e1e9/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ=
|
||||
github.com/cilium/ebpf v0.0.0-20191025125908-95b36a581eed h1:/UgmF+cZTm9kp4uJ122y/9cVhczNJCgAgAeH2FfzPeg=
|
||||
github.com/cilium/ebpf v0.0.0-20191025125908-95b36a581eed/go.mod h1:MA5e5Lr8slmEg9bt0VpxxWqJlO4iwu3FBdHUzV7wQVg=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313/go.mod h1:P1wt9Z3DP8O6W3rvwCt0REIlshg1InHImaLW0t3ObY0=
|
||||
|
@ -590,9 +591,8 @@ github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQ
|
|||
github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI=
|
||||
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
|
||||
github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runc v1.0.0-rc10 h1:AbmCEuSZXVflng0/cboQkpdEOeBsPMjz6tmq4Pv8MZw=
|
||||
github.com/opencontainers/runc v1.0.0-rc10/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runc v1.0.0-rc9 h1:/k06BMULKF5hidyoZymkoDCzdJzltZpz/UU4LguQVtc=
|
||||
github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runtime-spec v0.0.0-20180911193056-5684b8af48c1 h1:AcjCvgprf9I23wEYTHuyuHcuuQAp4hK5l+u1FUXgVaM=
|
||||
github.com/opencontainers/runtime-spec v0.0.0-20180911193056-5684b8af48c1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mod h1:r3f7wjNzSs2extwzU3Y+6pKfobzPh+kKFJ3ofN+3nfs=
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
|
@ -0,0 +1,46 @@
|
|||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to creating a positive environment include:
|
||||
|
||||
* Using welcoming and inclusive language
|
||||
* Being respectful of differing viewpoints and experiences
|
||||
* Gracefully accepting constructive criticism
|
||||
* Focusing on what is best for the community
|
||||
* Showing empathy towards other community members
|
||||
|
||||
Examples of unacceptable behavior by participants include:
|
||||
|
||||
* The use of sexualized language or imagery and unwelcome sexual attention or advances
|
||||
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or electronic address, without explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a professional setting
|
||||
|
||||
## Our Responsibilities
|
||||
|
||||
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
||||
|
||||
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
|
||||
|
||||
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
|
||||
|
||||
[homepage]: http://contributor-covenant.org
|
||||
[version]: http://contributor-covenant.org/version/1/4/
|
|
@ -0,0 +1,23 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2017 Nathan Sweet
|
||||
Copyright (c) 2018, 2019 Cloudflare
|
||||
Copyright (c) 2019 Authors of Cilium
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,183 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// CollectionABI describes the interface of an eBPF collection.
|
||||
type CollectionABI struct {
|
||||
Maps map[string]*MapABI
|
||||
Programs map[string]*ProgramABI
|
||||
}
|
||||
|
||||
// CheckSpec verifies that all maps and programs mentioned
|
||||
// in the ABI are present in the spec.
|
||||
func (abi *CollectionABI) CheckSpec(cs *CollectionSpec) error {
|
||||
for name := range abi.Maps {
|
||||
if cs.Maps[name] == nil {
|
||||
return errors.Errorf("missing map %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
for name := range abi.Programs {
|
||||
if cs.Programs[name] == nil {
|
||||
return errors.Errorf("missing program %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check verifies that all items in a collection conform to this ABI.
|
||||
func (abi *CollectionABI) Check(coll *Collection) error {
|
||||
for name, mapABI := range abi.Maps {
|
||||
m := coll.Maps[name]
|
||||
if m == nil {
|
||||
return errors.Errorf("missing map %s", name)
|
||||
}
|
||||
if err := mapABI.Check(m); err != nil {
|
||||
return errors.Wrapf(err, "map %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
for name, progABI := range abi.Programs {
|
||||
p := coll.Programs[name]
|
||||
if p == nil {
|
||||
return errors.Errorf("missing program %s", name)
|
||||
}
|
||||
if err := progABI.Check(p); err != nil {
|
||||
return errors.Wrapf(err, "program %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MapABI describes a Map.
|
||||
//
|
||||
// Use it to assert that a Map matches what your code expects.
|
||||
type MapABI struct {
|
||||
Type MapType
|
||||
KeySize uint32
|
||||
ValueSize uint32
|
||||
MaxEntries uint32
|
||||
InnerMap *MapABI
|
||||
}
|
||||
|
||||
func newMapABIFromSpec(spec *MapSpec) *MapABI {
|
||||
var inner *MapABI
|
||||
if spec.InnerMap != nil {
|
||||
inner = newMapABIFromSpec(spec.InnerMap)
|
||||
}
|
||||
|
||||
return &MapABI{
|
||||
spec.Type,
|
||||
spec.KeySize,
|
||||
spec.ValueSize,
|
||||
spec.MaxEntries,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
|
||||
func newMapABIFromFd(fd *bpfFD) (*MapABI, error) {
|
||||
info, err := bpfGetMapInfoByFD(fd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mapType := MapType(info.mapType)
|
||||
if mapType == ArrayOfMaps || mapType == HashOfMaps {
|
||||
return nil, errors.New("can't get map info for nested maps")
|
||||
}
|
||||
|
||||
return &MapABI{
|
||||
mapType,
|
||||
info.keySize,
|
||||
info.valueSize,
|
||||
info.maxEntries,
|
||||
nil,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Check verifies that a Map conforms to the ABI.
|
||||
//
|
||||
// Members of ABI which have the zero value of their type are not checked.
|
||||
func (abi *MapABI) Check(m *Map) error {
|
||||
return abi.check(&m.abi)
|
||||
}
|
||||
|
||||
func (abi *MapABI) check(other *MapABI) error {
|
||||
if abi.Type != UnspecifiedMap && other.Type != abi.Type {
|
||||
return errors.Errorf("expected map type %s, have %s", abi.Type, other.Type)
|
||||
}
|
||||
if err := checkUint32("key size", abi.KeySize, other.KeySize); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkUint32("value size", abi.ValueSize, other.ValueSize); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkUint32("max entries", abi.MaxEntries, other.MaxEntries); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if abi.InnerMap == nil {
|
||||
if abi.Type == ArrayOfMaps || abi.Type == HashOfMaps {
|
||||
return errors.New("missing inner map ABI")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if other.InnerMap == nil {
|
||||
return errors.New("missing inner map")
|
||||
}
|
||||
|
||||
return errors.Wrap(abi.InnerMap.check(other.InnerMap), "inner map")
|
||||
}
|
||||
|
||||
// ProgramABI describes a Program.
|
||||
//
|
||||
// Use it to assert that a Program matches what your code expects.
|
||||
type ProgramABI struct {
|
||||
Type ProgramType
|
||||
}
|
||||
|
||||
func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
|
||||
return &ProgramABI{
|
||||
spec.Type,
|
||||
}
|
||||
}
|
||||
|
||||
func newProgramABIFromFd(fd *bpfFD) (*ProgramABI, error) {
|
||||
info, err := bpfGetProgInfoByFD(fd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgramABIFromInfo(info), nil
|
||||
}
|
||||
|
||||
func newProgramABIFromInfo(info *bpfProgInfo) *ProgramABI {
|
||||
return &ProgramABI{
|
||||
Type: ProgramType(info.progType),
|
||||
}
|
||||
}
|
||||
|
||||
// Check verifies that a Program conforms to the ABI.
|
||||
//
|
||||
// Members which have the zero value of their type
|
||||
// are not checked.
|
||||
func (abi *ProgramABI) Check(prog *Program) error {
|
||||
if abi.Type != UnspecifiedProgram && prog.abi.Type != abi.Type {
|
||||
return errors.Errorf("expected program type %s, have %s", abi.Type, prog.abi.Type)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkUint32(name string, want, have uint32) error {
|
||||
if want != 0 && have != want {
|
||||
return errors.Errorf("expected %s to be %d, have %d", name, want, have)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
|
||||
|
||||
// Source of ALU / ALU64 / Branch operations
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |op |S|cls|
|
||||
// +----+-+---+
|
||||
type Source uint8
|
||||
|
||||
const sourceMask OpCode = 0x08
|
||||
|
||||
// Source bitmask
|
||||
const (
|
||||
// InvalidSource is returned by getters when invoked
|
||||
// on non ALU / branch OpCodes.
|
||||
InvalidSource Source = 0xff
|
||||
// ImmSource src is from constant
|
||||
ImmSource Source = 0x00
|
||||
// RegSource src is from register
|
||||
RegSource Source = 0x08
|
||||
)
|
||||
|
||||
// The Endianness of a byte swap instruction.
|
||||
type Endianness uint8
|
||||
|
||||
const endianMask = sourceMask
|
||||
|
||||
// Endian flags
|
||||
const (
|
||||
InvalidEndian Endianness = 0xff
|
||||
// Convert to little endian
|
||||
LE Endianness = 0x00
|
||||
// Convert to big endian
|
||||
BE Endianness = 0x08
|
||||
)
|
||||
|
||||
// ALUOp are ALU / ALU64 operations
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |OP |s|cls|
|
||||
// +----+-+---+
|
||||
type ALUOp uint8
|
||||
|
||||
const aluMask OpCode = 0xf0
|
||||
|
||||
const (
|
||||
// InvalidALUOp is returned by getters when invoked
|
||||
// on non ALU OpCodes
|
||||
InvalidALUOp ALUOp = 0xff
|
||||
// Add - addition
|
||||
Add ALUOp = 0x00
|
||||
// Sub - subtraction
|
||||
Sub ALUOp = 0x10
|
||||
// Mul - multiplication
|
||||
Mul ALUOp = 0x20
|
||||
// Div - division
|
||||
Div ALUOp = 0x30
|
||||
// Or - bitwise or
|
||||
Or ALUOp = 0x40
|
||||
// And - bitwise and
|
||||
And ALUOp = 0x50
|
||||
// LSh - bitwise shift left
|
||||
LSh ALUOp = 0x60
|
||||
// RSh - bitwise shift right
|
||||
RSh ALUOp = 0x70
|
||||
// Neg - sign/unsign signing bit
|
||||
Neg ALUOp = 0x80
|
||||
// Mod - modulo
|
||||
Mod ALUOp = 0x90
|
||||
// Xor - bitwise xor
|
||||
Xor ALUOp = 0xa0
|
||||
// Mov - move value from one place to another
|
||||
Mov ALUOp = 0xb0
|
||||
// ArSh - arithmatic shift
|
||||
ArSh ALUOp = 0xc0
|
||||
// Swap - endian conversions
|
||||
Swap ALUOp = 0xd0
|
||||
)
|
||||
|
||||
// HostTo converts from host to another endianness.
|
||||
func HostTo(endian Endianness, dst Register, size Size) Instruction {
|
||||
var imm int64
|
||||
switch size {
|
||||
case Half:
|
||||
imm = 16
|
||||
case Word:
|
||||
imm = 32
|
||||
case DWord:
|
||||
imm = 64
|
||||
default:
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)),
|
||||
Dst: dst,
|
||||
Constant: imm,
|
||||
}
|
||||
}
|
||||
|
||||
// Op returns the OpCode for an ALU operation with a given source.
|
||||
func (op ALUOp) Op(source Source) OpCode {
|
||||
return OpCode(ALU64Class).SetALUOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Reg emits `dst (op) src`.
|
||||
func (op ALUOp) Reg(dst, src Register) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
||||
|
||||
// Imm emits `dst (op) value`.
|
||||
func (op ALUOp) Imm(dst Register, value int32) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op(ImmSource),
|
||||
Dst: dst,
|
||||
Constant: int64(value),
|
||||
}
|
||||
}
|
||||
|
||||
// Op32 returns the OpCode for a 32-bit ALU operation with a given source.
|
||||
func (op ALUOp) Op32(source Source) OpCode {
|
||||
return OpCode(ALUClass).SetALUOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst.
|
||||
func (op ALUOp) Reg32(dst, src Register) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op32(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
||||
|
||||
// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst.
|
||||
func (op ALUOp) Imm32(dst Register, value int32) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op32(ImmSource),
|
||||
Dst: dst,
|
||||
Constant: int64(value),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidSource-255]
|
||||
_ = x[ImmSource-0]
|
||||
_ = x[RegSource-8]
|
||||
}
|
||||
|
||||
const (
|
||||
_Source_name_0 = "ImmSource"
|
||||
_Source_name_1 = "RegSource"
|
||||
_Source_name_2 = "InvalidSource"
|
||||
)
|
||||
|
||||
func (i Source) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Source_name_0
|
||||
case i == 8:
|
||||
return _Source_name_1
|
||||
case i == 255:
|
||||
return _Source_name_2
|
||||
default:
|
||||
return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidEndian-255]
|
||||
_ = x[LE-0]
|
||||
_ = x[BE-8]
|
||||
}
|
||||
|
||||
const (
|
||||
_Endianness_name_0 = "LE"
|
||||
_Endianness_name_1 = "BE"
|
||||
_Endianness_name_2 = "InvalidEndian"
|
||||
)
|
||||
|
||||
func (i Endianness) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Endianness_name_0
|
||||
case i == 8:
|
||||
return _Endianness_name_1
|
||||
case i == 255:
|
||||
return _Endianness_name_2
|
||||
default:
|
||||
return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidALUOp-255]
|
||||
_ = x[Add-0]
|
||||
_ = x[Sub-16]
|
||||
_ = x[Mul-32]
|
||||
_ = x[Div-48]
|
||||
_ = x[Or-64]
|
||||
_ = x[And-80]
|
||||
_ = x[LSh-96]
|
||||
_ = x[RSh-112]
|
||||
_ = x[Neg-128]
|
||||
_ = x[Mod-144]
|
||||
_ = x[Xor-160]
|
||||
_ = x[Mov-176]
|
||||
_ = x[ArSh-192]
|
||||
_ = x[Swap-208]
|
||||
}
|
||||
|
||||
const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
|
||||
|
||||
var _ALUOp_map = map[ALUOp]string{
|
||||
0: _ALUOp_name[0:3],
|
||||
16: _ALUOp_name[3:6],
|
||||
32: _ALUOp_name[6:9],
|
||||
48: _ALUOp_name[9:12],
|
||||
64: _ALUOp_name[12:14],
|
||||
80: _ALUOp_name[14:17],
|
||||
96: _ALUOp_name[17:20],
|
||||
112: _ALUOp_name[20:23],
|
||||
128: _ALUOp_name[23:26],
|
||||
144: _ALUOp_name[26:29],
|
||||
160: _ALUOp_name[29:32],
|
||||
176: _ALUOp_name[32:35],
|
||||
192: _ALUOp_name[35:39],
|
||||
208: _ALUOp_name[39:43],
|
||||
255: _ALUOp_name[43:55],
|
||||
}
|
||||
|
||||
func (i ALUOp) String() string {
|
||||
if str, ok := _ALUOp_map[i]; ok {
|
||||
return str
|
||||
}
|
||||
return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
// Package asm is an assembler for eBPF bytecode.
|
||||
package asm
|
|
@ -0,0 +1,143 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output func_string.go -type=BuiltinFunc
|
||||
|
||||
// BuiltinFunc is a built-in eBPF function.
|
||||
type BuiltinFunc int32
|
||||
|
||||
// eBPF built-in functions
|
||||
//
|
||||
// You can renegerate this list using the following gawk script:
|
||||
//
|
||||
// /FN\(.+\),/ {
|
||||
// match($1, /\((.+)\)/, r)
|
||||
// split(r[1], p, "_")
|
||||
// printf "Fn"
|
||||
// for (i in p) {
|
||||
// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
|
||||
// }
|
||||
// print ""
|
||||
// }
|
||||
//
|
||||
// The script expects include/uapi/linux/bpf.h as it's input.
|
||||
const (
|
||||
FnUnspec BuiltinFunc = iota
|
||||
FnMapLookupElem
|
||||
FnMapUpdateElem
|
||||
FnMapDeleteElem
|
||||
FnProbeRead
|
||||
FnKtimeGetNs
|
||||
FnTracePrintk
|
||||
FnGetPrandomU32
|
||||
FnGetSmpProcessorId
|
||||
FnSkbStoreBytes
|
||||
FnL3CsumReplace
|
||||
FnL4CsumReplace
|
||||
FnTailCall
|
||||
FnCloneRedirect
|
||||
FnGetCurrentPidTgid
|
||||
FnGetCurrentUidGid
|
||||
FnGetCurrentComm
|
||||
FnGetCgroupClassid
|
||||
FnSkbVlanPush
|
||||
FnSkbVlanPop
|
||||
FnSkbGetTunnelKey
|
||||
FnSkbSetTunnelKey
|
||||
FnPerfEventRead
|
||||
FnRedirect
|
||||
FnGetRouteRealm
|
||||
FnPerfEventOutput
|
||||
FnSkbLoadBytes
|
||||
FnGetStackid
|
||||
FnCsumDiff
|
||||
FnSkbGetTunnelOpt
|
||||
FnSkbSetTunnelOpt
|
||||
FnSkbChangeProto
|
||||
FnSkbChangeType
|
||||
FnSkbUnderCgroup
|
||||
FnGetHashRecalc
|
||||
FnGetCurrentTask
|
||||
FnProbeWriteUser
|
||||
FnCurrentTaskUnderCgroup
|
||||
FnSkbChangeTail
|
||||
FnSkbPullData
|
||||
FnCsumUpdate
|
||||
FnSetHashInvalid
|
||||
FnGetNumaNodeId
|
||||
FnSkbChangeHead
|
||||
FnXdpAdjustHead
|
||||
FnProbeReadStr
|
||||
FnGetSocketCookie
|
||||
FnGetSocketUid
|
||||
FnSetHash
|
||||
FnSetsockopt
|
||||
FnSkbAdjustRoom
|
||||
FnRedirectMap
|
||||
FnSkRedirectMap
|
||||
FnSockMapUpdate
|
||||
FnXdpAdjustMeta
|
||||
FnPerfEventReadValue
|
||||
FnPerfProgReadValue
|
||||
FnGetsockopt
|
||||
FnOverrideReturn
|
||||
FnSockOpsCbFlagsSet
|
||||
FnMsgRedirectMap
|
||||
FnMsgApplyBytes
|
||||
FnMsgCorkBytes
|
||||
FnMsgPullData
|
||||
FnBind
|
||||
FnXdpAdjustTail
|
||||
FnSkbGetXfrmState
|
||||
FnGetStack
|
||||
FnSkbLoadBytesRelative
|
||||
FnFibLookup
|
||||
FnSockHashUpdate
|
||||
FnMsgRedirectHash
|
||||
FnSkRedirectHash
|
||||
FnLwtPushEncap
|
||||
FnLwtSeg6StoreBytes
|
||||
FnLwtSeg6AdjustSrh
|
||||
FnLwtSeg6Action
|
||||
FnRcRepeat
|
||||
FnRcKeydown
|
||||
FnSkbCgroupId
|
||||
FnGetCurrentCgroupId
|
||||
FnGetLocalStorage
|
||||
FnSkSelectReuseport
|
||||
FnSkbAncestorCgroupId
|
||||
FnSkLookupTcp
|
||||
FnSkLookupUdp
|
||||
FnSkRelease
|
||||
FnMapPushElem
|
||||
FnMapPopElem
|
||||
FnMapPeekElem
|
||||
FnMsgPushData
|
||||
FnMsgPopData
|
||||
FnRcPointerRel
|
||||
FnSpinLock
|
||||
FnSpinUnlock
|
||||
FnSkFullsock
|
||||
FnTcpSock
|
||||
FnSkbEcnSetCe
|
||||
FnGetListenerSock
|
||||
FnSkcLookupTcp
|
||||
FnTcpCheckSyncookie
|
||||
FnSysctlGetName
|
||||
FnSysctlGetCurrentValue
|
||||
FnSysctlGetNewValue
|
||||
FnSysctlSetNewValue
|
||||
FnStrtol
|
||||
FnStrtoul
|
||||
FnSkStorageGet
|
||||
FnSkStorageDelete
|
||||
FnSendSignal
|
||||
FnTcpGenSyncookie
|
||||
)
|
||||
|
||||
// Call emits a function call.
|
||||
func (fn BuiltinFunc) Call() Instruction {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Call),
|
||||
Constant: int64(fn),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[FnUnspec-0]
|
||||
_ = x[FnMapLookupElem-1]
|
||||
_ = x[FnMapUpdateElem-2]
|
||||
_ = x[FnMapDeleteElem-3]
|
||||
_ = x[FnProbeRead-4]
|
||||
_ = x[FnKtimeGetNs-5]
|
||||
_ = x[FnTracePrintk-6]
|
||||
_ = x[FnGetPrandomU32-7]
|
||||
_ = x[FnGetSmpProcessorId-8]
|
||||
_ = x[FnSkbStoreBytes-9]
|
||||
_ = x[FnL3CsumReplace-10]
|
||||
_ = x[FnL4CsumReplace-11]
|
||||
_ = x[FnTailCall-12]
|
||||
_ = x[FnCloneRedirect-13]
|
||||
_ = x[FnGetCurrentPidTgid-14]
|
||||
_ = x[FnGetCurrentUidGid-15]
|
||||
_ = x[FnGetCurrentComm-16]
|
||||
_ = x[FnGetCgroupClassid-17]
|
||||
_ = x[FnSkbVlanPush-18]
|
||||
_ = x[FnSkbVlanPop-19]
|
||||
_ = x[FnSkbGetTunnelKey-20]
|
||||
_ = x[FnSkbSetTunnelKey-21]
|
||||
_ = x[FnPerfEventRead-22]
|
||||
_ = x[FnRedirect-23]
|
||||
_ = x[FnGetRouteRealm-24]
|
||||
_ = x[FnPerfEventOutput-25]
|
||||
_ = x[FnSkbLoadBytes-26]
|
||||
_ = x[FnGetStackid-27]
|
||||
_ = x[FnCsumDiff-28]
|
||||
_ = x[FnSkbGetTunnelOpt-29]
|
||||
_ = x[FnSkbSetTunnelOpt-30]
|
||||
_ = x[FnSkbChangeProto-31]
|
||||
_ = x[FnSkbChangeType-32]
|
||||
_ = x[FnSkbUnderCgroup-33]
|
||||
_ = x[FnGetHashRecalc-34]
|
||||
_ = x[FnGetCurrentTask-35]
|
||||
_ = x[FnProbeWriteUser-36]
|
||||
_ = x[FnCurrentTaskUnderCgroup-37]
|
||||
_ = x[FnSkbChangeTail-38]
|
||||
_ = x[FnSkbPullData-39]
|
||||
_ = x[FnCsumUpdate-40]
|
||||
_ = x[FnSetHashInvalid-41]
|
||||
_ = x[FnGetNumaNodeId-42]
|
||||
_ = x[FnSkbChangeHead-43]
|
||||
_ = x[FnXdpAdjustHead-44]
|
||||
_ = x[FnProbeReadStr-45]
|
||||
_ = x[FnGetSocketCookie-46]
|
||||
_ = x[FnGetSocketUid-47]
|
||||
_ = x[FnSetHash-48]
|
||||
_ = x[FnSetsockopt-49]
|
||||
_ = x[FnSkbAdjustRoom-50]
|
||||
_ = x[FnRedirectMap-51]
|
||||
_ = x[FnSkRedirectMap-52]
|
||||
_ = x[FnSockMapUpdate-53]
|
||||
_ = x[FnXdpAdjustMeta-54]
|
||||
_ = x[FnPerfEventReadValue-55]
|
||||
_ = x[FnPerfProgReadValue-56]
|
||||
_ = x[FnGetsockopt-57]
|
||||
_ = x[FnOverrideReturn-58]
|
||||
_ = x[FnSockOpsCbFlagsSet-59]
|
||||
_ = x[FnMsgRedirectMap-60]
|
||||
_ = x[FnMsgApplyBytes-61]
|
||||
_ = x[FnMsgCorkBytes-62]
|
||||
_ = x[FnMsgPullData-63]
|
||||
_ = x[FnBind-64]
|
||||
_ = x[FnXdpAdjustTail-65]
|
||||
_ = x[FnSkbGetXfrmState-66]
|
||||
_ = x[FnGetStack-67]
|
||||
_ = x[FnSkbLoadBytesRelative-68]
|
||||
_ = x[FnFibLookup-69]
|
||||
_ = x[FnSockHashUpdate-70]
|
||||
_ = x[FnMsgRedirectHash-71]
|
||||
_ = x[FnSkRedirectHash-72]
|
||||
_ = x[FnLwtPushEncap-73]
|
||||
_ = x[FnLwtSeg6StoreBytes-74]
|
||||
_ = x[FnLwtSeg6AdjustSrh-75]
|
||||
_ = x[FnLwtSeg6Action-76]
|
||||
_ = x[FnRcRepeat-77]
|
||||
_ = x[FnRcKeydown-78]
|
||||
_ = x[FnSkbCgroupId-79]
|
||||
_ = x[FnGetCurrentCgroupId-80]
|
||||
_ = x[FnGetLocalStorage-81]
|
||||
_ = x[FnSkSelectReuseport-82]
|
||||
_ = x[FnSkbAncestorCgroupId-83]
|
||||
_ = x[FnSkLookupTcp-84]
|
||||
_ = x[FnSkLookupUdp-85]
|
||||
_ = x[FnSkRelease-86]
|
||||
_ = x[FnMapPushElem-87]
|
||||
_ = x[FnMapPopElem-88]
|
||||
_ = x[FnMapPeekElem-89]
|
||||
_ = x[FnMsgPushData-90]
|
||||
_ = x[FnMsgPopData-91]
|
||||
_ = x[FnRcPointerRel-92]
|
||||
_ = x[FnSpinLock-93]
|
||||
_ = x[FnSpinUnlock-94]
|
||||
_ = x[FnSkFullsock-95]
|
||||
_ = x[FnTcpSock-96]
|
||||
_ = x[FnSkbEcnSetCe-97]
|
||||
_ = x[FnGetListenerSock-98]
|
||||
_ = x[FnSkcLookupTcp-99]
|
||||
_ = x[FnTcpCheckSyncookie-100]
|
||||
_ = x[FnSysctlGetName-101]
|
||||
_ = x[FnSysctlGetCurrentValue-102]
|
||||
_ = x[FnSysctlGetNewValue-103]
|
||||
_ = x[FnSysctlSetNewValue-104]
|
||||
_ = x[FnStrtol-105]
|
||||
_ = x[FnStrtoul-106]
|
||||
_ = x[FnSkStorageGet-107]
|
||||
_ = x[FnSkStorageDelete-108]
|
||||
_ = x[FnSendSignal-109]
|
||||
_ = x[FnTcpGenSyncookie-110]
|
||||
}
|
||||
|
||||
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie"
|
||||
|
||||
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632}
|
||||
|
||||
func (i BuiltinFunc) String() string {
|
||||
if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
|
||||
return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]]
|
||||
}
|
|
@ -0,0 +1,416 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// InstructionSize is the size of a BPF instruction in bytes
|
||||
const InstructionSize = 8
|
||||
|
||||
// Instruction is a single eBPF instruction.
|
||||
type Instruction struct {
|
||||
OpCode OpCode
|
||||
Dst Register
|
||||
Src Register
|
||||
Offset int16
|
||||
Constant int64
|
||||
Reference string
|
||||
Symbol string
|
||||
}
|
||||
|
||||
// Sym creates a symbol.
|
||||
func (ins Instruction) Sym(name string) Instruction {
|
||||
ins.Symbol = name
|
||||
return ins
|
||||
}
|
||||
|
||||
// Unmarshal decodes a BPF instruction.
|
||||
func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
|
||||
var bi bpfInstruction
|
||||
err := binary.Read(r, bo, &bi)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
ins.OpCode = bi.OpCode
|
||||
ins.Dst = bi.Registers.Dst()
|
||||
ins.Src = bi.Registers.Src()
|
||||
ins.Offset = bi.Offset
|
||||
ins.Constant = int64(bi.Constant)
|
||||
|
||||
if !bi.OpCode.isDWordLoad() {
|
||||
return InstructionSize, nil
|
||||
}
|
||||
|
||||
var bi2 bpfInstruction
|
||||
if err := binary.Read(r, bo, &bi2); err != nil {
|
||||
// No Wrap, to avoid io.EOF clash
|
||||
return 0, errors.New("64bit immediate is missing second half")
|
||||
}
|
||||
if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
|
||||
return 0, errors.New("64bit immediate has non-zero fields")
|
||||
}
|
||||
ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant)))
|
||||
|
||||
return 2 * InstructionSize, nil
|
||||
}
|
||||
|
||||
// Marshal encodes a BPF instruction.
|
||||
func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
|
||||
if ins.OpCode == InvalidOpCode {
|
||||
return 0, errors.New("invalid opcode")
|
||||
}
|
||||
|
||||
isDWordLoad := ins.OpCode.isDWordLoad()
|
||||
|
||||
cons := int32(ins.Constant)
|
||||
if isDWordLoad {
|
||||
// Encode least significant 32bit first for 64bit operations.
|
||||
cons = int32(uint32(ins.Constant))
|
||||
}
|
||||
|
||||
bpfi := bpfInstruction{
|
||||
ins.OpCode,
|
||||
newBPFRegisters(ins.Dst, ins.Src),
|
||||
ins.Offset,
|
||||
cons,
|
||||
}
|
||||
|
||||
if err := binary.Write(w, bo, &bpfi); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !isDWordLoad {
|
||||
return InstructionSize, nil
|
||||
}
|
||||
|
||||
bpfi = bpfInstruction{
|
||||
Constant: int32(ins.Constant >> 32),
|
||||
}
|
||||
|
||||
if err := binary.Write(w, bo, &bpfi); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return 2 * InstructionSize, nil
|
||||
}
|
||||
|
||||
// RewriteMapPtr changes an instruction to use a new map fd.
|
||||
//
|
||||
// Returns an error if the fd is invalid, or the instruction
|
||||
// is incorrect.
|
||||
func (ins *Instruction) RewriteMapPtr(fd int) error {
|
||||
if !ins.OpCode.isDWordLoad() {
|
||||
return errors.Errorf("%s is not a 64 bit load", ins.OpCode)
|
||||
}
|
||||
|
||||
if fd < 0 {
|
||||
return errors.New("invalid fd")
|
||||
}
|
||||
|
||||
ins.Src = R1
|
||||
ins.Constant = int64(fd)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Format implements fmt.Formatter.
|
||||
func (ins Instruction) Format(f fmt.State, c rune) {
|
||||
if c != 'v' {
|
||||
fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
|
||||
return
|
||||
}
|
||||
|
||||
op := ins.OpCode
|
||||
|
||||
if op == InvalidOpCode {
|
||||
fmt.Fprint(f, "INVALID")
|
||||
return
|
||||
}
|
||||
|
||||
// Omit trailing space for Exit
|
||||
if op.JumpOp() == Exit {
|
||||
fmt.Fprint(f, op)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(f, "%v ", op)
|
||||
switch cls := op.Class(); cls {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
switch op.Mode() {
|
||||
case ImmMode:
|
||||
fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
|
||||
case AbsMode:
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
case IndMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
|
||||
case MemMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
|
||||
case XAddMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
|
||||
}
|
||||
|
||||
case ALU64Class, ALUClass:
|
||||
fmt.Fprintf(f, "dst: %s ", ins.Dst)
|
||||
if op.ALUOp() == Swap || op.Source() == ImmSource {
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
} else {
|
||||
fmt.Fprintf(f, "src: %s", ins.Src)
|
||||
}
|
||||
|
||||
case JumpClass:
|
||||
switch jop := op.JumpOp(); jop {
|
||||
case Call:
|
||||
if ins.Src == R1 {
|
||||
// bpf-to-bpf call
|
||||
fmt.Fprint(f, ins.Constant)
|
||||
} else {
|
||||
fmt.Fprint(f, BuiltinFunc(ins.Constant))
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
|
||||
if op.Source() == ImmSource {
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
} else {
|
||||
fmt.Fprintf(f, "src: %s", ins.Src)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ins.Reference != "" {
|
||||
fmt.Fprintf(f, " <%s>", ins.Reference)
|
||||
}
|
||||
}
|
||||
|
||||
// Instructions is an eBPF program.
|
||||
type Instructions []Instruction
|
||||
|
||||
func (insns Instructions) String() string {
|
||||
return fmt.Sprint(insns)
|
||||
}
|
||||
|
||||
// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
|
||||
//
|
||||
// Returns an error if the symbol isn't used, see IsUnreferencedSymbol.
|
||||
func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
|
||||
if symbol == "" {
|
||||
return errors.New("empty symbol")
|
||||
}
|
||||
|
||||
found := false
|
||||
for i := range insns {
|
||||
ins := &insns[i]
|
||||
if ins.Reference != symbol {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := ins.RewriteMapPtr(fd); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
found = true
|
||||
}
|
||||
|
||||
if !found {
|
||||
return &unreferencedSymbolError{symbol}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SymbolOffsets returns the set of symbols and their offset in
|
||||
// the instructions.
|
||||
func (insns Instructions) SymbolOffsets() (map[string]int, error) {
|
||||
offsets := make(map[string]int)
|
||||
|
||||
for i, ins := range insns {
|
||||
if ins.Symbol == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := offsets[ins.Symbol]; ok {
|
||||
return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
|
||||
}
|
||||
|
||||
offsets[ins.Symbol] = i
|
||||
}
|
||||
|
||||
return offsets, nil
|
||||
}
|
||||
|
||||
// ReferenceOffsets returns the set of references and their offset in
|
||||
// the instructions.
|
||||
func (insns Instructions) ReferenceOffsets() map[string][]int {
|
||||
offsets := make(map[string][]int)
|
||||
|
||||
for i, ins := range insns {
|
||||
if ins.Reference == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
offsets[ins.Reference] = append(offsets[ins.Reference], i)
|
||||
}
|
||||
|
||||
return offsets
|
||||
}
|
||||
|
||||
func (insns Instructions) marshalledOffsets() (map[string]int, error) {
|
||||
symbols := make(map[string]int)
|
||||
|
||||
marshalledPos := 0
|
||||
for _, ins := range insns {
|
||||
currentPos := marshalledPos
|
||||
marshalledPos += ins.OpCode.marshalledInstructions()
|
||||
|
||||
if ins.Symbol == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := symbols[ins.Symbol]; ok {
|
||||
return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
|
||||
}
|
||||
|
||||
symbols[ins.Symbol] = currentPos
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
|
||||
// Format implements fmt.Formatter.
|
||||
//
|
||||
// You can control indentation of symbols by
|
||||
// specifying a width. Setting a precision controls the indentation of
|
||||
// instructions.
|
||||
// The default character is a tab, which can be overriden by specifying
|
||||
// the ' ' space flag.
|
||||
func (insns Instructions) Format(f fmt.State, c rune) {
|
||||
if c != 's' && c != 'v' {
|
||||
fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
|
||||
return
|
||||
}
|
||||
|
||||
// Precision is better in this case, because it allows
|
||||
// specifying 0 padding easily.
|
||||
padding, ok := f.Precision()
|
||||
if !ok {
|
||||
padding = 1
|
||||
}
|
||||
|
||||
indent := strings.Repeat("\t", padding)
|
||||
if f.Flag(' ') {
|
||||
indent = strings.Repeat(" ", padding)
|
||||
}
|
||||
|
||||
symPadding, ok := f.Width()
|
||||
if !ok {
|
||||
symPadding = padding - 1
|
||||
}
|
||||
if symPadding < 0 {
|
||||
symPadding = 0
|
||||
}
|
||||
|
||||
symIndent := strings.Repeat("\t", symPadding)
|
||||
if f.Flag(' ') {
|
||||
symIndent = strings.Repeat(" ", symPadding)
|
||||
}
|
||||
|
||||
// Figure out how many digits we need to represent the highest
|
||||
// offset.
|
||||
highestOffset := 0
|
||||
for _, ins := range insns {
|
||||
highestOffset += ins.OpCode.marshalledInstructions()
|
||||
}
|
||||
offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset))))
|
||||
|
||||
offset := 0
|
||||
for _, ins := range insns {
|
||||
if ins.Symbol != "" {
|
||||
fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol)
|
||||
}
|
||||
fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins)
|
||||
offset += ins.OpCode.marshalledInstructions()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Marshal encodes a BPF program into the kernel format.
|
||||
func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
|
||||
absoluteOffsets, err := insns.marshalledOffsets()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
num := 0
|
||||
for i, ins := range insns {
|
||||
switch {
|
||||
case ins.OpCode.JumpOp() == Call && ins.Constant == -1:
|
||||
// Rewrite bpf to bpf call
|
||||
offset, ok := absoluteOffsets[ins.Reference]
|
||||
if !ok {
|
||||
return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
||||
}
|
||||
|
||||
ins.Constant = int64(offset - num - 1)
|
||||
|
||||
case ins.OpCode.Class() == JumpClass && ins.Offset == -1:
|
||||
// Rewrite jump to label
|
||||
offset, ok := absoluteOffsets[ins.Reference]
|
||||
if !ok {
|
||||
return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
||||
}
|
||||
|
||||
ins.Offset = int16(offset - num - 1)
|
||||
}
|
||||
|
||||
n, err := ins.Marshal(w, bo)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "instruction %d", i)
|
||||
}
|
||||
|
||||
num += int(n / InstructionSize)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type bpfInstruction struct {
|
||||
OpCode OpCode
|
||||
Registers bpfRegisters
|
||||
Offset int16
|
||||
Constant int32
|
||||
}
|
||||
|
||||
type bpfRegisters uint8
|
||||
|
||||
func newBPFRegisters(dst, src Register) bpfRegisters {
|
||||
return bpfRegisters((src << 4) | (dst & 0xF))
|
||||
}
|
||||
|
||||
func (r bpfRegisters) Dst() Register {
|
||||
return Register(r & 0xF)
|
||||
}
|
||||
|
||||
func (r bpfRegisters) Src() Register {
|
||||
return Register(r >> 4)
|
||||
}
|
||||
|
||||
type unreferencedSymbolError struct {
|
||||
symbol string
|
||||
}
|
||||
|
||||
func (use *unreferencedSymbolError) Error() string {
|
||||
return fmt.Sprintf("unreferenced symbol %s", use.symbol)
|
||||
}
|
||||
|
||||
// IsUnreferencedSymbol returns true if err was caused by
|
||||
// an unreferenced symbol.
|
||||
func IsUnreferencedSymbol(err error) bool {
|
||||
_, ok := err.(*unreferencedSymbolError)
|
||||
return ok
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output jump_string.go -type=JumpOp
|
||||
|
||||
// JumpOp affect control flow.
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |OP |s|cls|
|
||||
// +----+-+---+
|
||||
type JumpOp uint8
|
||||
|
||||
const jumpMask OpCode = aluMask
|
||||
|
||||
const (
|
||||
// InvalidJumpOp is returned by getters when invoked
|
||||
// on non branch OpCodes
|
||||
InvalidJumpOp JumpOp = 0xff
|
||||
// Ja jumps by offset unconditionally
|
||||
Ja JumpOp = 0x00
|
||||
// JEq jumps by offset if r == imm
|
||||
JEq JumpOp = 0x10
|
||||
// JGT jumps by offset if r > imm
|
||||
JGT JumpOp = 0x20
|
||||
// JGE jumps by offset if r >= imm
|
||||
JGE JumpOp = 0x30
|
||||
// JSet jumps by offset if r & imm
|
||||
JSet JumpOp = 0x40
|
||||
// JNE jumps by offset if r != imm
|
||||
JNE JumpOp = 0x50
|
||||
// JSGT jumps by offset if signed r > signed imm
|
||||
JSGT JumpOp = 0x60
|
||||
// JSGE jumps by offset if signed r >= signed imm
|
||||
JSGE JumpOp = 0x70
|
||||
// Call builtin or user defined function from imm
|
||||
Call JumpOp = 0x80
|
||||
// Exit ends execution, with value in r0
|
||||
Exit JumpOp = 0x90
|
||||
// JLT jumps by offset if r < imm
|
||||
JLT JumpOp = 0xa0
|
||||
// JLE jumps by offset if r <= imm
|
||||
JLE JumpOp = 0xb0
|
||||
// JSLT jumps by offset if signed r < signed imm
|
||||
JSLT JumpOp = 0xc0
|
||||
// JSLE jumps by offset if signed r <= signed imm
|
||||
JSLE JumpOp = 0xd0
|
||||
)
|
||||
|
||||
// Return emits an exit instruction.
|
||||
//
|
||||
// Requires a return value in R0.
|
||||
func Return() Instruction {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Exit),
|
||||
}
|
||||
}
|
||||
|
||||
// Op returns the OpCode for a given jump source.
|
||||
func (op JumpOp) Op(source Source) OpCode {
|
||||
return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled.
|
||||
func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
|
||||
if op == Exit || op == Call || op == Ja {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource),
|
||||
Dst: dst,
|
||||
Offset: -1,
|
||||
Constant: int64(value),
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled.
|
||||
func (op JumpOp) Reg(dst, src Register, label string) Instruction {
|
||||
if op == Exit || op == Call || op == Ja {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
// Label adjusts PC to the address of the label.
|
||||
func (op JumpOp) Label(label string) Instruction {
|
||||
if op == Call {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Call),
|
||||
Src: R1,
|
||||
Constant: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op),
|
||||
Offset: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidJumpOp-255]
|
||||
_ = x[Ja-0]
|
||||
_ = x[JEq-16]
|
||||
_ = x[JGT-32]
|
||||
_ = x[JGE-48]
|
||||
_ = x[JSet-64]
|
||||
_ = x[JNE-80]
|
||||
_ = x[JSGT-96]
|
||||
_ = x[JSGE-112]
|
||||
_ = x[Call-128]
|
||||
_ = x[Exit-144]
|
||||
_ = x[JLT-160]
|
||||
_ = x[JLE-176]
|
||||
_ = x[JSLT-192]
|
||||
_ = x[JSLE-208]
|
||||
}
|
||||
|
||||
const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp"
|
||||
|
||||
var _JumpOp_map = map[JumpOp]string{
|
||||
0: _JumpOp_name[0:2],
|
||||
16: _JumpOp_name[2:5],
|
||||
32: _JumpOp_name[5:8],
|
||||
48: _JumpOp_name[8:11],
|
||||
64: _JumpOp_name[11:15],
|
||||
80: _JumpOp_name[15:18],
|
||||
96: _JumpOp_name[18:22],
|
||||
112: _JumpOp_name[22:26],
|
||||
128: _JumpOp_name[26:30],
|
||||
144: _JumpOp_name[30:34],
|
||||
160: _JumpOp_name[34:37],
|
||||
176: _JumpOp_name[37:40],
|
||||
192: _JumpOp_name[40:44],
|
||||
208: _JumpOp_name[44:48],
|
||||
255: _JumpOp_name[48:61],
|
||||
}
|
||||
|
||||
func (i JumpOp) String() string {
|
||||
if str, ok := _JumpOp_map[i]; ok {
|
||||
return str
|
||||
}
|
||||
return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output load_store_string.go -type=Mode,Size
|
||||
|
||||
// Mode for load and store operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// |MDE|sz|cls|
|
||||
// +---+--+---+
|
||||
type Mode uint8
|
||||
|
||||
const modeMask OpCode = 0xe0
|
||||
|
||||
const (
|
||||
// InvalidMode is returned by getters when invoked
|
||||
// on non load / store OpCodes
|
||||
InvalidMode Mode = 0xff
|
||||
// ImmMode - immediate value
|
||||
ImmMode Mode = 0x00
|
||||
// AbsMode - immediate value + offset
|
||||
AbsMode Mode = 0x20
|
||||
// IndMode - indirect (imm+src)
|
||||
IndMode Mode = 0x40
|
||||
// MemMode - load from memory
|
||||
MemMode Mode = 0x60
|
||||
// XAddMode - add atomically across processors.
|
||||
XAddMode Mode = 0xc0
|
||||
)
|
||||
|
||||
// Size of load and store operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// |mde|SZ|cls|
|
||||
// +---+--+---+
|
||||
type Size uint8
|
||||
|
||||
const sizeMask OpCode = 0x18
|
||||
|
||||
const (
|
||||
// InvalidSize is returned by getters when invoked
|
||||
// on non load / store OpCodes
|
||||
InvalidSize Size = 0xff
|
||||
// DWord - double word; 64 bits
|
||||
DWord Size = 0x18
|
||||
// Word - word; 32 bits
|
||||
Word Size = 0x00
|
||||
// Half - half-word; 16 bits
|
||||
Half Size = 0x08
|
||||
// Byte - byte; 8 bits
|
||||
Byte Size = 0x10
|
||||
)
|
||||
|
||||
// Sizeof returns the size in bytes.
|
||||
func (s Size) Sizeof() int {
|
||||
switch s {
|
||||
case DWord:
|
||||
return 8
|
||||
case Word:
|
||||
return 4
|
||||
case Half:
|
||||
return 2
|
||||
case Byte:
|
||||
return 1
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMemOp returns the OpCode to load a value of given size from memory.
|
||||
func LoadMemOp(size Size) OpCode {
|
||||
return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadMem emits `dst = *(size *)(src + offset)`.
|
||||
func LoadMem(dst, src Register, offset int16, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadMemOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: offset,
|
||||
}
|
||||
}
|
||||
|
||||
// LoadImmOp returns the OpCode to load an immediate of given size.
|
||||
//
|
||||
// As of kernel 4.20, only DWord size is accepted.
|
||||
func LoadImmOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(ImmMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadImm emits `dst = (size)value`.
|
||||
//
|
||||
// As of kernel 4.20, only DWord size is accepted.
|
||||
func LoadImm(dst Register, value int64, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadImmOp(size),
|
||||
Dst: dst,
|
||||
Constant: value,
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMapPtr stores a pointer to a map in dst.
|
||||
func LoadMapPtr(dst Register, fd int) Instruction {
|
||||
if fd < 0 {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: LoadImmOp(DWord),
|
||||
Dst: dst,
|
||||
Src: R1,
|
||||
Constant: int64(fd),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
|
||||
func LoadIndOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(IndMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`.
|
||||
func LoadInd(dst, src Register, offset int32, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadIndOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Constant: int64(offset),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff.
|
||||
func LoadAbsOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(AbsMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`.
|
||||
func LoadAbs(offset int32, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadAbsOp(size),
|
||||
Dst: R0,
|
||||
Constant: int64(offset),
|
||||
}
|
||||
}
|
||||
|
||||
// StoreMemOp returns the OpCode for storing a register of given size in memory.
|
||||
func StoreMemOp(size Size) OpCode {
|
||||
return OpCode(StXClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreMem emits `*(size *)(dst + offset) = src`
|
||||
func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreMemOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: offset,
|
||||
}
|
||||
}
|
||||
|
||||
// StoreImmOp returns the OpCode for storing an immediate of given size in memory.
|
||||
func StoreImmOp(size Size) OpCode {
|
||||
return OpCode(StClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreImm emits `*(size *)(dst + offset) = value`.
|
||||
func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreImmOp(size),
|
||||
Dst: dst,
|
||||
Offset: offset,
|
||||
Constant: value,
|
||||
}
|
||||
}
|
||||
|
||||
// StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
|
||||
func StoreXAddOp(size Size) OpCode {
|
||||
return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreXAdd atomically adds src to *dst.
|
||||
func StoreXAdd(dst, src Register, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreXAddOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidMode-255]
|
||||
_ = x[ImmMode-0]
|
||||
_ = x[AbsMode-32]
|
||||
_ = x[IndMode-64]
|
||||
_ = x[MemMode-96]
|
||||
_ = x[XAddMode-192]
|
||||
}
|
||||
|
||||
const (
|
||||
_Mode_name_0 = "ImmMode"
|
||||
_Mode_name_1 = "AbsMode"
|
||||
_Mode_name_2 = "IndMode"
|
||||
_Mode_name_3 = "MemMode"
|
||||
_Mode_name_4 = "XAddMode"
|
||||
_Mode_name_5 = "InvalidMode"
|
||||
)
|
||||
|
||||
func (i Mode) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Mode_name_0
|
||||
case i == 32:
|
||||
return _Mode_name_1
|
||||
case i == 64:
|
||||
return _Mode_name_2
|
||||
case i == 96:
|
||||
return _Mode_name_3
|
||||
case i == 192:
|
||||
return _Mode_name_4
|
||||
case i == 255:
|
||||
return _Mode_name_5
|
||||
default:
|
||||
return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidSize-255]
|
||||
_ = x[DWord-24]
|
||||
_ = x[Word-0]
|
||||
_ = x[Half-8]
|
||||
_ = x[Byte-16]
|
||||
}
|
||||
|
||||
const (
|
||||
_Size_name_0 = "Word"
|
||||
_Size_name_1 = "Half"
|
||||
_Size_name_2 = "Byte"
|
||||
_Size_name_3 = "DWord"
|
||||
_Size_name_4 = "InvalidSize"
|
||||
)
|
||||
|
||||
func (i Size) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Size_name_0
|
||||
case i == 8:
|
||||
return _Size_name_1
|
||||
case i == 16:
|
||||
return _Size_name_2
|
||||
case i == 24:
|
||||
return _Size_name_3
|
||||
case i == 255:
|
||||
return _Size_name_4
|
||||
default:
|
||||
return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,237 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:generate stringer -output opcode_string.go -type=Class
|
||||
|
||||
type encoding int
|
||||
|
||||
const (
|
||||
unknownEncoding encoding = iota
|
||||
loadOrStore
|
||||
jumpOrALU
|
||||
)
|
||||
|
||||
// Class of operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// | ?? |CLS|
|
||||
// +---+--+---+
|
||||
type Class uint8
|
||||
|
||||
const classMask OpCode = 0x07
|
||||
|
||||
const (
|
||||
// LdClass load memory
|
||||
LdClass Class = 0x00
|
||||
// LdXClass load memory from constant
|
||||
LdXClass Class = 0x01
|
||||
// StClass load register from memory
|
||||
StClass Class = 0x02
|
||||
// StXClass load register from constant
|
||||
StXClass Class = 0x03
|
||||
// ALUClass arithmetic operators
|
||||
ALUClass Class = 0x04
|
||||
// JumpClass jump operators
|
||||
JumpClass Class = 0x05
|
||||
// ALU64Class arithmetic in 64 bit mode
|
||||
ALU64Class Class = 0x07
|
||||
)
|
||||
|
||||
func (cls Class) encoding() encoding {
|
||||
switch cls {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
return loadOrStore
|
||||
case ALU64Class, ALUClass, JumpClass:
|
||||
return jumpOrALU
|
||||
default:
|
||||
return unknownEncoding
|
||||
}
|
||||
}
|
||||
|
||||
// OpCode is a packed eBPF opcode.
|
||||
//
|
||||
// Its encoding is defined by a Class value:
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// | ???? |CLS|
|
||||
// +----+-+---+
|
||||
type OpCode uint8
|
||||
|
||||
// InvalidOpCode is returned by setters on OpCode
|
||||
const InvalidOpCode OpCode = 0xff
|
||||
|
||||
// marshalledInstructions returns the number of BPF instructions required
|
||||
// to encode this opcode.
|
||||
func (op OpCode) marshalledInstructions() int {
|
||||
if op == LoadImmOp(DWord) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func (op OpCode) isDWordLoad() bool {
|
||||
return op == LoadImmOp(DWord)
|
||||
}
|
||||
|
||||
// Class returns the class of operation.
|
||||
func (op OpCode) Class() Class {
|
||||
return Class(op & classMask)
|
||||
}
|
||||
|
||||
// Mode returns the mode for load and store operations.
|
||||
func (op OpCode) Mode() Mode {
|
||||
if op.Class().encoding() != loadOrStore {
|
||||
return InvalidMode
|
||||
}
|
||||
return Mode(op & modeMask)
|
||||
}
|
||||
|
||||
// Size returns the size for load and store operations.
|
||||
func (op OpCode) Size() Size {
|
||||
if op.Class().encoding() != loadOrStore {
|
||||
return InvalidSize
|
||||
}
|
||||
return Size(op & sizeMask)
|
||||
}
|
||||
|
||||
// Source returns the source for branch and ALU operations.
|
||||
func (op OpCode) Source() Source {
|
||||
if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
|
||||
return InvalidSource
|
||||
}
|
||||
return Source(op & sourceMask)
|
||||
}
|
||||
|
||||
// ALUOp returns the ALUOp.
|
||||
func (op OpCode) ALUOp() ALUOp {
|
||||
if op.Class().encoding() != jumpOrALU {
|
||||
return InvalidALUOp
|
||||
}
|
||||
return ALUOp(op & aluMask)
|
||||
}
|
||||
|
||||
// Endianness returns the Endianness for a byte swap instruction.
|
||||
func (op OpCode) Endianness() Endianness {
|
||||
if op.ALUOp() != Swap {
|
||||
return InvalidEndian
|
||||
}
|
||||
return Endianness(op & endianMask)
|
||||
}
|
||||
|
||||
// JumpOp returns the JumpOp.
|
||||
func (op OpCode) JumpOp() JumpOp {
|
||||
if op.Class().encoding() != jumpOrALU {
|
||||
return InvalidJumpOp
|
||||
}
|
||||
return JumpOp(op & jumpMask)
|
||||
}
|
||||
|
||||
// SetMode sets the mode on load and store operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetMode(mode Mode) OpCode {
|
||||
if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^modeMask) | OpCode(mode)
|
||||
}
|
||||
|
||||
// SetSize sets the size on load and store operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetSize(size Size) OpCode {
|
||||
if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^sizeMask) | OpCode(size)
|
||||
}
|
||||
|
||||
// SetSource sets the source on jump and ALU operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetSource(source Source) OpCode {
|
||||
if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^sourceMask) | OpCode(source)
|
||||
}
|
||||
|
||||
// SetALUOp sets the ALUOp on ALU operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetALUOp(alu ALUOp) OpCode {
|
||||
class := op.Class()
|
||||
if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^aluMask) | OpCode(alu)
|
||||
}
|
||||
|
||||
// SetJumpOp sets the JumpOp on jump operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
|
||||
if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^jumpMask) | OpCode(jump)
|
||||
}
|
||||
|
||||
func (op OpCode) String() string {
|
||||
var f strings.Builder
|
||||
|
||||
switch class := op.Class(); class {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
f.WriteString(strings.TrimSuffix(class.String(), "Class"))
|
||||
|
||||
mode := op.Mode()
|
||||
f.WriteString(strings.TrimSuffix(mode.String(), "Mode"))
|
||||
|
||||
switch op.Size() {
|
||||
case DWord:
|
||||
f.WriteString("DW")
|
||||
case Word:
|
||||
f.WriteString("W")
|
||||
case Half:
|
||||
f.WriteString("H")
|
||||
case Byte:
|
||||
f.WriteString("B")
|
||||
}
|
||||
|
||||
case ALU64Class, ALUClass:
|
||||
f.WriteString(op.ALUOp().String())
|
||||
|
||||
if op.ALUOp() == Swap {
|
||||
// Width for Endian is controlled by Constant
|
||||
f.WriteString(op.Endianness().String())
|
||||
} else {
|
||||
if class == ALUClass {
|
||||
f.WriteString("32")
|
||||
}
|
||||
|
||||
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
|
||||
}
|
||||
|
||||
case JumpClass:
|
||||
f.WriteString(op.JumpOp().String())
|
||||
if jop := op.JumpOp(); jop != Exit && jop != Call {
|
||||
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(&f, "%#x", op)
|
||||
}
|
||||
|
||||
return f.String()
|
||||
}
|
||||
|
||||
// valid returns true if all bits in value are covered by mask.
|
||||
func valid(value, mask OpCode) bool {
|
||||
return value & ^mask == 0
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[LdClass-0]
|
||||
_ = x[LdXClass-1]
|
||||
_ = x[StClass-2]
|
||||
_ = x[StXClass-3]
|
||||
_ = x[ALUClass-4]
|
||||
_ = x[JumpClass-5]
|
||||
_ = x[ALU64Class-7]
|
||||
}
|
||||
|
||||
const (
|
||||
_Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass"
|
||||
_Class_name_1 = "ALU64Class"
|
||||
)
|
||||
|
||||
var (
|
||||
_Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
|
||||
)
|
||||
|
||||
func (i Class) String() string {
|
||||
switch {
|
||||
case 0 <= i && i <= 5:
|
||||
return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]]
|
||||
case i == 7:
|
||||
return _Class_name_1
|
||||
default:
|
||||
return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Register is the source or destination of most operations.
|
||||
type Register uint8
|
||||
|
||||
// R0 contains return values.
|
||||
const R0 Register = 0
|
||||
|
||||
// Registers for function arguments.
|
||||
const (
|
||||
R1 Register = R0 + 1 + iota
|
||||
R2
|
||||
R3
|
||||
R4
|
||||
R5
|
||||
)
|
||||
|
||||
// Callee saved registers preserved by function calls.
|
||||
const (
|
||||
R6 Register = R5 + 1 + iota
|
||||
R7
|
||||
R8
|
||||
R9
|
||||
)
|
||||
|
||||
// Read-only frame pointer to access stack.
|
||||
const (
|
||||
R10 Register = R9 + 1
|
||||
RFP = R10
|
||||
)
|
||||
|
||||
func (r Register) String() string {
|
||||
v := uint8(r)
|
||||
if v == 10 {
|
||||
return "rfp"
|
||||
}
|
||||
return fmt.Sprintf("r%d", v)
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// CollectionOptions control loading a collection into the kernel.
|
||||
type CollectionOptions struct {
|
||||
Programs ProgramOptions
|
||||
}
|
||||
|
||||
// CollectionSpec describes a collection.
|
||||
type CollectionSpec struct {
|
||||
Maps map[string]*MapSpec
|
||||
Programs map[string]*ProgramSpec
|
||||
}
|
||||
|
||||
// Copy returns a recursive copy of the spec.
|
||||
func (cs *CollectionSpec) Copy() *CollectionSpec {
|
||||
if cs == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := CollectionSpec{
|
||||
Maps: make(map[string]*MapSpec, len(cs.Maps)),
|
||||
Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
|
||||
}
|
||||
|
||||
for name, spec := range cs.Maps {
|
||||
cpy.Maps[name] = spec.Copy()
|
||||
}
|
||||
|
||||
for name, spec := range cs.Programs {
|
||||
cpy.Programs[name] = spec.Copy()
|
||||
}
|
||||
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Collection is a collection of Programs and Maps associated
|
||||
// with their symbols
|
||||
type Collection struct {
|
||||
Programs map[string]*Program
|
||||
Maps map[string]*Map
|
||||
}
|
||||
|
||||
// NewCollection creates a Collection from a specification.
|
||||
//
|
||||
// Only maps referenced by at least one of the programs are initialized.
|
||||
func NewCollection(spec *CollectionSpec) (*Collection, error) {
|
||||
return NewCollectionWithOptions(spec, CollectionOptions{})
|
||||
}
|
||||
|
||||
// NewCollectionWithOptions creates a Collection from a specification.
|
||||
//
|
||||
// Only maps referenced by at least one of the programs are initialized.
|
||||
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
|
||||
maps := make(map[string]*Map)
|
||||
for mapName, mapSpec := range spec.Maps {
|
||||
m, err := NewMap(mapSpec)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "map %s", mapName)
|
||||
}
|
||||
maps[mapName] = m
|
||||
}
|
||||
|
||||
progs := make(map[string]*Program)
|
||||
for progName, origProgSpec := range spec.Programs {
|
||||
progSpec := origProgSpec.Copy()
|
||||
|
||||
// Rewrite any reference to a valid map.
|
||||
for i := range progSpec.Instructions {
|
||||
var (
|
||||
ins = &progSpec.Instructions[i]
|
||||
m = maps[ins.Reference]
|
||||
)
|
||||
|
||||
if ins.Reference == "" || m == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if ins.Src == asm.R1 {
|
||||
// Don't overwrite maps already rewritten, users can
|
||||
// rewrite programs in the spec themselves
|
||||
continue
|
||||
}
|
||||
|
||||
if err := ins.RewriteMapPtr(m.FD()); err != nil {
|
||||
return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference)
|
||||
}
|
||||
}
|
||||
|
||||
prog, err := NewProgramWithOptions(progSpec, opts.Programs)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "program %s", progName)
|
||||
}
|
||||
progs[progName] = prog
|
||||
}
|
||||
|
||||
return &Collection{
|
||||
progs,
|
||||
maps,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// LoadCollection parses an object file and converts it to a collection.
|
||||
func LoadCollection(file string) (*Collection, error) {
|
||||
spec, err := LoadCollectionSpec(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewCollection(spec)
|
||||
}
|
||||
|
||||
// Close frees all maps and programs associated with the collection.
|
||||
//
|
||||
// The collection mustn't be used afterwards.
|
||||
func (coll *Collection) Close() {
|
||||
for _, prog := range coll.Programs {
|
||||
prog.Close()
|
||||
}
|
||||
for _, m := range coll.Maps {
|
||||
m.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// DetachMap removes the named map from the Collection.
|
||||
//
|
||||
// This means that a later call to Close() will not affect this map.
|
||||
//
|
||||
// Returns nil if no map of that name exists.
|
||||
func (coll *Collection) DetachMap(name string) *Map {
|
||||
m := coll.Maps[name]
|
||||
delete(coll.Maps, name)
|
||||
return m
|
||||
}
|
||||
|
||||
// DetachProgram removes the named program from the Collection.
|
||||
//
|
||||
// This means that a later call to Close() will not affect this program.
|
||||
//
|
||||
// Returns nil if no program of that name exists.
|
||||
func (coll *Collection) DetachProgram(name string) *Program {
|
||||
p := coll.Programs[name]
|
||||
delete(coll.Programs, name)
|
||||
return p
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
// Package ebpf is a toolkit for working with eBPF programs.
|
||||
//
|
||||
// eBPF programs are small snippets of code which are executed directly
|
||||
// in a VM in the Linux kernel, which makes them very fast and flexible.
|
||||
// Many Linux subsystems now accept eBPF programs. This makes it possible
|
||||
// to implement highly application specific logic inside the kernel,
|
||||
// without having to modify the actual kernel itself.
|
||||
//
|
||||
// This package is designed for long-running processes which
|
||||
// want to use eBPF to implement part of their application logic. It has no
|
||||
// run-time dependencies outside of the library and the Linux kernel itself.
|
||||
// eBPF code should be compiled ahead of time using clang, and shipped with
|
||||
// your application as any other resource.
|
||||
//
|
||||
// This package doesn't include code required to attach eBPF to Linux
|
||||
// subsystems, since this varies per subsystem.
|
||||
package ebpf
|
|
@ -0,0 +1,392 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/elf"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type elfCode struct {
|
||||
*elf.File
|
||||
symbols []elf.Symbol
|
||||
symbolsPerSection map[elf.SectionIndex]map[uint64]string
|
||||
}
|
||||
|
||||
// LoadCollectionSpec parses an ELF file into a CollectionSpec.
|
||||
func LoadCollectionSpec(file string) (*CollectionSpec, error) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
spec, err := LoadCollectionSpecFromReader(f)
|
||||
return spec, errors.Wrapf(err, "file %s", file)
|
||||
}
|
||||
|
||||
// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
|
||||
func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
|
||||
f, err := elf.NewFile(code)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
symbols, err := f.Symbols()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "load symbols")
|
||||
}
|
||||
|
||||
ec := &elfCode{f, symbols, symbolsPerSection(symbols)}
|
||||
|
||||
var licenseSection, versionSection *elf.Section
|
||||
progSections := make(map[elf.SectionIndex]*elf.Section)
|
||||
relSections := make(map[elf.SectionIndex]*elf.Section)
|
||||
mapSections := make(map[elf.SectionIndex]*elf.Section)
|
||||
for i, sec := range ec.Sections {
|
||||
switch {
|
||||
case strings.HasPrefix(sec.Name, "license"):
|
||||
licenseSection = sec
|
||||
case strings.HasPrefix(sec.Name, "version"):
|
||||
versionSection = sec
|
||||
case strings.HasPrefix(sec.Name, "maps"):
|
||||
mapSections[elf.SectionIndex(i)] = sec
|
||||
case sec.Type == elf.SHT_REL:
|
||||
if int(sec.Info) >= len(ec.Sections) {
|
||||
return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
|
||||
}
|
||||
|
||||
// Store relocations under the section index of the target
|
||||
idx := elf.SectionIndex(sec.Info)
|
||||
if relSections[idx] != nil {
|
||||
return nil, errors.Errorf("section %d has multiple relocation sections", idx)
|
||||
}
|
||||
relSections[idx] = sec
|
||||
case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
|
||||
progSections[elf.SectionIndex(i)] = sec
|
||||
}
|
||||
}
|
||||
|
||||
license, err := loadLicense(licenseSection)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "load license")
|
||||
}
|
||||
|
||||
version, err := loadVersion(versionSection, ec.ByteOrder)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "load version")
|
||||
}
|
||||
|
||||
maps, err := ec.loadMaps(mapSections)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "load maps")
|
||||
}
|
||||
|
||||
progs, libs, err := ec.loadPrograms(progSections, relSections, license, version)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "load programs")
|
||||
}
|
||||
|
||||
if len(libs) > 0 {
|
||||
for name, prog := range progs {
|
||||
prog.Instructions, err = link(prog.Instructions, libs...)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "program %s", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &CollectionSpec{maps, progs}, nil
|
||||
}
|
||||
|
||||
func loadLicense(sec *elf.Section) (string, error) {
|
||||
if sec == nil {
|
||||
return "", errors.Errorf("missing license section")
|
||||
}
|
||||
data, err := sec.Data()
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "section %s", sec.Name)
|
||||
}
|
||||
return string(bytes.TrimRight(data, "\000")), nil
|
||||
}
|
||||
|
||||
func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
|
||||
if sec == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var version uint32
|
||||
err := binary.Read(sec.Open(), bo, &version)
|
||||
return version, errors.Wrapf(err, "section %s", sec.Name)
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) {
|
||||
var (
|
||||
progs = make(map[string]*ProgramSpec)
|
||||
libs []asm.Instructions
|
||||
)
|
||||
for idx, prog := range progSections {
|
||||
syms := ec.symbolsPerSection[idx]
|
||||
if len(syms) == 0 {
|
||||
return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name)
|
||||
}
|
||||
|
||||
funcSym := syms[0]
|
||||
if funcSym == "" {
|
||||
return nil, nil, errors.Errorf("section %v: no label at start", prog.Name)
|
||||
}
|
||||
|
||||
rels, err := ec.loadRelocations(relSections[idx])
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym)
|
||||
}
|
||||
|
||||
insns, err := ec.loadInstructions(prog, syms, rels)
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym)
|
||||
}
|
||||
|
||||
if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram {
|
||||
// There is no single name we can use for "library" sections,
|
||||
// since they may contain multiple functions. We'll decode the
|
||||
// labels they contain later on, and then link sections that way.
|
||||
libs = append(libs, insns)
|
||||
} else {
|
||||
progs[funcSym] = &ProgramSpec{
|
||||
Name: funcSym,
|
||||
Type: progType,
|
||||
AttachType: attachType,
|
||||
License: license,
|
||||
KernelVersion: version,
|
||||
Instructions: insns,
|
||||
}
|
||||
}
|
||||
}
|
||||
return progs, libs, nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) {
|
||||
var (
|
||||
r = section.Open()
|
||||
insns asm.Instructions
|
||||
ins asm.Instruction
|
||||
offset uint64
|
||||
)
|
||||
for {
|
||||
n, err := ins.Unmarshal(r, ec.ByteOrder)
|
||||
if err == io.EOF {
|
||||
return insns, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "offset %d", offset)
|
||||
}
|
||||
|
||||
ins.Symbol = symbols[offset]
|
||||
ins.Reference = relocations[offset]
|
||||
|
||||
insns = append(insns, ins)
|
||||
offset += n
|
||||
}
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) {
|
||||
var (
|
||||
maps = make(map[string]*MapSpec)
|
||||
b = make([]byte, 1)
|
||||
)
|
||||
for idx, sec := range mapSections {
|
||||
syms := ec.symbolsPerSection[idx]
|
||||
if len(syms) == 0 {
|
||||
return nil, errors.Errorf("section %v: no symbols", sec.Name)
|
||||
}
|
||||
|
||||
if sec.Size%uint64(len(syms)) != 0 {
|
||||
return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
|
||||
}
|
||||
|
||||
var (
|
||||
r = sec.Open()
|
||||
size = sec.Size / uint64(len(syms))
|
||||
)
|
||||
for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
|
||||
mapSym := syms[offset]
|
||||
if mapSym == "" {
|
||||
fmt.Println(syms)
|
||||
return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
|
||||
}
|
||||
|
||||
if maps[mapSym] != nil {
|
||||
return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
|
||||
}
|
||||
|
||||
lr := io.LimitReader(r, int64(size))
|
||||
|
||||
var spec MapSpec
|
||||
switch {
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
|
||||
return nil, errors.Errorf("map %v: missing type", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
|
||||
return nil, errors.Errorf("map %v: missing key size", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
|
||||
return nil, errors.Errorf("map %v: missing value size", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
|
||||
return nil, errors.Errorf("map %v: missing max entries", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
|
||||
return nil, errors.Errorf("map %v: missing flags", mapSym)
|
||||
}
|
||||
|
||||
for {
|
||||
_, err := lr.Read(b)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b[0] != 0 {
|
||||
return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
|
||||
}
|
||||
}
|
||||
|
||||
maps[mapSym] = &spec
|
||||
}
|
||||
}
|
||||
return maps, nil
|
||||
}
|
||||
|
||||
func getProgType(v string) (ProgramType, AttachType) {
|
||||
types := map[string]ProgramType{
|
||||
// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568
|
||||
"socket": SocketFilter,
|
||||
"seccomp": SocketFilter,
|
||||
"kprobe/": Kprobe,
|
||||
"kretprobe/": Kprobe,
|
||||
"tracepoint/": TracePoint,
|
||||
"xdp": XDP,
|
||||
"perf_event": PerfEvent,
|
||||
"sockops": SockOps,
|
||||
"sk_skb": SkSKB,
|
||||
"sk_msg": SkMsg,
|
||||
"lirc_mode2": LircMode2,
|
||||
"flow_dissector": FlowDissector,
|
||||
|
||||
"cgroup_skb/": CGroupSKB,
|
||||
"cgroup/dev": CGroupDevice,
|
||||
"cgroup/skb": CGroupSKB,
|
||||
"cgroup/sock": CGroupSock,
|
||||
"cgroup/post_bind": CGroupSock,
|
||||
"cgroup/bind": CGroupSockAddr,
|
||||
"cgroup/connect": CGroupSockAddr,
|
||||
"cgroup/sendmsg": CGroupSockAddr,
|
||||
"cgroup/recvmsg": CGroupSockAddr,
|
||||
"cgroup/sysctl": CGroupSysctl,
|
||||
"cgroup/getsockopt": CGroupSockopt,
|
||||
"cgroup/setsockopt": CGroupSockopt,
|
||||
"classifier": SchedCLS,
|
||||
"action": SchedACT,
|
||||
}
|
||||
attachTypes := map[string]AttachType{
|
||||
"cgroup_skb/ingress": AttachCGroupInetIngress,
|
||||
"cgroup_skb/egress": AttachCGroupInetEgress,
|
||||
"cgroup/sock": AttachCGroupInetSockCreate,
|
||||
"cgroup/post_bind4": AttachCGroupInet4PostBind,
|
||||
"cgroup/post_bind6": AttachCGroupInet6PostBind,
|
||||
"cgroup/dev": AttachCGroupDevice,
|
||||
"sockops": AttachCGroupSockOps,
|
||||
"sk_skb/stream_parser": AttachSkSKBStreamParser,
|
||||
"sk_skb/stream_verdict": AttachSkSKBStreamVerdict,
|
||||
"sk_msg": AttachSkSKBStreamVerdict,
|
||||
"lirc_mode2": AttachLircMode2,
|
||||
"flow_dissector": AttachFlowDissector,
|
||||
"cgroup/bind4": AttachCGroupInet4Bind,
|
||||
"cgroup/bind6": AttachCGroupInet6Bind,
|
||||
"cgroup/connect4": AttachCGroupInet4Connect,
|
||||
"cgroup/connect6": AttachCGroupInet6Connect,
|
||||
"cgroup/sendmsg4": AttachCGroupUDP4Sendmsg,
|
||||
"cgroup/sendmsg6": AttachCGroupUDP6Sendmsg,
|
||||
"cgroup/recvmsg4": AttachCGroupUDP4Recvmsg,
|
||||
"cgroup/recvmsg6": AttachCGroupUDP6Recvmsg,
|
||||
"cgroup/sysctl": AttachCGroupSysctl,
|
||||
"cgroup/getsockopt": AttachCGroupGetsockopt,
|
||||
"cgroup/setsockopt": AttachCGroupSetsockopt,
|
||||
}
|
||||
attachType := AttachNone
|
||||
for k, t := range attachTypes {
|
||||
if strings.HasPrefix(v, k) {
|
||||
attachType = t
|
||||
}
|
||||
}
|
||||
|
||||
for k, t := range types {
|
||||
if strings.HasPrefix(v, k) {
|
||||
return t, attachType
|
||||
}
|
||||
}
|
||||
return UnspecifiedProgram, AttachNone
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) {
|
||||
rels := make(map[uint64]string)
|
||||
if sec == nil {
|
||||
return rels, nil
|
||||
}
|
||||
|
||||
if sec.Entsize < 16 {
|
||||
return nil, errors.New("rels are less than 16 bytes")
|
||||
}
|
||||
|
||||
r := sec.Open()
|
||||
for off := uint64(0); off < sec.Size; off += sec.Entsize {
|
||||
ent := io.LimitReader(r, int64(sec.Entsize))
|
||||
|
||||
var rel elf.Rel64
|
||||
if binary.Read(ent, ec.ByteOrder, &rel) != nil {
|
||||
return nil, errors.Errorf("can't parse relocation at offset %v", off)
|
||||
}
|
||||
|
||||
symNo := int(elf.R_SYM64(rel.Info) - 1)
|
||||
if symNo >= len(ec.symbols) {
|
||||
return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
|
||||
}
|
||||
|
||||
rels[rel.Off] = ec.symbols[symNo].Name
|
||||
}
|
||||
return rels, nil
|
||||
}
|
||||
|
||||
func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string {
|
||||
result := make(map[elf.SectionIndex]map[uint64]string)
|
||||
for i, sym := range symbols {
|
||||
switch elf.ST_TYPE(sym.Info) {
|
||||
case elf.STT_NOTYPE:
|
||||
// Older versions of LLVM doesn't tag
|
||||
// symbols correctly.
|
||||
break
|
||||
case elf.STT_OBJECT:
|
||||
break
|
||||
case elf.STT_FUNC:
|
||||
break
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if sym.Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
idx := sym.Section
|
||||
if _, ok := result[idx]; !ok {
|
||||
result[idx] = make(map[uint64]string)
|
||||
}
|
||||
result[idx][sym.Value] = symbols[i].Name
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type featureTest struct {
|
||||
Fn func() bool
|
||||
|
||||
once sync.Once
|
||||
result bool
|
||||
}
|
||||
|
||||
func (ft *featureTest) Result() bool {
|
||||
ft.once.Do(func() {
|
||||
ft.result = ft.Fn()
|
||||
})
|
||||
return ft.result
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
module github.com/cilium/ebpf
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/pkg/errors v0.8.1
|
||||
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7
|
||||
)
|
|
@ -0,0 +1,4 @@
|
|||
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7 h1:HmbHVPwrPEKPGLAcHSrMe6+hqSUlvZU0rab6x5EXfGU=
|
||||
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
@ -0,0 +1,64 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var sysCPU struct {
|
||||
once sync.Once
|
||||
err error
|
||||
num int
|
||||
}
|
||||
|
||||
// PossibleCPUs returns the max number of CPUs a system may possibly have
|
||||
// Logical CPU numbers must be of the form 0-n
|
||||
func PossibleCPUs() (int, error) {
|
||||
sysCPU.once.Do(func() {
|
||||
sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible")
|
||||
})
|
||||
|
||||
return sysCPU.num, sysCPU.err
|
||||
}
|
||||
|
||||
var onlineCPU struct {
|
||||
once sync.Once
|
||||
err error
|
||||
num int
|
||||
}
|
||||
|
||||
// OnlineCPUs returns the number of currently online CPUs
|
||||
// Logical CPU numbers must be of the form 0-n
|
||||
func OnlineCPUs() (int, error) {
|
||||
onlineCPU.once.Do(func() {
|
||||
onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online")
|
||||
})
|
||||
|
||||
return onlineCPU.num, onlineCPU.err
|
||||
}
|
||||
|
||||
// parseCPUs parses the number of cpus from sysfs,
|
||||
// in the format of "/sys/devices/system/cpu/{possible,online,..}.
|
||||
// Logical CPU numbers must be of the form 0-n
|
||||
func parseCPUs(path string) (int, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var low, high int
|
||||
n, _ := fmt.Fscanf(file, "%d-%d", &low, &high)
|
||||
if n < 1 || low != 0 {
|
||||
return 0, errors.Wrapf(err, "%s has unknown format", path)
|
||||
}
|
||||
if n == 1 {
|
||||
high = low
|
||||
}
|
||||
|
||||
// cpus is 0 indexed
|
||||
return high + 1, nil
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
|
||||
// depending on the host's endianness.
|
||||
var NativeEndian binary.ByteOrder
|
||||
|
||||
func init() {
|
||||
if isBigEndian() {
|
||||
NativeEndian = binary.BigEndian
|
||||
} else {
|
||||
NativeEndian = binary.LittleEndian
|
||||
}
|
||||
}
|
||||
|
||||
func isBigEndian() (ret bool) {
|
||||
i := int(0x1)
|
||||
bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
|
||||
return bs[0] == 0
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
// +build linux
|
||||
|
||||
package unix
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
linux "golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
ENOENT = linux.ENOENT
|
||||
EAGAIN = linux.EAGAIN
|
||||
ENOSPC = linux.ENOSPC
|
||||
EINVAL = linux.EINVAL
|
||||
EPOLLIN = linux.EPOLLIN
|
||||
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
|
||||
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
|
||||
SYS_BPF = linux.SYS_BPF
|
||||
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
|
||||
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
|
||||
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
|
||||
O_CLOEXEC = linux.O_CLOEXEC
|
||||
O_NONBLOCK = linux.O_NONBLOCK
|
||||
PROT_READ = linux.PROT_READ
|
||||
PROT_WRITE = linux.PROT_WRITE
|
||||
MAP_SHARED = linux.MAP_SHARED
|
||||
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
|
||||
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
|
||||
PerfBitWatermark = linux.PerfBitWatermark
|
||||
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
|
||||
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
|
||||
)
|
||||
|
||||
// Statfs_t is a wrapper
|
||||
type Statfs_t = linux.Statfs_t
|
||||
|
||||
// Rlimit is a wrapper
|
||||
type Rlimit = linux.Rlimit
|
||||
|
||||
// Setrlimit is a wrapper
|
||||
func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
||||
return linux.Setrlimit(resource, rlim)
|
||||
}
|
||||
|
||||
// Syscall is a wrapper
|
||||
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
||||
return linux.Syscall(trap, a1, a2, a3)
|
||||
}
|
||||
|
||||
// FcntlInt is a wrapper
|
||||
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
||||
return linux.FcntlInt(fd, cmd, arg)
|
||||
}
|
||||
|
||||
// Statfs is a wrapper
|
||||
func Statfs(path string, buf *Statfs_t) (err error) {
|
||||
return linux.Statfs(path, buf)
|
||||
}
|
||||
|
||||
// Close is a wrapper
|
||||
func Close(fd int) (err error) {
|
||||
return linux.Close(fd)
|
||||
}
|
||||
|
||||
// EpollEvent is a wrapper
|
||||
type EpollEvent = linux.EpollEvent
|
||||
|
||||
// EpollWait is a wrapper
|
||||
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
||||
return linux.EpollWait(epfd, events, msec)
|
||||
}
|
||||
|
||||
// EpollCtl is a wrapper
|
||||
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
||||
return linux.EpollCtl(epfd, op, fd, event)
|
||||
}
|
||||
|
||||
// Eventfd is a wrapper
|
||||
func Eventfd(initval uint, flags int) (fd int, err error) {
|
||||
return linux.Eventfd(initval, flags)
|
||||
}
|
||||
|
||||
// Write is a wrapper
|
||||
func Write(fd int, p []byte) (n int, err error) {
|
||||
return linux.Write(fd, p)
|
||||
}
|
||||
|
||||
// EpollCreate1 is a wrapper
|
||||
func EpollCreate1(flag int) (fd int, err error) {
|
||||
return linux.EpollCreate1(flag)
|
||||
}
|
||||
|
||||
// PerfEventMmapPage is a wrapper
|
||||
type PerfEventMmapPage linux.PerfEventMmapPage
|
||||
|
||||
// SetNonblock is a wrapper
|
||||
func SetNonblock(fd int, nonblocking bool) (err error) {
|
||||
return linux.SetNonblock(fd, nonblocking)
|
||||
}
|
||||
|
||||
// Mmap is a wrapper
|
||||
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
||||
return linux.Mmap(fd, offset, length, prot, flags)
|
||||
}
|
||||
|
||||
// Munmap is a wrapper
|
||||
func Munmap(b []byte) (err error) {
|
||||
return linux.Munmap(b)
|
||||
}
|
||||
|
||||
// PerfEventAttr is a wrapper
|
||||
type PerfEventAttr = linux.PerfEventAttr
|
||||
|
||||
// PerfEventOpen is a wrapper
|
||||
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
||||
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
// +build !linux
|
||||
|
||||
package unix
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
||||
|
||||
const (
|
||||
ENOENT = syscall.ENOENT
|
||||
EAGAIN = syscall.EAGAIN
|
||||
ENOSPC = syscall.ENOSPC
|
||||
EINVAL = syscall.EINVAL
|
||||
BPF_OBJ_NAME_LEN = 0x10
|
||||
BPF_TAG_SIZE = 0x8
|
||||
SYS_BPF = 321
|
||||
F_DUPFD_CLOEXEC = 0x406
|
||||
EPOLLIN = 0x1
|
||||
EPOLL_CTL_ADD = 0x1
|
||||
EPOLL_CLOEXEC = 0x80000
|
||||
O_CLOEXEC = 0x80000
|
||||
O_NONBLOCK = 0x800
|
||||
PROT_READ = 0x1
|
||||
PROT_WRITE = 0x2
|
||||
MAP_SHARED = 0x1
|
||||
PERF_TYPE_SOFTWARE = 0x1
|
||||
PERF_COUNT_SW_BPF_OUTPUT = 0xa
|
||||
PerfBitWatermark = 0x4000
|
||||
PERF_SAMPLE_RAW = 0x400
|
||||
PERF_FLAG_FD_CLOEXEC = 0x8
|
||||
)
|
||||
|
||||
// Statfs_t is a wrapper
|
||||
type Statfs_t struct {
|
||||
Type int64
|
||||
Bsize int64
|
||||
Blocks uint64
|
||||
Bfree uint64
|
||||
Bavail uint64
|
||||
Files uint64
|
||||
Ffree uint64
|
||||
Fsid [2]int32
|
||||
Namelen int64
|
||||
Frsize int64
|
||||
Flags int64
|
||||
Spare [4]int64
|
||||
}
|
||||
|
||||
// Rlimit is a wrapper
|
||||
type Rlimit struct {
|
||||
Cur uint64
|
||||
Max uint64
|
||||
}
|
||||
|
||||
// Setrlimit is a wrapper
|
||||
func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Syscall is a wrapper
|
||||
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
||||
return 0, 0, syscall.Errno(1)
|
||||
}
|
||||
|
||||
// FcntlInt is a wrapper
|
||||
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
||||
return -1, errNonLinux
|
||||
}
|
||||
|
||||
// Statfs is a wrapper
|
||||
func Statfs(path string, buf *Statfs_t) error {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Close is a wrapper
|
||||
func Close(fd int) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// EpollEvent is a wrapper
|
||||
type EpollEvent struct {
|
||||
Events uint32
|
||||
Fd int32
|
||||
Pad int32
|
||||
}
|
||||
|
||||
// EpollWait is a wrapper
|
||||
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// EpollCtl is a wrapper
|
||||
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Eventfd is a wrapper
|
||||
func Eventfd(initval uint, flags int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// Write is a wrapper
|
||||
func Write(fd int, p []byte) (n int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// EpollCreate1 is a wrapper
|
||||
func EpollCreate1(flag int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// PerfEventMmapPage is a wrapper
|
||||
type PerfEventMmapPage struct {
|
||||
Version uint32
|
||||
Compat_version uint32
|
||||
Lock uint32
|
||||
Index uint32
|
||||
Offset int64
|
||||
Time_enabled uint64
|
||||
Time_running uint64
|
||||
Capabilities uint64
|
||||
Pmc_width uint16
|
||||
Time_shift uint16
|
||||
Time_mult uint32
|
||||
Time_offset uint64
|
||||
Time_zero uint64
|
||||
Size uint32
|
||||
|
||||
Data_head uint64
|
||||
Data_tail uint64
|
||||
Data_offset uint64
|
||||
Data_size uint64
|
||||
Aux_head uint64
|
||||
Aux_tail uint64
|
||||
Aux_offset uint64
|
||||
Aux_size uint64
|
||||
}
|
||||
|
||||
// SetNonblock is a wrapper
|
||||
func SetNonblock(fd int, nonblocking bool) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Mmap is a wrapper
|
||||
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
||||
return []byte{}, errNonLinux
|
||||
}
|
||||
|
||||
// Munmap is a wrapper
|
||||
func Munmap(b []byte) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// PerfEventAttr is a wrapper
|
||||
type PerfEventAttr struct {
|
||||
Type uint32
|
||||
Size uint32
|
||||
Config uint64
|
||||
Sample uint64
|
||||
Sample_type uint64
|
||||
Read_format uint64
|
||||
Bits uint64
|
||||
Wakeup uint32
|
||||
Bp_type uint32
|
||||
Ext1 uint64
|
||||
Ext2 uint64
|
||||
Branch_sample_type uint64
|
||||
Sample_regs_user uint64
|
||||
Sample_stack_user uint32
|
||||
Clockid int32
|
||||
Sample_regs_intr uint64
|
||||
Aux_watermark uint32
|
||||
Sample_max_stack uint16
|
||||
}
|
||||
|
||||
// PerfEventOpen is a wrapper
|
||||
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf/asm"
|
||||
)
|
||||
|
||||
// link resolves bpf-to-bpf calls.
|
||||
//
|
||||
// Each section may contain multiple functions / labels, and is only linked
|
||||
// if the program being edited references one of these functions.
|
||||
//
|
||||
// Sections must not require linking themselves.
|
||||
func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) {
|
||||
for _, section := range sections {
|
||||
var err error
|
||||
insns, err = linkSection(insns, section)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return insns, nil
|
||||
}
|
||||
|
||||
func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
|
||||
// A map of symbols to the libraries which contain them.
|
||||
symbols, err := section.SymbolOffsets()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, ins := range insns {
|
||||
if ins.Reference == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if ins.Constant != -1 {
|
||||
// This is already a valid call, no need to link again.
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := symbols[ins.Reference]; !ok {
|
||||
// Symbol isn't available in this section
|
||||
continue
|
||||
}
|
||||
|
||||
// At this point we know that at least one function in the
|
||||
// library is called from insns. Merge the two sections.
|
||||
// The rewrite of ins.Constant happens in asm.Instruction.Marshal.
|
||||
return append(insns, section...), nil
|
||||
}
|
||||
|
||||
// None of the functions in the section are called. Do nothing.
|
||||
return insns, nil
|
||||
}
|
|
@ -0,0 +1,595 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// MapSpec defines a Map.
|
||||
type MapSpec struct {
|
||||
// Name is passed to the kernel as a debug aid. Must only contain
|
||||
// alpha numeric and '_' characters.
|
||||
Name string
|
||||
Type MapType
|
||||
KeySize uint32
|
||||
ValueSize uint32
|
||||
MaxEntries uint32
|
||||
Flags uint32
|
||||
// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
|
||||
InnerMap *MapSpec
|
||||
}
|
||||
|
||||
func (ms *MapSpec) String() string {
|
||||
return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
|
||||
}
|
||||
|
||||
// Copy returns a copy of the spec.
|
||||
func (ms *MapSpec) Copy() *MapSpec {
|
||||
if ms == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := *ms
|
||||
cpy.InnerMap = ms.InnerMap.Copy()
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Map represents a Map file descriptor.
|
||||
//
|
||||
// It is not safe to close a map which is used by other goroutines.
|
||||
//
|
||||
// Methods which take interface{} arguments by default encode
|
||||
// them using binary.Read/Write in the machine's native endianness.
|
||||
//
|
||||
// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
|
||||
// if you require custom encoding.
|
||||
type Map struct {
|
||||
fd *bpfFD
|
||||
abi MapABI
|
||||
// Per CPU maps return values larger than the size in the spec
|
||||
fullValueSize int
|
||||
}
|
||||
|
||||
// NewMapFromFD creates a map from a raw fd.
|
||||
//
|
||||
// You should not use fd after calling this function.
|
||||
func NewMapFromFD(fd int) (*Map, error) {
|
||||
if fd < 0 {
|
||||
return nil, errors.New("invalid fd")
|
||||
}
|
||||
bpfFd := newBPFFD(uint32(fd))
|
||||
|
||||
abi, err := newMapABIFromFd(bpfFd)
|
||||
if err != nil {
|
||||
bpfFd.forget()
|
||||
return nil, err
|
||||
}
|
||||
return newMap(bpfFd, abi)
|
||||
}
|
||||
|
||||
// NewMap creates a new Map.
|
||||
//
|
||||
// Creating a map for the first time will perform feature detection
|
||||
// by creating small, temporary maps.
|
||||
func NewMap(spec *MapSpec) (*Map, error) {
|
||||
if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
|
||||
return createMap(spec, nil)
|
||||
}
|
||||
|
||||
if spec.InnerMap == nil {
|
||||
return nil, errors.Errorf("%s requires InnerMap", spec.Type)
|
||||
}
|
||||
|
||||
template, err := createMap(spec.InnerMap, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer template.Close()
|
||||
|
||||
return createMap(spec, template.fd)
|
||||
}
|
||||
|
||||
func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
|
||||
spec = spec.Copy()
|
||||
|
||||
switch spec.Type {
|
||||
case ArrayOfMaps:
|
||||
fallthrough
|
||||
case HashOfMaps:
|
||||
if spec.ValueSize != 0 && spec.ValueSize != 4 {
|
||||
return nil, errors.Errorf("ValueSize must be zero or four for map of map")
|
||||
}
|
||||
spec.ValueSize = 4
|
||||
|
||||
case PerfEventArray:
|
||||
if spec.KeySize != 0 {
|
||||
return nil, errors.Errorf("KeySize must be zero for perf event array")
|
||||
}
|
||||
if spec.ValueSize != 0 {
|
||||
return nil, errors.Errorf("ValueSize must be zero for perf event array")
|
||||
}
|
||||
if spec.MaxEntries == 0 {
|
||||
n, err := internal.OnlineCPUs()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "perf event array")
|
||||
}
|
||||
spec.MaxEntries = uint32(n)
|
||||
}
|
||||
|
||||
spec.KeySize = 4
|
||||
spec.ValueSize = 4
|
||||
}
|
||||
|
||||
attr := bpfMapCreateAttr{
|
||||
mapType: spec.Type,
|
||||
keySize: spec.KeySize,
|
||||
valueSize: spec.ValueSize,
|
||||
maxEntries: spec.MaxEntries,
|
||||
flags: spec.Flags,
|
||||
}
|
||||
|
||||
if inner != nil {
|
||||
var err error
|
||||
attr.innerMapFd, err = inner.value()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "map create")
|
||||
}
|
||||
}
|
||||
|
||||
name, err := newBPFObjName(spec.Name)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "map create")
|
||||
}
|
||||
|
||||
if haveObjName.Result() {
|
||||
attr.mapName = name
|
||||
}
|
||||
|
||||
fd, err := bpfMapCreate(&attr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "map create")
|
||||
}
|
||||
|
||||
return newMap(fd, newMapABIFromSpec(spec))
|
||||
}
|
||||
|
||||
func newMap(fd *bpfFD, abi *MapABI) (*Map, error) {
|
||||
m := &Map{
|
||||
fd,
|
||||
*abi,
|
||||
int(abi.ValueSize),
|
||||
}
|
||||
|
||||
if !abi.Type.hasPerCPUValue() {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *Map) String() string {
|
||||
return fmt.Sprintf("%s#%d", m.abi.Type, m.fd)
|
||||
}
|
||||
|
||||
// ABI gets the ABI of the Map
|
||||
func (m *Map) ABI() MapABI {
|
||||
return m.abi
|
||||
}
|
||||
|
||||
// Lookup retrieves a value from a Map.
|
||||
//
|
||||
// Calls Close() on valueOut if it is of type **Map or **Program,
|
||||
// and *valueOut is not nil.
|
||||
//
|
||||
// Returns an error if the key doesn't exist, see IsNotExist.
|
||||
func (m *Map) Lookup(key, valueOut interface{}) error {
|
||||
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
|
||||
|
||||
if err := m.lookup(key, valuePtr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if valueBytes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if m.abi.Type.hasPerCPUValue() {
|
||||
return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes)
|
||||
}
|
||||
|
||||
switch value := valueOut.(type) {
|
||||
case **Map:
|
||||
m, err := unmarshalMap(valueBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
(*value).Close()
|
||||
*value = m
|
||||
return nil
|
||||
case *Map:
|
||||
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
||||
case Map:
|
||||
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
||||
|
||||
case **Program:
|
||||
p, err := unmarshalProgram(valueBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
(*value).Close()
|
||||
*value = p
|
||||
return nil
|
||||
case *Program:
|
||||
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
||||
case Program:
|
||||
return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
||||
|
||||
default:
|
||||
return unmarshalBytes(valueOut, valueBytes)
|
||||
}
|
||||
}
|
||||
|
||||
// LookupBytes gets a value from Map.
|
||||
//
|
||||
// Returns a nil value if a key doesn't exist.
|
||||
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
|
||||
valueBytes := make([]byte, m.fullValueSize)
|
||||
valuePtr := newPtr(unsafe.Pointer(&valueBytes[0]))
|
||||
|
||||
err := m.lookup(key, valuePtr)
|
||||
if IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return valueBytes, err
|
||||
}
|
||||
|
||||
func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return errors.WithMessage(err, "can't marshal key")
|
||||
}
|
||||
|
||||
err = bpfMapLookupElem(m.fd, keyPtr, valueOut)
|
||||
return errors.WithMessage(err, "lookup failed")
|
||||
}
|
||||
|
||||
// MapUpdateFlags controls the behaviour of the Map.Update call.
|
||||
//
|
||||
// The exact semantics depend on the specific MapType.
|
||||
type MapUpdateFlags uint64
|
||||
|
||||
const (
|
||||
// UpdateAny creates a new element or update an existing one.
|
||||
UpdateAny MapUpdateFlags = iota
|
||||
// UpdateNoExist creates a new element.
|
||||
UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
|
||||
// UpdateExist updates an existing element.
|
||||
UpdateExist
|
||||
)
|
||||
|
||||
// Put replaces or creates a value in map.
|
||||
//
|
||||
// It is equivalent to calling Update with UpdateAny.
|
||||
func (m *Map) Put(key, value interface{}) error {
|
||||
return m.Update(key, value, UpdateAny)
|
||||
}
|
||||
|
||||
// Update changes the value of a key.
|
||||
func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return errors.WithMessage(err, "can't marshal key")
|
||||
}
|
||||
|
||||
var valuePtr syscallPtr
|
||||
if m.abi.Type.hasPerCPUValue() {
|
||||
valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
|
||||
} else {
|
||||
valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
|
||||
}
|
||||
if err != nil {
|
||||
return errors.WithMessage(err, "can't marshal value")
|
||||
}
|
||||
|
||||
return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags))
|
||||
}
|
||||
|
||||
// Delete removes a value.
|
||||
//
|
||||
// Returns an error if the key does not exist, see IsNotExist.
|
||||
func (m *Map) Delete(key interface{}) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return errors.WithMessage(err, "can't marshal key")
|
||||
}
|
||||
|
||||
err = bpfMapDeleteElem(m.fd, keyPtr)
|
||||
return errors.WithMessage(err, "can't delete key")
|
||||
}
|
||||
|
||||
// NextKey finds the key following an initial key.
|
||||
//
|
||||
// See NextKeyBytes for details.
|
||||
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
|
||||
nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))
|
||||
|
||||
if err := m.nextKey(key, nextKeyPtr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if nextKeyBytes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := unmarshalBytes(nextKeyOut, nextKeyBytes)
|
||||
return errors.WithMessage(err, "can't unmarshal next key")
|
||||
}
|
||||
|
||||
// NextKeyBytes returns the key following an initial key as a byte slice.
|
||||
//
|
||||
// Passing nil will return the first key.
|
||||
//
|
||||
// Use Iterate if you want to traverse all entries in the map.
|
||||
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
|
||||
nextKey := make([]byte, m.abi.KeySize)
|
||||
nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0]))
|
||||
|
||||
err := m.nextKey(key, nextKeyPtr)
|
||||
if IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return nextKey, err
|
||||
}
|
||||
|
||||
func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
|
||||
var (
|
||||
keyPtr syscallPtr
|
||||
err error
|
||||
)
|
||||
|
||||
if key != nil {
|
||||
keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return errors.WithMessage(err, "can't marshal key")
|
||||
}
|
||||
}
|
||||
|
||||
err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut)
|
||||
return errors.WithMessage(err, "can't get next key")
|
||||
}
|
||||
|
||||
// Iterate traverses a map.
|
||||
//
|
||||
// It's safe to create multiple iterators at the same time.
|
||||
//
|
||||
// It's not possible to guarantee that all keys in a map will be
|
||||
// returned if there are concurrent modifications to the map.
|
||||
func (m *Map) Iterate() *MapIterator {
|
||||
return newMapIterator(m)
|
||||
}
|
||||
|
||||
// Close removes a Map
|
||||
func (m *Map) Close() error {
|
||||
if m == nil {
|
||||
// This makes it easier to clean up when iterating maps
|
||||
// of maps / programs.
|
||||
return nil
|
||||
}
|
||||
|
||||
return m.fd.close()
|
||||
}
|
||||
|
||||
// FD gets the file descriptor of the Map.
|
||||
//
|
||||
// Calling this function is invalid after Close has been called.
|
||||
func (m *Map) FD() int {
|
||||
fd, err := m.fd.value()
|
||||
if err != nil {
|
||||
// Best effort: -1 is the number most likely to be an
|
||||
// invalid file descriptor.
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(fd)
|
||||
}
|
||||
|
||||
// Clone creates a duplicate of the Map.
|
||||
//
|
||||
// Closing the duplicate does not affect the original, and vice versa.
|
||||
// Changes made to the map are reflected by both instances however.
|
||||
//
|
||||
// Cloning a nil Map returns nil.
|
||||
func (m *Map) Clone() (*Map, error) {
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
dup, err := m.fd.dup()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "can't clone map")
|
||||
}
|
||||
|
||||
return newMap(dup, &m.abi)
|
||||
}
|
||||
|
||||
// Pin persists the map past the lifetime of the process that created it.
|
||||
//
|
||||
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
|
||||
func (m *Map) Pin(fileName string) error {
|
||||
return bpfPinObject(fileName, m.fd)
|
||||
}
|
||||
|
||||
// LoadPinnedMap load a Map from a BPF file.
|
||||
//
|
||||
// Requires at least Linux 4.13, and is not compatible with
|
||||
// nested maps. Use LoadPinnedMapExplicit in these situations.
|
||||
func LoadPinnedMap(fileName string) (*Map, error) {
|
||||
fd, err := bpfGetObject(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
abi, err := newMapABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.close()
|
||||
return nil, err
|
||||
}
|
||||
return newMap(fd, abi)
|
||||
}
|
||||
|
||||
// LoadPinnedMapExplicit loads a map with explicit parameters.
|
||||
func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
|
||||
fd, err := bpfGetObject(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newMap(fd, abi)
|
||||
}
|
||||
|
||||
func unmarshalMap(buf []byte) (*Map, error) {
|
||||
if len(buf) != 4 {
|
||||
return nil, errors.New("map id requires 4 byte value")
|
||||
}
|
||||
|
||||
// Looking up an entry in a nested map or prog array returns an id,
|
||||
// not an fd.
|
||||
id := internal.NativeEndian.Uint32(buf)
|
||||
fd, err := bpfGetMapFDByID(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
abi, err := newMapABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newMap(fd, abi)
|
||||
}
|
||||
|
||||
// MarshalBinary implements BinaryMarshaler.
|
||||
func (m *Map) MarshalBinary() ([]byte, error) {
|
||||
fd, err := m.fd.value()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := make([]byte, 4)
|
||||
internal.NativeEndian.PutUint32(buf, fd)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// MapIterator iterates a Map.
|
||||
//
|
||||
// See Map.Iterate.
|
||||
type MapIterator struct {
|
||||
target *Map
|
||||
prevKey interface{}
|
||||
prevBytes []byte
|
||||
count, maxEntries uint32
|
||||
done bool
|
||||
err error
|
||||
}
|
||||
|
||||
func newMapIterator(target *Map) *MapIterator {
|
||||
return &MapIterator{
|
||||
target: target,
|
||||
maxEntries: target.abi.MaxEntries,
|
||||
prevBytes: make([]byte, int(target.abi.KeySize)),
|
||||
}
|
||||
}
|
||||
|
||||
var errIterationAborted = errors.New("iteration aborted")
|
||||
|
||||
// Next decodes the next key and value.
|
||||
//
|
||||
// Iterating a hash map from which keys are being deleted is not
|
||||
// safe. You may see the same key multiple times. Iteration may
|
||||
// also abort with an error, see IsIterationAborted.
|
||||
//
|
||||
// Returns false if there are no more entries. You must check
|
||||
// the result of Err afterwards.
|
||||
//
|
||||
// See Map.Get for further caveats around valueOut.
|
||||
func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
|
||||
if mi.err != nil || mi.done {
|
||||
return false
|
||||
}
|
||||
|
||||
for ; mi.count < mi.maxEntries; mi.count++ {
|
||||
var nextBytes []byte
|
||||
nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
|
||||
if mi.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if nextBytes == nil {
|
||||
mi.done = true
|
||||
return false
|
||||
}
|
||||
|
||||
// The user can get access to nextBytes since unmarshalBytes
|
||||
// does not copy when unmarshaling into a []byte.
|
||||
// Make a copy to prevent accidental corruption of
|
||||
// iterator state.
|
||||
copy(mi.prevBytes, nextBytes)
|
||||
mi.prevKey = mi.prevBytes
|
||||
|
||||
mi.err = mi.target.Lookup(nextBytes, valueOut)
|
||||
if IsNotExist(mi.err) {
|
||||
// Even though the key should be valid, we couldn't look up
|
||||
// its value. If we're iterating a hash map this is probably
|
||||
// because a concurrent delete removed the value before we
|
||||
// could get it. This means that the next call to NextKeyBytes
|
||||
// is very likely to restart iteration.
|
||||
// If we're iterating one of the fd maps like
|
||||
// ProgramArray it means that a given slot doesn't have
|
||||
// a valid fd associated. It's OK to continue to the next slot.
|
||||
continue
|
||||
}
|
||||
if mi.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
mi.err = unmarshalBytes(keyOut, nextBytes)
|
||||
return mi.err == nil
|
||||
}
|
||||
|
||||
mi.err = errIterationAborted
|
||||
return false
|
||||
}
|
||||
|
||||
// Err returns any encountered error.
|
||||
//
|
||||
// The method must be called after Next returns nil.
|
||||
func (mi *MapIterator) Err() error {
|
||||
return mi.err
|
||||
}
|
||||
|
||||
// IsNotExist returns true if the error indicates that a
|
||||
// key doesn't exist.
|
||||
func IsNotExist(err error) bool {
|
||||
return errors.Cause(err) == unix.ENOENT
|
||||
}
|
||||
|
||||
// IsIterationAborted returns true if the iteration was aborted.
|
||||
//
|
||||
// This occurs when keys are deleted from a hash map during iteration.
|
||||
func IsIterationAborted(err error) bool {
|
||||
return errors.Cause(err) == errIterationAborted
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding"
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func marshalPtr(data interface{}, length int) (syscallPtr, error) {
|
||||
if ptr, ok := data.(unsafe.Pointer); ok {
|
||||
return newPtr(ptr), nil
|
||||
}
|
||||
|
||||
buf, err := marshalBytes(data, length)
|
||||
if err != nil {
|
||||
return syscallPtr{}, err
|
||||
}
|
||||
|
||||
return newPtr(unsafe.Pointer(&buf[0])), nil
|
||||
}
|
||||
|
||||
func marshalBytes(data interface{}, length int) (buf []byte, err error) {
|
||||
switch value := data.(type) {
|
||||
case encoding.BinaryMarshaler:
|
||||
buf, err = value.MarshalBinary()
|
||||
case string:
|
||||
buf = []byte(value)
|
||||
case []byte:
|
||||
buf = value
|
||||
case unsafe.Pointer:
|
||||
err = errors.New("can't marshal from unsafe.Pointer")
|
||||
default:
|
||||
var wr bytes.Buffer
|
||||
err = binary.Write(&wr, internal.NativeEndian, value)
|
||||
err = errors.Wrapf(err, "encoding %T", value)
|
||||
buf = wr.Bytes()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(buf) != length {
|
||||
return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length)
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) {
|
||||
if ptr, ok := dst.(unsafe.Pointer); ok {
|
||||
return newPtr(ptr), nil
|
||||
}
|
||||
|
||||
buf := make([]byte, length)
|
||||
return newPtr(unsafe.Pointer(&buf[0])), buf
|
||||
}
|
||||
|
||||
func unmarshalBytes(data interface{}, buf []byte) error {
|
||||
switch value := data.(type) {
|
||||
case unsafe.Pointer:
|
||||
sh := &reflect.SliceHeader{
|
||||
Data: uintptr(value),
|
||||
Len: len(buf),
|
||||
Cap: len(buf),
|
||||
}
|
||||
|
||||
dst := *(*[]byte)(unsafe.Pointer(sh))
|
||||
copy(dst, buf)
|
||||
runtime.KeepAlive(value)
|
||||
return nil
|
||||
case encoding.BinaryUnmarshaler:
|
||||
return value.UnmarshalBinary(buf)
|
||||
case *string:
|
||||
*value = string(buf)
|
||||
return nil
|
||||
case *[]byte:
|
||||
*value = buf
|
||||
return nil
|
||||
case string:
|
||||
return errors.New("require pointer to string")
|
||||
case []byte:
|
||||
return errors.New("require pointer to []byte")
|
||||
default:
|
||||
rd := bytes.NewReader(buf)
|
||||
err := binary.Read(rd, internal.NativeEndian, value)
|
||||
return errors.Wrapf(err, "decoding %T", value)
|
||||
}
|
||||
}
|
||||
|
||||
// marshalPerCPUValue encodes a slice containing one value per
|
||||
// possible CPU into a buffer of bytes.
|
||||
//
|
||||
// Values are initialized to zero if the slice has less elements than CPUs.
|
||||
//
|
||||
// slice must have a type like []elementType.
|
||||
func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
|
||||
sliceType := reflect.TypeOf(slice)
|
||||
if sliceType.Kind() != reflect.Slice {
|
||||
return syscallPtr{}, errors.New("per-CPU value requires slice")
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return syscallPtr{}, err
|
||||
}
|
||||
|
||||
sliceValue := reflect.ValueOf(slice)
|
||||
sliceLen := sliceValue.Len()
|
||||
if sliceLen > possibleCPUs {
|
||||
return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs")
|
||||
}
|
||||
|
||||
alignedElemLength := align(elemLength, 8)
|
||||
buf := make([]byte, alignedElemLength*possibleCPUs)
|
||||
|
||||
for i := 0; i < sliceLen; i++ {
|
||||
elem := sliceValue.Index(i).Interface()
|
||||
elemBytes, err := marshalBytes(elem, elemLength)
|
||||
if err != nil {
|
||||
return syscallPtr{}, err
|
||||
}
|
||||
|
||||
offset := i * alignedElemLength
|
||||
copy(buf[offset:offset+elemLength], elemBytes)
|
||||
}
|
||||
|
||||
return newPtr(unsafe.Pointer(&buf[0])), nil
|
||||
}
|
||||
|
||||
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
|
||||
// possible CPU.
|
||||
//
|
||||
// valueOut must have a type like *[]elementType
|
||||
func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
|
||||
slicePtrType := reflect.TypeOf(slicePtr)
|
||||
if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
|
||||
return errors.Errorf("per-cpu value requires pointer to slice")
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sliceType := slicePtrType.Elem()
|
||||
slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
|
||||
|
||||
sliceElemType := sliceType.Elem()
|
||||
sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
|
||||
if sliceElemIsPointer {
|
||||
sliceElemType = sliceElemType.Elem()
|
||||
}
|
||||
|
||||
step := len(buf) / possibleCPUs
|
||||
if step < elemLength {
|
||||
return errors.Errorf("per-cpu element length is larger than available data")
|
||||
}
|
||||
for i := 0; i < possibleCPUs; i++ {
|
||||
var elem interface{}
|
||||
if sliceElemIsPointer {
|
||||
newElem := reflect.New(sliceElemType)
|
||||
slice.Index(i).Set(newElem)
|
||||
elem = newElem.Interface()
|
||||
} else {
|
||||
elem = slice.Index(i).Addr().Interface()
|
||||
}
|
||||
|
||||
// Make a copy, since unmarshal can hold on to itemBytes
|
||||
elemBytes := make([]byte, elemLength)
|
||||
copy(elemBytes, buf[:elemLength])
|
||||
|
||||
err := unmarshalBytes(elem, elemBytes)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "cpu %d", i)
|
||||
}
|
||||
|
||||
buf = buf[step:]
|
||||
}
|
||||
|
||||
reflect.ValueOf(slicePtr).Elem().Set(slice)
|
||||
return nil
|
||||
}
|
||||
|
||||
func align(n, alignment int) int {
|
||||
return (int(n) + alignment - 1) / alignment * alignment
|
||||
}
|
|
@ -0,0 +1,523 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
errNotSupported = errors.New("ebpf: not supported by kernel")
|
||||
)
|
||||
|
||||
const (
|
||||
// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
|
||||
// This is currently the maximum of spare space allocated for SKB
|
||||
// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
|
||||
outputPad = 256 + 2
|
||||
)
|
||||
|
||||
// DefaultVerifierLogSize is the default number of bytes allocated for the
|
||||
// verifier log.
|
||||
const DefaultVerifierLogSize = 64 * 1024
|
||||
|
||||
// ProgramOptions control loading a program into the kernel.
|
||||
type ProgramOptions struct {
|
||||
// Controls the detail emitted by the kernel verifier. Set to non-zero
|
||||
// to enable logging.
|
||||
LogLevel uint32
|
||||
// Controls the output buffer size for the verifier. Defaults to
|
||||
// DefaultVerifierLogSize.
|
||||
LogSize int
|
||||
}
|
||||
|
||||
// ProgramSpec defines a Program
|
||||
type ProgramSpec struct {
|
||||
// Name is passed to the kernel as a debug aid. Must only contain
|
||||
// alpha numeric and '_' characters.
|
||||
Name string
|
||||
Type ProgramType
|
||||
AttachType AttachType
|
||||
Instructions asm.Instructions
|
||||
License string
|
||||
KernelVersion uint32
|
||||
}
|
||||
|
||||
// Copy returns a copy of the spec.
|
||||
func (ps *ProgramSpec) Copy() *ProgramSpec {
|
||||
if ps == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := *ps
|
||||
cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
|
||||
copy(cpy.Instructions, ps.Instructions)
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Program represents BPF program loaded into the kernel.
|
||||
//
|
||||
// It is not safe to close a Program which is used by other goroutines.
|
||||
type Program struct {
|
||||
// Contains the output of the kernel verifier if enabled,
|
||||
// otherwise it is empty.
|
||||
VerifierLog string
|
||||
|
||||
fd *bpfFD
|
||||
name string
|
||||
abi ProgramABI
|
||||
}
|
||||
|
||||
// NewProgram creates a new Program.
|
||||
//
|
||||
// Loading a program for the first time will perform
|
||||
// feature detection by loading small, temporary programs.
|
||||
func NewProgram(spec *ProgramSpec) (*Program, error) {
|
||||
return NewProgramWithOptions(spec, ProgramOptions{})
|
||||
}
|
||||
|
||||
// NewProgramWithOptions creates a new Program.
|
||||
//
|
||||
// Loading a program for the first time will perform
|
||||
// feature detection by loading small, temporary programs.
|
||||
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
|
||||
attr, err := convertProgramSpec(spec, haveObjName.Result())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logSize := DefaultVerifierLogSize
|
||||
if opts.LogSize > 0 {
|
||||
logSize = opts.LogSize
|
||||
}
|
||||
|
||||
var logBuf []byte
|
||||
if opts.LogLevel > 0 {
|
||||
logBuf = make([]byte, logSize)
|
||||
attr.logLevel = opts.LogLevel
|
||||
attr.logSize = uint32(len(logBuf))
|
||||
attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
|
||||
}
|
||||
|
||||
fd, err := bpfProgLoad(attr)
|
||||
if err == nil {
|
||||
prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
|
||||
prog.VerifierLog = convertCString(logBuf)
|
||||
return prog, nil
|
||||
}
|
||||
|
||||
truncated := errors.Cause(err) == unix.ENOSPC
|
||||
if opts.LogLevel == 0 {
|
||||
// Re-run with the verifier enabled to get better error messages.
|
||||
logBuf = make([]byte, logSize)
|
||||
attr.logLevel = 1
|
||||
attr.logSize = uint32(len(logBuf))
|
||||
attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
|
||||
|
||||
_, nerr := bpfProgLoad(attr)
|
||||
truncated = errors.Cause(nerr) == unix.ENOSPC
|
||||
}
|
||||
|
||||
logs := convertCString(logBuf)
|
||||
if truncated {
|
||||
logs += "\n(truncated...)"
|
||||
}
|
||||
|
||||
return nil, &loadError{err, logs}
|
||||
}
|
||||
|
||||
// NewProgramFromFD creates a program from a raw fd.
|
||||
//
|
||||
// You should not use fd after calling this function.
|
||||
func NewProgramFromFD(fd int) (*Program, error) {
|
||||
if fd < 0 {
|
||||
return nil, errors.New("invalid fd")
|
||||
}
|
||||
bpfFd := newBPFFD(uint32(fd))
|
||||
|
||||
info, err := bpfGetProgInfoByFD(bpfFd)
|
||||
if err != nil {
|
||||
bpfFd.forget()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var name string
|
||||
if bpfName := convertCString(info.name[:]); bpfName != "" {
|
||||
name = bpfName
|
||||
} else {
|
||||
name = convertCString(info.tag[:])
|
||||
}
|
||||
|
||||
return newProgram(bpfFd, name, newProgramABIFromInfo(info)), nil
|
||||
}
|
||||
|
||||
func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
|
||||
return &Program{
|
||||
name: name,
|
||||
fd: fd,
|
||||
abi: *abi,
|
||||
}
|
||||
}
|
||||
|
||||
func convertProgramSpec(spec *ProgramSpec, includeName bool) (*bpfProgLoadAttr, error) {
|
||||
if len(spec.Instructions) == 0 {
|
||||
return nil, errors.New("Instructions cannot be empty")
|
||||
}
|
||||
|
||||
if len(spec.License) == 0 {
|
||||
return nil, errors.New("License cannot be empty")
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
|
||||
err := spec.Instructions.Marshal(buf, internal.NativeEndian)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bytecode := buf.Bytes()
|
||||
insCount := uint32(len(bytecode) / asm.InstructionSize)
|
||||
lic := []byte(spec.License)
|
||||
attr := &bpfProgLoadAttr{
|
||||
progType: spec.Type,
|
||||
expectedAttachType: spec.AttachType,
|
||||
insCount: insCount,
|
||||
instructions: newPtr(unsafe.Pointer(&bytecode[0])),
|
||||
license: newPtr(unsafe.Pointer(&lic[0])),
|
||||
}
|
||||
|
||||
name, err := newBPFObjName(spec.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if includeName {
|
||||
attr.progName = name
|
||||
}
|
||||
|
||||
return attr, nil
|
||||
}
|
||||
|
||||
func (p *Program) String() string {
|
||||
if p.name != "" {
|
||||
return fmt.Sprintf("%s(%s)#%s", p.abi.Type, p.name, p.fd)
|
||||
}
|
||||
return fmt.Sprintf("%s#%s", p.abi.Type, p.fd)
|
||||
}
|
||||
|
||||
// ABI gets the ABI of the Program
|
||||
func (p *Program) ABI() ProgramABI {
|
||||
return p.abi
|
||||
}
|
||||
|
||||
// FD gets the file descriptor of the Program.
|
||||
//
|
||||
// It is invalid to call this function after Close has been called.
|
||||
func (p *Program) FD() int {
|
||||
fd, err := p.fd.value()
|
||||
if err != nil {
|
||||
// Best effort: -1 is the number most likely to be an
|
||||
// invalid file descriptor.
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(fd)
|
||||
}
|
||||
|
||||
// Clone creates a duplicate of the Program.
|
||||
//
|
||||
// Closing the duplicate does not affect the original, and vice versa.
|
||||
//
|
||||
// Cloning a nil Program returns nil.
|
||||
func (p *Program) Clone() (*Program, error) {
|
||||
if p == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
dup, err := p.fd.dup()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "can't clone program")
|
||||
}
|
||||
|
||||
return newProgram(dup, p.name, &p.abi), nil
|
||||
}
|
||||
|
||||
// Pin persists the Program past the lifetime of the process that created it
|
||||
//
|
||||
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
|
||||
func (p *Program) Pin(fileName string) error {
|
||||
return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program")
|
||||
}
|
||||
|
||||
// Close unloads the program from the kernel.
|
||||
func (p *Program) Close() error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return p.fd.close()
|
||||
}
|
||||
|
||||
// Test runs the Program in the kernel with the given input and returns the
|
||||
// value returned by the eBPF program. outLen may be zero.
|
||||
//
|
||||
// Note: the kernel expects at least 14 bytes input for an ethernet header for
|
||||
// XDP and SKB programs.
|
||||
//
|
||||
// This function requires at least Linux 4.12.
|
||||
func (p *Program) Test(in []byte) (uint32, []byte, error) {
|
||||
ret, out, _, err := p.testRun(in, 1)
|
||||
return ret, out, err
|
||||
}
|
||||
|
||||
// Benchmark runs the Program with the given input for a number of times
|
||||
// and returns the time taken per iteration.
|
||||
//
|
||||
// The returned value is the return value of the last execution of
|
||||
// the program.
|
||||
//
|
||||
// This function requires at least Linux 4.12.
|
||||
func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) {
|
||||
ret, _, total, err := p.testRun(in, repeat)
|
||||
return ret, total, err
|
||||
}
|
||||
|
||||
var noProgTestRun = featureTest{
|
||||
Fn: func() bool {
|
||||
prog, err := NewProgram(&ProgramSpec{
|
||||
Type: SocketFilter,
|
||||
Instructions: asm.Instructions{
|
||||
asm.LoadImm(asm.R0, 0, asm.DWord),
|
||||
asm.Return(),
|
||||
},
|
||||
License: "MIT",
|
||||
})
|
||||
if err != nil {
|
||||
// This may be because we lack sufficient permissions, etc.
|
||||
return false
|
||||
}
|
||||
defer prog.Close()
|
||||
|
||||
fd, err := prog.fd.value()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Programs require at least 14 bytes input
|
||||
in := make([]byte, 14)
|
||||
attr := bpfProgTestRunAttr{
|
||||
fd: fd,
|
||||
dataSizeIn: uint32(len(in)),
|
||||
dataIn: newPtr(unsafe.Pointer(&in[0])),
|
||||
}
|
||||
|
||||
_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return errors.Cause(err) == unix.EINVAL
|
||||
},
|
||||
}
|
||||
|
||||
func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) {
|
||||
if uint(repeat) > math.MaxUint32 {
|
||||
return 0, nil, 0, fmt.Errorf("repeat is too high")
|
||||
}
|
||||
|
||||
if len(in) == 0 {
|
||||
return 0, nil, 0, fmt.Errorf("missing input")
|
||||
}
|
||||
|
||||
if uint(len(in)) > math.MaxUint32 {
|
||||
return 0, nil, 0, fmt.Errorf("input is too long")
|
||||
}
|
||||
|
||||
if noProgTestRun.Result() {
|
||||
return 0, nil, 0, errNotSupported
|
||||
}
|
||||
|
||||
// Older kernels ignore the dataSizeOut argument when copying to user space.
|
||||
// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
|
||||
// size will be. Hence we allocate an output buffer which we hope will always be large
|
||||
// enough, and panic if the kernel wrote past the end of the allocation.
|
||||
// See https://patchwork.ozlabs.org/cover/1006822/
|
||||
out := make([]byte, len(in)+outputPad)
|
||||
|
||||
fd, err := p.fd.value()
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
attr := bpfProgTestRunAttr{
|
||||
fd: fd,
|
||||
dataSizeIn: uint32(len(in)),
|
||||
dataSizeOut: uint32(len(out)),
|
||||
dataIn: newPtr(unsafe.Pointer(&in[0])),
|
||||
dataOut: newPtr(unsafe.Pointer(&out[0])),
|
||||
repeat: uint32(repeat),
|
||||
}
|
||||
|
||||
_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return 0, nil, 0, errors.Wrap(err, "can't run test")
|
||||
}
|
||||
|
||||
if int(attr.dataSizeOut) > cap(out) {
|
||||
// Houston, we have a problem. The program created more data than we allocated,
|
||||
// and the kernel wrote past the end of our buffer.
|
||||
panic("kernel wrote past end of output buffer")
|
||||
}
|
||||
out = out[:int(attr.dataSizeOut)]
|
||||
|
||||
total := time.Duration(attr.duration) * time.Nanosecond
|
||||
return attr.retval, out, total, nil
|
||||
}
|
||||
|
||||
func unmarshalProgram(buf []byte) (*Program, error) {
|
||||
if len(buf) != 4 {
|
||||
return nil, errors.New("program id requires 4 byte value")
|
||||
}
|
||||
|
||||
// Looking up an entry in a nested map or prog array returns an id,
|
||||
// not an fd.
|
||||
id := internal.NativeEndian.Uint32(buf)
|
||||
fd, err := bpfGetProgramFDByID(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
abi, err := newProgramABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgram(fd, "", abi), nil
|
||||
}
|
||||
|
||||
// MarshalBinary implements BinaryMarshaler.
|
||||
func (p *Program) MarshalBinary() ([]byte, error) {
|
||||
value, err := p.fd.value()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := make([]byte, 4)
|
||||
internal.NativeEndian.PutUint32(buf, value)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// Attach a Program to a container object fd
|
||||
func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
|
||||
if fd < 0 {
|
||||
return errors.New("invalid fd")
|
||||
}
|
||||
|
||||
pfd, err := p.fd.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfProgAlterAttr{
|
||||
targetFd: uint32(fd),
|
||||
attachBpfFd: pfd,
|
||||
attachType: uint32(typ),
|
||||
attachFlags: uint32(flags),
|
||||
}
|
||||
|
||||
return bpfProgAlter(_ProgAttach, &attr)
|
||||
}
|
||||
|
||||
// Detach a Program from a container object fd
|
||||
func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
|
||||
if fd < 0 {
|
||||
return errors.New("invalid fd")
|
||||
}
|
||||
|
||||
pfd, err := p.fd.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfProgAlterAttr{
|
||||
targetFd: uint32(fd),
|
||||
attachBpfFd: pfd,
|
||||
attachType: uint32(typ),
|
||||
attachFlags: uint32(flags),
|
||||
}
|
||||
|
||||
return bpfProgAlter(_ProgDetach, &attr)
|
||||
}
|
||||
|
||||
// LoadPinnedProgram loads a Program from a BPF file.
|
||||
//
|
||||
// Requires at least Linux 4.13, use LoadPinnedProgramExplicit on
|
||||
// earlier versions.
|
||||
func LoadPinnedProgram(fileName string) (*Program, error) {
|
||||
fd, err := bpfGetObject(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
abi, err := newProgramABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgram(fd, filepath.Base(fileName), abi), nil
|
||||
}
|
||||
|
||||
// LoadPinnedProgramExplicit loads a program with explicit parameters.
|
||||
func LoadPinnedProgramExplicit(fileName string, abi *ProgramABI) (*Program, error) {
|
||||
fd, err := bpfGetObject(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgram(fd, filepath.Base(fileName), abi), nil
|
||||
}
|
||||
|
||||
// SanitizeName replaces all invalid characters in name.
|
||||
//
|
||||
// Use this to automatically generate valid names for maps and
|
||||
// programs at run time.
|
||||
//
|
||||
// Passing a negative value for replacement will delete characters
|
||||
// instead of replacing them.
|
||||
func SanitizeName(name string, replacement rune) string {
|
||||
return strings.Map(func(char rune) rune {
|
||||
if invalidBPFObjNameChar(char) {
|
||||
return replacement
|
||||
}
|
||||
return char
|
||||
}, name)
|
||||
}
|
||||
|
||||
type loadError struct {
|
||||
cause error
|
||||
verifierLog string
|
||||
}
|
||||
|
||||
func (le *loadError) Error() string {
|
||||
if le.verifierLog == "" {
|
||||
return fmt.Sprintf("failed to load program: %s", le.cause)
|
||||
}
|
||||
return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog)
|
||||
}
|
||||
|
||||
func (le *loadError) Cause() error {
|
||||
return le.cause
|
||||
}
|
||||
|
||||
// IsNotSupported returns true if an error occurred because
|
||||
// the kernel does not have support for a specific feature.
|
||||
func IsNotSupported(err error) bool {
|
||||
return errors.Cause(err) == errNotSupported
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build armbe mips mips64p32
|
||||
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ptr wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type syscallPtr struct {
|
||||
pad uint32
|
||||
ptr unsafe.Pointer
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build 386 amd64p32 arm mipsle mips64p32le
|
||||
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ptr wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type syscallPtr struct {
|
||||
ptr unsafe.Pointer
|
||||
pad uint32
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le
|
||||
// +build !armbe,!mips,!mips64p32
|
||||
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ptr wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type syscallPtr struct {
|
||||
ptr unsafe.Pointer
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
eBPF
|
||||
-------
|
||||
[![](https://godoc.org/github.com/cilium/ebpf?status.svg)](https://godoc.org/github.com/cilium/ebpf)
|
||||
|
||||
eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes.
|
||||
|
||||
[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler.
|
||||
|
||||
The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack.
|
||||
|
||||
## Current status
|
||||
|
||||
The package is production ready, but **the API is explicitly unstable
|
||||
right now**. Expect to update your code if you want to follow along.
|
||||
|
||||
## Useful resources
|
||||
|
||||
* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended)
|
||||
* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt)
|
||||
* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md)
|
|
@ -0,0 +1,67 @@
|
|||
#!/bin/bash
|
||||
# Test the current package under a different kernel.
|
||||
# Requires virtme and qemu to be installed.
|
||||
|
||||
set -eu
|
||||
set -o pipefail
|
||||
|
||||
if [[ "${1:-}" = "--in-vm" ]]; then
|
||||
shift
|
||||
|
||||
readonly home="$(mktemp --directory)"
|
||||
|
||||
mount -t bpf bpf /sys/fs/bpf
|
||||
export CGO_ENABLED=0
|
||||
export HOME="$home"
|
||||
|
||||
echo Running tests...
|
||||
/usr/local/bin/go test -mod=vendor -coverprofile="$1/coverage.txt" -covermode=atomic -v ./...
|
||||
touch "$1/success"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Force Go modules, so that vendoring and building are easier.
|
||||
export GO111MODULE=on
|
||||
|
||||
# Pull all dependencies, so that we can run tests without the
|
||||
# vm having network access.
|
||||
go mod vendor
|
||||
|
||||
# Use sudo if /dev/kvm isn't accessible by the current user.
|
||||
sudo=""
|
||||
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
|
||||
sudo="sudo"
|
||||
fi
|
||||
readonly sudo
|
||||
|
||||
readonly kernel_version="${1:-}"
|
||||
if [[ -z "${kernel_version}" ]]; then
|
||||
echo "Expecting kernel version as first argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
readonly kernel="linux-${kernel_version}.bz"
|
||||
readonly output="$(mktemp -d)"
|
||||
readonly tmp_dir="$(mktemp -d)"
|
||||
|
||||
test -e "${tmp_dir}/${kernel}" || {
|
||||
echo Fetching ${kernel}
|
||||
curl --fail -L "https://github.com/newtools/ci-kernels/blob/master/${kernel}?raw=true" -o "${tmp_dir}/${kernel}"
|
||||
}
|
||||
|
||||
echo Testing on ${kernel_version}
|
||||
$sudo virtme-run --kimg "${tmp_dir}/${kernel}" --memory 256M --pwd --rwdir=/run/output="${output}" --script-sh "$(realpath "$0") --in-vm /run/output"
|
||||
|
||||
if [[ ! -e "${output}/success" ]]; then
|
||||
echo "Test failed on ${kernel_version}"
|
||||
exit 1
|
||||
else
|
||||
echo "Test successful on ${kernel_version}"
|
||||
if [[ -v CODECOV_TOKEN ]]; then
|
||||
curl --fail -s https://codecov.io/bash > "${tmp_dir}/codecov.sh"
|
||||
chmod +x "${tmp_dir}/codecov.sh"
|
||||
"${tmp_dir}/codecov.sh" -f "${output}/coverage.txt"
|
||||
fi
|
||||
fi
|
||||
|
||||
$sudo rm -r "${output}"
|
|
@ -0,0 +1,420 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var errClosedFd = errors.New("use of closed file descriptor")
|
||||
|
||||
type bpfFD struct {
|
||||
raw int64
|
||||
}
|
||||
|
||||
func newBPFFD(value uint32) *bpfFD {
|
||||
fd := &bpfFD{int64(value)}
|
||||
runtime.SetFinalizer(fd, (*bpfFD).close)
|
||||
return fd
|
||||
}
|
||||
|
||||
func (fd *bpfFD) String() string {
|
||||
return strconv.FormatInt(fd.raw, 10)
|
||||
}
|
||||
|
||||
func (fd *bpfFD) value() (uint32, error) {
|
||||
if fd.raw < 0 {
|
||||
return 0, errClosedFd
|
||||
}
|
||||
|
||||
return uint32(fd.raw), nil
|
||||
}
|
||||
|
||||
func (fd *bpfFD) close() error {
|
||||
if fd.raw < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
value := int(fd.raw)
|
||||
fd.raw = -1
|
||||
|
||||
fd.forget()
|
||||
return unix.Close(value)
|
||||
}
|
||||
|
||||
func (fd *bpfFD) forget() {
|
||||
runtime.SetFinalizer(fd, nil)
|
||||
}
|
||||
|
||||
func (fd *bpfFD) dup() (*bpfFD, error) {
|
||||
if fd.raw < 0 {
|
||||
return nil, errClosedFd
|
||||
}
|
||||
|
||||
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "can't dup fd")
|
||||
}
|
||||
|
||||
return newBPFFD(uint32(dup)), nil
|
||||
}
|
||||
|
||||
// bpfObjName is a null-terminated string made up of
|
||||
// 'A-Za-z0-9_' characters.
|
||||
type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte
|
||||
|
||||
// newBPFObjName truncates the result if it is too long.
|
||||
func newBPFObjName(name string) (bpfObjName, error) {
|
||||
idx := strings.IndexFunc(name, invalidBPFObjNameChar)
|
||||
if idx != -1 {
|
||||
return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name)
|
||||
}
|
||||
|
||||
var result bpfObjName
|
||||
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func invalidBPFObjNameChar(char rune) bool {
|
||||
switch {
|
||||
case char >= 'A' && char <= 'Z':
|
||||
fallthrough
|
||||
case char >= 'a' && char <= 'z':
|
||||
fallthrough
|
||||
case char >= '0' && char <= '9':
|
||||
fallthrough
|
||||
case char == '_':
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
type bpfMapCreateAttr struct {
|
||||
mapType MapType
|
||||
keySize uint32
|
||||
valueSize uint32
|
||||
maxEntries uint32
|
||||
flags uint32
|
||||
innerMapFd uint32 // since 4.12 56f668dfe00d
|
||||
numaNode uint32 // since 4.14 96eabe7a40aa
|
||||
mapName bpfObjName // since 4.15 ad5b177bd73f
|
||||
}
|
||||
|
||||
type bpfMapOpAttr struct {
|
||||
mapFd uint32
|
||||
padding uint32
|
||||
key syscallPtr
|
||||
value syscallPtr
|
||||
flags uint64
|
||||
}
|
||||
|
||||
type bpfMapInfo struct {
|
||||
mapType uint32
|
||||
id uint32
|
||||
keySize uint32
|
||||
valueSize uint32
|
||||
maxEntries uint32
|
||||
flags uint32
|
||||
mapName bpfObjName // since 4.15 ad5b177bd73f
|
||||
}
|
||||
|
||||
type bpfPinObjAttr struct {
|
||||
fileName syscallPtr
|
||||
fd uint32
|
||||
padding uint32
|
||||
}
|
||||
|
||||
type bpfProgLoadAttr struct {
|
||||
progType ProgramType
|
||||
insCount uint32
|
||||
instructions syscallPtr
|
||||
license syscallPtr
|
||||
logLevel uint32
|
||||
logSize uint32
|
||||
logBuf syscallPtr
|
||||
kernelVersion uint32 // since 4.1 2541517c32be
|
||||
progFlags uint32 // since 4.11 e07b98d9bffe
|
||||
progName bpfObjName // since 4.15 067cae47771c
|
||||
progIfIndex uint32 // since 4.15 1f6f4cb7ba21
|
||||
expectedAttachType AttachType // since 4.17 5e43f899b03a
|
||||
}
|
||||
|
||||
type bpfProgInfo struct {
|
||||
progType uint32
|
||||
id uint32
|
||||
tag [unix.BPF_TAG_SIZE]byte
|
||||
jitedLen uint32
|
||||
xlatedLen uint32
|
||||
jited syscallPtr
|
||||
xlated syscallPtr
|
||||
loadTime uint64 // since 4.15 cb4d2b3f03d8
|
||||
createdByUID uint32
|
||||
nrMapIDs uint32
|
||||
mapIds syscallPtr
|
||||
name bpfObjName
|
||||
}
|
||||
|
||||
type bpfProgTestRunAttr struct {
|
||||
fd uint32
|
||||
retval uint32
|
||||
dataSizeIn uint32
|
||||
dataSizeOut uint32
|
||||
dataIn syscallPtr
|
||||
dataOut syscallPtr
|
||||
repeat uint32
|
||||
duration uint32
|
||||
}
|
||||
|
||||
type bpfProgAlterAttr struct {
|
||||
targetFd uint32
|
||||
attachBpfFd uint32
|
||||
attachType uint32
|
||||
attachFlags uint32
|
||||
}
|
||||
|
||||
type bpfObjGetInfoByFDAttr struct {
|
||||
fd uint32
|
||||
infoLen uint32
|
||||
info syscallPtr // May be either bpfMapInfo or bpfProgInfo
|
||||
}
|
||||
|
||||
type bpfGetFDByIDAttr struct {
|
||||
id uint32
|
||||
next uint32
|
||||
}
|
||||
|
||||
func newPtr(ptr unsafe.Pointer) syscallPtr {
|
||||
return syscallPtr{ptr: ptr}
|
||||
}
|
||||
|
||||
func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
|
||||
for {
|
||||
fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
// As of ~4.20 the verifier can be interrupted by a signal,
|
||||
// and returns EAGAIN in that case.
|
||||
if err == unix.EAGAIN {
|
||||
continue
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newBPFFD(uint32(fd)), nil
|
||||
}
|
||||
}
|
||||
|
||||
func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error {
|
||||
_, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) {
|
||||
fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newBPFFD(uint32(fd)), nil
|
||||
}
|
||||
|
||||
func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
|
||||
fd, err := m.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: valueOut,
|
||||
}
|
||||
_, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
|
||||
fd, err := m.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: valueOut,
|
||||
flags: flags,
|
||||
}
|
||||
_, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
|
||||
fd, err := m.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
}
|
||||
_, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
|
||||
fd, err := m.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: nextKeyOut,
|
||||
}
|
||||
_, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return err
|
||||
}
|
||||
|
||||
const bpfFSType = 0xcafe4a11
|
||||
|
||||
func bpfPinObject(fileName string, fd *bpfFD) error {
|
||||
dirName := filepath.Dir(fileName)
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Statfs(dirName, &statfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if uint64(statfs.Type) != bpfFSType {
|
||||
return errors.Errorf("%s is not on a bpf filesystem", fileName)
|
||||
}
|
||||
|
||||
value, err := fd.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{
|
||||
fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
|
||||
fd: value,
|
||||
}), 16)
|
||||
return errors.Wrapf(err, "pin object %s", fileName)
|
||||
}
|
||||
|
||||
func bpfGetObject(fileName string) (*bpfFD, error) {
|
||||
ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{
|
||||
fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
|
||||
}), 16)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "get object %s", fileName)
|
||||
}
|
||||
return newBPFFD(uint32(ptr)), nil
|
||||
}
|
||||
|
||||
func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
|
||||
value, err := fd.value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// available from 4.13
|
||||
attr := bpfObjGetInfoByFDAttr{
|
||||
fd: value,
|
||||
infoLen: uint32(size),
|
||||
info: newPtr(info),
|
||||
}
|
||||
_, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return errors.Wrapf(err, "fd %d", value)
|
||||
}
|
||||
|
||||
func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) {
|
||||
var info bpfProgInfo
|
||||
err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
|
||||
return &info, errors.Wrap(err, "can't get program info")
|
||||
}
|
||||
|
||||
func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) {
|
||||
var info bpfMapInfo
|
||||
err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
|
||||
return &info, errors.Wrap(err, "can't get map info:")
|
||||
}
|
||||
|
||||
var haveObjName = featureTest{
|
||||
Fn: func() bool {
|
||||
name, err := newBPFObjName("feature_test")
|
||||
if err != nil {
|
||||
// This really is a fatal error, but it should be caught
|
||||
// by the unit tests not working.
|
||||
return false
|
||||
}
|
||||
|
||||
attr := bpfMapCreateAttr{
|
||||
mapType: Array,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
mapName: name,
|
||||
}
|
||||
|
||||
fd, err := bpfMapCreate(&attr)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
_ = fd.close()
|
||||
return true
|
||||
},
|
||||
}
|
||||
|
||||
func bpfGetMapFDByID(id uint32) (*bpfFD, error) {
|
||||
// available from 4.13
|
||||
attr := bpfGetFDByIDAttr{
|
||||
id: id,
|
||||
}
|
||||
ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "can't get fd for map id %d", id)
|
||||
}
|
||||
return newBPFFD(uint32(ptr)), nil
|
||||
}
|
||||
|
||||
func bpfGetProgramFDByID(id uint32) (*bpfFD, error) {
|
||||
// available from 4.13
|
||||
attr := bpfGetFDByIDAttr{
|
||||
id: id,
|
||||
}
|
||||
ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "can't get fd for program id %d", id)
|
||||
}
|
||||
return newBPFFD(uint32(ptr)), nil
|
||||
}
|
||||
|
||||
func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) {
|
||||
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
|
||||
runtime.KeepAlive(attr)
|
||||
|
||||
var err error
|
||||
if errNo != 0 {
|
||||
err = errNo
|
||||
}
|
||||
|
||||
return r1, err
|
||||
}
|
||||
|
||||
func convertCString(in []byte) string {
|
||||
inLen := bytes.IndexByte(in, 0)
|
||||
if inLen == -1 {
|
||||
return ""
|
||||
}
|
||||
return string(in[:inLen])
|
||||
}
|
|
@ -0,0 +1,189 @@
|
|||
package ebpf
|
||||
|
||||
//go:generate stringer -output types_string.go -type=MapType,ProgramType
|
||||
|
||||
// MapType indicates the type map structure
|
||||
// that will be initialized in the kernel.
|
||||
type MapType uint32
|
||||
|
||||
// All the various map types that can be created
|
||||
const (
|
||||
UnspecifiedMap MapType = iota
|
||||
// Hash is a hash map
|
||||
Hash
|
||||
// Array is an array map
|
||||
Array
|
||||
// ProgramArray - A program array map is a special kind of array map whose map
|
||||
// values contain only file descriptors referring to other eBPF
|
||||
// programs. Thus, both the key_size and value_size must be
|
||||
// exactly four bytes. This map is used in conjunction with the
|
||||
// TailCall helper.
|
||||
ProgramArray
|
||||
// PerfEventArray - A perf event array is used in conjunction with PerfEventRead
|
||||
// and PerfEventOutput calls, to read the raw bpf_perf_data from the registers.
|
||||
PerfEventArray
|
||||
// PerCPUHash - This data structure is useful for people who have high performance
|
||||
// network needs and can reconcile adds at the end of some cycle, so that
|
||||
// hashes can be lock free without the use of XAdd, which can be costly.
|
||||
PerCPUHash
|
||||
// PerCPUArray - This data structure is useful for people who have high performance
|
||||
// network needs and can reconcile adds at the end of some cycle, so that
|
||||
// hashes can be lock free without the use of XAdd, which can be costly.
|
||||
// Each CPU gets a copy of this hash, the contents of all of which can be reconciled
|
||||
// later.
|
||||
PerCPUArray
|
||||
// StackTrace - This holds whole user and kernel stack traces, it can be retrieved with
|
||||
// GetStackID
|
||||
StackTrace
|
||||
// CGroupArray - This is a very niche structure used to help SKBInCGroup determine
|
||||
// if an skb is from a socket belonging to a specific cgroup
|
||||
CGroupArray
|
||||
// LRUHash - This allows you to create a small hash structure that will purge the
|
||||
// least recently used items rather than thow an error when you run out of memory
|
||||
LRUHash
|
||||
// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
|
||||
// it has more to do with including the CPU id with the LRU calculation so that if a
|
||||
// particular CPU is using a value over-and-over again, then it will be saved, but if
|
||||
// a value is being retrieved a lot but sparsely across CPUs it is not as important, basically
|
||||
// giving weight to CPU locality over overall usage.
|
||||
LRUCPUHash
|
||||
// LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful,
|
||||
// for storing things like IP addresses which can be bit masked allowing for keys of differing
|
||||
// values to refer to the same reference based on their masks. See wikipedia for more details.
|
||||
LPMTrie
|
||||
// ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps
|
||||
// itself.
|
||||
ArrayOfMaps
|
||||
// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
|
||||
// itself.
|
||||
HashOfMaps
|
||||
)
|
||||
|
||||
// hasPerCPUValue returns true if the Map stores a value per CPU.
|
||||
func (mt MapType) hasPerCPUValue() bool {
|
||||
if mt == PerCPUHash || mt == PerCPUArray {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const (
|
||||
_MapCreate = iota
|
||||
_MapLookupElem
|
||||
_MapUpdateElem
|
||||
_MapDeleteElem
|
||||
_MapGetNextKey
|
||||
_ProgLoad
|
||||
_ObjPin
|
||||
_ObjGet
|
||||
_ProgAttach
|
||||
_ProgDetach
|
||||
_ProgTestRun
|
||||
_ProgGetNextID
|
||||
_MapGetNextID
|
||||
_ProgGetFDByID
|
||||
_MapGetFDByID
|
||||
_ObjGetInfoByFD
|
||||
)
|
||||
|
||||
const (
|
||||
_Any = iota
|
||||
_NoExist
|
||||
_Exist
|
||||
)
|
||||
|
||||
// ProgramType of the eBPF program
|
||||
type ProgramType uint32
|
||||
|
||||
// eBPF program types
|
||||
const (
|
||||
// Unrecognized program type
|
||||
UnspecifiedProgram ProgramType = iota
|
||||
// SocketFilter socket or seccomp filter
|
||||
SocketFilter
|
||||
// Kprobe program
|
||||
Kprobe
|
||||
// SchedCLS traffic control shaper
|
||||
SchedCLS
|
||||
// SchedACT routing control shaper
|
||||
SchedACT
|
||||
// TracePoint program
|
||||
TracePoint
|
||||
// XDP program
|
||||
XDP
|
||||
// PerfEvent program
|
||||
PerfEvent
|
||||
// CGroupSKB program
|
||||
CGroupSKB
|
||||
// CGroupSock program
|
||||
CGroupSock
|
||||
// LWTIn program
|
||||
LWTIn
|
||||
// LWTOut program
|
||||
LWTOut
|
||||
// LWTXmit program
|
||||
LWTXmit
|
||||
// SockOps program
|
||||
SockOps
|
||||
// SkSKB program
|
||||
SkSKB
|
||||
// CGroupDevice program
|
||||
CGroupDevice
|
||||
// SkMsg program
|
||||
SkMsg
|
||||
// RawTracepoint program
|
||||
RawTracepoint
|
||||
// CGroupSockAddr program
|
||||
CGroupSockAddr
|
||||
// LWTSeg6Local program
|
||||
LWTSeg6Local
|
||||
// LircMode2 program
|
||||
LircMode2
|
||||
// SkReuseport program
|
||||
SkReuseport
|
||||
// FlowDissector program
|
||||
FlowDissector
|
||||
// CGroupSysctl program
|
||||
CGroupSysctl
|
||||
// RawTracepointWritable program
|
||||
RawTracepointWritable
|
||||
// CGroupSockopt program
|
||||
CGroupSockopt
|
||||
)
|
||||
|
||||
// AttachType of the eBPF program, needed to differentiate allowed context accesses in
|
||||
// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required.
|
||||
// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
|
||||
type AttachType uint32
|
||||
|
||||
// AttachNone is an alias for AttachCGroupInetIngress for readability reasons
|
||||
const AttachNone AttachType = 0
|
||||
|
||||
const (
|
||||
AttachCGroupInetIngress AttachType = iota
|
||||
AttachCGroupInetEgress
|
||||
AttachCGroupInetSockCreate
|
||||
AttachCGroupSockOps
|
||||
AttachSkSKBStreamParser
|
||||
AttachSkSKBStreamVerdict
|
||||
AttachCGroupDevice
|
||||
AttachSkMsgVerdict
|
||||
AttachCGroupInet4Bind
|
||||
AttachCGroupInet6Bind
|
||||
AttachCGroupInet4Connect
|
||||
AttachCGroupInet6Connect
|
||||
AttachCGroupInet4PostBind
|
||||
AttachCGroupInet6PostBind
|
||||
AttachCGroupUDP4Sendmsg
|
||||
AttachCGroupUDP6Sendmsg
|
||||
AttachLircMode2
|
||||
AttachFlowDissector
|
||||
AttachCGroupSysctl
|
||||
AttachCGroupUDP4Recvmsg
|
||||
AttachCGroupUDP6Recvmsg
|
||||
AttachCGroupGetsockopt
|
||||
AttachCGroupSetsockopt
|
||||
)
|
||||
|
||||
// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
|
||||
type AttachFlags uint32
|
|
@ -0,0 +1,78 @@
|
|||
// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT.
|
||||
|
||||
package ebpf
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[UnspecifiedMap-0]
|
||||
_ = x[Hash-1]
|
||||
_ = x[Array-2]
|
||||
_ = x[ProgramArray-3]
|
||||
_ = x[PerfEventArray-4]
|
||||
_ = x[PerCPUHash-5]
|
||||
_ = x[PerCPUArray-6]
|
||||
_ = x[StackTrace-7]
|
||||
_ = x[CGroupArray-8]
|
||||
_ = x[LRUHash-9]
|
||||
_ = x[LRUCPUHash-10]
|
||||
_ = x[LPMTrie-11]
|
||||
_ = x[ArrayOfMaps-12]
|
||||
_ = x[HashOfMaps-13]
|
||||
}
|
||||
|
||||
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps"
|
||||
|
||||
var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136}
|
||||
|
||||
func (i MapType) String() string {
|
||||
if i >= MapType(len(_MapType_index)-1) {
|
||||
return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _MapType_name[_MapType_index[i]:_MapType_index[i+1]]
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[UnspecifiedProgram-0]
|
||||
_ = x[SocketFilter-1]
|
||||
_ = x[Kprobe-2]
|
||||
_ = x[SchedCLS-3]
|
||||
_ = x[SchedACT-4]
|
||||
_ = x[TracePoint-5]
|
||||
_ = x[XDP-6]
|
||||
_ = x[PerfEvent-7]
|
||||
_ = x[CGroupSKB-8]
|
||||
_ = x[CGroupSock-9]
|
||||
_ = x[LWTIn-10]
|
||||
_ = x[LWTOut-11]
|
||||
_ = x[LWTXmit-12]
|
||||
_ = x[SockOps-13]
|
||||
_ = x[SkSKB-14]
|
||||
_ = x[CGroupDevice-15]
|
||||
_ = x[SkMsg-16]
|
||||
_ = x[RawTracepoint-17]
|
||||
_ = x[CGroupSockAddr-18]
|
||||
_ = x[LWTSeg6Local-19]
|
||||
_ = x[LircMode2-20]
|
||||
_ = x[SkReuseport-21]
|
||||
_ = x[FlowDissector-22]
|
||||
_ = x[CGroupSysctl-23]
|
||||
_ = x[RawTracepointWritable-24]
|
||||
_ = x[CGroupSockopt-25]
|
||||
}
|
||||
|
||||
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt"
|
||||
|
||||
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258}
|
||||
|
||||
func (i ProgramType) String() string {
|
||||
if i >= ProgramType(len(_ProgramType_index)-1) {
|
||||
return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
dist: xenial
|
||||
dist: bionic
|
||||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
|
@ -13,6 +13,20 @@ matrix:
|
|||
script:
|
||||
- make BUILDTAGS="${BUILDTAGS}" all
|
||||
- sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1
|
||||
- go: 1.12.x
|
||||
env:
|
||||
- VIRTUALBOX_VERSION=6.0
|
||||
- VAGRANT_VERSION=2.2.6
|
||||
- FEDORA_VERSION=31
|
||||
before_install:
|
||||
- cat /proc/cpuinfo
|
||||
- wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami)
|
||||
- wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb
|
||||
- vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config
|
||||
- ssh default sudo dnf install -y podman
|
||||
script:
|
||||
- ssh default sudo podman build -t test /vagrant
|
||||
- ssh default sudo podman run --privileged --cgroupns=private test make localunittest
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
test unittest integration \
|
||||
cross localcross
|
||||
|
||||
CONTAINER_ENGINE := docker
|
||||
GO := go
|
||||
|
||||
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
|
||||
|
@ -40,14 +41,14 @@ contrib/cmd/recvtty/recvtty: $(SOURCES)
|
|||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
|
||||
static: $(SOURCES)
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo static_build" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo static_build" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
|
||||
release:
|
||||
script/release.sh -r release/$(VERSION) -v $(VERSION)
|
||||
|
||||
dbuild: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
|
||||
|
||||
lint:
|
||||
$(GO) vet $(allpackages)
|
||||
|
@ -57,7 +58,7 @@ man:
|
|||
man/md2man-all.sh
|
||||
|
||||
runcimage:
|
||||
docker build ${DOCKER_BUILD_PROXY} -t $(RUNC_IMAGE) .
|
||||
$(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) .
|
||||
|
||||
test:
|
||||
make unittest integration rootlessintegration
|
||||
|
@ -66,25 +67,25 @@ localtest:
|
|||
make localunittest localintegration localrootlessintegration
|
||||
|
||||
unittest: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
|
||||
|
||||
localunittest: all
|
||||
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
|
||||
|
||||
integration: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
|
||||
|
||||
localintegration: all
|
||||
bats -t tests/integration${TESTPATH}
|
||||
|
||||
rootlessintegration: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
|
||||
|
||||
localrootlessintegration: all
|
||||
tests/rootless.sh
|
||||
|
||||
shell: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
||||
|
||||
install:
|
||||
install -D -m0755 runc $(BINDIR)/runc
|
||||
|
@ -119,7 +120,7 @@ validate:
|
|||
ci: validate test release
|
||||
|
||||
cross: runcimage
|
||||
docker run ${DOCKER_RUN_PROXY} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
|
||||
|
||||
localcross:
|
||||
CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel .
|
||||
|
|
|
@ -16,9 +16,13 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati
|
|||
|
||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||
|
||||
Currently, the following features are not considered to be production-ready:
|
||||
|
||||
* Support for cgroup v2
|
||||
|
||||
## Security
|
||||
|
||||
Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/)
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
||||
|
||||
## Building
|
||||
|
||||
|
@ -229,7 +233,14 @@ runc delete mycontainerid
|
|||
This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`.
|
||||
|
||||
#### Rootless containers
|
||||
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user:
|
||||
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version.
|
||||
|
||||
**Note:** In order to use this feature, "User Namespaces" must be compiled and enabled in your kernel. There are various ways to do this depending on your distribution:
|
||||
- Confirm `CONFIG_USER_NS=y` is set in your kernel configuration (normally found in `/proc/config.gz`)
|
||||
- Arch/Debian: `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
|
||||
- RHEL/CentOS 7: `echo 28633 > /proc/sys/user/max_user_namespaces`
|
||||
|
||||
Run the following commands as an ordinary user:
|
||||
```bash
|
||||
# Same as the first example
|
||||
mkdir ~/mycontainer
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
# Security
|
||||
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
|
@ -1 +1 @@
|
|||
1.0.0-rc9
|
||||
1.0.0-rc10
|
||||
|
|
|
@ -12,125 +12,12 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/types"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
// event struct for encoding the event data to json.
|
||||
type event struct {
|
||||
Type string `json:"type"`
|
||||
ID string `json:"id"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
}
|
||||
|
||||
// stats is the runc specific stats structure for stability when encoding and decoding stats.
|
||||
type stats struct {
|
||||
CPU cpu `json:"cpu"`
|
||||
Memory memory `json:"memory"`
|
||||
Pids pids `json:"pids"`
|
||||
Blkio blkio `json:"blkio"`
|
||||
Hugetlb map[string]hugetlb `json:"hugetlb"`
|
||||
IntelRdt intelRdt `json:"intel_rdt"`
|
||||
}
|
||||
|
||||
type hugetlb struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
Max uint64 `json:"max,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type blkioEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type blkio struct {
|
||||
IoServiceBytesRecursive []blkioEntry `json:"ioServiceBytesRecursive,omitempty"`
|
||||
IoServicedRecursive []blkioEntry `json:"ioServicedRecursive,omitempty"`
|
||||
IoQueuedRecursive []blkioEntry `json:"ioQueueRecursive,omitempty"`
|
||||
IoServiceTimeRecursive []blkioEntry `json:"ioServiceTimeRecursive,omitempty"`
|
||||
IoWaitTimeRecursive []blkioEntry `json:"ioWaitTimeRecursive,omitempty"`
|
||||
IoMergedRecursive []blkioEntry `json:"ioMergedRecursive,omitempty"`
|
||||
IoTimeRecursive []blkioEntry `json:"ioTimeRecursive,omitempty"`
|
||||
SectorsRecursive []blkioEntry `json:"sectorsRecursive,omitempty"`
|
||||
}
|
||||
|
||||
type pids struct {
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type throttling struct {
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"`
|
||||
ThrottledTime uint64 `json:"throttledTime,omitempty"`
|
||||
}
|
||||
|
||||
type cpuUsage struct {
|
||||
// Units: nanoseconds.
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Percpu []uint64 `json:"percpu,omitempty"`
|
||||
Kernel uint64 `json:"kernel"`
|
||||
User uint64 `json:"user"`
|
||||
}
|
||||
|
||||
type cpu struct {
|
||||
Usage cpuUsage `json:"usage,omitempty"`
|
||||
Throttling throttling `json:"throttling,omitempty"`
|
||||
}
|
||||
|
||||
type memoryEntry struct {
|
||||
Limit uint64 `json:"limit"`
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
Max uint64 `json:"max,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type memory struct {
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
Usage memoryEntry `json:"usage,omitempty"`
|
||||
Swap memoryEntry `json:"swap,omitempty"`
|
||||
Kernel memoryEntry `json:"kernel,omitempty"`
|
||||
KernelTCP memoryEntry `json:"kernelTCP,omitempty"`
|
||||
Raw map[string]uint64 `json:"raw,omitempty"`
|
||||
}
|
||||
|
||||
type l3CacheInfo struct {
|
||||
CbmMask string `json:"cbm_mask,omitempty"`
|
||||
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type memBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type intelRdt struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
var eventsCommand = cli.Command{
|
||||
Name: "events",
|
||||
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
|
||||
|
@ -164,7 +51,7 @@ information is displayed once every 5 seconds.`,
|
|||
}
|
||||
var (
|
||||
stats = make(chan *libcontainer.Stats, 1)
|
||||
events = make(chan *event, 1024)
|
||||
events = make(chan *types.Event, 1024)
|
||||
group = &sync.WaitGroup{}
|
||||
)
|
||||
group.Add(1)
|
||||
|
@ -182,7 +69,7 @@ information is displayed once every 5 seconds.`,
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
close(events)
|
||||
group.Wait()
|
||||
return nil
|
||||
|
@ -208,12 +95,12 @@ information is displayed once every 5 seconds.`,
|
|||
// this means an oom event was received, if it is !ok then
|
||||
// the channel was closed because the container stopped and
|
||||
// the cgroups no longer exist.
|
||||
events <- &event{Type: "oom", ID: container.ID()}
|
||||
events <- &types.Event{Type: "oom", ID: container.ID()}
|
||||
} else {
|
||||
n = nil
|
||||
}
|
||||
case s := <-stats:
|
||||
events <- &event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
}
|
||||
if n == nil {
|
||||
close(events)
|
||||
|
@ -225,12 +112,12 @@ information is displayed once every 5 seconds.`,
|
|||
},
|
||||
}
|
||||
|
||||
func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
||||
func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
||||
cg := ls.CgroupStats
|
||||
if cg == nil {
|
||||
return nil
|
||||
}
|
||||
var s stats
|
||||
var s types.Stats
|
||||
s.Pids.Current = cg.PidsStats.Current
|
||||
s.Pids.Limit = cg.PidsStats.Limit
|
||||
|
||||
|
@ -258,7 +145,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
|||
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
|
||||
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
|
||||
|
||||
s.Hugetlb = make(map[string]hugetlb)
|
||||
s.Hugetlb = make(map[string]types.Hugetlb)
|
||||
for k, v := range cg.HugetlbStats {
|
||||
s.Hugetlb[k] = convertHugtlb(v)
|
||||
}
|
||||
|
@ -276,19 +163,20 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
|
|||
}
|
||||
}
|
||||
|
||||
s.NetworkInterfaces = ls.Interfaces
|
||||
return &s
|
||||
}
|
||||
|
||||
func convertHugtlb(c cgroups.HugetlbStats) hugetlb {
|
||||
return hugetlb{
|
||||
func convertHugtlb(c cgroups.HugetlbStats) types.Hugetlb {
|
||||
return types.Hugetlb{
|
||||
Usage: c.Usage,
|
||||
Max: c.MaxUsage,
|
||||
Failcnt: c.Failcnt,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemoryEntry(c cgroups.MemoryData) memoryEntry {
|
||||
return memoryEntry{
|
||||
func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry {
|
||||
return types.MemoryEntry{
|
||||
Limit: c.Limit,
|
||||
Usage: c.Usage,
|
||||
Max: c.MaxUsage,
|
||||
|
@ -296,10 +184,10 @@ func convertMemoryEntry(c cgroups.MemoryData) memoryEntry {
|
|||
}
|
||||
}
|
||||
|
||||
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
|
||||
var out []blkioEntry
|
||||
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
|
||||
var out []types.BlkioEntry
|
||||
for _, e := range c {
|
||||
out = append(out, blkioEntry{
|
||||
out = append(out, types.BlkioEntry{
|
||||
Major: e.Major,
|
||||
Minor: e.Minor,
|
||||
Op: e.Op,
|
||||
|
@ -309,16 +197,16 @@ func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
|
|||
return out
|
||||
}
|
||||
|
||||
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
|
||||
return &l3CacheInfo{
|
||||
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
|
||||
return &types.L3CacheInfo{
|
||||
CbmMask: i.CbmMask,
|
||||
MinCbmBits: i.MinCbmBits,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
|
||||
return &memBwInfo{
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
|
||||
return &types.MemBwInfo{
|
||||
BandwidthGran: i.BandwidthGran,
|
||||
DelayLinear: i.DelayLinear,
|
||||
MinBandwidth: i.MinBandwidth,
|
||||
|
|
|
@ -37,8 +37,18 @@ type Manager interface {
|
|||
// restore the object later.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
||||
// The value corresponds to the all values of GetPaths() map.
|
||||
//
|
||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
||||
// in legacy mode.
|
||||
GetUnifiedPath() (string, error)
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
|
||||
// Gets the cgroup as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
|
|
180
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
180
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
|
@ -0,0 +1,180 @@
|
|||
// Package devicefilter containes eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
package devicefilter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case 'c':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case 'b':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case 'a':
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return errors.Errorf("invalid DeviceType %q", string(dev.Type))
|
||||
}
|
||||
if dev.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", dev.Major)
|
||||
}
|
||||
if dev.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", dev.Major)
|
||||
}
|
||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return errors.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
45
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
45
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
|
@ -5,7 +5,6 @@ package fs
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
@ -33,14 +32,6 @@ var (
|
|||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
subsystemsUnified = subsystemSet{
|
||||
&CpusetGroupV2{},
|
||||
&FreezerGroupV2{},
|
||||
&CpuGroupV2{},
|
||||
&MemoryGroupV2{},
|
||||
&IOGroupV2{},
|
||||
&PidsGroupV2{},
|
||||
}
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
|
@ -138,9 +129,6 @@ func isIgnorableError(rootless bool, err error) bool {
|
|||
}
|
||||
|
||||
func (m *Manager) getSubsystems() subsystemSet {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return subsystemsUnified
|
||||
}
|
||||
return subsystemsLegacy
|
||||
}
|
||||
|
||||
|
@ -224,6 +212,10 @@ func (m *Manager) GetPaths() map[string]string {
|
|||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
@ -377,23 +369,6 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
|
|||
return path, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readFile(dir, file string) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(dir, file))
|
||||
return string(data), err
|
||||
}
|
||||
|
||||
func removePath(p string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -430,3 +405,7 @@ func CheckCpushares(path string, c uint64) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -31,41 +32,41 @@ func (s *BlkioGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -51,12 +52,12 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
|
|||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -65,17 +66,17 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
|||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +99,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1,92 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpuGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
path, err := d.path("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(path, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpu cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuWeight != 0 {
|
||||
if err := writeFile(path, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpuMax != "" {
|
||||
if err := writeFile(path, "cpu.max", cgroup.Resources.CpuMax); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpu"))
|
||||
}
|
||||
|
||||
func (s *CpuGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(path, "cpu.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -10,6 +10,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
@ -51,7 +52,7 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -85,8 +86,8 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
|||
return 0, 0, err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) != 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
if len(fields) < 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
@ -31,12 +32,12 @@ func (s *CpusetGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -135,12 +136,12 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
|||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,159 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type CpusetGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) Apply(d *cgroupData) error {
|
||||
dir, err := d.path("cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(dir, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuset"))
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
||||
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus.effective")); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems.effective")); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// ensureParent makes sure that the parent directory of current is created
|
||||
// and populated with the proper cpus and mems files copied from
|
||||
// it's parent.
|
||||
func (s *CpusetGroupV2) ensureParent(current, root string) error {
|
||||
parent := filepath.Dir(current)
|
||||
if libcontainerUtils.CleanPath(parent) == root {
|
||||
return nil
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
if parent == current {
|
||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
||||
}
|
||||
if err := s.ensureParent(parent, root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(current, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func (s *CpusetGroupV2) copyIfNeeded(current, parent string) error {
|
||||
var (
|
||||
err error
|
||||
currentCpus, currentMems []byte
|
||||
parentCpus, parentMems []byte
|
||||
)
|
||||
|
||||
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
|
||||
return err
|
||||
}
|
||||
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) isEmpty(b []byte) bool {
|
||||
return len(bytes.Trim(b, "\n")) == 0
|
||||
}
|
||||
|
||||
func (s *CpusetGroupV2) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
||||
if err := s.Set(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
|
@ -4,6 +4,7 @@ package fs
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
@ -37,7 +38,7 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := writeFile(path, file, dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -45,25 +46,25 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := writeFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -34,11 +35,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := readFile(path, "freezer.state")
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1,74 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type FreezerGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *FreezerGroupV2) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroupV2) Apply(d *cgroupData) error {
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
var desiredState string
|
||||
filename := "cgroup.freeze"
|
||||
if cgroup.Resources.Freezer == configs.Frozen {
|
||||
desiredState = "1"
|
||||
} else {
|
||||
desiredState = "0"
|
||||
}
|
||||
|
||||
switch cgroup.Resources.Freezer {
|
||||
case configs.Frozen, configs.Thawed:
|
||||
for {
|
||||
// In case this loop does not exit because it doesn't get the expected
|
||||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := writeFile(path, filename, desiredState); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := readFile(path, filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == desiredState {
|
||||
break
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("freezer"))
|
||||
}
|
||||
|
||||
func (s *FreezerGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -8,6 +8,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -28,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -44,21 +45,21 @@ func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
|
|
|
@ -1,191 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type IOGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) Apply(d *cgroupData) error {
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
cgroupsv2 := cgroups.IsCgroup2UnifiedMode()
|
||||
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
filename := "blkio.weight"
|
||||
if cgroupsv2 {
|
||||
filename = "io.bfq.weight"
|
||||
}
|
||||
if err := writeFile(path, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if cgroupsv2 {
|
||||
if err := writeFile(path, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if cgroupsv2 {
|
||||
if err := writeFile(path, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if cgroupsv2 {
|
||||
if err := writeFile(path, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if cgroupsv2 {
|
||||
if err := writeFile(path, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("blkio"))
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(path string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
p := filepath.Join("/sys/fs/cgroup", path, name)
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return ret, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) getCgroupV2Stats(path string, stats *cgroups.Stats) error {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
|
||||
values, err := readCgroup2MapFile(path, "io.stat")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[0], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
major, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Accommodate the cgroup v1 naming
|
||||
switch op {
|
||||
case "rbytes":
|
||||
op = "read"
|
||||
case "wbytes":
|
||||
op = "write"
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IOGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return s.getCgroupV2Stats(path, stats)
|
||||
}
|
|
@ -11,6 +11,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -84,28 +85,28 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
|||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -126,25 +127,25 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
|
@ -171,7 +172,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
|
@ -201,7 +202,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
||||
value, err := getCgroupParamUint(path, useHierarchy)
|
||||
value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -233,7 +234,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -241,7 +242,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -249,7 +250,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -257,7 +258,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = getCgroupParamUint(path, limit)
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
|
|
@ -1,164 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type MemoryGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *MemoryGroupV2) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroupV2) Apply(d *cgroupData) (err error) {
|
||||
path, err := d.path("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
} else if path == "" {
|
||||
return nil
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Only enable kernel memory accouting when this cgroup
|
||||
// is created by libcontainer, otherwise we might get
|
||||
// error when people use `cgroupsPath` to join an existed
|
||||
// cgroup whose kernel memory is not initialized.
|
||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}()
|
||||
|
||||
// We need to join memory cgroup after set memory limits, because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
_, err = d.join("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryAndSwapCgroups(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := writeFile(path, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
|
||||
if err := setMemoryAndSwapCgroups(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.high", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("memory"))
|
||||
}
|
||||
|
||||
func (s *MemoryGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryDataV2(path, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "current"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "max"}, ".")
|
||||
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = getCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -26,7 +27,7 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package fs
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -24,7 +25,7 @@ func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -35,7 +36,7 @@ func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -48,12 +49,12 @@ func (s *PidsGroup) Remove(d *cgroupData) error {
|
|||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
current, err := getCgroupParamUint(path, "pids.current")
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
maxString, err := getCgroupParamString(path, "pids.max")
|
||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
@ -61,7 +62,7 @@ func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = parseUint(maxString, 10, 64)
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
|
|
|
@ -1,107 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type PidsGroupV2 struct {
|
||||
}
|
||||
|
||||
func (s *PidsGroupV2) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroupV2) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroupV2) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroupV2) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func isNOTSUP(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.ENOTSUP
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PidsGroupV2) GetStats(path string, stats *cgroups.Stats) error {
|
||||
current, err := getCgroupParamUint(path, "pids.current")
|
||||
if os.IsNotExist(err) {
|
||||
// if the controller is not enabled, let's read the list
|
||||
// PIDs (or threads if cgroup.threads is enabled)
|
||||
contents, err := ioutil.ReadFile(filepath.Join(path, "cgroup.procs"))
|
||||
if err != nil && isNOTSUP(err) {
|
||||
contents, err = ioutil.ReadFile(filepath.Join(path, "cgroup.threads"))
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := make(map[string]string)
|
||||
for _, i := range strings.Split(string(contents), "\n") {
|
||||
if i != "" {
|
||||
pids[i] = i
|
||||
}
|
||||
}
|
||||
stats.PidsStats.Current = uint64(len(pids))
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
maxString, err := getCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = parseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpu(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuWeight != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpuMax != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.max", cgroup.Resources.CpuMax); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(dirPath, "cpu.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpuset(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
if len(c.Paths) != 0 {
|
||||
// never set by specconv
|
||||
return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName, ownCgroup)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", errors.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
73
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
73
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
|
@ -0,0 +1,73 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from crun
|
||||
// https://github.com/containers/crun/blob/0.10.2/src/libcrun/cgroup.c#L1644-L1652
|
||||
func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
||||
for _, dev := range cgroup.Resources.Devices {
|
||||
if dev.Allow || !isRWM(dev.Permissions) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
devices := cgroup.Devices
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
// never set by OCI specconv, but *allowAllDevices=false is still used by the integration test
|
||||
if *allowAllDevices == true {
|
||||
return errors.New("libcontainer AllowAllDevices is not supported, use Devices")
|
||||
}
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
53
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
53
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var desired int
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
desired = 1
|
||||
case configs.Thawed:
|
||||
desired = 0
|
||||
default:
|
||||
return errors.Errorf("unknown freezer state %+v", state)
|
||||
}
|
||||
supportedErr := supportsFreezer(dirPath)
|
||||
if supportedErr != nil && desired != 0 {
|
||||
// can ignore error if desired == 1
|
||||
return errors.Wrap(supportedErr, "freezer not supported")
|
||||
}
|
||||
return freezeWithInt(dirPath, desired)
|
||||
}
|
||||
|
||||
func supportsFreezer(dirPath string) error {
|
||||
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
return err
|
||||
}
|
||||
|
||||
// freeze writes desired int to "cgroup.freeze".
|
||||
func freezeWithInt(dirPath string, desired int) error {
|
||||
desiredS := strconv.Itoa(desired)
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil {
|
||||
return err
|
||||
}
|
||||
got, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if gotS := strings.TrimSpace(string(got)); gotS != desiredS {
|
||||
return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS)
|
||||
}
|
||||
return nil
|
||||
}
|
214
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
214
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
|
@ -0,0 +1,214 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
if dirPath != "" {
|
||||
if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) {
|
||||
return nil, errors.Errorf("invalid dir path %q", dirPath)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
controllers, err := detectControllers(dirPath)
|
||||
if err != nil && !rootless {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
controllers: controllers,
|
||||
rootless: rootless,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func detectControllers(dirPath string) (map[string]struct{}, error) {
|
||||
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersData, err := ioutil.ReadFile(controllersPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersFields := strings.Fields(string(controllersData))
|
||||
controllers := make(map[string]struct{}, len(controllersFields))
|
||||
for _, c := range controllersFields {
|
||||
controllers[c] = struct{}{}
|
||||
}
|
||||
return controllers, nil
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) error {
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
var (
|
||||
st cgroups.Stats
|
||||
errs []error
|
||||
)
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := statPids(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if err := statPidsWithoutController(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kenrel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := statMemory(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := statIo(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := statCpu(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return &st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return &st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
return os.RemoveAll(m.dirPath)
|
||||
}
|
||||
|
||||
// GetPaths is for compatibility purpose and should be removed in future
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := map[string]string{
|
||||
// pseudo-controller for compatibility
|
||||
"devices": m.dirPath,
|
||||
"freezer": m.dirPath,
|
||||
}
|
||||
for c := range m.controllers {
|
||||
paths[c] = m.dirPath
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetUnifiedPath() (string, error) {
|
||||
return m.dirPath, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container == nil || container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if _, ok := m.controllers["cpuset"]; ok {
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return errors.Errorf("error while setting cgroup v2: %+v", errs)
|
||||
}
|
||||
m.config = container.Cgroups
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
124
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
124
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
|
@ -0,0 +1,124 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setIo(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
filename := "io.bfq.weight"
|
||||
if err := fscommon.WriteFile(dirPath, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
p := filepath.Join(dirPath, name)
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
|
||||
values, err := readCgroup2MapFile(dirPath, "io.stat")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[0], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
major, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Accommodate the cgroup v1 naming
|
||||
switch op {
|
||||
case "rbytes":
|
||||
op = "read"
|
||||
case "wbytes":
|
||||
op = "write"
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
|
||||
return nil
|
||||
}
|
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(dirPath, "memory.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "current"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "max"}, ".")
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
90
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
90
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isNOTSUP(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.ENOTSUP
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := ioutil.ReadFile(filepath.Join(dirPath, "cgroup.procs"))
|
||||
if err != nil && isNOTSUP(err) {
|
||||
contents, err = ioutil.ReadFile(filepath.Join(dirPath, "cgroup.threads"))
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := make(map[string]string)
|
||||
for _, i := range strings.Split(string(contents), "\n") {
|
||||
if i != "" {
|
||||
pids[i] = i
|
||||
}
|
||||
}
|
||||
stats.PidsStats.Current = uint64(len(pids))
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.current")
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.max")
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
|
||||
maxString, filepath.Join(dirPath, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
Normal file
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func WriteFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(path, []byte(data), 0700); err != nil {
|
||||
return errors.Wrapf(err, "failed to write %q to %q", data, path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
if dir == "" {
|
||||
return "", errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
data, err := ioutil.ReadFile(path)
|
||||
return string(data), err
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
@ -18,7 +18,7 @@ var (
|
|||
|
||||
// Saturates negative values at zero and returns a uint64.
|
||||
// Due to kernel bugs, some of the memory cgroup stats can be negative.
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
|
@ -38,11 +38,11 @@ func parseUint(s string, base, bitSize int) (uint64, error) {
|
|||
|
||||
// Parses a cgroup param and returns as name, value
|
||||
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
|
||||
func getCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
func GetCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.Fields(t)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
value, err := parseUint(parts[1], 10, 64)
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err)
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ func getCgroupParamKeyValue(t string) (string, uint64, error) {
|
|||
}
|
||||
|
||||
// Gets a single uint64 value from the specified cgroup file.
|
||||
func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
func GetCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
fileName := filepath.Join(cgroupPath, cgroupFile)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
|
@ -65,7 +65,7 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
|||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := parseUint(trimmed, 10, 64)
|
||||
res, err := ParseUint(trimmed, 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName)
|
||||
}
|
||||
|
@ -73,7 +73,7 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
|||
}
|
||||
|
||||
// Gets a string value from the specified cgroup file
|
||||
func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
func GetCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
|
||||
if err != nil {
|
||||
return "", err
|
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
// +build !linux static_build
|
||||
// +build !linux
|
||||
|
||||
package systemd
|
||||
|
||||
|
@ -42,6 +42,10 @@ func (m *Manager) GetPaths() map[string]string {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
@ -57,3 +61,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
|
|||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
|
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
// +build linux,!static_build
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
|
@ -297,6 +297,10 @@ func (m *LegacyManager) GetPaths() map[string]string {
|
|||
return paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
path, err := getSubsystemPath(c, subsystem)
|
||||
if err != nil {
|
||||
|
@ -524,3 +528,7 @@ func isUnitExists(err error) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
||||
|
|
131
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
131
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
// +build linux,!static_build
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
|
@ -14,8 +14,9 @@ import (
|
|||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
|
@ -25,15 +26,6 @@ type UnifiedManager struct {
|
|||
Paths map[string]string
|
||||
}
|
||||
|
||||
var unifiedSubsystems = subsystemSet{
|
||||
&fs.CpusetGroupV2{},
|
||||
&fs.FreezerGroupV2{},
|
||||
&fs.CpuGroupV2{},
|
||||
&fs.MemoryGroupV2{},
|
||||
&fs.IOGroupV2{},
|
||||
&fs.PidsGroupV2{},
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
|
@ -159,19 +151,19 @@ func (m *UnifiedManager) Apply(pid int) error {
|
|||
return err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range unifiedSubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
path, err := getSubsystemPath(m.Cgroups, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = map[string]string{
|
||||
"pids": path,
|
||||
"memory": path,
|
||||
"io": path,
|
||||
"cpu": path,
|
||||
"devices": path,
|
||||
"cpuset": path,
|
||||
"freezer": path,
|
||||
}
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -195,7 +187,24 @@ func (m *UnifiedManager) GetPaths() map[string]string {
|
|||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetUnifiedPath() (string, error) {
|
||||
unifiedPath := ""
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for k, v := range m.Paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
return unifiedPath,
|
||||
errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)
|
||||
}
|
||||
}
|
||||
if unifiedPath == "" {
|
||||
// FIXME: unified path could be detected even when no controller is available
|
||||
return unifiedPath, errors.New("cannot detect unified path")
|
||||
}
|
||||
return unifiedPath, nil
|
||||
}
|
||||
func createCgroupsv2Path(path string) (Err error) {
|
||||
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
|
@ -250,27 +259,24 @@ func joinCgroupsV2(c *configs.Cgroup, pid int) error {
|
|||
return createCgroupsv2Path(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.Cgroups, path, false)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := unifiedSubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(path, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -278,7 +284,7 @@ func (m *UnifiedManager) GetPids() ([]int, error) {
|
|||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -286,44 +292,21 @@ func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
|||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := unifiedSubsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, sys := range unifiedSubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
||||
|
|
|
@ -20,8 +20,9 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -40,7 +41,7 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
|||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
|
||||
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
||||
panic("cannot statfs cgroup root")
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
|
@ -50,6 +51,9 @@ func IsCgroup2UnifiedMode() bool {
|
|||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return unifiedMountpoint, nil
|
||||
}
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
@ -235,8 +239,8 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
|||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: "/sys/fs/cgroup",
|
||||
Root: "/sys/fs/cgroup",
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
|
@ -262,6 +266,21 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
|||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(string(data))...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -265,22 +265,24 @@ func (c *linuxContainer) Exec() error {
|
|||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
pid := c.initProcess.pid()
|
||||
blockingFifoOpenCh := awaitFifoOpen(path)
|
||||
for {
|
||||
select {
|
||||
case result := <-blockingFifoOpenCh:
|
||||
return handleFifoResult(result)
|
||||
|
||||
fifoOpen := make(chan struct{})
|
||||
select {
|
||||
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
|
||||
return errors.New("container process is already dead")
|
||||
case result := <-awaitFifoOpen(path):
|
||||
close(fifoOpen)
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
|
||||
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
|
||||
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
|
||||
return errors.New("container process is already dead")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,38 +297,39 @@ func readFromExecFifo(execFifo io.Reader) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
|
||||
isDead := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-exit:
|
||||
return
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
close(isDead)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
return isDead
|
||||
}
|
||||
|
||||
func awaitFifoOpen(path string) <-chan openResult {
|
||||
fifoOpened := make(chan openResult)
|
||||
go func() {
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
return
|
||||
}
|
||||
fifoOpened <- openResult{file: f}
|
||||
result := fifoOpen(path, true)
|
||||
fifoOpened <- result
|
||||
}()
|
||||
return fifoOpened
|
||||
}
|
||||
|
||||
func fifoOpen(path string, block bool) openResult {
|
||||
flags := os.O_RDONLY
|
||||
if !block {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
}
|
||||
return openResult{file: f}
|
||||
}
|
||||
|
||||
func handleFifoResult(result openResult) error {
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(f.Name())
|
||||
}
|
||||
|
||||
type openResult struct {
|
||||
file *os.File
|
||||
err error
|
||||
|
@ -940,7 +943,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the checkpoint directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.ImagesDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -948,7 +951,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
|
||||
}
|
||||
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -1111,7 +1114,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
return err
|
||||
}
|
||||
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1246,7 +1249,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
}
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the work directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
|
@ -1824,7 +1827,7 @@ func (c *linuxContainer) isPaused() (bool, error) {
|
|||
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) {
|
||||
if os.IsNotExist(err) || err == syscall.ENODEV {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
|
|
|
@ -14,12 +14,14 @@ import (
|
|||
"github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -59,10 +61,37 @@ func SystemdCgroups(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func getUnifiedPath(paths map[string]string) string {
|
||||
unifiedPath := ""
|
||||
for k, v := range paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v))
|
||||
}
|
||||
}
|
||||
// can be empty
|
||||
return unifiedPath
|
||||
}
|
||||
|
||||
func cgroupfs2(l *LinuxFactory, rootless bool) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, false)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
|
@ -79,6 +108,9 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, true)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
|
|
|
@ -127,6 +127,12 @@ func finalizeNamespace(config *initConfig) error {
|
|||
return errors.Wrap(err, "close exec fds")
|
||||
}
|
||||
|
||||
if config.Cwd != "" {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
|
||||
capabilities := &configs.Capabilities{}
|
||||
if config.Capabilities != nil {
|
||||
capabilities = config.Capabilities
|
||||
|
@ -154,11 +160,6 @@ func finalizeNamespace(config *initConfig) error {
|
|||
if err := w.ApplyCaps(); err != nil {
|
||||
return errors.Wrap(err, "apply caps")
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/types"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
|
@ -37,8 +38,8 @@ func getStrategy(tpe string) (networkStrategy, error) {
|
|||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
|
||||
out := &NetworkInterface{Name: interfaceName}
|
||||
func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, error) {
|
||||
out := &types.NetworkInterface{Name: interfaceName}
|
||||
// This can happen if the network runtime information is missing - possible if the
|
||||
// container was created by an old version of libcontainer.
|
||||
if interfaceName == "" {
|
||||
|
|
|
@ -279,8 +279,14 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|||
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data)
|
||||
if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||
if err == unix.EPERM || err == unix.EBUSY {
|
||||
return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||
|
@ -293,6 +299,18 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|||
|
||||
switch m.Device {
|
||||
case "proc", "sysfs":
|
||||
// If the destination already exists and is not a directory, we bail
|
||||
// out This is to avoid mounting through a symlink or similar -- which
|
||||
// has been a "fun" attack scenario in the past.
|
||||
// TODO: This won't be necessary once we switch to libpathrs and we can
|
||||
// stop all of these symlink-exchange attacks.
|
||||
if fi, err := os.Lstat(dest); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
} else if fi.Mode()&os.ModeDir == 0 {
|
||||
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
|
||||
}
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -43,7 +43,8 @@ var mountPropagationMapping = map[string]int{
|
|||
"": 0,
|
||||
}
|
||||
|
||||
var allowedDevices = []*configs.Device{
|
||||
// AllowedDevices is exposed for devicefilter_test.go
|
||||
var AllowedDevices = []*configs.Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
|
@ -195,7 +196,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
|||
if err := createDevices(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c, err := createCgroupConfig(opts)
|
||||
c, err := CreateCgroupConfig(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -296,7 +297,7 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
|
|||
}
|
||||
}
|
||||
|
||||
func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
||||
func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
||||
var (
|
||||
myCgroupPath string
|
||||
|
||||
|
@ -341,7 +342,7 @@ func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
|
||||
// In rootless containers, any attempt to make cgroup changes is likely to fail.
|
||||
// libcontainer will validate this but ignores the error.
|
||||
c.Resources.AllowedDevices = allowedDevices
|
||||
c.Resources.AllowedDevices = AllowedDevices
|
||||
if spec.Linux != nil {
|
||||
r := spec.Linux.Resources
|
||||
if r == nil {
|
||||
|
@ -495,7 +496,7 @@ func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
}
|
||||
}
|
||||
// append the default allowed devices to the end of the list
|
||||
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
|
||||
c.Resources.Devices = append(c.Resources.Devices, AllowedDevices...)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
|
@ -1,10 +1,13 @@
|
|||
package libcontainer
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
import "github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/types"
|
||||
)
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
Interfaces []*types.NetworkInterface
|
||||
CgroupStats *cgroups.Stats
|
||||
IntelRdtStats *intelrdt.Stats
|
||||
}
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
package types
|
||||
|
||||
// Event struct for encoding the event data to json.
|
||||
type Event struct {
|
||||
Type string `json:"type"`
|
||||
ID string `json:"id"`
|
||||
Data interface{} `json:"data,omitempty"`
|
||||
}
|
||||
|
||||
// stats is the runc specific stats structure for stability when encoding and decoding stats.
|
||||
type Stats struct {
|
||||
CPU Cpu `json:"cpu"`
|
||||
Memory Memory `json:"memory"`
|
||||
Pids Pids `json:"pids"`
|
||||
Blkio Blkio `json:"blkio"`
|
||||
Hugetlb map[string]Hugetlb `json:"hugetlb"`
|
||||
IntelRdt IntelRdt `json:"intel_rdt"`
|
||||
NetworkInterfaces []*NetworkInterface `json:"network_interfaces"`
|
||||
}
|
||||
|
||||
type Hugetlb struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
Max uint64 `json:"max,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type BlkioEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type Blkio struct {
|
||||
IoServiceBytesRecursive []BlkioEntry `json:"ioServiceBytesRecursive,omitempty"`
|
||||
IoServicedRecursive []BlkioEntry `json:"ioServicedRecursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioEntry `json:"ioQueueRecursive,omitempty"`
|
||||
IoServiceTimeRecursive []BlkioEntry `json:"ioServiceTimeRecursive,omitempty"`
|
||||
IoWaitTimeRecursive []BlkioEntry `json:"ioWaitTimeRecursive,omitempty"`
|
||||
IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"`
|
||||
IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"`
|
||||
SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"`
|
||||
}
|
||||
|
||||
type Pids struct {
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type Throttling struct {
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"`
|
||||
ThrottledTime uint64 `json:"throttledTime,omitempty"`
|
||||
}
|
||||
|
||||
type CpuUsage struct {
|
||||
// Units: nanoseconds.
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Percpu []uint64 `json:"percpu,omitempty"`
|
||||
Kernel uint64 `json:"kernel"`
|
||||
User uint64 `json:"user"`
|
||||
}
|
||||
|
||||
type Cpu struct {
|
||||
Usage CpuUsage `json:"usage,omitempty"`
|
||||
Throttling Throttling `json:"throttling,omitempty"`
|
||||
}
|
||||
|
||||
type MemoryEntry struct {
|
||||
Limit uint64 `json:"limit"`
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
Max uint64 `json:"max,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type Memory struct {
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
Usage MemoryEntry `json:"usage,omitempty"`
|
||||
Swap MemoryEntry `json:"swap,omitempty"`
|
||||
Kernel MemoryEntry `json:"kernel,omitempty"`
|
||||
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
|
||||
Raw map[string]uint64 `json:"raw,omitempty"`
|
||||
}
|
||||
|
||||
type L3CacheInfo struct {
|
||||
CbmMask string `json:"cbm_mask,omitempty"`
|
||||
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MemBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type IntelRdt struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
|
@ -26,3 +26,6 @@ golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7
|
|||
# console dependencies
|
||||
github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f
|
||||
github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1
|
||||
|
||||
# ebpf dependencies
|
||||
github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67
|
||||
|
|
|
@ -149,6 +149,11 @@ github.com/chai2010/gettext-go/gettext/plural
|
|||
github.com/chai2010/gettext-go/gettext/po
|
||||
# github.com/checkpoint-restore/go-criu v0.0.0-20181120144056-17b0214f6c48
|
||||
github.com/checkpoint-restore/go-criu/rpc
|
||||
# github.com/cilium/ebpf v0.0.0-20191025125908-95b36a581eed
|
||||
github.com/cilium/ebpf
|
||||
github.com/cilium/ebpf/asm
|
||||
github.com/cilium/ebpf/internal
|
||||
github.com/cilium/ebpf/internal/unix
|
||||
# github.com/container-storage-interface/spec v1.2.0
|
||||
github.com/container-storage-interface/spec/lib/go/csi
|
||||
# github.com/containerd/cgroups v0.0.0-00010101000000-000000000000 => github.com/containerd/cgroups v0.0.0-20190717030353-c4b9ac5c7601
|
||||
|
@ -656,13 +661,17 @@ github.com/opencontainers/go-digest
|
|||
github.com/opencontainers/image-spec/identity
|
||||
github.com/opencontainers/image-spec/specs-go
|
||||
github.com/opencontainers/image-spec/specs-go/v1
|
||||
# github.com/opencontainers/runc v1.0.0-rc9
|
||||
# github.com/opencontainers/runc v1.0.0-rc10
|
||||
github.com/opencontainers/runc
|
||||
github.com/opencontainers/runc/contrib/cmd/recvtty
|
||||
github.com/opencontainers/runc/libcontainer
|
||||
github.com/opencontainers/runc/libcontainer/apparmor
|
||||
github.com/opencontainers/runc/libcontainer/cgroups
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/ebpf
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fs
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fs2
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fscommon
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/systemd
|
||||
github.com/opencontainers/runc/libcontainer/configs
|
||||
github.com/opencontainers/runc/libcontainer/configs/validate
|
||||
|
@ -678,6 +687,7 @@ github.com/opencontainers/runc/libcontainer/stacktrace
|
|||
github.com/opencontainers/runc/libcontainer/system
|
||||
github.com/opencontainers/runc/libcontainer/user
|
||||
github.com/opencontainers/runc/libcontainer/utils
|
||||
github.com/opencontainers/runc/types
|
||||
# github.com/opencontainers/runtime-spec v1.0.0 => github.com/opencontainers/runtime-spec v0.0.0-20180911193056-5684b8af48c1
|
||||
github.com/opencontainers/runtime-spec/specs-go
|
||||
# github.com/opencontainers/selinux v1.3.1-0.20190929122143-5215b1806f52
|
||||
|
|
Loading…
Reference in New Issue