mirror of https://github.com/k3s-io/k3s
vendor: bump runc to f000fe11
parent
f77a0706d0
commit
2b64276536
|
@ -2795,83 +2795,83 @@
|
|||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/apparmor",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/fs",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/systemd",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/configs",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/configs/validate",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/criurpc",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/intelrdt",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/keys",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/mount",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/seccomp",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/stacktrace",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/system",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/user",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runc/libcontainer/utils",
|
||||
"Comment": "v1.0.0-rc5-46-g871ba2e58e2431",
|
||||
"Rev": "871ba2e58e24314d1fab4517a80410191ba5ad01"
|
||||
"Comment": "v1.0.0-rc5-176-gf000fe11ece1b7",
|
||||
"Rev": "f000fe11ece1b79f744edd9c8e1a53ba0f5e0f24"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/opencontainers/runtime-spec/specs-go",
|
||||
|
|
|
@ -53,6 +53,7 @@ go_library(
|
|||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
"//vendor/github.com/opencontainers/selinux/go-selinux/label:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/github.com/syndtr/gocapability/capability:go_default_library",
|
||||
"//vendor/github.com/vishvananda/netlink:go_default_library",
|
||||
|
|
|
@ -323,6 +323,7 @@ generated when building libcontainer with docker.
|
|||
|
||||
## Copyright and license
|
||||
|
||||
Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license.
|
||||
Docs released under Creative commons.
|
||||
|
||||
Code and documentation copyright 2014 Docker, inc.
|
||||
The code and documentation are released under the [Apache 2.0 license](../LICENSE).
|
||||
The documentation is also released under Creative Commons Attribution 4.0 International License.
|
||||
You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/.
|
||||
|
|
|
@ -156,17 +156,21 @@ init process will block waiting for the parent to finish setup.
|
|||
|
||||
### IntelRdt
|
||||
|
||||
Intel platforms with new Xeon CPU support Intel Resource Director Technology
|
||||
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
|
||||
currently supports L3 cache resource allocation.
|
||||
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
two sub-features of RDT.
|
||||
|
||||
This feature provides a way for the software to restrict cache allocation to a
|
||||
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
has a capacity bitmask (CBM).
|
||||
Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
with other 'subsets'. The different subsets are identified by class of
|
||||
service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
|
||||
It can be used to handle L3 cache resource allocation for containers if
|
||||
hardware and kernel support Intel RDT/CAT.
|
||||
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
over memory bandwidth for the software. A user controls the resource by
|
||||
indicating the percentage of maximum memory bandwidth.
|
||||
|
||||
It can be used to handle L3 cache and memory bandwidth resources allocation
|
||||
for containers if hardware and kernel support Intel RDT CAT and MBA features.
|
||||
|
||||
In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
"resource control" filesystem, which is a "cgroup-like" interface.
|
||||
|
@ -175,6 +179,9 @@ Comparing with cgroups, it has similar process management lifecycle and
|
|||
interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
filesystem layout.
|
||||
|
||||
CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
"resource control" filesystem.
|
||||
|
||||
Intel RDT "resource control" filesystem hierarchy:
|
||||
```
|
||||
mount -t resctrl resctrl /sys/fs/resctrl
|
||||
|
@ -182,59 +189,85 @@ tree /sys/fs/resctrl
|
|||
/sys/fs/resctrl/
|
||||
|-- info
|
||||
| |-- L3
|
||||
| |-- cbm_mask
|
||||
| |-- min_cbm_bits
|
||||
| | |-- cbm_mask
|
||||
| | |-- min_cbm_bits
|
||||
| | |-- num_closids
|
||||
| |-- MB
|
||||
| |-- bandwidth_gran
|
||||
| |-- delay_linear
|
||||
| |-- min_bandwidth
|
||||
| |-- num_closids
|
||||
|-- cpus
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
|-- <container_id>
|
||||
|-- cpus
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
|
||||
```
|
||||
|
||||
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
resource constraints.
|
||||
For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
cache and memory bandwidth resources constraints.
|
||||
|
||||
The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
<container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
to the "tasks" file (which will automatically remove them from the previous
|
||||
to the "tasks" file (which will automatically remove them from the previous
|
||||
group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
added to the same group as their parent. If a pid is not in any sub group, it
|
||||
is in root group.
|
||||
added to the same group as their parent.
|
||||
|
||||
The file `schemata` has allocation masks/values for L3 cache on each socket,
|
||||
which contains L3 cache id and capacity bitmask (CBM).
|
||||
The file `schemata` has a list of all the resources available to this group.
|
||||
Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
|
||||
L3 cache schema:
|
||||
It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
contains L3 cache id and capacity bitmask (CBM).
|
||||
```
|
||||
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
```
|
||||
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
|
||||
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
be set is less than the max bit. The max bits in the CBM is varied among
|
||||
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
|
||||
For more information about Intel RDT/CAT kernel interface:
|
||||
Memory bandwidth schema:
|
||||
It has allocation values for memory bandwidth on each socket, which contains
|
||||
L3 cache id and memory bandwidth percentage.
|
||||
```
|
||||
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
```
|
||||
For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
|
||||
The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
that is allocated is also dependent on the CPU model and can be looked up at
|
||||
"info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
step available on the hardware.
|
||||
|
||||
For more information about Intel RDT kernel interface:
|
||||
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
|
||||
An example for runc:
|
||||
```
|
||||
An example for runc:
|
||||
Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
the "lower" 50% L3 cache id 1:
|
||||
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
with a memory bandwidth granularity of 10%.
|
||||
|
||||
Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
|
||||
"linux": {
|
||||
"intelRdt": {
|
||||
"l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
}
|
||||
"intelRdt": {
|
||||
"closID": "guaranteed_group",
|
||||
"l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
"memBwSchema": "MB:0=20;1=70"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ go_library(
|
|||
"freezer.go",
|
||||
"fs_unsupported.go",
|
||||
"hugetlb.go",
|
||||
"kmem.go",
|
||||
"memory.go",
|
||||
"name.go",
|
||||
"net_cls.go",
|
||||
|
@ -29,6 +30,7 @@ go_library(
|
|||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
@ -14,6 +13,8 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -35,7 +36,7 @@ var (
|
|||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
|
@ -62,9 +63,10 @@ type subsystem interface {
|
|||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Rootless bool // ignore permission-related errors
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
|
@ -100,6 +102,33 @@ type cgroupData struct {
|
|||
pid int
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if os.IsPermission(errors.Cause(err)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try to handle other errnos.
|
||||
var errno error
|
||||
switch err := errors.Cause(err).(type) {
|
||||
case *os.PathError:
|
||||
errno = err.Err
|
||||
case *os.LinkError:
|
||||
errno = err.Err
|
||||
case *os.SyscallError:
|
||||
errno = err.Err
|
||||
}
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
|
@ -145,11 +174,11 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
m.Paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
if os.IsPermission(err) && m.Cgroups.Path == "" {
|
||||
// If we didn't set a cgroup path, then let's defer the error here
|
||||
// until we know whether we have set limits or not.
|
||||
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
|
||||
// it will have the same limits as its parent.
|
||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
|
||||
delete(m.Paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
|
@ -207,9 +236,16 @@ func (m *Manager) Set(container *configs.Config) error {
|
|||
for _, sys := range subsystems {
|
||||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if m.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// cgroup never applied
|
||||
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -46,11 +46,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
|
|||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
|
@ -83,11 +79,7 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return s.SetRtSched(path, cgroup)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Remove(d *cgroupData) error {
|
||||
|
|
|
@ -77,18 +77,14 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
|||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatbility.
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
||||
|
|
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
Normal file
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
// +build linux,!nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // for Errno type only
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Check if kernel memory is enabled
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// kernel memory is not enabled on the system so we should do nothing
|
||||
return nil
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
11
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
generated
vendored
Normal file
11
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem_disabled.go
generated
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
// +build linux,nokmem
|
||||
|
||||
package fs
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
return nil
|
||||
}
|
|
@ -5,23 +5,18 @@ package fs
|
|||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
|
@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Check if kernel memory is enabled
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// kernel memory is not enabled on the system so we should do nothing
|
||||
return nil
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
|
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
|
@ -5,6 +5,7 @@ package systemd
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
@ -295,13 +296,19 @@ func (m *Manager) Apply(pid int) error {
|
|||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
|
@ -312,6 +319,12 @@ func (m *Manager) Apply(pid int) error {
|
|||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/docker/go-units"
|
||||
units "github.com/docker/go-units"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -103,7 +103,7 @@ func FindCgroupMountpointDir() (string, error) {
|
|||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" {
|
||||
// Check that the mount is properly formated.
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
@ -151,19 +151,20 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
|
|||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if !ss[opt] {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
if strings.HasPrefix(opt, cgroupNamePrefix) {
|
||||
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
|
||||
} else {
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
}
|
||||
if !all {
|
||||
numFound++
|
||||
opt = opt[len(cgroupNamePrefix):]
|
||||
}
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
res = append(res, m)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
|
@ -187,7 +188,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
|||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = true
|
||||
allMap[s] = false
|
||||
}
|
||||
return getCgroupMountsHelper(allMap, f, all)
|
||||
}
|
||||
|
@ -262,7 +263,7 @@ func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
|||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
|
@ -141,9 +141,10 @@ type Config struct {
|
|||
|
||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||
// higher scores are preferred for being killed.
|
||||
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||
// value.
|
||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||
OomScoreAdj int `json:"oom_score_adj"`
|
||||
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||
|
||||
// UidMappings is an array of User ID mappings for User Namespaces
|
||||
UidMappings []IDMap `json:"uid_mappings"`
|
||||
|
@ -185,12 +186,19 @@ type Config struct {
|
|||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// Rootless specifies whether the container is a rootless container.
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache) the container has available
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
|
||||
// When RootlessEUID is set, runc creates a new userns for the container.
|
||||
// (config.json needs to contain userns settings)
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
|
||||
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
}
|
||||
|
||||
type Hooks struct {
|
||||
|
|
|
@ -4,4 +4,8 @@ type IntelRdt struct {
|
|||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth percentage per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
||||
|
|
46
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
46
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
|
@ -2,23 +2,18 @@ package validate
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
geteuid = os.Geteuid
|
||||
getegid = os.Getegid
|
||||
)
|
||||
|
||||
func (v *ConfigValidator) rootless(config *configs.Config) error {
|
||||
if err := rootlessMappings(config); err != nil {
|
||||
// rootlessEUID makes sure that the config can be applied when runc
|
||||
// is being executed as a non-root user (euid != 0) in the current user namespace.
|
||||
func (v *ConfigValidator) rootlessEUID(config *configs.Config) error {
|
||||
if err := rootlessEUIDMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rootlessMount(config); err != nil {
|
||||
if err := rootlessEUIDMount(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -38,11 +33,9 @@ func hasIDMapping(id int, mappings []configs.IDMap) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func rootlessMappings(config *configs.Config) error {
|
||||
if euid := geteuid(); euid != 0 {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless containers require user namespaces")
|
||||
}
|
||||
func rootlessEUIDMappings(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless container requires user namespaces")
|
||||
}
|
||||
|
||||
if len(config.UidMappings) == 0 {
|
||||
|
@ -51,34 +44,13 @@ func rootlessMappings(config *configs.Config) error {
|
|||
if len(config.GidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one GID mapping")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
|
||||
func rootlessCgroup(config *configs.Config) error {
|
||||
// Nothing set at all.
|
||||
if config.Cgroups == nil || config.Cgroups.Resources == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for comparing to the zero value.
|
||||
left := reflect.ValueOf(*config.Cgroups.Resources)
|
||||
right := reflect.Zero(left.Type())
|
||||
|
||||
// This is all we need to do, since specconv won't add cgroup options in
|
||||
// rootless mode.
|
||||
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
|
||||
return fmt.Errorf("cannot specify resource limits in rootless container")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount verifies that the user isn't trying to set up any mounts they don't have
|
||||
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
|
||||
// `gid=` option that doesn't resolve to root.
|
||||
func rootlessMount(config *configs.Config) error {
|
||||
func rootlessEUIDMount(config *configs.Config) error {
|
||||
// XXX: We could whitelist allowed devices at this point, but I'm not
|
||||
// convinced that's a good idea. The kernel is the best arbiter of
|
||||
// access control.
|
||||
|
|
33
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
33
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
|
@ -44,8 +44,8 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
|
|||
if err := v.intelrdt(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootless {
|
||||
if err := v.rootless(config); err != nil {
|
||||
if config.RootlessEUID {
|
||||
if err := v.rootlessEUID(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -151,6 +151,16 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
|||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
|
||||
}
|
||||
}
|
||||
if config.Namespaces.Contains(configs.NEWUTS) {
|
||||
switch s {
|
||||
case "kernel.domainname":
|
||||
// This is namespaced and there's no explicit OCI field for it.
|
||||
continue
|
||||
case "kernel.hostname":
|
||||
// This is namespaced but there's a conflicting (dedicated) OCI field for it.
|
||||
return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname")
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
|
||||
}
|
||||
|
||||
|
@ -159,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
|||
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
}
|
||||
if config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
|
||||
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
}
|
||||
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
}
|
||||
|
||||
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ import (
|
|||
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -60,7 +59,8 @@ type State struct {
|
|||
|
||||
// Platform specific fields below here
|
||||
|
||||
// Specifies if the container was started under the rootless mode.
|
||||
// Specified if the container was started under the rootless mode.
|
||||
// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
|
||||
|
@ -225,17 +225,13 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
|||
func (c *linuxContainer) Start(process *Process) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
if process.Init {
|
||||
if err := c.createExecFifo(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := c.start(process, status == Stopped); err != nil {
|
||||
if status == Stopped {
|
||||
if err := c.start(process); err != nil {
|
||||
if process.Init {
|
||||
c.deleteExecFifo()
|
||||
}
|
||||
return err
|
||||
|
@ -244,17 +240,10 @@ func (c *linuxContainer) Start(process *Process) error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) Run(process *Process) error {
|
||||
c.m.Lock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
c.m.Unlock()
|
||||
return err
|
||||
}
|
||||
c.m.Unlock()
|
||||
if err := c.Start(process); err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
if process.Init {
|
||||
return c.exec()
|
||||
}
|
||||
return nil
|
||||
|
@ -335,8 +324,8 @@ type openResult struct {
|
|||
err error
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process, isInit bool) error {
|
||||
parent, err := c.newParentProcess(process, isInit)
|
||||
func (c *linuxContainer) start(process *Process) error {
|
||||
parent, err := c.newParentProcess(process)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
|
@ -349,7 +338,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
|
|||
}
|
||||
// generate a timestamp indicating when the container was started
|
||||
c.created = time.Now().UTC()
|
||||
if isInit {
|
||||
if process.Init {
|
||||
c.state = &createdState{
|
||||
c: c,
|
||||
}
|
||||
|
@ -411,10 +400,7 @@ func (c *linuxContainer) createExecFifo() error {
|
|||
return err
|
||||
}
|
||||
unix.Umask(oldMask)
|
||||
if err := os.Chown(fifoName, rootuid, rootgid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return os.Chown(fifoName, rootuid, rootgid)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) deleteExecFifo() {
|
||||
|
@ -439,7 +425,7 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
|
||||
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := utils.NewSockPair("init")
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
|
@ -448,7 +434,7 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces
|
|||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !doInit {
|
||||
if !p.Init {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
}
|
||||
|
||||
|
@ -473,6 +459,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS")))
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
|
||||
if p.ConsoleSocket != nil {
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket)
|
||||
|
@ -533,14 +520,15 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
|||
return nil, err
|
||||
}
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -556,7 +544,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
|||
PassedFilesCount: len(process.ExtraFiles),
|
||||
ContainerId: c.ID(),
|
||||
NoNewPrivileges: c.config.NoNewPrivileges,
|
||||
Rootless: c.config.Rootless,
|
||||
RootlessEUID: c.config.RootlessEUID,
|
||||
RootlessCgroups: c.config.RootlessCgroups,
|
||||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
|
@ -624,16 +613,16 @@ func (c *linuxContainer) Resume() error {
|
|||
|
||||
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
@ -668,7 +657,7 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
|||
Features: criuFeat,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, criuOpts, false)
|
||||
err := c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
if err != nil {
|
||||
logrus.Debugf("%s", err)
|
||||
return fmt.Errorf("CRIU feature check failed")
|
||||
|
@ -781,7 +770,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
|||
Type: &t,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, nil, false)
|
||||
err := c.criuSwrk(nil, req, nil, false, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CRIU version check failed: %s", err)
|
||||
}
|
||||
|
@ -877,12 +866,11 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
// Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
|
||||
// (CLI prints a warning)
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has
|
||||
// support for doing unprivileged dumps, but the setup of
|
||||
// rootless containers might make this complicated.
|
||||
if c.config.Rootless {
|
||||
return fmt.Errorf("cannot checkpoint a rootless container")
|
||||
}
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
|
@ -939,6 +927,33 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
LazyPages: proto.Bool(criuOpts.LazyPages),
|
||||
}
|
||||
|
||||
// If the container is running in a network namespace and has
|
||||
// a path to the network namespace configured, we will dump
|
||||
// that network namespace as an external namespace and we
|
||||
// will expect that the namespace exists during restore.
|
||||
// This basically means that CRIU will ignore the namespace
|
||||
// and expect to be setup correctly.
|
||||
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
|
||||
if nsPath != "" {
|
||||
// For this to work we need at least criu 3.11.0 => 31100.
|
||||
// As there was already a successful version check we will
|
||||
// not error out if it fails. runc will just behave as it used
|
||||
// to do and ignore external network namespaces.
|
||||
err := c.checkCriuVersion(31100)
|
||||
if err == nil {
|
||||
// CRIU expects the information about an external namespace
|
||||
// like this: --external net[<inode>]:<key>
|
||||
// This <key> is always 'extRootNetNS'.
|
||||
var netns syscall.Stat_t
|
||||
err = syscall.Stat(nsPath, &netns)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
criuExternal := fmt.Sprintf("net[%d]:extRootNetNS", netns.Ino)
|
||||
rpcOpts.External = append(rpcOpts.External, criuExternal)
|
||||
}
|
||||
}
|
||||
|
||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||
if fcg != "" {
|
||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||
|
@ -1043,7 +1058,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
}
|
||||
}
|
||||
|
||||
err = c.criuSwrk(nil, req, criuOpts, false)
|
||||
err = c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1087,11 +1102,12 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
var extraFiles []*os.File
|
||||
|
||||
// Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS().
|
||||
// (CLI prints a warning)
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
||||
// support for unprivileged restore at the moment.
|
||||
if c.config.Rootless {
|
||||
return fmt.Errorf("cannot restore a rootless container")
|
||||
}
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
|
@ -1161,6 +1177,38 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
},
|
||||
}
|
||||
|
||||
// Same as during checkpointing. If the container has a specific network namespace
|
||||
// assigned to it, this now expects that the checkpoint will be restored in a
|
||||
// already created network namespace.
|
||||
nsPath := c.config.Namespaces.PathOf(configs.NEWNET)
|
||||
if nsPath != "" {
|
||||
// For this to work we need at least criu 3.11.0 => 31100.
|
||||
// As there was already a successful version check we will
|
||||
// not error out if it fails. runc will just behave as it used
|
||||
// to do and ignore external network namespaces.
|
||||
err := c.checkCriuVersion(31100)
|
||||
if err == nil {
|
||||
// CRIU wants the information about an existing network namespace
|
||||
// like this: --inherit-fd fd[<fd>]:<key>
|
||||
// The <key> needs to be the same as during checkpointing.
|
||||
// We are always using 'extRootNetNS' as the key in this.
|
||||
netns, err := os.Open(nsPath)
|
||||
defer netns.Close()
|
||||
if err != nil {
|
||||
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
|
||||
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
|
||||
}
|
||||
inheritFd := new(criurpc.InheritFd)
|
||||
inheritFd.Key = proto.String("extRootNetNS")
|
||||
// The offset of four is necessary because 0, 1, 2 and 3 is already
|
||||
// used by stdin, stdout, stderr, 'criu swrk' socket.
|
||||
inheritFd.Fd = proto.Int32(int32(4 + len(extraFiles)))
|
||||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||
// All open FDs need to be transferred to CRIU via extraFiles
|
||||
extraFiles = append(extraFiles, netns)
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
|
@ -1219,7 +1267,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||
}
|
||||
}
|
||||
return c.criuSwrk(process, req, criuOpts, true)
|
||||
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
|
@ -1249,7 +1297,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -1290,6 +1338,9 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
cmd.Stderr = process.Stderr
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
|
||||
if extraFiles != nil {
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
|
@ -1664,7 +1715,7 @@ func (c *linuxContainer) currentState() (*State, error) {
|
|||
InitProcessStartTime: startTime,
|
||||
Created: c.created,
|
||||
},
|
||||
Rootless: c.config.Rootless,
|
||||
Rootless: c.config.RootlessEUID && c.config.RootlessCgroups,
|
||||
CgroupPaths: c.cgroupManager.GetPaths(),
|
||||
IntelRdtPath: intelRdtPath,
|
||||
NamespacePaths: make(map[configs.NamespaceType]string),
|
||||
|
@ -1765,7 +1816,7 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
|||
if !joinExistingUser {
|
||||
// write uid mappings
|
||||
if len(c.config.UidMappings) > 0 {
|
||||
if c.config.Rootless && c.newuidmapPath != "" {
|
||||
if c.config.RootlessEUID && c.newuidmapPath != "" {
|
||||
r.AddData(&Bytemsg{
|
||||
Type: UidmapPathAttr,
|
||||
Value: []byte(c.newuidmapPath),
|
||||
|
@ -1791,38 +1842,33 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
|
|||
Type: GidmapAttr,
|
||||
Value: b,
|
||||
})
|
||||
if c.config.Rootless && c.newgidmapPath != "" {
|
||||
if c.config.RootlessEUID && c.newgidmapPath != "" {
|
||||
r.AddData(&Bytemsg{
|
||||
Type: GidmapPathAttr,
|
||||
Value: []byte(c.newgidmapPath),
|
||||
})
|
||||
}
|
||||
if requiresRootOrMappingTool(c.config) {
|
||||
// check if we have CAP_SETGID to setgroup properly
|
||||
pid, err := capability.NewPid(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
|
||||
r.AddData(&Boolmsg{
|
||||
Type: SetgroupAttr,
|
||||
Value: true,
|
||||
})
|
||||
}
|
||||
r.AddData(&Boolmsg{
|
||||
Type: SetgroupAttr,
|
||||
Value: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write oom_score_adj
|
||||
r.AddData(&Bytemsg{
|
||||
Type: OomScoreAdjAttr,
|
||||
Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)),
|
||||
})
|
||||
if c.config.OomScoreAdj != nil {
|
||||
// write oom_score_adj
|
||||
r.AddData(&Bytemsg{
|
||||
Type: OomScoreAdjAttr,
|
||||
Value: []byte(fmt.Sprintf("%d", *c.config.OomScoreAdj)),
|
||||
})
|
||||
}
|
||||
|
||||
// write rootless
|
||||
r.AddData(&Boolmsg{
|
||||
Type: RootlessAttr,
|
||||
Value: c.config.Rootless,
|
||||
Type: RootlessEUIDAttr,
|
||||
Value: c.config.RootlessEUID,
|
||||
})
|
||||
|
||||
return bytes.NewReader(r.Serialize()), nil
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"runtime/debug"
|
||||
"strconv"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
|
@ -59,9 +60,9 @@ func SystemdCgroups(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to
|
||||
// create and manage cgroups.
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
|
@ -72,9 +73,26 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RootlessCgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to create
|
||||
// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is
|
||||
// that RootlessCgroupfs can transparently handle permission errors that occur
|
||||
// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Rootless: true,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
|
||||
func IntelRdtFs(l *LinuxFactory) error {
|
||||
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
||||
return &intelrdt.IntelRdtManager{
|
||||
|
@ -178,7 +196,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
containerRoot, err := securejoin.SecureJoin(l.Root, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
|
||||
} else if !os.IsNotExist(err) {
|
||||
|
@ -201,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
|
@ -212,7 +233,14 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
//when load, we need to check id is valid or not.
|
||||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
containerRoot, err := securejoin.SecureJoin(l.Root, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state, err := l.loadState(containerRoot, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -240,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
||||
}
|
||||
return c, nil
|
||||
|
@ -322,7 +350,11 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
|||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
||||
f, err := os.Open(filepath.Join(root, stateFilename))
|
||||
stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f, err := os.Open(stateFilePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
|
||||
|
@ -338,7 +370,7 @@ func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
|||
}
|
||||
|
||||
func (l *LinuxFactory) validateID(id string) error {
|
||||
if !idRegex.MatchString(id) {
|
||||
if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
|
@ -20,6 +21,7 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
@ -64,7 +66,8 @@ type initConfig struct {
|
|||
CreateConsole bool `json:"create_console"`
|
||||
ConsoleWidth uint16 `json:"console_width"`
|
||||
ConsoleHeight uint16 `json:"console_height"`
|
||||
Rootless bool `json:"rootless"`
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
|
@ -121,7 +124,7 @@ func finalizeNamespace(config *initConfig) error {
|
|||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "close exec fds")
|
||||
}
|
||||
|
||||
capabilities := &configs.Capabilities{}
|
||||
|
@ -136,20 +139,20 @@ func finalizeNamespace(config *initConfig) error {
|
|||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.ApplyBoundingSet(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "apply bounding set")
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "set keep caps")
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "setup user")
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "clear keep caps")
|
||||
}
|
||||
if err := w.ApplyCaps(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "apply caps")
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
|
@ -217,11 +220,7 @@ func syncParentReady(pipe io.ReadWriter) error {
|
|||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procRun); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return readSync(pipe, procRun)
|
||||
}
|
||||
|
||||
// syncParentHooks sends to the given pipe a JSON payload which indicates that
|
||||
|
@ -234,11 +233,7 @@ func syncParentHooks(pipe io.ReadWriter) error {
|
|||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procResume); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return readSync(pipe, procResume)
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
|
@ -282,7 +277,7 @@ func setupUser(config *initConfig) error {
|
|||
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
|
||||
}
|
||||
|
||||
if config.Rootless {
|
||||
if config.RootlessEUID {
|
||||
// We cannot set any additional groups in a rootless container and thus
|
||||
// we bail if the user asked us to do so. TODO: We currently can't do
|
||||
// this check earlier, but if libcontainer.Process.User was typesafe
|
||||
|
@ -298,11 +293,18 @@ func setupUser(config *initConfig) error {
|
|||
return err
|
||||
}
|
||||
|
||||
setgroups, err := ioutil.ReadFile("/proc/self/setgroups")
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
|
||||
// There's nothing we can do about /etc/group entries, so we silently
|
||||
// ignore setting groups here (since the user didn't explicitly ask us to
|
||||
// set the group).
|
||||
if !config.Rootless {
|
||||
allowSupGroups := !config.RootlessEUID && strings.TrimSpace(string(setgroups)) != "deny"
|
||||
|
||||
if allowSupGroups {
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := unix.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
|
|
|
@ -16,20 +16,25 @@ import (
|
|||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT/CAT feature:
|
||||
* About Intel RDT features:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
||||
* Cache is the only resource that is supported in RDT.
|
||||
* Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
* two sub-features of RDT.
|
||||
*
|
||||
* This feature provides a way for the software to restrict cache allocation to a
|
||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
* has a capacity bitmask (CBM).
|
||||
* Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
* cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
* with other 'subsets'. The different subsets are identified by class of
|
||||
* service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
*
|
||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
||||
* of Intel Software Developer Manual.
|
||||
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
* over memory bandwidth for the software. A user controls the resource by
|
||||
* indicating the percentage of maximum memory bandwidth.
|
||||
*
|
||||
* About Intel RDT/CAT kernel interface:
|
||||
* More details about Intel RDT CAT and MBA can be found in the section 17.18
|
||||
* of Intel Software Developer Manual:
|
||||
* https://software.intel.com/en-us/articles/intel-sdm
|
||||
*
|
||||
* About Intel RDT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
|
@ -37,59 +42,86 @@ import (
|
|||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
* "resource control" filesystem.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | |-- cbm_mask
|
||||
* | |-- min_cbm_bits
|
||||
* | | |-- cbm_mask
|
||||
* | | |-- min_cbm_bits
|
||||
* | | |-- num_closids
|
||||
* | |-- MB
|
||||
* | |-- bandwidth_gran
|
||||
* | |-- delay_linear
|
||||
* | |-- min_bandwidth
|
||||
* | |-- num_closids
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- cpus
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
* resource constraints.
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
* cache and memory bandwidth resources constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
||||
* in root group.
|
||||
* added to the same group as their parent.
|
||||
*
|
||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
||||
* which contains L3 cache id and capacity bitmask (CBM).
|
||||
* The file `schemata` has a list of all the resources available to this group.
|
||||
* Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
*
|
||||
* L3 cache schema:
|
||||
* It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
* contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
* For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
* supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
* e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
* bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
* set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* For more information about Intel RDT/CAT kernel interface:
|
||||
* Memory bandwidth schema:
|
||||
* It has allocation values for memory bandwidth on each socket, which contains
|
||||
* L3 cache id and memory bandwidth percentage.
|
||||
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
*
|
||||
* The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
* can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
* that is allocated is also dependent on the CPU model and can be looked up at
|
||||
* "info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
* step available on the hardware.
|
||||
*
|
||||
* For more information about Intel RDT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
* the "lower" 50% L3 cache id 1:
|
||||
* 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
* with a memory bandwidth granularity of 10%.
|
||||
*
|
||||
* Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
* on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
* maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
* "memBwSchema": "MB:0=20;1=70"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
@ -129,8 +161,10 @@ var (
|
|||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT is supported
|
||||
isEnabled bool
|
||||
// The flag to indicate if Intel RDT/CAT is enabled
|
||||
isCatEnabled bool
|
||||
// The flag to indicate if Intel RDT/MBA is enabled
|
||||
isMbaEnabled bool
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
|
@ -139,19 +173,35 @@ type intelRdtData struct {
|
|||
pid int
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled in init()
|
||||
// Check if Intel RDT sub-features are enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
||||
// "cat_l3" flag is set if supported
|
||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if !isFlagSet || err != nil {
|
||||
isEnabled = false
|
||||
// 1. Check if hardware and kernel support Intel RDT sub-features
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
isEnabled = isIntelRdtMounted()
|
||||
if !isIntelRdtMounted() {
|
||||
return
|
||||
}
|
||||
|
||||
// 3. Double check if Intel RDT sub-features are available in
|
||||
// "resource control" filesystem. Intel RDT sub-features can be
|
||||
// selectively disabled or enabled by kernel command line
|
||||
// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
|
||||
if isCatFlagSet {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil {
|
||||
isCatEnabled = true
|
||||
}
|
||||
}
|
||||
if isMbaFlagSet {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
|
||||
isMbaEnabled = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
|
@ -177,7 +227,7 @@ func findIntelRdtMountpointDir() (string, error) {
|
|||
}
|
||||
|
||||
if postSeparatorFields[0] == "resctrl" {
|
||||
// Check that the mount is properly formated.
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
@ -223,30 +273,40 @@ func isIntelRdtMounted() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, error) {
|
||||
func parseCpuInfoFile(path string) (bool, bool, error) {
|
||||
isCatFlagSet := false
|
||||
isMbaFlagSet := false
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
flags := strings.Split(text, " ")
|
||||
line := s.Text()
|
||||
|
||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
||||
for _, flag := range flags {
|
||||
if flag == "cat_l3" {
|
||||
return true, nil
|
||||
// Search "cat_l3" and "mba" flags in first "flags" line
|
||||
if strings.Contains(line, "flags") {
|
||||
flags := strings.Split(line, " ")
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
for _, flag := range flags {
|
||||
switch flag {
|
||||
case "cat_l3":
|
||||
isCatFlagSet = true
|
||||
case "mba":
|
||||
isMbaFlagSet = true
|
||||
}
|
||||
}
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
|
@ -292,30 +352,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
|
|||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
||||
|
||||
func readTasksFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
|
@ -368,6 +404,57 @@ func getL3CacheInfo() (*L3CacheInfo, error) {
|
|||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// Get the read-only memory bandwidth information
|
||||
func getMemBwInfo() (*MemBwInfo, error) {
|
||||
memBwInfo := &MemBwInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "MB")
|
||||
bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
memBwInfo.BandwidthGran = bandwidthGran
|
||||
memBwInfo.DelayLinear = delayLinear
|
||||
memBwInfo.MinBandwidth = minBandwidth
|
||||
memBwInfo.NumClosids = numClosids
|
||||
|
||||
return memBwInfo, nil
|
||||
}
|
||||
|
||||
// Get diagnostics for last filesystem operation error from file info/last_cmd_status
|
||||
func getLastCmdStatus() (string, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info")
|
||||
lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return lastCmdStatus, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
|
@ -383,9 +470,14 @@ func WriteIntelRdtTasks(dir string, pid int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled
|
||||
func IsEnabled() bool {
|
||||
return isEnabled
|
||||
// Check if Intel RDT/CAT is enabled
|
||||
func IsCatEnabled() bool {
|
||||
return isCatEnabled
|
||||
}
|
||||
|
||||
// Check if Intel RDT/MBA is enabled
|
||||
func IsMbaEnabled() bool {
|
||||
return isMbaEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
|
@ -452,65 +544,130 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
|||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// The read-only L3 cache and memory bandwidth schemata in root
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
stats.L3CacheSchema = schemaStrings[0]
|
||||
|
||||
if IsCatEnabled() {
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "L3") {
|
||||
stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "L3") {
|
||||
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if IsMbaEnabled() {
|
||||
// The read-only memory bandwidth information
|
||||
memBwInfo, err := getMemBwInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MemBwInfo = memBwInfo
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "MB") {
|
||||
stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "MB") {
|
||||
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||
path := m.GetPath()
|
||||
|
||||
// About L3 cache schema file:
|
||||
// The schema has allocation masks/values for L3 cache on each socket,
|
||||
// About L3 cache schema:
|
||||
// It has allocation bitmasks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, L3's schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
|
||||
// is 0xc0.
|
||||
//
|
||||
// About L3 cache CBM validity:
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
||||
// "resource control" filesystem layout, the CBM in a group should
|
||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
||||
// when writing.
|
||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
// CBM is varied among supported Intel CPU models. Kernel will check
|
||||
// if it is valid when writing. e.g., default value 0xfffff in root
|
||||
// indicates the max bits of CBM is 20 bits, which mapping to entire
|
||||
// L3 cache capacity. Some valid CBM values to set in a group:
|
||||
// 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
//
|
||||
//
|
||||
// About memory bandwidth schema:
|
||||
// It has allocation values for memory bandwidth on each socket, which
|
||||
// contains L3 cache id and memory bandwidth percentage.
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// "MB:0=20;1=70"
|
||||
//
|
||||
// The minimum bandwidth percentage value for each CPU model is
|
||||
// predefined and can be looked up through "info/MB/min_bandwidth".
|
||||
// The bandwidth granularity that is allocated is also dependent on
|
||||
// the CPU model and can be looked up at "info/MB/bandwidth_gran".
|
||||
// The available bandwidth control steps are: min_bw + N * bw_gran.
|
||||
// Intermediate values are rounded to the next control step available
|
||||
// on the hardware.
|
||||
if container.IntelRdt != nil {
|
||||
path := m.GetPath()
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
if l3CacheSchema != "" {
|
||||
memBwSchema := container.IntelRdt.MemBwSchema
|
||||
|
||||
// Write a single joint schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||
return NewLastCmdError(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Write only L3 cache schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema == "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
return NewLastCmdError(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Write only memory bandwidth schema string to schemata file
|
||||
if l3CacheSchema == "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||
return NewLastCmdError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -521,11 +678,11 @@ func (m *IntelRdtManager) Set(container *configs.Config) error {
|
|||
func (raw *intelRdtData) join(id string) (string, error) {
|
||||
path := filepath.Join(raw.root, id)
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
return "", NewLastCmdError(err)
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
return "", NewLastCmdError(err)
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
@ -551,3 +708,23 @@ func IsNotFound(err error) bool {
|
|||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
||||
|
||||
type LastCmdError struct {
|
||||
LastCmdStatus string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *LastCmdError) Error() string {
|
||||
return fmt.Sprintf(e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus)
|
||||
}
|
||||
|
||||
func NewLastCmdError(err error) error {
|
||||
lastCmdStatus, err1 := getLastCmdStatus()
|
||||
if err1 == nil {
|
||||
return &LastCmdError{
|
||||
LastCmdStatus: lastCmdStatus,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -8,6 +8,13 @@ type L3CacheInfo struct {
|
|||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MemBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
@ -17,6 +24,15 @@ type Stats struct {
|
|||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
|
|
|
@ -8,6 +8,7 @@ go_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
|
|
|
@ -7,6 +7,8 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
|
@ -15,7 +17,7 @@ type KeySerial uint32
|
|||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("could not create session key: %v", err)
|
||||
return 0, errors.Wrap(err, "create session key")
|
||||
}
|
||||
return KeySerial(sessKeyId), nil
|
||||
}
|
||||
|
@ -42,9 +44,5 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
|||
|
||||
perm := (uint32(perm64) & mask) | setbits
|
||||
|
||||
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return unix.KeyctlSetperm(int(ringId), perm)
|
||||
}
|
||||
|
|
|
@ -10,16 +10,16 @@ import (
|
|||
// list of known message types we want to send to bootstrap program
|
||||
// The number is randomly chosen to not conflict with known netlink types
|
||||
const (
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
OomScoreAdjAttr uint16 = 27286
|
||||
RootlessAttr uint16 = 27287
|
||||
UidmapPathAttr uint16 = 27288
|
||||
GidmapPathAttr uint16 = 27289
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
OomScoreAdjAttr uint16 = 27286
|
||||
RootlessEUIDAttr uint16 = 27287
|
||||
UidmapPathAttr uint16 = 27288
|
||||
GidmapPathAttr uint16 = 27289
|
||||
)
|
||||
|
||||
type Int32msg struct {
|
||||
|
|
|
@ -5,18 +5,15 @@ package libcontainer
|
|||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
var strategies = map[string]networkStrategy{
|
||||
"veth": &veth{},
|
||||
"loopback": &loopback{},
|
||||
}
|
||||
|
||||
|
@ -103,157 +100,3 @@ func (l *loopback) attach(n *configs.Network) (err error) {
|
|||
func (l *loopback) detach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that is attached to the bridge on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type veth struct {
|
||||
}
|
||||
|
||||
func (v *veth) detach(n *configs.Network) (err error) {
|
||||
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
|
||||
}
|
||||
|
||||
// attach a container network interface to an external network
|
||||
func (v *veth) attach(n *configs.Network) (err error) {
|
||||
brl, err := netlink.LinkByName(n.Bridge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
br, ok := brl.(*netlink.Bridge)
|
||||
if !ok {
|
||||
return fmt.Errorf("Wrong device type %T", brl)
|
||||
}
|
||||
host, err := netlink.LinkByName(n.HostInterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := netlink.LinkSetMaster(host, br); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.HairpinMode {
|
||||
if err := netlink.LinkSetHairpin(host, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetUp(host); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *veth) create(n *network, nspid int) (err error) {
|
||||
tmpName, err := v.generateTempPeerName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.TempVethPeerName = tmpName
|
||||
if n.Bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
veth := &netlink.Veth{
|
||||
LinkAttrs: netlink.LinkAttrs{
|
||||
Name: n.HostInterfaceName,
|
||||
TxQLen: n.TxQueueLen,
|
||||
},
|
||||
PeerName: n.TempVethPeerName,
|
||||
}
|
||||
if err := netlink.LinkAdd(veth); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
netlink.LinkDel(veth)
|
||||
}
|
||||
}()
|
||||
if err := v.attach(&n.Network); err != nil {
|
||||
return err
|
||||
}
|
||||
child, err := netlink.LinkByName(n.TempVethPeerName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.LinkSetNsPid(child, nspid)
|
||||
}
|
||||
|
||||
func (v *veth) generateTempPeerName() (string, error) {
|
||||
return utils.GenerateRandomName("veth", 7)
|
||||
}
|
||||
|
||||
func (v *veth) initialize(config *network) error {
|
||||
peer := config.TempVethPeerName
|
||||
if peer == "" {
|
||||
return fmt.Errorf("peer is not specified")
|
||||
}
|
||||
child, err := netlink.LinkByName(peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetDown(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetName(child, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
// get the interface again after we changed the name as the index also changes.
|
||||
if child, err = netlink.LinkByName(config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
mac, err := net.ParseMAC(config.MacAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ip, err := netlink.ParseAddr(config.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
ip6, err := netlink.ParseAddr(config.IPv6Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip6); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetUp(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
gw := net.ParseIP(config.IPv6Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -72,6 +72,9 @@ type Process struct {
|
|||
// ConsoleSocket provides the masterfd console.
|
||||
ConsoleSocket *os.File
|
||||
|
||||
// Init specifies whether the process is the first process in the container.
|
||||
Init bool
|
||||
|
||||
ops processOperations
|
||||
}
|
||||
|
||||
|
|
|
@ -46,15 +46,16 @@ type parentProcess interface {
|
|||
}
|
||||
|
||||
type setnsProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
cgroupPaths map[string]string
|
||||
intelRdtPath string
|
||||
config *initConfig
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
cgroupPaths map[string]string
|
||||
rootlessCgroups bool
|
||||
intelRdtPath string
|
||||
config *initConfig
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
|
@ -86,7 +87,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
|
@ -537,7 +538,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
|||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stderr, i.Stderr = w, r
|
||||
// change ownership of the pipes incase we are in a user namespace
|
||||
// change ownership of the pipes in case we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -46,6 +46,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
|||
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||
}
|
||||
|
||||
setupDev := needsSetupDev(config)
|
||||
for _, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
if err := mountCmd(precmd); err != nil {
|
||||
|
@ -64,8 +65,6 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
|||
}
|
||||
}
|
||||
|
||||
setupDev := needsSetupDev(config)
|
||||
|
||||
if setupDev {
|
||||
if err := createDevices(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating device nodes")
|
||||
|
@ -153,6 +152,26 @@ func finalizeRootfs(config *configs.Config) (err error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
// /tmp has to be mounted as private to allow MS_MOVE to work in all situations
|
||||
func prepareTmp(topTmpDir string) (string, error) {
|
||||
tmpdir, err := ioutil.TempDir(topTmpDir, "runctop")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := unix.Mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := unix.Mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return tmpdir, nil
|
||||
}
|
||||
|
||||
func cleanupTmp(tmpdir string) error {
|
||||
unix.Unmount(tmpdir, 0)
|
||||
return os.RemoveAll(tmpdir)
|
||||
}
|
||||
|
||||
func mountCmd(cmd configs.Command) error {
|
||||
command := exec.Command(cmd.Path, cmd.Args[:]...)
|
||||
command.Env = cmd.Env
|
||||
|
@ -200,7 +219,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|||
}
|
||||
}
|
||||
if copyUp {
|
||||
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
|
||||
tmpdir, err := prepareTmp("/tmp")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||
}
|
||||
defer cleanupTmp(tmpdir)
|
||||
tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
}
|
||||
|
@ -397,6 +421,7 @@ func checkMountDestination(rootfs, dest string) error {
|
|||
"/proc/stat",
|
||||
"/proc/swaps",
|
||||
"/proc/uptime",
|
||||
"/proc/loadavg",
|
||||
"/proc/net/dev",
|
||||
}
|
||||
for _, valid := range validDestinations {
|
||||
|
@ -413,7 +438,7 @@ func checkMountDestination(rootfs, dest string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." || !strings.HasPrefix(path, "..") {
|
||||
if path != "." && !strings.HasPrefix(path, "..") {
|
||||
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
|
||||
}
|
||||
}
|
||||
|
@ -803,10 +828,7 @@ func remount(m *configs.Mount, rootfs string) error {
|
|||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
|
|
|
@ -5,12 +5,14 @@ package libcontainer
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -28,10 +30,19 @@ func (l *linuxSetnsInit) getSessionRingName() string {
|
|||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
// do not inherit the parent's session keyring
|
||||
// Do not inherit the parent's session keyring.
|
||||
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
return err
|
||||
// Same justification as in standart_init_linux.go as to why we
|
||||
// don't bail on ENOSYS.
|
||||
//
|
||||
// TODO(cyphar): And we should have logging here too.
|
||||
if errors.Cause(err) != unix.ENOSYS {
|
||||
return errors.Wrap(err, "join session keyring")
|
||||
}
|
||||
}
|
||||
}
|
||||
if l.config.CreateConsole {
|
||||
|
@ -47,6 +58,10 @@ func (l *linuxSetnsInit) Init() error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
defer label.SetProcessLabel("")
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
|
@ -61,9 +76,6 @@ func (l *linuxSetnsInit) Init() error {
|
|||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
// Set seccomp as close to execve as possible, so as few syscalls take
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles).
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall" //only for Exec
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
|
@ -14,6 +15,7 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -43,17 +45,31 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
|||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
// Do not inherit the parent's session keyring.
|
||||
sessKeyId, err := keys.JoinSessionKeyring(ringname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Make session keyring searcheable.
|
||||
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return err
|
||||
if sessKeyId, err := keys.JoinSessionKeyring(ringname); err != nil {
|
||||
// If keyrings aren't supported then it is likely we are on an
|
||||
// older kernel (or inside an LXC container). While we could bail,
|
||||
// the security feature we are using here is best-effort (it only
|
||||
// really provides marginal protection since VFS credentials are
|
||||
// the only significant protection of keyrings).
|
||||
//
|
||||
// TODO(cyphar): Log this so people know what's going on, once we
|
||||
// have proper logging in 'runc init'.
|
||||
if errors.Cause(err) != unix.ENOSYS {
|
||||
return errors.Wrap(err, "join session keyring")
|
||||
}
|
||||
} else {
|
||||
// Make session keyring searcheable. If we've gotten this far we
|
||||
// bail on any error -- we don't want to have a keyring with bad
|
||||
// permissions.
|
||||
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return errors.Wrap(err, "mod keyring permissions")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,7 +92,7 @@ func (l *linuxStandardInit) Init() error {
|
|||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "setctty")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -89,46 +105,47 @@ func (l *linuxStandardInit) Init() error {
|
|||
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := unix.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "sethostname")
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "apply apparmor profile")
|
||||
}
|
||||
|
||||
for key, value := range l.config.Config.Sysctl {
|
||||
if err := writeSystemProperty(key, value); err != nil {
|
||||
return err
|
||||
return errors.Wrapf(err, "write sysctl key %s", key)
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := readonlyPath(path); err != nil {
|
||||
return err
|
||||
return errors.Wrapf(err, "readonly path %s", path)
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.MaskPaths {
|
||||
if err := maskPath(path, l.config.Config.MountLabel); err != nil {
|
||||
return err
|
||||
return errors.Wrapf(err, "mask path %s", path)
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "get pdeath signal")
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "set nonewprivileges")
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "sync ready")
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return errors.Wrap(err, "set process label")
|
||||
}
|
||||
defer label.SetProcessLabel("")
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
|
@ -143,7 +160,7 @@ func (l *linuxStandardInit) Init() error {
|
|||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "restore pdeath signal")
|
||||
}
|
||||
// Compare the parent from the initial start of the init process and make
|
||||
// sure that it did not change. if the parent changes that means it died
|
||||
|
|
|
@ -41,10 +41,7 @@ type syncT struct {
|
|||
// writeSync is used to write to a synchronisation pipe. An error is returned
|
||||
// if there was a problem writing the payload.
|
||||
func writeSync(pipe io.Writer, sync syncType) error {
|
||||
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return utils.WriteJSON(pipe, syncT{sync})
|
||||
}
|
||||
|
||||
// readSync is used to read from a synchronisation pipe. An error is returned
|
||||
|
|
|
@ -17,6 +17,41 @@ go_library(
|
|||
importpath = "github.com/opencontainers/runc/libcontainer/system",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:darwin": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:dragonfly": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:freebsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:nacl": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:netbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:openbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:plan9": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:solaris": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:windows": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}) + select({
|
||||
"@io_bazel_rules_go//go/platform:linux_386": [
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
|
|
|
@ -3,13 +3,12 @@
|
|||
package system
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall" // only for exec
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
|
@ -102,34 +101,43 @@ func Setctty() error {
|
|||
}
|
||||
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Copied from github.com/lxc/lxd/shared/util.go
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
func RunningInUserNS() bool {
|
||||
file, err := os.Open("/proc/self/uid_map")
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return false
|
||||
}
|
||||
defer file.Close()
|
||||
return UIDMapInUserNS(uidmap)
|
||||
}
|
||||
|
||||
buf := bufio.NewReader(file)
|
||||
l, _, err := buf.ReadLine()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
line := string(l)
|
||||
var a, b, c int64
|
||||
fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
|
||||
func UIDMapInUserNS(uidmap []user.IDMap) bool {
|
||||
/*
|
||||
* We assume we are in the initial user namespace if we have a full
|
||||
* range - 4294967295 uids starting at uid 0.
|
||||
*/
|
||||
if a == 0 && b == 0 && c == 4294967295 {
|
||||
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// GetParentNSeuid returns the euid within the parent user namespace
|
||||
func GetParentNSeuid() int64 {
|
||||
euid := int64(os.Geteuid())
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return euid
|
||||
}
|
||||
for _, um := range uidmap {
|
||||
if um.ID <= euid && euid <= um.ID+um.Count-1 {
|
||||
return um.ParentID + euid - um.ID
|
||||
}
|
||||
}
|
||||
return euid
|
||||
}
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
|
|
|
@ -2,8 +2,26 @@
|
|||
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
// RunningInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func RunningInUserNS() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// UIDMapInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func UIDMapInUserNS(uidmap []user.IDMap) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// GetParentNSeuid returns the euid within the parent user namespace
|
||||
// Always returns os.Geteuid on non-linux
|
||||
func GetParentNSeuid() int {
|
||||
return os.Geteuid()
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package user
|
|||
import (
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -114,3 +115,30 @@ func CurrentUser() (User, error) {
|
|||
func CurrentGroup() (Group, error) {
|
||||
return LookupGid(unix.Getgid())
|
||||
}
|
||||
|
||||
func currentUserSubIDs(fileName string) ([]SubID, error) {
|
||||
u, err := CurrentUser()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filter := func(entry SubID) bool {
|
||||
return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid)
|
||||
}
|
||||
return ParseSubIDFileFilter(fileName, filter)
|
||||
}
|
||||
|
||||
func CurrentUserSubUIDs() ([]SubID, error) {
|
||||
return currentUserSubIDs("/etc/subuid")
|
||||
}
|
||||
|
||||
func CurrentUserSubGIDs() ([]SubID, error) {
|
||||
return currentUserSubIDs("/etc/subgid")
|
||||
}
|
||||
|
||||
func CurrentProcessUIDMap() ([]IDMap, error) {
|
||||
return ParseIDMapFile("/proc/self/uid_map")
|
||||
}
|
||||
|
||||
func CurrentProcessGIDMap() ([]IDMap, error) {
|
||||
return ParseIDMapFile("/proc/self/gid_map")
|
||||
}
|
||||
|
|
|
@ -75,12 +75,29 @@ func groupFromOS(g *user.Group) (Group, error) {
|
|||
return newGroup, nil
|
||||
}
|
||||
|
||||
// SubID represents an entry in /etc/sub{u,g}id
|
||||
type SubID struct {
|
||||
Name string
|
||||
SubID int64
|
||||
Count int64
|
||||
}
|
||||
|
||||
// IDMap represents an entry in /proc/PID/{u,g}id_map
|
||||
type IDMap struct {
|
||||
ID int64
|
||||
ParentID int64
|
||||
Count int64
|
||||
}
|
||||
|
||||
func parseLine(line string, v ...interface{}) {
|
||||
if line == "" {
|
||||
parseParts(strings.Split(line, ":"), v...)
|
||||
}
|
||||
|
||||
func parseParts(parts []string, v ...interface{}) {
|
||||
if len(parts) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
parts := strings.Split(line, ":")
|
||||
for i, p := range parts {
|
||||
// Ignore cases where we don't have enough fields to populate the arguments.
|
||||
// Some configuration files like to misbehave.
|
||||
|
@ -96,6 +113,8 @@ func parseLine(line string, v ...interface{}) {
|
|||
case *int:
|
||||
// "numbers", with conversion errors ignored because of some misbehaving configuration files.
|
||||
*e, _ = strconv.Atoi(p)
|
||||
case *int64:
|
||||
*e, _ = strconv.ParseInt(p, 10, 64)
|
||||
case *[]string:
|
||||
// Comma-separated lists.
|
||||
if p != "" {
|
||||
|
@ -105,7 +124,7 @@ func parseLine(line string, v ...interface{}) {
|
|||
}
|
||||
default:
|
||||
// Someone goof'd when writing code using this function. Scream so they can hear us.
|
||||
panic(fmt.Sprintf("parseLine only accepts {*string, *int, *[]string} as arguments! %#v is not a pointer!", e))
|
||||
panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -479,3 +498,111 @@ func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int
|
|||
}
|
||||
return GetAdditionalGroups(additionalGroups, group)
|
||||
}
|
||||
|
||||
func ParseSubIDFile(path string) ([]SubID, error) {
|
||||
subid, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer subid.Close()
|
||||
return ParseSubID(subid)
|
||||
}
|
||||
|
||||
func ParseSubID(subid io.Reader) ([]SubID, error) {
|
||||
return ParseSubIDFilter(subid, nil)
|
||||
}
|
||||
|
||||
func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) {
|
||||
subid, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer subid.Close()
|
||||
return ParseSubIDFilter(subid, filter)
|
||||
}
|
||||
|
||||
func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
||||
if r == nil {
|
||||
return nil, fmt.Errorf("nil source for subid-formatted data")
|
||||
}
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
out = []SubID{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// see: man 5 subuid
|
||||
p := SubID{}
|
||||
parseLine(line, &p.Name, &p.SubID, &p.Count)
|
||||
|
||||
if filter == nil || filter(p) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func ParseIDMapFile(path string) ([]IDMap, error) {
|
||||
r, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
return ParseIDMap(r)
|
||||
}
|
||||
|
||||
func ParseIDMap(r io.Reader) ([]IDMap, error) {
|
||||
return ParseIDMapFilter(r, nil)
|
||||
}
|
||||
|
||||
func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) {
|
||||
r, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer r.Close()
|
||||
return ParseIDMapFilter(r, filter)
|
||||
}
|
||||
|
||||
func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
||||
if r == nil {
|
||||
return nil, fmt.Errorf("nil source for idmap-formatted data")
|
||||
}
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
out = []IDMap{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// see: man 7 user_namespaces
|
||||
p := IDMap{}
|
||||
parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count)
|
||||
|
||||
if filter == nil || filter(p) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
|
@ -17,19 +15,6 @@ const (
|
|||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// GenerateRandomName returns a new name joined with a prefix. This size
|
||||
// specified is used to truncate the randomly generated value
|
||||
func GenerateRandomName(prefix string, size int) (string, error) {
|
||||
id := make([]byte, 32)
|
||||
if _, err := io.ReadFull(rand.Reader, id); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if size > 64 {
|
||||
size = 64
|
||||
}
|
||||
return prefix + hex.EncodeToString(id)[:size], nil
|
||||
}
|
||||
|
||||
// ResolveRootfs ensures that the current working directory is
|
||||
// not a symlink and returns the absolute path to the rootfs
|
||||
func ResolveRootfs(uncleanRootfs string) (string, error) {
|
||||
|
|
Loading…
Reference in New Issue