mirror of https://github.com/k3s-io/k3s
Merge pull request #506 from AkihiroSuda/bump-up-rootlesskit
rootless: use built-in port driverpull/536/head
commit
9f4e43fea6
|
@ -13,7 +13,7 @@ import (
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
|
"github.com/rootless-containers/rootlesskit/pkg/copyup/tmpfssymlink"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns"
|
"github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/parent"
|
"github.com/rootless-containers/rootlesskit/pkg/parent"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/port/socat"
|
portbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ func createParentOpt(stateDir string) (*parent.Opt, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
opt.NetworkDriver = slirp4netns.NewParentDriver(binary, mtu, ipnet, disableHostLoopback, "")
|
opt.NetworkDriver = slirp4netns.NewParentDriver(binary, mtu, ipnet, disableHostLoopback, "")
|
||||||
opt.PortDriver, err = socat.NewParentDriver(&logrusDebugWriter{})
|
opt.PortDriver, err = portbuiltin.NewParentDriver(&logrusDebugWriter{}, stateDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -127,6 +127,7 @@ func createChildOpt() (*child.Opt, error) {
|
||||||
opt.TargetCmd = os.Args
|
opt.TargetCmd = os.Args
|
||||||
opt.PipeFDEnvKey = pipeFD
|
opt.PipeFDEnvKey = pipeFD
|
||||||
opt.NetworkDriver = slirp4netns.NewChildDriver()
|
opt.NetworkDriver = slirp4netns.NewChildDriver()
|
||||||
|
opt.PortDriver = portbuiltin.NewChildDriver(&logrusDebugWriter{})
|
||||||
opt.CopyUpDirs = []string{"/etc", "/run"}
|
opt.CopyUpDirs = []string{"/etc", "/run"}
|
||||||
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
|
opt.CopyUpDriver = tmpfssymlink.NewChildDriver()
|
||||||
return opt, nil
|
return opt, nil
|
||||||
|
|
|
@ -237,7 +237,7 @@ import:
|
||||||
- package: github.com/robfig/cron
|
- package: github.com/robfig/cron
|
||||||
version: v1-53-gdf38d32658d878
|
version: v1-53-gdf38d32658d878
|
||||||
- package: github.com/rootless-containers/rootlesskit
|
- package: github.com/rootless-containers/rootlesskit
|
||||||
version: 893c1c3de71f54c301fdb85a7c0dd15c1933c159
|
version: v0.4.1
|
||||||
- package: github.com/russross/blackfriday
|
- package: github.com/russross/blackfriday
|
||||||
version: v1.4-2-g300106c228d52c
|
version: v1.4-2-g300106c228d52c
|
||||||
- package: github.com/seccomp/libseccomp-golang
|
- package: github.com/seccomp/libseccomp-golang
|
||||||
|
|
|
@ -126,7 +126,7 @@ gopkg.in/yaml.v2 v2.2.1
|
||||||
github.com/ibuildthecloud/kvsql 1afc2d8ad7d7e263c1971b05cb37e83aa5562561 https://github.com/erikwilson/rancher-kvsql.git
|
github.com/ibuildthecloud/kvsql 1afc2d8ad7d7e263c1971b05cb37e83aa5562561 https://github.com/erikwilson/rancher-kvsql.git
|
||||||
|
|
||||||
# rootless
|
# rootless
|
||||||
github.com/rootless-containers/rootlesskit 893c1c3de71f54c301fdb85a7c0dd15c1933c159
|
github.com/rootless-containers/rootlesskit v0.4.1
|
||||||
github.com/theckman/go-flock v0.7.1
|
github.com/theckman/go-flock v0.7.1
|
||||||
|
|
||||||
github.com/morikuni/aec 39771216ff4c63d11f5e604076f9c45e8be1067b
|
github.com/morikuni/aec 39771216ff4c63d11f5e604076f9c45e8be1067b
|
||||||
|
|
|
@ -53,6 +53,19 @@
|
||||||
revision = "a7962380ca08b5a188038c69871b8d3fbdf31e89"
|
revision = "a7962380ca08b5a188038c69871b8d3fbdf31e89"
|
||||||
version = "v1.7.0"
|
version = "v1.7.0"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
digest = "1:a1d58d11ad642c9760251e0846ee37ad1237e83f8245c8d310e3eba0d76bb7f4"
|
||||||
|
name = "github.com/insomniacslk/dhcp"
|
||||||
|
packages = [
|
||||||
|
"dhcpv4",
|
||||||
|
"dhcpv4/client4",
|
||||||
|
"iana",
|
||||||
|
"rfc1035label",
|
||||||
|
]
|
||||||
|
pruneopts = "UT"
|
||||||
|
revision = "625d653f51917b167cc2e53ef8fe595e85dd5fa4"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "master"
|
branch = "master"
|
||||||
digest = "1:dd1e851f4e3a5ee3f51613c79a01666e04e5e9289e2da3f6f815c008010fc02f"
|
digest = "1:dd1e851f4e3a5ee3f51613c79a01666e04e5e9289e2da3f6f815c008010fc02f"
|
||||||
|
@ -128,6 +141,18 @@
|
||||||
revision = "392e7fae8f1b0bdbd67dad7237d23f618feb6dbb"
|
revision = "392e7fae8f1b0bdbd67dad7237d23f618feb6dbb"
|
||||||
version = "v0.7.1"
|
version = "v0.7.1"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
digest = "1:35bdf197f9a11e01b3e2d9a35af57cd2b0ed0023d7c4d63a3e1bd7762275a17a"
|
||||||
|
name = "github.com/u-root/u-root"
|
||||||
|
packages = [
|
||||||
|
"pkg/rand",
|
||||||
|
"pkg/ubinary",
|
||||||
|
"pkg/uio",
|
||||||
|
]
|
||||||
|
pruneopts = "UT"
|
||||||
|
revision = "34b144e97033ea76860cc0fa6a69256cd5fe2133"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
digest = "1:b24d38b282bacf9791408a080f606370efa3d364e4b5fd9ba0f7b87786d3b679"
|
digest = "1:b24d38b282bacf9791408a080f606370efa3d364e4b5fd9ba0f7b87786d3b679"
|
||||||
name = "github.com/urfave/cli"
|
name = "github.com/urfave/cli"
|
||||||
|
@ -138,9 +163,15 @@
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
branch = "master"
|
branch = "master"
|
||||||
digest = "1:7ccb2dbb79f60b4e530c7dc3a0b3681b3869cf7ea91c645735e4dd4e6e3264fd"
|
digest = "1:19df0dab53c3fdea922472b4199990146f3db473881fdc59317d8b4800df145a"
|
||||||
name = "golang.org/x/net"
|
name = "golang.org/x/net"
|
||||||
packages = ["context/ctxhttp"]
|
packages = [
|
||||||
|
"bpf",
|
||||||
|
"context/ctxhttp",
|
||||||
|
"internal/iana",
|
||||||
|
"internal/socket",
|
||||||
|
"ipv4",
|
||||||
|
]
|
||||||
pruneopts = "UT"
|
pruneopts = "UT"
|
||||||
revision = "74de082e2cca95839e88aa0aeee5aadf6ce7710f"
|
revision = "74de082e2cca95839e88aa0aeee5aadf6ce7710f"
|
||||||
|
|
||||||
|
@ -162,6 +193,8 @@
|
||||||
"github.com/docker/docker/pkg/idtools",
|
"github.com/docker/docker/pkg/idtools",
|
||||||
"github.com/google/uuid",
|
"github.com/google/uuid",
|
||||||
"github.com/gorilla/mux",
|
"github.com/gorilla/mux",
|
||||||
|
"github.com/insomniacslk/dhcp/dhcpv4",
|
||||||
|
"github.com/insomniacslk/dhcp/dhcpv4/client4",
|
||||||
"github.com/jamescun/tuntap",
|
"github.com/jamescun/tuntap",
|
||||||
"github.com/moby/vpnkit/go/pkg/vmnet",
|
"github.com/moby/vpnkit/go/pkg/vmnet",
|
||||||
"github.com/pkg/errors",
|
"github.com/pkg/errors",
|
||||||
|
|
|
@ -39,3 +39,7 @@
|
||||||
[prune]
|
[prune]
|
||||||
go-tests = true
|
go-tests = true
|
||||||
unused-packages = true
|
unused-packages = true
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
branch = "master"
|
||||||
|
name = "github.com/insomniacslk/dhcp"
|
||||||
|
|
|
@ -1,15 +1,40 @@
|
||||||
# RootlessKit: the gate to the rootless world
|
# RootlessKit: the gate to the rootless world
|
||||||
|
|
||||||
`rootlesskit` is a kind of Linux-native "fake root" utility, made for mainly running [Docker and Kubernetes as an unprivileged user](https://github.com/rootless-containers/usernetes).
|
RootlessKit is a kind of Linux-native "fake root" utility, made for mainly running [Docker and Kubernetes as an unprivileged user](https://github.com/rootless-containers/usernetes), so as to protect the real root on the host from potential container-breakout attacks.
|
||||||
|
|
||||||
`rootlesskit` does an equivalent of [`unshare(1)`](http://man7.org/linux/man-pages/man1/unshare.1.html) and [`newuidmap(1)`](http://man7.org/linux/man-pages/man1/newuidmap.1.html)/[`newgidmap(1)`](http://man7.org/linux/man-pages/man1/newgidmap.1.html) in a single command, for creating unprivileged [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) and [`mount_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) with [`subuid(5)`](http://man7.org/linux/man-pages/man5/subuid.5.html) and [`subgid(5)`](http://man7.org/linux/man-pages/man5/subgid.5.html).
|
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
||||||
|
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
||||||
|
|
||||||
`rootlesskit` also supports network namespace isolation and userspace NAT using ["slirp"](#slirp).
|
|
||||||
Kernel NAT using SUID-enabled [`lxc-user-nic(1)`](https://linuxcontainers.org/lxc/manpages/man1/lxc-user-nic.1.html) is also on the plan.
|
- [What it actually does](#what-it-actually-does)
|
||||||
|
- [Projects using RootlessKit](#projects-using-rootlesskit)
|
||||||
|
- [Setup](#setup)
|
||||||
|
- [Requirements](#requirements)
|
||||||
|
- [Distribution-specific hints](#distribution-specific-hints)
|
||||||
|
- [Usage](#usage)
|
||||||
|
- [State directory](#state-directory)
|
||||||
|
- [Environment variables](#environment-variables)
|
||||||
|
- [Network Drivers](#network-drivers)
|
||||||
|
- [`--net=host` (default)](#--nethost-default)
|
||||||
|
- [`--net=slirp4netns` (recommended)](#--netslirp4netns-recommended)
|
||||||
|
- [`--net=vpnkit`](#--netvpnkit)
|
||||||
|
- [`--net=lxc-user-nic` (experimental)](#--netlxc-user-nic-experimental)
|
||||||
|
- [Port Drivers](#port-drivers)
|
||||||
|
|
||||||
|
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
||||||
|
|
||||||
|
## What it actually does
|
||||||
|
|
||||||
|
RootlessKit creates [`user_namespaces(7)`](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) and [`mount_namespaces(7)`](http://man7.org/linux/man-pages/man7/mount_namespaces.7.html), and executes [`newuidmap(1)`](http://man7.org/linux/man-pages/man1/newuidmap.1.html)/[`newgidmap(1)`](http://man7.org/linux/man-pages/man1/newgidmap.1.html) along with [`subuid(5)`](http://man7.org/linux/man-pages/man5/subuid.5.html) and [`subgid(5)`](http://man7.org/linux/man-pages/man5/subgid.5.html).
|
||||||
|
|
||||||
|
RootlessKit also supports isolating [`network_namespaces(7)`](http://man7.org/linux/man-pages/man7/network_namespaces.7.html) with userspace NAT using ["slirp"](#network-drivers).
|
||||||
|
Kernel NAT using SUID-enabled [`lxc-user-nic(1)`](https://linuxcontainers.org/lxc/manpages/man1/lxc-user-nic.1.html) is also experimentally supported.
|
||||||
|
|
||||||
## Projects using RootlessKit
|
## Projects using RootlessKit
|
||||||
|
|
||||||
|
* [Docker/Moby](https://get.docker.com/rootless)
|
||||||
* [Usernetes](https://github.com/rootless-containers/usernetes): Docker & Kubernetes, installable under a non-root user's `$HOME`.
|
* [Usernetes](https://github.com/rootless-containers/usernetes): Docker & Kubernetes, installable under a non-root user's `$HOME`.
|
||||||
|
* [k3s](https://k3s.io/): Lightweight Kubernetes
|
||||||
* [BuildKit](https://github.com/moby/buildkit): Next-generation `docker build` backend
|
* [BuildKit](https://github.com/moby/buildkit): Next-generation `docker build` backend
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
@ -19,22 +44,37 @@ $ go get github.com/rootless-containers/rootlesskit/cmd/rootlesskit
|
||||||
$ go get github.com/rootless-containers/rootlesskit/cmd/rootlessctl
|
$ go get github.com/rootless-containers/rootlesskit/cmd/rootlessctl
|
||||||
```
|
```
|
||||||
|
|
||||||
Requirements:
|
or just run `make` to make binaries under `./bin` directory.
|
||||||
* Some distros such as Debian (excluding Ubuntu) and Arch Linux require `sudo sh -c "echo 1 > /proc/sys/kernel/unprivileged_userns_clone"`.
|
|
||||||
* `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package on most distros.
|
### Requirements
|
||||||
* `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`.
|
|
||||||
|
* `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package on most distributions.
|
||||||
|
|
||||||
|
* `/etc/subuid` and `/etc/subgid` should contain more than 65536 sub-IDs. e.g. `penguin:231072:65536`. These files are automatically configured on most distributions.
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ id -u
|
$ id -u
|
||||||
1001
|
1001
|
||||||
$ whoami
|
$ whoami
|
||||||
penguin
|
penguin
|
||||||
$ grep ^$(whoami): /etc/subuid
|
$ grep "^$(whoami):" /etc/subuid
|
||||||
penguin:231072:65536
|
penguin:231072:65536
|
||||||
$ grep ^$(whoami): /etc/subgid
|
$ grep "^$(whoami):" /etc/subgid
|
||||||
penguin:231072:65536
|
penguin:231072:65536
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Distribution-specific hints
|
||||||
|
|
||||||
|
Debian (excluding Ubuntu):
|
||||||
|
* `sudo sh -c "echo 1 > /proc/sys/kernel/unprivileged_userns_clone"` is required
|
||||||
|
|
||||||
|
Arch Linux:
|
||||||
|
* `sudo sh -c "echo 1 > /proc/sys/kernel/unprivileged_userns_clone"` is required
|
||||||
|
|
||||||
|
RHEL/CentOS 7:
|
||||||
|
* `sudo sh -c "echo 28633 > /proc/sys/user/max_user_namespaces"` is required
|
||||||
|
* [COPR package `vbatts/shadow-utils-newxidmap`](https://copr.fedorainfracloud.org/coprs/vbatts/shadow-utils-newxidmap/) needs to be installed
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -70,10 +110,10 @@ rootlesskit$ rm /etc/resolv.conf
|
||||||
rootlesskit$ vi /etc/resolv.conf
|
rootlesskit$ vi /etc/resolv.conf
|
||||||
```
|
```
|
||||||
|
|
||||||
You can even create network namespaces with [Slirp](#slirp):
|
You can even create network namespaces with [Slirp](#network-drivers):
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ rootlesskit --copy-up=/etc --copy-up=/run --net=slirp4netns bash
|
$ rootlesskit --copy-up=/etc --copy-up=/run --net=slirp4netns --disable-host-loopback bash
|
||||||
rootlesskit$ ip netns add foo
|
rootlesskit$ ip netns add foo
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
@ -95,7 +135,7 @@ allow
|
||||||
Full CLI options:
|
Full CLI options:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ rootlesskit --help
|
|
||||||
NAME:
|
NAME:
|
||||||
rootlesskit - the gate to the rootless world
|
rootlesskit - the gate to the rootless world
|
||||||
|
|
||||||
|
@ -103,40 +143,37 @@ USAGE:
|
||||||
rootlesskit [global options] command [command options] [arguments...]
|
rootlesskit [global options] command [command options] [arguments...]
|
||||||
|
|
||||||
VERSION:
|
VERSION:
|
||||||
0.3.0-alpha.0
|
0.3.0+dev
|
||||||
|
|
||||||
COMMANDS:
|
COMMANDS:
|
||||||
help, h Shows a list of commands or help for one command
|
help, h Shows a list of commands or help for one command
|
||||||
|
|
||||||
GLOBAL OPTIONS:
|
GLOBAL OPTIONS:
|
||||||
--debug debug mode
|
--debug debug mode
|
||||||
--state-dir value state directory
|
--state-dir value state directory
|
||||||
--net value network driver [host, slirp4netns, vpnkit, vdeplug_slirp] (default: "host")
|
--net value network driver [host, slirp4netns, vpnkit, lxc-user-nic(experimental), vdeplug_slirp(deprecated)] (default: "host")
|
||||||
--slirp4netns-binary value path of slirp4netns binary for --net=slirp4netns (default: "slirp4netns")
|
--slirp4netns-binary value path of slirp4netns binary for --net=slirp4netns (default: "slirp4netns")
|
||||||
--vpnkit-binary value path of VPNKit binary for --net=vpnkit (default: "vpnkit")
|
--vpnkit-binary value path of VPNKit binary for --net=vpnkit (default: "vpnkit")
|
||||||
--mtu value MTU for non-host network (default: 65520 for slirp4netns, 1500 for others) (default: 0)
|
--lxc-user-nic-binary value path of lxc-user-nic binary for --net=lxc-user-nic (default: "/usr/lib/x86_64-linux-gnu/lxc/lxc-user-nic")
|
||||||
--cidr value CIDR for slirp4netns network (default: 10.0.2.0/24, requires slirp4netns v0.3.0+ for custom CIDR)
|
--lxc-user-nic-bridge value lxc-user-nic bridge name (default: "lxcbr0")
|
||||||
--disable-host-loopback prohibit connecting to 127.0.0.1:* on the host namespace
|
--mtu value MTU for non-host network (default: 65520 for slirp4netns, 1500 for others) (default: 0)
|
||||||
--copy-up value mount a filesystem and copy-up the contents. e.g. "--copy-up=/etc" (typically required for non-host network)
|
--cidr value CIDR for slirp4netns network (default: 10.0.2.0/24, requires slirp4netns v0.3.0+ for custom CIDR)
|
||||||
--copy-up-mode value copy-up mode [tmpfs+symlink] (default: "tmpfs+symlink")
|
--disable-host-loopback prohibit connecting to 127.0.0.1:* on the host namespace
|
||||||
--port-driver value port driver for non-host network. [none, socat] (default: "none")
|
--copy-up value mount a filesystem and copy-up the contents. e.g. "--copy-up=/etc" (typically required for non-host network)
|
||||||
--help, -h show help
|
--copy-up-mode value copy-up mode [tmpfs+symlink] (default: "tmpfs+symlink")
|
||||||
--version, -v print the version
|
--port-driver value port driver for non-host network. [none, socat, slirp4netns, builtin(experimental)] (default: "none")
|
||||||
```
|
--help, -h show help
|
||||||
|
--version, -v print the version
|
||||||
## Building from source
|
|
||||||
`rootlesskit` and `rootlessctl` can be built from source using:
|
|
||||||
|
|
||||||
```
|
|
||||||
make
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## State directory
|
## State directory
|
||||||
|
|
||||||
The following files will be created in the `--state-dir` directory:
|
The following files will be created in the state directory, which can be specified with `--state-dir`:
|
||||||
* `lock`: lock file
|
* `lock`: lock file
|
||||||
* `child_pid`: decimal PID text that can be used for `nsenter(1)`.
|
* `child_pid`: decimal PID text that can be used for `nsenter(1)`.
|
||||||
* `api.sock`: REST API socket for `rootlessctl`. See [Port forwarding](#port-forwarding) section.
|
* `api.sock`: REST API socket for `rootlessctl`. See [Port Drivers](#port-drivers) section.
|
||||||
|
|
||||||
|
If `--state-dir` is not specified, RootlessKit creates a temporary state directory on `/tmp` and removes it on exit.
|
||||||
|
|
||||||
Undocumented files are subject to change.
|
Undocumented files are subject to change.
|
||||||
|
|
||||||
|
@ -147,20 +184,95 @@ The following environment variables will be set for the child process:
|
||||||
|
|
||||||
Undocumented environment variables are subject to change.
|
Undocumented environment variables are subject to change.
|
||||||
|
|
||||||
## Slirp
|
## Network Drivers
|
||||||
|
|
||||||
Remarks:
|
RootlessKit provides several drivers for providing network connectivity:
|
||||||
* Specifying `--copy-up=/etc` is highly recommended unless `/etc/resolv.conf` is statically configured. Otherwise `/etc/resolv.conf` will be invalidated when it is recreated on the host, typically by NetworkManager or systemd-resolved.
|
|
||||||
|
|
||||||
Currently there are three slirp implementations supported by rootlesskit:
|
* `--net=host`: use host network namespace (default)
|
||||||
* `--net=slirp4netns`, using [slirp4netns](https://github.com/rootless-containers/slirp4netns) (recommended)
|
* `--net=slirp4netns`: use [slirp4netns](https://github.com/rootless-containers/slirp4netns) (recommended)
|
||||||
* `--net=vpnkit`, using [VPNKit](https://github.com/moby/vpnkit)
|
* `--net=vpnkit`: use [VPNKit](https://github.com/moby/vpnkit)
|
||||||
* `--net=vdeplug_slirp`, using [vdeplug_slirp](https://github.com/rd235/vdeplug_slirp)
|
* `--net=lxc-user-nic`: use `lxc-user-nic` (experimental)
|
||||||
|
* `--net=vdeplug_slirp`: use [vdeplug_slirp](https://github.com/rd235/vdeplug_slirp) (deprecated)
|
||||||
|
|
||||||
Usage:
|
[Benchmark (Aug 28, 2018)](https://github.com/rootless-containers/rootlesskit/pull/16):
|
||||||
|
|
||||||
|
| Implementation | MTU=1500 | MTU=4000 | MTU=16384 | MTU=65520
|
||||||
|
|---------------------------------|------------|-------------|-------------|------------
|
||||||
|
|(rootful veth) |(52.1 Gbps) | (45.4 Gbps) | (43.6 Gbps )| (51.5 Gbps)
|
||||||
|
|`rootlesskit --net=slirp4netns` | 1.07 Gbps | 2.78 Gbps | 4.55 Gbps | 9.21 Gbps
|
||||||
|
|`rootlesskit --net=vpnKit` | 514 Mbps | 526 Mbps | 540 Mbps |(Unsupported)
|
||||||
|
|`rootlesskit --net=vdeplug_slirp`| 763 Mbps |(Unsupported)|(Unsupported)|(Unsupported)
|
||||||
|
|
|
||||||
|
|
||||||
|
`--net=lxc-user-nic` is as fast as rootful veth.
|
||||||
|
|
||||||
|
### `--net=host` (default)
|
||||||
|
|
||||||
|
`--net=host` does not isolate the network namespace from the host.
|
||||||
|
|
||||||
|
Pros:
|
||||||
|
* No performance overhead
|
||||||
|
* Supports ICMP Echo (`ping`) when `/proc/sys/net/ipv4/ping_group_range` is configured
|
||||||
|
|
||||||
|
Cons:
|
||||||
|
* No permission for network-namespaced operations, e.g. creating iptables rules, running `tcpdump`
|
||||||
|
|
||||||
|
To route ICMP Echo packets (`ping`), you need to write the range of GIDs to [`net.ipv4.ping_group_range`](http://man7.org/linux/man-pages/man7/icmp.7.html).
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ rootlesskit --state-dir=/run/user/1001/rootlesskit/foo --net=slirp4netns --copy-up=/etc bash
|
$ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range"
|
||||||
|
```
|
||||||
|
|
||||||
|
### `--net=slirp4netns` (recommended)
|
||||||
|
|
||||||
|
`--net=slirp4netns` isolates the network namespace from the host and launch [slirp4netns](https://github.com/rootless-containers/slirp4netns) for providing usermode networking.
|
||||||
|
|
||||||
|
Pros:
|
||||||
|
* Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump`
|
||||||
|
* Supports ICMP Echo (`ping`) when `/proc/sys/net/ipv4/ping_group_range` is configured
|
||||||
|
|
||||||
|
Cons:
|
||||||
|
* Extra performance overhead (but still faster than `--net=vpnkit`)
|
||||||
|
* Supports only TCP, UDP, and ICMP Echo packets
|
||||||
|
|
||||||
|
|
||||||
|
To use `--net=slirp4netns`, you need to install slirp4netns.
|
||||||
|
v0.3.0 or later is recommended.
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ sudo dnf install slirp4netns
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ sudo apt-get install slirp4netns
|
||||||
|
```
|
||||||
|
|
||||||
|
If binary package is not available for your distribution, install from the source:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ git clone https://github.com/rootless-containers/slirp4netns
|
||||||
|
$ cd slirp4netns
|
||||||
|
$ ./autogen.sh && ./configure && make
|
||||||
|
$ cp slirp4netns ~/bin
|
||||||
|
```
|
||||||
|
|
||||||
|
The network is configured as follows by default:
|
||||||
|
* IP: 10.0.2.100/24
|
||||||
|
* Gateway: 10.0.2.2
|
||||||
|
* DNS: 10.0.2.3
|
||||||
|
|
||||||
|
The network configuration can be changed by specifying custom CIDR, e.g. `--cidr=10.0.3.0/24` (requires slirp4netns v0.3.0+).
|
||||||
|
|
||||||
|
Specifying `--copy-up=/etc` is highly recommended unless `/etc/resolv.conf` on the host is statically configured. Otherwise `/etc/resolv.conf` in the RootlessKit's mount namespace will be unmounted when `/etc/resolv.conf` on the host is recreated, typically by NetworkManager or systemd-resolved.
|
||||||
|
|
||||||
|
It is also highly recommended to specyfy`--disable-host-loopback`. Otherwise ports listening on 127.0.0.1 in the host are accessible as 10.0.2.2 in the RootlessKit's network namespace.
|
||||||
|
|
||||||
|
Example session:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ rootlesskit --net=slirp4netns --copy-up=/etc --disable-host-loopback bash
|
||||||
rootlesskit$ ip a
|
rootlesskit$ ip a
|
||||||
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
|
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
|
||||||
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
||||||
|
@ -168,13 +280,13 @@ rootlesskit$ ip a
|
||||||
valid_lft forever preferred_lft forever
|
valid_lft forever preferred_lft forever
|
||||||
inet6 ::1/128 scope host
|
inet6 ::1/128 scope host
|
||||||
valid_lft forever preferred_lft forever
|
valid_lft forever preferred_lft forever
|
||||||
2: tap0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
|
2: tap0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 65520 qdisc fq_codel state UP group default qlen 1000
|
||||||
link/ether 42:b6:8d:e4:02:c4 brd ff:ff:ff:ff:ff:ff
|
link/ether 46:dc:8d:09:fd:f2 brd ff:ff:ff:ff:ff:ff
|
||||||
inet 10.0.2.100/24 scope global tap0
|
inet 10.0.2.100/24 scope global tap0
|
||||||
valid_lft forever preferred_lft forever
|
valid_lft forever preferred_lft forever
|
||||||
inet6 fe80::40b6:8dff:fee4:2c4/64 scope link
|
inet6 fe80::44dc:8dff:fe09:fdf2/64 scope link
|
||||||
valid_lft forever preferred_lft forever
|
valid_lft forever preferred_lft forever
|
||||||
rootlesskit$ ip r
|
ootlesskit$ ip r
|
||||||
default via 10.0.2.2 dev tap0
|
default via 10.0.2.2 dev tap0
|
||||||
10.0.2.0/24 dev tap0 proto kernel scope link src 10.0.2.100
|
10.0.2.0/24 dev tap0 proto kernel scope link src 10.0.2.100
|
||||||
rootlesskit$ cat /etc/resolv.conf
|
rootlesskit$ cat /etc/resolv.conf
|
||||||
|
@ -183,23 +295,70 @@ rootlesskit$ curl https://www.google.com
|
||||||
<!doctype html><html ...>...</html>
|
<!doctype html><html ...>...</html>
|
||||||
```
|
```
|
||||||
|
|
||||||
Default network configuration for `--net=slirp4netns` and `--net=vdeplug_slirp`:
|
|
||||||
* IP: 10.0.2.100/24
|
|
||||||
* Gateway: 10.0.2.2
|
|
||||||
* DNS: 10.0.2.3
|
|
||||||
* Host: 10.0.2.2, 10.0.2.3
|
|
||||||
|
|
||||||
Default network configuration for `--net=vpnkit`:
|
### `--net=vpnkit`
|
||||||
|
|
||||||
|
`--net=vpnkit` isolates the network namespace from the host and launch [VPNKit](https://github.com/moby/vpnkit) for providing usermode networking.
|
||||||
|
|
||||||
|
Pros:
|
||||||
|
* Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump`
|
||||||
|
|
||||||
|
Cons:
|
||||||
|
* Extra performance overhead
|
||||||
|
* Supports only TCP and UDP packets. No support for ICMP Echo (`ping`) unlike `--net=slirp4netns`, even if `/proc/sys/net/ipv4/ping_group_range` is configured.
|
||||||
|
|
||||||
|
To use `--net=vpnkit`, you need to install VPNkit.
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ git clone https://github.com/moby/vpnkit.git
|
||||||
|
$ cd vpnkit
|
||||||
|
$ make
|
||||||
|
$ cp vpnkit.exe ~/bin/vpnkit
|
||||||
|
```
|
||||||
|
|
||||||
|
The network is configured as follows by default:
|
||||||
* IP: 192.168.65.3/24
|
* IP: 192.168.65.3/24
|
||||||
* Gateway: 192.168.65.1
|
* Gateway: 192.168.65.1
|
||||||
* DNS: 192.168.65.1
|
* DNS: 192.168.65.1
|
||||||
* Host: 192.168.65.2
|
|
||||||
|
|
||||||
`--net=slirp4netns` supports specifying custom CIDR, e.g. `--cidr=10.0.3.0/24` (requires slirp4netns v0.3.0+)
|
As in `--net=slirp4netns`, specifying `--copy-up=/etc` and `--disable-host-loopback` is highly recommended.
|
||||||
|
If `--disable-host-loopback` is not specified, ports listening on 127.0.0.1 in the host are accessible as 192.168.65.2 in the RootlessKit's network namespace.
|
||||||
|
|
||||||
It is highly recommended to disable host loopback address by specyfing `--disable-host-loopback`.
|
### `--net=lxc-user-nic` (experimental)
|
||||||
|
|
||||||
### Port forwarding
|
`--net=lxc-user-nic` isolates the network namespace from the host and launch [`lxc-user-nic(1)`](https://linuxcontainers.org/lxc/manpages/man1/lxc-user-nic.1.html) SUID binary for providing kernel-mode NAT.
|
||||||
|
|
||||||
|
Pros:
|
||||||
|
* No performance overhead
|
||||||
|
* Possible to perform network-namespaced operations, e.g. creating iptables rules, running `tcpdump`
|
||||||
|
* Supports ICMP Echo (`ping`) without `/proc/sys/net/ipv4/ping_group_range` configuration
|
||||||
|
|
||||||
|
Cons:
|
||||||
|
* Less secure
|
||||||
|
* Needs `/etc/lxc/lxc-usernet` configuration
|
||||||
|
|
||||||
|
To use `lxc-user-nic`, you need to install `liblxc-common` package:
|
||||||
|
```console
|
||||||
|
$ sudo apt-get install liblxc-common
|
||||||
|
```
|
||||||
|
|
||||||
|
You also need to set up [`/etc/lxc/lxc-usernet`](https://linuxcontainers.org/lxc/manpages/man5/lxc-usernet.5.html):
|
||||||
|
```
|
||||||
|
# USERNAME TYPE BRIDGE COUNT
|
||||||
|
penguin veth lxcbr0 1
|
||||||
|
```
|
||||||
|
|
||||||
|
The `COUNT` value needs to be increased to run multiple RootlessKit instances with `--net=lxc-user-nic` simultaneously.
|
||||||
|
|
||||||
|
It may take a few seconds to configure the interface using DHCP.
|
||||||
|
|
||||||
|
If you start and stop RootlessKit too frequently, you might use up all available DHCP addresses.
|
||||||
|
You might need to reset `/var/lib/misc/dnsmasq.lxcbr0.leases` and restart the `lxc-net` service.
|
||||||
|
|
||||||
|
Currently, the MAC address is always set to a random address.
|
||||||
|
|
||||||
|
|
||||||
|
## Port Drivers
|
||||||
|
|
||||||
`rootlessctl` can be used for exposing the ports in the network namespace to the host network namespace.
|
`rootlessctl` can be used for exposing the ports in the network namespace to the host network namespace.
|
||||||
You also need to launch `rootlesskit` with `--port-driver=(socat|slirp4netns|builtin)`. `builtin` is the fastest but currently experimental.
|
You also need to launch `rootlesskit` with `--port-driver=(socat|slirp4netns|builtin)`. `builtin` is the fastest but currently experimental.
|
||||||
|
@ -207,7 +366,7 @@ You also need to launch `rootlesskit` with `--port-driver=(socat|slirp4netns|bui
|
||||||
For example, to expose 80 in the child as 8080 in the parent:
|
For example, to expose 80 in the child as 8080 in the parent:
|
||||||
|
|
||||||
```console
|
```console
|
||||||
$ rootlesskit --state-dir=/run/user/1001/rootlesskit/foo --net=slirp4netns --copy-up=/etc --port-driver=socat bash
|
$ rootlesskit --state-dir=/run/user/1001/rootlesskit/foo --net=slirp4netns --disable-host-loopback --copy-up=/etc --port-driver=builtin bash
|
||||||
rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock add-ports 0.0.0.0:8080:80/tcp
|
rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock add-ports 0.0.0.0:8080:80/tcp
|
||||||
1
|
1
|
||||||
rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock list-ports
|
rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock list-ports
|
||||||
|
@ -217,67 +376,9 @@ rootlesskit$ rootlessctl --socket=/run/user/1001/rootlesskit/foo/api.sock remove
|
||||||
1
|
1
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also expose the ports manually without using the API socket.
|
You can also expose ports using `socat` and `nsenter` instead of RootlessKit's port drivers.
|
||||||
```console
|
```console
|
||||||
$ pid=$(cat /run/user/1001/rootlesskit/foo/child_pid)
|
$ pid=$(cat /run/user/1001/rootlesskit/foo/child_pid)
|
||||||
$ socat -t -- TCP-LISTEN:8080,reuseaddr,fork EXEC:"nsenter -U -n -t $pid socat -t -- STDIN TCP4\:127.0.0.1\:80"
|
$ socat -t -- TCP-LISTEN:8080,reuseaddr,fork EXEC:"nsenter -U -n -t $pid socat -t -- STDIN TCP4\:127.0.0.1\:80"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Routing ping packets
|
|
||||||
|
|
||||||
To route ping packets, you need to set up `net.ipv4.ping_group_range` properly.
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range"
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: routing ping packets is not supported for `--net=vpnkit`.
|
|
||||||
|
|
||||||
### Annex: benchmark (MTU=1500)
|
|
||||||
|
|
||||||
Aug 1, 2018, on Travis: https://travis-ci.org/rootless-containers/rootlesskit/builds/410721610
|
|
||||||
|
|
||||||
* `--net=slirp4netns`: 1.07 Gbits/sec
|
|
||||||
* `--net=vpnkit`: 528 Mbits/sec
|
|
||||||
* `--net=vdeplug_slirp`: 771 Mbits/sec
|
|
||||||
|
|
||||||
Note: slirp4netns can reach 8.18 Gbits/sec with MTU=65520: https://github.com/rootless-containers/slirp4netns/pull/20
|
|
||||||
|
|
||||||
### Annex: how to install `slirp4netns` (required for `--net=slirp4netns`)
|
|
||||||
|
|
||||||
See also https://github.com/rootless-containers/slirp4netns
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ git clone https://github.com/rootless-containers/slirp4netns
|
|
||||||
$ cd slirp4netns
|
|
||||||
$ ./autogen.sh && ./configure && make
|
|
||||||
$ cp slirp4netns ~/bin
|
|
||||||
```
|
|
||||||
|
|
||||||
RPM is also available for Fedora: https://rpms.remirepo.net/rpmphp/zoom.php?rpm=slirp4netns
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ sudo dnf install slirp4netns
|
|
||||||
```
|
|
||||||
|
|
||||||
### Annex: how to install VPNKit (required for `--net=vpnkit`)
|
|
||||||
|
|
||||||
See also https://github.com/moby/vpnkit
|
|
||||||
|
|
||||||
```console
|
|
||||||
$ git clone https://github.com/moby/vpnkit.git
|
|
||||||
$ cd vpnkit
|
|
||||||
$ make
|
|
||||||
$ cp vpnkit.exe ~/bin/vpnkit
|
|
||||||
```
|
|
||||||
|
|
||||||
### Annex: how to install `vdeplug_slirp` (required for `--net=vdeplug_slirp`)
|
|
||||||
|
|
||||||
You need to install the following components:
|
|
||||||
|
|
||||||
* https://github.com/rd235/s2argv-execs
|
|
||||||
* https://github.com/rd235/vdeplug4 (depends on `s2argv-execs`)
|
|
||||||
* https://github.com/rd235/libslirp
|
|
||||||
* https://github.com/rd235/vdeplug_slirp (depends on `vdeplug4` and `libslirp`)
|
|
||||||
|
|
||||||
Please refer to README in the each of the components.
|
|
||||||
|
|
|
@ -4,11 +4,13 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/common"
|
"github.com/rootless-containers/rootlesskit/pkg/common"
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/copyup"
|
"github.com/rootless-containers/rootlesskit/pkg/copyup"
|
||||||
|
@ -76,12 +78,12 @@ func activateLoopback() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func activateTap(tap, ip string, netmask int, gateway string, mtu int) error {
|
func activateDev(dev, ip string, netmask int, gateway string, mtu int) error {
|
||||||
cmds := [][]string{
|
cmds := [][]string{
|
||||||
{"ip", "link", "set", tap, "up"},
|
{"ip", "link", "set", dev, "up"},
|
||||||
{"ip", "link", "set", "dev", tap, "mtu", strconv.Itoa(mtu)},
|
{"ip", "link", "set", "dev", dev, "mtu", strconv.Itoa(mtu)},
|
||||||
{"ip", "addr", "add", ip + "/" + strconv.Itoa(netmask), "dev", tap},
|
{"ip", "addr", "add", ip + "/" + strconv.Itoa(netmask), "dev", dev},
|
||||||
{"ip", "route", "add", "default", "via", gateway, "dev", tap},
|
{"ip", "route", "add", "default", "via", gateway, "dev", dev},
|
||||||
}
|
}
|
||||||
if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil {
|
if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil {
|
||||||
return errors.Wrapf(err, "executing %v", cmds)
|
return errors.Wrapf(err, "executing %v", cmds)
|
||||||
|
@ -119,11 +121,11 @@ func setupNet(msg common.Message, etcWasCopied bool, driver network.ChildDriver)
|
||||||
if err := activateLoopback(); err != nil {
|
if err := activateLoopback(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
tap, err := driver.ConfigureTap(msg.Network)
|
dev, err := driver.ConfigureNetworkChild(&msg.Network)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := activateTap(tap, msg.Network.IP, msg.Network.Netmask, msg.Network.Gateway, msg.Network.MTU); err != nil {
|
if err := activateDev(dev, msg.Network.IP, msg.Network.Netmask, msg.Network.Gateway, msg.Network.MTU); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if etcWasCopied {
|
if etcWasCopied {
|
||||||
|
@ -187,6 +189,14 @@ func Child(opt Opt) error {
|
||||||
if msg.Stage != 1 {
|
if msg.Stage != 1 {
|
||||||
return errors.Errorf("expected stage 1, got stage %d", msg.Stage)
|
return errors.Errorf("expected stage 1, got stage %d", msg.Stage)
|
||||||
}
|
}
|
||||||
|
// The parent calls child with Pdeathsig, but it is cleared when newuidmap SUID binary is called
|
||||||
|
// https://github.com/rootless-containers/rootlesskit/issues/65#issuecomment-492343646
|
||||||
|
runtime.LockOSThread()
|
||||||
|
err = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
|
||||||
|
runtime.UnlockOSThread()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
os.Unsetenv(opt.PipeFDEnvKey)
|
os.Unsetenv(opt.PipeFDEnvKey)
|
||||||
if err := pipeR.Close(); err != nil {
|
if err := pipeR.Close(); err != nil {
|
||||||
return errors.Wrapf(err, "failed to close fd %d", pipeFD)
|
return errors.Wrapf(err, "failed to close fd %d", pipeFD)
|
||||||
|
|
|
@ -22,6 +22,7 @@ type Message1 struct {
|
||||||
|
|
||||||
// NetworkMessage is empty for HostNetwork.
|
// NetworkMessage is empty for HostNetwork.
|
||||||
type NetworkMessage struct {
|
type NetworkMessage struct {
|
||||||
|
Dev string
|
||||||
IP string
|
IP string
|
||||||
Netmask int
|
Netmask int
|
||||||
Gateway string
|
Gateway string
|
||||||
|
|
|
@ -14,5 +14,7 @@ type ParentDriver interface {
|
||||||
|
|
||||||
// ChildDriver is called from the child namespace
|
// ChildDriver is called from the child namespace
|
||||||
type ChildDriver interface {
|
type ChildDriver interface {
|
||||||
ConfigureTap(netmsg common.NetworkMessage) (tap string, err error)
|
// netmsg MAY be modified.
|
||||||
|
// devName is like "tap" or "eth0"
|
||||||
|
ConfigureNetworkChild(netmsg *common.NetworkMessage) (devName string, err error)
|
||||||
}
|
}
|
||||||
|
|
10
vendor/github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns/slirp4netns.go
generated
vendored
10
vendor/github.com/rootless-containers/rootlesskit/pkg/network/slirp4netns/slirp4netns.go
generated
vendored
|
@ -41,8 +41,6 @@ func NewParentDriver(binary string, mtu int, ipnet *net.IPNet, disableHostLoopba
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const opaqueTap = "slirp4netns.tap"
|
|
||||||
|
|
||||||
type parentDriver struct {
|
type parentDriver struct {
|
||||||
binary string
|
binary string
|
||||||
mtu int
|
mtu int
|
||||||
|
@ -87,10 +85,8 @@ func (d *parentDriver) ConfigureNetwork(childPID int, stateDir string) (*common.
|
||||||
return nil, common.Seq(cleanups), errors.Wrapf(err, "executing %v", cmd)
|
return nil, common.Seq(cleanups), errors.Wrapf(err, "executing %v", cmd)
|
||||||
}
|
}
|
||||||
netmsg := common.NetworkMessage{
|
netmsg := common.NetworkMessage{
|
||||||
|
Dev: tap,
|
||||||
MTU: d.mtu,
|
MTU: d.mtu,
|
||||||
Opaque: map[string]string{
|
|
||||||
opaqueTap: tap,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
if d.ipnet != nil {
|
if d.ipnet != nil {
|
||||||
// TODO: get the actual configuration via slirp4netns API?
|
// TODO: get the actual configuration via slirp4netns API?
|
||||||
|
@ -126,8 +122,8 @@ func NewChildDriver() network.ChildDriver {
|
||||||
type childDriver struct {
|
type childDriver struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *childDriver) ConfigureTap(netmsg common.NetworkMessage) (string, error) {
|
func (d *childDriver) ConfigureNetworkChild(netmsg *common.NetworkMessage) (string, error) {
|
||||||
tap := netmsg.Opaque[opaqueTap]
|
tap := netmsg.Dev
|
||||||
if tap == "" {
|
if tap == "" {
|
||||||
return "", errors.New("could not determine the preconfigured tap")
|
return "", errors.New("could not determine the preconfigured tap")
|
||||||
}
|
}
|
||||||
|
|
487
vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/builtin.go
generated
vendored
Normal file
487
vendor/github.com/rootless-containers/rootlesskit/pkg/port/builtin/builtin.go
generated
vendored
Normal file
|
@ -0,0 +1,487 @@
|
||||||
|
package builtin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/rootless-containers/rootlesskit/pkg/msgutil"
|
||||||
|
"github.com/rootless-containers/rootlesskit/pkg/port"
|
||||||
|
"github.com/rootless-containers/rootlesskit/pkg/port/portutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
opaqueKeySocketPath = "builtin.socketpath"
|
||||||
|
opaqueKeyChildReadyPipePath = "builtin.readypipepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewParentDriver for builtin driver.
|
||||||
|
func NewParentDriver(logWriter io.Writer, stateDir string) (port.ParentDriver, error) {
|
||||||
|
// TODO: consider using socketpair FD instead of socket file
|
||||||
|
socketPath := filepath.Join(stateDir, ".bp.sock")
|
||||||
|
childReadyPipePath := filepath.Join(stateDir, ".bp-ready.pipe")
|
||||||
|
// remove the path just incase the previous rootlesskit instance crashed
|
||||||
|
if err := os.RemoveAll(childReadyPipePath); err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "cannot remove %s", childReadyPipePath)
|
||||||
|
}
|
||||||
|
if err := syscall.Mkfifo(childReadyPipePath, 0600); err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "cannot mkfifo %s", childReadyPipePath)
|
||||||
|
}
|
||||||
|
d := driver{
|
||||||
|
logWriter: logWriter,
|
||||||
|
socketPath: socketPath,
|
||||||
|
childReadyPipePath: childReadyPipePath,
|
||||||
|
ports: make(map[int]*port.Status, 0),
|
||||||
|
stoppers: make(map[int]func() error, 0),
|
||||||
|
nextID: 1,
|
||||||
|
}
|
||||||
|
return &d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type driver struct {
|
||||||
|
logWriter io.Writer
|
||||||
|
socketPath string
|
||||||
|
childReadyPipePath string
|
||||||
|
mu sync.Mutex
|
||||||
|
ports map[int]*port.Status
|
||||||
|
stoppers map[int]func() error
|
||||||
|
nextID int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) OpaqueForChild() map[string]string {
|
||||||
|
return map[string]string{
|
||||||
|
opaqueKeySocketPath: d.socketPath,
|
||||||
|
opaqueKeyChildReadyPipePath: d.childReadyPipePath,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, _ *port.ChildContext) error {
|
||||||
|
childReadyPipeR, err := os.OpenFile(d.childReadyPipePath, os.O_RDONLY, os.ModeNamedPipe)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, err = ioutil.ReadAll(childReadyPipeR); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
childReadyPipeR.Close()
|
||||||
|
var dialer net.Dialer
|
||||||
|
conn, err := dialer.Dial("unix", d.socketPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = initiate(conn.(*net.UnixConn))
|
||||||
|
conn.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
initComplete <- struct{}{}
|
||||||
|
<-quit
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) {
|
||||||
|
d.mu.Lock()
|
||||||
|
err := portutil.ValidatePortSpec(spec, d.ports)
|
||||||
|
d.mu.Unlock()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
routineStopCh := make(chan struct{})
|
||||||
|
routineStop := func() error {
|
||||||
|
close(routineStopCh)
|
||||||
|
return nil // FIXME
|
||||||
|
}
|
||||||
|
switch spec.Proto {
|
||||||
|
case "tcp":
|
||||||
|
err = startTCPRoutines(d.socketPath, spec, routineStopCh, d.logWriter)
|
||||||
|
case "udp":
|
||||||
|
err = startUDPRoutines(d.socketPath, spec, routineStopCh, d.logWriter)
|
||||||
|
default:
|
||||||
|
// NOTREACHED
|
||||||
|
return nil, errors.New("spec was not validated?")
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
d.mu.Lock()
|
||||||
|
id := d.nextID
|
||||||
|
st := port.Status{
|
||||||
|
ID: id,
|
||||||
|
Spec: spec,
|
||||||
|
}
|
||||||
|
d.ports[id] = &st
|
||||||
|
d.stoppers[id] = routineStop
|
||||||
|
d.nextID++
|
||||||
|
d.mu.Unlock()
|
||||||
|
return &st, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) ListPorts(ctx context.Context) ([]port.Status, error) {
|
||||||
|
var ports []port.Status
|
||||||
|
d.mu.Lock()
|
||||||
|
for _, p := range d.ports {
|
||||||
|
ports = append(ports, *p)
|
||||||
|
}
|
||||||
|
d.mu.Unlock()
|
||||||
|
return ports, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) RemovePort(ctx context.Context, id int) error {
|
||||||
|
d.mu.Lock()
|
||||||
|
defer d.mu.Unlock()
|
||||||
|
stop, ok := d.stoppers[id]
|
||||||
|
if !ok {
|
||||||
|
return errors.Errorf("unknown id: %d", id)
|
||||||
|
}
|
||||||
|
err := stop()
|
||||||
|
delete(d.stoppers, id)
|
||||||
|
delete(d.ports, id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func initiate(c *net.UnixConn) error {
|
||||||
|
req := request{
|
||||||
|
Type: requestTypeInit,
|
||||||
|
}
|
||||||
|
if _, err := msgutil.MarshalToWriter(c, &req); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := c.CloseWrite(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var rep reply
|
||||||
|
if _, err := msgutil.UnmarshalFromReader(c, &rep); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.CloseRead()
|
||||||
|
}
|
||||||
|
|
||||||
|
func connectToChild(socketPath string, spec port.Spec) (int, error) {
|
||||||
|
var dialer net.Dialer
|
||||||
|
conn, err := dialer.Dial("unix", socketPath)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
c := conn.(*net.UnixConn)
|
||||||
|
req := request{
|
||||||
|
Type: requestTypeConnect,
|
||||||
|
Proto: spec.Proto,
|
||||||
|
Port: spec.ChildPort,
|
||||||
|
}
|
||||||
|
if _, err := msgutil.MarshalToWriter(c, &req); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if err := c.CloseWrite(); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
oobSpace := unix.CmsgSpace(4)
|
||||||
|
oob := make([]byte, oobSpace)
|
||||||
|
_, oobN, _, _, err := c.ReadMsgUnix(nil, oob)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if oobN != oobSpace {
|
||||||
|
return 0, errors.Errorf("expected OOB space %d, got %d", oobSpace, oobN)
|
||||||
|
}
|
||||||
|
oob = oob[:oobN]
|
||||||
|
fd, err := parseFDFromOOB(oob)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if err := c.CloseRead(); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return fd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func connectToChildWithRetry(socketPath string, spec port.Spec, retries int) (int, error) {
|
||||||
|
for i := 0; i < retries; i++ {
|
||||||
|
fd, err := connectToChild(socketPath, spec)
|
||||||
|
if i == retries-1 && err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
return fd, err
|
||||||
|
}
|
||||||
|
// TODO: backoff
|
||||||
|
time.Sleep(time.Duration(i*5) * time.Millisecond)
|
||||||
|
}
|
||||||
|
// NOT REACHED
|
||||||
|
return 0, errors.New("reached max retry")
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseFDFromOOB(oob []byte) (int, error) {
|
||||||
|
scms, err := unix.ParseSocketControlMessage(oob)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if len(scms) != 1 {
|
||||||
|
return 0, errors.Errorf("unexpected scms: %v", scms)
|
||||||
|
}
|
||||||
|
scm := scms[0]
|
||||||
|
fds, err := unix.ParseUnixRights(&scm)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if len(fds) != 1 {
|
||||||
|
return 0, errors.Errorf("unexpected fds: %v", fds)
|
||||||
|
}
|
||||||
|
return fds[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func startTCPRoutines(socketPath string, spec port.Spec, stopCh <-chan struct{}, logWriter io.Writer) error {
|
||||||
|
ln, err := net.Listen("tcp", fmt.Sprintf("%s:%d", spec.ParentIP, spec.ParentPort))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(logWriter, "listen: %v\n", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newConns := make(chan net.Conn)
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
c, err := ln.Accept()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(logWriter, "accept: %v\n", err)
|
||||||
|
close(newConns)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
newConns <- c
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
defer ln.Close()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case c, ok := <-newConns:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
if err := copyConnToChild(c, socketPath, spec, stopCh); err != nil {
|
||||||
|
fmt.Fprintf(logWriter, "copyConnToChild: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
case <-stopCh:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
// no wait
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func startUDPRoutines(socketPath string, spec port.Spec, stopCh <-chan struct{}, logWriter io.Writer) error {
|
||||||
|
addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", spec.ParentIP, spec.ParentPort))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
c, err := net.ListenUDP("udp", addr)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
if err := copyConnToChild(c, socketPath, spec, stopCh); err != nil {
|
||||||
|
fmt.Fprintf(logWriter, "copyConnToChild: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
// no wait
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func copyConnToChild(c net.Conn, socketPath string, spec port.Spec, stopCh <-chan struct{}) error {
|
||||||
|
defer c.Close()
|
||||||
|
// get fd from the child as an SCM_RIGHTS cmsg
|
||||||
|
fd, err := connectToChildWithRetry(socketPath, spec, 10)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
f := os.NewFile(uintptr(fd), "")
|
||||||
|
defer f.Close()
|
||||||
|
fc, err := net.FileConn(f)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer fc.Close()
|
||||||
|
bicopy(c, fc, stopCh)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bicopy is based on libnetwork/cmd/proxy/tcp_proxy.go .
|
||||||
|
// NOTE: sendfile(2) cannot be used for sockets
|
||||||
|
func bicopy(x, y net.Conn, quit <-chan struct{}) {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var broker = func(to, from net.Conn) {
|
||||||
|
io.Copy(to, from)
|
||||||
|
if fromTCP, ok := from.(*net.TCPConn); ok {
|
||||||
|
fromTCP.CloseRead()
|
||||||
|
}
|
||||||
|
if toTCP, ok := to.(*net.TCPConn); ok {
|
||||||
|
toTCP.CloseWrite()
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Add(2)
|
||||||
|
go broker(x, y)
|
||||||
|
go broker(y, x)
|
||||||
|
finish := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
wg.Wait()
|
||||||
|
close(finish)
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-quit:
|
||||||
|
case <-finish:
|
||||||
|
}
|
||||||
|
x.Close()
|
||||||
|
y.Close()
|
||||||
|
<-finish
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
requestTypeInit = "init"
|
||||||
|
requestTypeConnect = "connect"
|
||||||
|
)
|
||||||
|
|
||||||
|
// request and response are encoded as JSON with uint32le length header.
|
||||||
|
type request struct {
|
||||||
|
Type string // "init" or "connect"
|
||||||
|
Proto string // "tcp" or "udp"
|
||||||
|
Port int
|
||||||
|
}
|
||||||
|
|
||||||
|
// may contain FD as OOB
|
||||||
|
type reply struct {
|
||||||
|
Error string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewChildDriver(logWriter io.Writer) port.ChildDriver {
|
||||||
|
return &childDriver{
|
||||||
|
logWriter: logWriter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type childDriver struct {
|
||||||
|
logWriter io.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}) error {
|
||||||
|
socketPath := opaque[opaqueKeySocketPath]
|
||||||
|
if socketPath == "" {
|
||||||
|
return errors.New("socket path not set")
|
||||||
|
}
|
||||||
|
childReadyPipePath := opaque[opaqueKeyChildReadyPipePath]
|
||||||
|
if childReadyPipePath == "" {
|
||||||
|
return errors.New("child ready pipe path not set")
|
||||||
|
}
|
||||||
|
childReadyPipeW, err := os.OpenFile(childReadyPipePath, os.O_WRONLY, os.ModeNamedPipe)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ln, err := net.ListenUnix("unix", &net.UnixAddr{
|
||||||
|
Name: socketPath,
|
||||||
|
Net: "unix",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// write nothing, just close
|
||||||
|
if err = childReadyPipeW.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stopAccept := make(chan struct{}, 1)
|
||||||
|
go func() {
|
||||||
|
<-quit
|
||||||
|
stopAccept <- struct{}{}
|
||||||
|
ln.Close()
|
||||||
|
}()
|
||||||
|
for {
|
||||||
|
c, err := ln.AcceptUnix()
|
||||||
|
if err != nil {
|
||||||
|
select {
|
||||||
|
case <-stopAccept:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
if rerr := d.routine(c); rerr != nil {
|
||||||
|
rep := reply{
|
||||||
|
Error: rerr.Error(),
|
||||||
|
}
|
||||||
|
msgutil.MarshalToWriter(c, &rep)
|
||||||
|
}
|
||||||
|
c.Close()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *childDriver) routine(c *net.UnixConn) error {
|
||||||
|
var req request
|
||||||
|
if _, err := msgutil.UnmarshalFromReader(c, &req); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
switch req.Type {
|
||||||
|
case requestTypeInit:
|
||||||
|
return d.handleConnectInit(c, &req)
|
||||||
|
case requestTypeConnect:
|
||||||
|
return d.handleConnectRequest(c, &req)
|
||||||
|
default:
|
||||||
|
return errors.Errorf("unknown request type %q", req.Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *childDriver) handleConnectInit(c *net.UnixConn, req *request) error {
|
||||||
|
_, err := msgutil.MarshalToWriter(c, nil)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *childDriver) handleConnectRequest(c *net.UnixConn, req *request) error {
|
||||||
|
switch req.Proto {
|
||||||
|
case "tcp":
|
||||||
|
case "udp":
|
||||||
|
default:
|
||||||
|
return errors.Errorf("unknown proto: %q", req.Proto)
|
||||||
|
}
|
||||||
|
var dialer net.Dialer
|
||||||
|
targetConn, err := dialer.Dial(req.Proto, fmt.Sprintf("127.0.0.1:%d", req.Port))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer targetConn.Close() // no effect on duplicated FD
|
||||||
|
targetConnFiler, ok := targetConn.(filer)
|
||||||
|
if !ok {
|
||||||
|
return errors.Errorf("unknown target connection: %+v", targetConn)
|
||||||
|
}
|
||||||
|
targetConnFile, err := targetConnFiler.File()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
oob := unix.UnixRights(int(targetConnFile.Fd()))
|
||||||
|
f, err := c.File()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = unix.Sendmsg(int(f.Fd()), []byte("dummy"), oob, nil, 0)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// filer is implemented by *net.TCPConn and *net.UDPConn
|
||||||
|
type filer interface {
|
||||||
|
File() (f *os.File, err error)
|
||||||
|
}
|
|
@ -1,218 +0,0 @@
|
||||||
package socat
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"sync"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
|
||||||
|
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/port"
|
|
||||||
"github.com/rootless-containers/rootlesskit/pkg/port/portutil"
|
|
||||||
)
|
|
||||||
|
|
||||||
func NewParentDriver(logWriter io.Writer) (port.ParentDriver, error) {
|
|
||||||
if _, err := exec.LookPath("socat"); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if _, err := exec.LookPath("nsenter"); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
d := driver{
|
|
||||||
logWriter: logWriter,
|
|
||||||
ports: make(map[int]*port.Status, 0),
|
|
||||||
stoppers: make(map[int]func() error, 0),
|
|
||||||
nextID: 1,
|
|
||||||
}
|
|
||||||
return &d, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type driver struct {
|
|
||||||
logWriter io.Writer
|
|
||||||
mu sync.Mutex
|
|
||||||
childPID int
|
|
||||||
ports map[int]*port.Status
|
|
||||||
stoppers map[int]func() error
|
|
||||||
nextID int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *driver) OpaqueForChild() map[string]string {
|
|
||||||
// NOP, as this driver does not have child-side logic.
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *driver) RunParentDriver(initComplete chan struct{}, quit <-chan struct{}, cctx *port.ChildContext) error {
|
|
||||||
if cctx == nil || cctx.PID <= 0 {
|
|
||||||
return errors.New("child PID not set")
|
|
||||||
}
|
|
||||||
d.childPID = cctx.PID
|
|
||||||
initComplete <- struct{}{}
|
|
||||||
<-quit
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *driver) AddPort(ctx context.Context, spec port.Spec) (*port.Status, error) {
|
|
||||||
if d.childPID <= 0 {
|
|
||||||
return nil, errors.New("child PID not set")
|
|
||||||
}
|
|
||||||
d.mu.Lock()
|
|
||||||
err := portutil.ValidatePortSpec(spec, d.ports)
|
|
||||||
d.mu.Unlock()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
cf := func() (*exec.Cmd, error) {
|
|
||||||
return createSocatCmd(ctx, spec, d.logWriter, d.childPID)
|
|
||||||
}
|
|
||||||
routineErrorCh := make(chan error)
|
|
||||||
routineStopCh := make(chan struct{})
|
|
||||||
routineStop := func() error {
|
|
||||||
close(routineStopCh)
|
|
||||||
return <-routineErrorCh
|
|
||||||
}
|
|
||||||
go portRoutine(cf, routineStopCh, routineErrorCh, d.logWriter)
|
|
||||||
d.mu.Lock()
|
|
||||||
id := d.nextID
|
|
||||||
st := port.Status{
|
|
||||||
ID: id,
|
|
||||||
Spec: spec,
|
|
||||||
}
|
|
||||||
d.ports[id] = &st
|
|
||||||
d.stoppers[id] = routineStop
|
|
||||||
d.nextID++
|
|
||||||
d.mu.Unlock()
|
|
||||||
return &st, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *driver) ListPorts(ctx context.Context) ([]port.Status, error) {
|
|
||||||
var ports []port.Status
|
|
||||||
d.mu.Lock()
|
|
||||||
for _, p := range d.ports {
|
|
||||||
ports = append(ports, *p)
|
|
||||||
}
|
|
||||||
d.mu.Unlock()
|
|
||||||
return ports, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *driver) RemovePort(ctx context.Context, id int) error {
|
|
||||||
d.mu.Lock()
|
|
||||||
defer d.mu.Unlock()
|
|
||||||
stop, ok := d.stoppers[id]
|
|
||||||
if !ok {
|
|
||||||
return errors.Errorf("unknown port id: %d", id)
|
|
||||||
}
|
|
||||||
err := stop()
|
|
||||||
delete(d.stoppers, id)
|
|
||||||
delete(d.ports, id)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func createSocatCmd(ctx context.Context, spec port.Spec, logWriter io.Writer, childPID int) (*exec.Cmd, error) {
|
|
||||||
if spec.Proto != "tcp" && spec.Proto != "udp" {
|
|
||||||
return nil, errors.Errorf("unsupported proto: %s", spec.Proto)
|
|
||||||
}
|
|
||||||
ipStr := "0.0.0.0"
|
|
||||||
if spec.ParentIP != "" {
|
|
||||||
ip := net.ParseIP(spec.ParentIP)
|
|
||||||
if ip == nil {
|
|
||||||
return nil, errors.Errorf("unsupported parentIP: %s", spec.ParentIP)
|
|
||||||
}
|
|
||||||
ip = ip.To4()
|
|
||||||
if ip == nil {
|
|
||||||
return nil, errors.Errorf("unsupported parentIP (v6?): %s", spec.ParentIP)
|
|
||||||
}
|
|
||||||
ipStr = ip.String()
|
|
||||||
}
|
|
||||||
if spec.ParentPort < 1 || spec.ParentPort > 65535 {
|
|
||||||
return nil, errors.Errorf("unsupported parentPort: %d", spec.ParentPort)
|
|
||||||
}
|
|
||||||
if spec.ChildPort < 1 || spec.ChildPort > 65535 {
|
|
||||||
return nil, errors.Errorf("unsupported childPort: %d", spec.ChildPort)
|
|
||||||
}
|
|
||||||
var cmd *exec.Cmd
|
|
||||||
switch spec.Proto {
|
|
||||||
case "tcp":
|
|
||||||
cmd = exec.CommandContext(ctx,
|
|
||||||
"socat",
|
|
||||||
fmt.Sprintf("TCP-LISTEN:%d,bind=%s,reuseaddr,fork,rcvbuf=65536,sndbuf=65536", spec.ParentPort, ipStr),
|
|
||||||
fmt.Sprintf("EXEC:\"%s\",nofork",
|
|
||||||
fmt.Sprintf("nsenter -U -n --preserve-credentials -t %d socat STDIN TCP4:127.0.0.1:%d", childPID, spec.ChildPort)))
|
|
||||||
case "udp":
|
|
||||||
cmd = exec.CommandContext(ctx,
|
|
||||||
"socat",
|
|
||||||
fmt.Sprintf("UDP-LISTEN:%d,bind=%s,reuseaddr,fork,rcvbuf=65536,sndbuf=65536", spec.ParentPort, ipStr),
|
|
||||||
fmt.Sprintf("EXEC:\"%s\",nofork",
|
|
||||||
fmt.Sprintf("nsenter -U -n --preserve-credentials -t %d socat STDIN UDP4:127.0.0.1:%d", childPID, spec.ChildPort)))
|
|
||||||
}
|
|
||||||
cmd.Env = os.Environ()
|
|
||||||
cmd.Stdout = logWriter
|
|
||||||
cmd.Stderr = logWriter
|
|
||||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
||||||
Pdeathsig: syscall.SIGKILL,
|
|
||||||
}
|
|
||||||
return cmd, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type cmdFactory func() (*exec.Cmd, error)
|
|
||||||
|
|
||||||
func portRoutine(cf cmdFactory, stopCh <-chan struct{}, errWCh chan error, logWriter io.Writer) {
|
|
||||||
retry := 0
|
|
||||||
doneCh := make(chan error)
|
|
||||||
for {
|
|
||||||
cmd, err := cf()
|
|
||||||
if err != nil {
|
|
||||||
errWCh <- err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cmdDesc := fmt.Sprintf("%s %v", cmd.Path, cmd.Args)
|
|
||||||
fmt.Fprintf(logWriter, "[exec] starting cmd %s\n", cmdDesc)
|
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
errWCh <- err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
pid := cmd.Process.Pid
|
|
||||||
go func() {
|
|
||||||
err := cmd.Wait()
|
|
||||||
doneCh <- err
|
|
||||||
}()
|
|
||||||
select {
|
|
||||||
case err := <-doneCh:
|
|
||||||
// even if err == nil (unexpected for socat), continue the loop
|
|
||||||
retry++
|
|
||||||
sleepDuration := time.Duration((retry*100)%(30*1000)) * time.Millisecond
|
|
||||||
fmt.Fprintf(logWriter, "[exec] retrying cmd %s after sleeping %v, count=%d, err=%v\n",
|
|
||||||
cmdDesc, sleepDuration, retry, err)
|
|
||||||
select {
|
|
||||||
case <-time.After(sleepDuration):
|
|
||||||
case <-stopCh:
|
|
||||||
errWCh <- err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case <-stopCh:
|
|
||||||
fmt.Fprintf(logWriter, "[exec] killing cmd %s pid %d\n", cmdDesc, pid)
|
|
||||||
syscall.Kill(pid, syscall.SIGKILL)
|
|
||||||
fmt.Fprintf(logWriter, "[exec] killed cmd %s pid %d\n", cmdDesc, pid)
|
|
||||||
close(errWCh)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewChildDriver() port.ChildDriver {
|
|
||||||
return &childDriver{}
|
|
||||||
}
|
|
||||||
|
|
||||||
type childDriver struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *childDriver) RunChildDriver(opaque map[string]string, quit <-chan struct{}) error {
|
|
||||||
// NOP
|
|
||||||
<-quit
|
|
||||||
return nil
|
|
||||||
}
|
|
Loading…
Reference in New Issue