mirror of https://github.com/k3s-io/k3s
Merge pull request #25177 from euank/rkt-alternate-stage1
Automatic merge from submit-queue rkt: Support alternate stage1's via annotation This provides a basic implementation for setting a stage1 on a per-pod basis via an annotation. This provides a basic implementation for setting a stage1 on a per-pod basis via an annotation. See discussion here for how this approach was arrived at: https://github.com/kubernetes/kubernetes/issues/23944#issuecomment-212653776 It's possible this feature should be gated behind additional knobs, such as a kubelet flag to filter allowed stage1s, or a check akin to what priviliged gets in the apiserver. Currently, it checks `AllowPrivileged`, as a means to let people disable this feature, though overloading it as stage1 and privileged isn't ideal. Fixes #23944 Testing done (note, unfortunately done with some additional ./cluster changes merged in): ``` $ cat examples/stage1-fly/fly-me-to-the-moon.yaml apiVersion: v1 kind: Pod metadata: labels: name: exit name: exit-fast annotations: {"rkt.alpha.kubernetes.io/stage1-name-override": "coreos.com/rkt/stage1-fly:1.3.0"} spec: restartPolicy: Never containers: - name: exit image: busybox command: ["sh", "-c", "ps aux"] $ kubectl create -f examples/stage1-fly $ ssh core@minion systemctl status -l --no-pager k8s_2f169b2e-c32a-49e9-a5fb-29ae1f6b4783.service ... failed ... May 04 23:33:03 minion rkt[2525]: stage0: error writing /etc/rkt-resolv.conf: open /var/lib/rkt/pods/run/2f169b2e-c32a-49e9-a5fb-29ae1f6b4783/stage1/rootfs/etc/rkt-resolv.conf: no such file or directory ... # Restart kubelet with allow-privileged=false $ kubectl create -f examples/stage1-fly $ kubectl describe exit-fast ... 1m 19s 5 {kubelet euank-e2e-test-minion-dv3u} spec.containers{exit} Warning Failed Failed to create rkt container with error: cannot make "exit-fast_default(17050ce9-1252-11e6-a52a-42010af00002)": running a custom stage1 requires a privileged security context .... ``` Note as well that the "success" here is rkt spitting out an [error message](https://github.com/coreos/rkt/issues/2141) which indicates that the right stage1 was being used at least. cc @yifan-gu @aaronlevypull/6/head
commit
025b017277
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated via `mockgen k8s.io/kubernetes/pkg/kubelet/rkt VolumeGetter > mock_rkt/mock_volume_getter.go`
|
||||
// Edited to include required boilerplate
|
||||
// Source: k8s.io/kubernetes/pkg/kubelet/rkt (interfaces: VolumeGetter)
|
||||
|
||||
package mock_rkt
|
||||
|
||||
import (
|
||||
gomock "github.com/golang/mock/gomock"
|
||||
container "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
types "k8s.io/kubernetes/pkg/types"
|
||||
)
|
||||
|
||||
// Mock of VolumeGetter interface
|
||||
type MockVolumeGetter struct {
|
||||
ctrl *gomock.Controller
|
||||
recorder *_MockVolumeGetterRecorder
|
||||
}
|
||||
|
||||
// Recorder for MockVolumeGetter (not exported)
|
||||
type _MockVolumeGetterRecorder struct {
|
||||
mock *MockVolumeGetter
|
||||
}
|
||||
|
||||
func NewMockVolumeGetter(ctrl *gomock.Controller) *MockVolumeGetter {
|
||||
mock := &MockVolumeGetter{ctrl: ctrl}
|
||||
mock.recorder = &_MockVolumeGetterRecorder{mock}
|
||||
return mock
|
||||
}
|
||||
|
||||
func (_m *MockVolumeGetter) EXPECT() *_MockVolumeGetterRecorder {
|
||||
return _m.recorder
|
||||
}
|
||||
|
||||
func (_m *MockVolumeGetter) GetVolumes(_param0 types.UID) (container.VolumeMap, bool) {
|
||||
ret := _m.ctrl.Call(_m, "GetVolumes", _param0)
|
||||
ret0, _ := ret[0].(container.VolumeMap)
|
||||
ret1, _ := ret[1].(bool)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
||||
func (_mr *_MockVolumeGetterRecorder) GetVolumes(arg0 interface{}) *gomock.Call {
|
||||
return _mr.mock.ctrl.RecordCall(_mr.mock, "GetVolumes", arg0)
|
||||
}
|
|
@ -41,6 +41,7 @@ import (
|
|||
"golang.org/x/net/context"
|
||||
"google.golang.org/grpc"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/capabilities"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
"k8s.io/kubernetes/pkg/credentialprovider"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
|
@ -89,7 +90,18 @@ const (
|
|||
k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash"
|
||||
k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count"
|
||||
k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
|
||||
dockerPrefix = "docker://"
|
||||
|
||||
// TODO(euank): This has significant security concerns as a stage1 image is
|
||||
// effectively root.
|
||||
// Furthermore, this (using an annotation) is a hack to pass an extra
|
||||
// non-portable argument in. It should not be relied on to be stable.
|
||||
// In the future, this might be subsumed by a first-class api object, or by a
|
||||
// kitchen-sink params object (#17064).
|
||||
// See discussion in #23944
|
||||
// Also, do we want more granularity than path-at-the-kubelet-level and
|
||||
// image/name-at-the-pod-level?
|
||||
k8sRktStage1NameAnno = "rkt.alpha.kubernetes.io/stage1-name-override"
|
||||
dockerPrefix = "docker://"
|
||||
|
||||
authDir = "auth.d"
|
||||
dockerAuthTemplate = `{"rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials":{"user":%q,"password":%q}}`
|
||||
|
@ -130,7 +142,7 @@ type Runtime struct {
|
|||
runtimeHelper kubecontainer.RuntimeHelper
|
||||
recorder record.EventRecorder
|
||||
livenessManager proberesults.Manager
|
||||
volumeGetter volumeGetter
|
||||
volumeGetter VolumeGetter
|
||||
imagePuller kubecontainer.ImagePuller
|
||||
runner kubecontainer.HandlerRunner
|
||||
execer utilexec.Interface
|
||||
|
@ -154,7 +166,7 @@ type Runtime struct {
|
|||
var _ kubecontainer.Runtime = &Runtime{}
|
||||
|
||||
// TODO(yifan): Remove this when volumeManager is moved to separate package.
|
||||
type volumeGetter interface {
|
||||
type VolumeGetter interface {
|
||||
GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool)
|
||||
}
|
||||
|
||||
|
@ -181,7 +193,7 @@ func New(
|
|||
containerRefManager *kubecontainer.RefManager,
|
||||
podGetter podGetter,
|
||||
livenessManager proberesults.Manager,
|
||||
volumeGetter volumeGetter,
|
||||
volumeGetter VolumeGetter,
|
||||
httpClient kubetypes.HttpGetter,
|
||||
networkPlugin network.NetworkPlugin,
|
||||
hairpinMode bool,
|
||||
|
@ -264,10 +276,8 @@ func New(
|
|||
}
|
||||
|
||||
func (r *Runtime) buildCommand(args ...string) *exec.Cmd {
|
||||
cmd := exec.Command(r.config.Path)
|
||||
cmd.Args = append(cmd.Args, r.config.buildGlobalOptions()...)
|
||||
cmd.Args = append(cmd.Args, args...)
|
||||
return cmd
|
||||
allArgs := append(r.config.buildGlobalOptions(), args...)
|
||||
return exec.Command(r.config.Path, allArgs...)
|
||||
}
|
||||
|
||||
// convertToACName converts a string into ACName.
|
||||
|
@ -285,7 +295,8 @@ func (r *Runtime) RunCommand(args ...string) ([]string, error) {
|
|||
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd := r.buildCommand(args...)
|
||||
cmd.Stdout, cmd.Stderr = &stdout, &stderr
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String())
|
||||
}
|
||||
|
@ -595,14 +606,19 @@ func (r *Runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appc
|
|||
}
|
||||
}
|
||||
|
||||
requiresPrivileged := false
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue)
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID))
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name)
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace)
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount))
|
||||
if stage1Name, ok := pod.Annotations[k8sRktStage1NameAnno]; ok {
|
||||
requiresPrivileged = true
|
||||
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktStage1NameAnno), stage1Name)
|
||||
}
|
||||
|
||||
for _, c := range pod.Spec.Containers {
|
||||
err := r.newAppcRuntimeApp(pod, c, pullSecrets, manifest)
|
||||
err := r.newAppcRuntimeApp(pod, c, requiresPrivileged, pullSecrets, manifest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -707,7 +723,11 @@ func (r *Runtime) makeContainerLogMount(opts *kubecontainer.RunContainerOptions,
|
|||
return &mnt, nil
|
||||
}
|
||||
|
||||
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
|
||||
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, requiresPrivileged bool, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
|
||||
if requiresPrivileged && !capabilities.Get().AllowPrivileged {
|
||||
return fmt.Errorf("cannot make %q: running a custom stage1 requires a privileged security context", format.Pod(pod))
|
||||
}
|
||||
|
||||
if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
@ -950,6 +970,27 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
|
|||
return teardownErr
|
||||
}
|
||||
|
||||
func (r *Runtime) preparePodArgs(manifest *appcschema.PodManifest, manifestFileName string) []string {
|
||||
// Order of precedence for the stage1:
|
||||
// 1) pod annotation (stage1 name)
|
||||
// 2) kubelet configured stage1 (stage1 path)
|
||||
// 3) empty; whatever rkt's compiled to default to
|
||||
stage1ImageCmd := ""
|
||||
if r.config.Stage1Image != "" {
|
||||
stage1ImageCmd = "--stage1-path=" + r.config.Stage1Image
|
||||
}
|
||||
if stage1Name, ok := manifest.Annotations.Get(k8sRktStage1NameAnno); ok {
|
||||
stage1ImageCmd = "--stage1-name=" + stage1Name
|
||||
}
|
||||
|
||||
// Run 'rkt prepare' to get the rkt UUID.
|
||||
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFileName}
|
||||
if stage1ImageCmd != "" {
|
||||
cmds = append(cmds, stage1ImageCmd)
|
||||
}
|
||||
return cmds
|
||||
}
|
||||
|
||||
// preparePod will:
|
||||
//
|
||||
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
|
||||
|
@ -958,7 +999,7 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
|
|||
// On success, it will return a string that represents name of the unit file
|
||||
// and the runtime pod.
|
||||
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) {
|
||||
// Generate the pod manifest from the pod spec.
|
||||
// Generate the appc pod manifest from the k8s pod spec.
|
||||
manifest, err := r.makePodManifest(pod, pullSecrets)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
|
@ -986,12 +1027,8 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName s
|
|||
return "", nil, err
|
||||
}
|
||||
|
||||
// Run 'rkt prepare' to get the rkt UUID.
|
||||
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()}
|
||||
if r.config.Stage1Image != "" {
|
||||
cmds = append(cmds, "--stage1-path", r.config.Stage1Image)
|
||||
}
|
||||
output, err := r.cli.RunCommand(cmds...)
|
||||
prepareCmd := r.preparePodArgs(manifest, manifestFile.Name())
|
||||
output, err := r.RunCommand(prepareCmd...)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
@ -1809,7 +1846,7 @@ func (r *Runtime) ExecInContainer(containerID kubecontainer.ContainerID, cmd []s
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid)
|
||||
args := []string{"enter", fmt.Sprintf("--app=%s", id.appName), id.uuid}
|
||||
args = append(args, cmd...)
|
||||
command := r.buildCommand(args...)
|
||||
|
||||
|
|
|
@ -36,8 +36,10 @@ import (
|
|||
kubetesting "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_os"
|
||||
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_rkt"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/util/errors"
|
||||
utilexec "k8s.io/kubernetes/pkg/util/exec"
|
||||
utiltesting "k8s.io/kubernetes/pkg/util/testing"
|
||||
)
|
||||
|
||||
|
@ -1194,6 +1196,11 @@ func TestGenerateRunCommand(t *testing.T) {
|
|||
for i, tt := range tests {
|
||||
testCaseHint := fmt.Sprintf("test case #%d", i)
|
||||
rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err}
|
||||
rkt.execer = &utilexec.FakeExec{CommandScript: []utilexec.FakeCommandAction{func(cmd string, args ...string) utilexec.Cmd {
|
||||
return utilexec.InitFakeCmd(&utilexec.FakeCmd{}, cmd, args...)
|
||||
}}}
|
||||
|
||||
// a command should be created of this form, but the returned command shouldn't be called (asserted by having no expectations on it)
|
||||
|
||||
result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName)
|
||||
assert.Equal(t, tt.err, err, testCaseHint)
|
||||
|
@ -1617,3 +1624,137 @@ func TestGarbageCollect(t *testing.T) {
|
|||
getter.pods = make(map[types.UID]*api.Pod)
|
||||
}
|
||||
}
|
||||
|
||||
type annotationsByName []appctypes.Annotation
|
||||
|
||||
func (a annotationsByName) Len() int { return len(a) }
|
||||
func (a annotationsByName) Less(x, y int) bool { return a[x].Name < a[y].Name }
|
||||
func (a annotationsByName) Swap(x, y int) { a[x], a[y] = a[y], a[x] }
|
||||
|
||||
func TestMakePodManifestAnnotations(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
defer ctrl.Finish()
|
||||
|
||||
mockVolumeGetter := mock_rkt.NewMockVolumeGetter(ctrl)
|
||||
|
||||
fr := newFakeRktInterface()
|
||||
fs := newFakeSystemd()
|
||||
r := &Runtime{apisvc: fr, systemd: fs, volumeGetter: mockVolumeGetter}
|
||||
|
||||
testCases := []struct {
|
||||
in *api.Pod
|
||||
out *appcschema.PodManifest
|
||||
outerr error
|
||||
}{
|
||||
{
|
||||
in: &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
UID: "uid-1",
|
||||
Name: "name-1",
|
||||
Namespace: "namespace-1",
|
||||
Annotations: map[string]string{
|
||||
k8sRktStage1NameAnno: "stage1-override-img",
|
||||
},
|
||||
},
|
||||
},
|
||||
out: &appcschema.PodManifest{
|
||||
Annotations: []appctypes.Annotation{
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktStage1NameAnno),
|
||||
Value: "stage1-override-img",
|
||||
},
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktUIDAnno),
|
||||
Value: "uid-1",
|
||||
},
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktNameAnno),
|
||||
Value: "name-1",
|
||||
},
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktKubeletAnno),
|
||||
Value: "true",
|
||||
},
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktNamespaceAnno),
|
||||
Value: "namespace-1",
|
||||
},
|
||||
{
|
||||
Name: appctypes.ACIdentifier(k8sRktRestartCountAnno),
|
||||
Value: "0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
hint := fmt.Sprintf("case #%d", i)
|
||||
mockVolumeGetter.EXPECT().GetVolumes(gomock.Any()).Return(kubecontainer.VolumeMap{}, true)
|
||||
|
||||
result, err := r.makePodManifest(testCase.in, []api.Secret{})
|
||||
assert.Equal(t, err, testCase.outerr, hint)
|
||||
if err == nil {
|
||||
sort.Sort(annotationsByName(result.Annotations))
|
||||
sort.Sort(annotationsByName(testCase.out.Annotations))
|
||||
assert.Equal(t, result.Annotations, testCase.out.Annotations, hint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreparePodArgs(t *testing.T) {
|
||||
r := &Runtime{
|
||||
config: &Config{},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
manifest appcschema.PodManifest
|
||||
stage1Config string
|
||||
cmd []string
|
||||
}{
|
||||
{
|
||||
appcschema.PodManifest{
|
||||
Annotations: appctypes.Annotations{
|
||||
{
|
||||
Name: k8sRktStage1NameAnno,
|
||||
Value: "stage1-image",
|
||||
},
|
||||
},
|
||||
},
|
||||
"",
|
||||
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
|
||||
},
|
||||
{
|
||||
appcschema.PodManifest{
|
||||
Annotations: appctypes.Annotations{
|
||||
{
|
||||
Name: k8sRktStage1NameAnno,
|
||||
Value: "stage1-image",
|
||||
},
|
||||
},
|
||||
},
|
||||
"stage1-path",
|
||||
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
|
||||
},
|
||||
{
|
||||
appcschema.PodManifest{
|
||||
Annotations: appctypes.Annotations{},
|
||||
},
|
||||
"stage1-path",
|
||||
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-path=stage1-path"},
|
||||
},
|
||||
{
|
||||
appcschema.PodManifest{
|
||||
Annotations: appctypes.Annotations{},
|
||||
},
|
||||
"",
|
||||
[]string{"prepare", "--quiet", "--pod-manifest", "file"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
r.config.Stage1Image = testCase.stage1Config
|
||||
cmd := r.preparePodArgs(&testCase.manifest, "file")
|
||||
assert.Equal(t, testCase.cmd, cmd, fmt.Sprintf("Test case #%d", i))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue