Merge pull request #25177 from euank/rkt-alternate-stage1

Automatic merge from submit-queue

rkt: Support alternate stage1's via annotation

This provides a basic implementation for setting a stage1 on a per-pod
basis via an annotation.

This provides a basic implementation for setting a stage1 on a per-pod
basis via an annotation. See discussion here for how this approach was arrived at: https://github.com/kubernetes/kubernetes/issues/23944#issuecomment-212653776

It's possible this feature should be gated behind additional knobs, such
as a kubelet flag to filter allowed stage1s, or a check akin to what
priviliged gets in the apiserver.
Currently, it checks `AllowPrivileged`, as a means to let people disable
this feature, though overloading it as stage1 and privileged isn't
ideal.

Fixes #23944

Testing done (note, unfortunately done with some additional ./cluster changes merged in):

```
$ cat examples/stage1-fly/fly-me-to-the-moon.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    name: exit
  name: exit-fast
  annotations: {"rkt.alpha.kubernetes.io/stage1-name-override": "coreos.com/rkt/stage1-fly:1.3.0"}
spec:
  restartPolicy: Never
  containers:
    - name: exit
      image: busybox
      command: ["sh", "-c", "ps aux"]
$ kubectl create -f examples/stage1-fly
$ ssh core@minion systemctl status -l --no-pager k8s_2f169b2e-c32a-49e9-a5fb-29ae1f6b4783.service
...
failed
...
May 04 23:33:03 minion rkt[2525]: stage0: error writing /etc/rkt-resolv.conf: open /var/lib/rkt/pods/run/2f169b2e-c32a-49e9-a5fb-29ae1f6b4783/stage1/rootfs/etc/rkt-resolv.conf: no such file or directory
...
# Restart kubelet with allow-privileged=false
$ kubectl create -f examples/stage1-fly
$ kubectl describe exit-fast
...
  1m		19s		5	{kubelet euank-e2e-test-minion-dv3u}	spec.containers{exit}	Warning		Failed		Failed to create rkt container with error: cannot make "exit-fast_default(17050ce9-1252-11e6-a52a-42010af00002)": running a custom stage1 requires a privileged security context
....
```

Note as well that the "success" here is rkt spitting out an [error message](https://github.com/coreos/rkt/issues/2141) which indicates that the right stage1 was being used at least.

cc @yifan-gu @aaronlevy
pull/6/head
k8s-merge-robot 2016-05-25 07:52:17 -07:00
commit 025b017277
3 changed files with 256 additions and 19 deletions

View File

@ -0,0 +1,59 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Generated via `mockgen k8s.io/kubernetes/pkg/kubelet/rkt VolumeGetter > mock_rkt/mock_volume_getter.go`
// Edited to include required boilerplate
// Source: k8s.io/kubernetes/pkg/kubelet/rkt (interfaces: VolumeGetter)
package mock_rkt
import (
gomock "github.com/golang/mock/gomock"
container "k8s.io/kubernetes/pkg/kubelet/container"
types "k8s.io/kubernetes/pkg/types"
)
// Mock of VolumeGetter interface
type MockVolumeGetter struct {
ctrl *gomock.Controller
recorder *_MockVolumeGetterRecorder
}
// Recorder for MockVolumeGetter (not exported)
type _MockVolumeGetterRecorder struct {
mock *MockVolumeGetter
}
func NewMockVolumeGetter(ctrl *gomock.Controller) *MockVolumeGetter {
mock := &MockVolumeGetter{ctrl: ctrl}
mock.recorder = &_MockVolumeGetterRecorder{mock}
return mock
}
func (_m *MockVolumeGetter) EXPECT() *_MockVolumeGetterRecorder {
return _m.recorder
}
func (_m *MockVolumeGetter) GetVolumes(_param0 types.UID) (container.VolumeMap, bool) {
ret := _m.ctrl.Call(_m, "GetVolumes", _param0)
ret0, _ := ret[0].(container.VolumeMap)
ret1, _ := ret[1].(bool)
return ret0, ret1
}
func (_mr *_MockVolumeGetterRecorder) GetVolumes(arg0 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "GetVolumes", arg0)
}

View File

@ -41,6 +41,7 @@ import (
"golang.org/x/net/context" "golang.org/x/net/context"
"google.golang.org/grpc" "google.golang.org/grpc"
"k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/capabilities"
"k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/credentialprovider" "k8s.io/kubernetes/pkg/credentialprovider"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -89,7 +90,18 @@ const (
k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash" k8sRktContainerHashAnno = "rkt.kubernetes.io/container-hash"
k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count" k8sRktRestartCountAnno = "rkt.kubernetes.io/restart-count"
k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path" k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
dockerPrefix = "docker://"
// TODO(euank): This has significant security concerns as a stage1 image is
// effectively root.
// Furthermore, this (using an annotation) is a hack to pass an extra
// non-portable argument in. It should not be relied on to be stable.
// In the future, this might be subsumed by a first-class api object, or by a
// kitchen-sink params object (#17064).
// See discussion in #23944
// Also, do we want more granularity than path-at-the-kubelet-level and
// image/name-at-the-pod-level?
k8sRktStage1NameAnno = "rkt.alpha.kubernetes.io/stage1-name-override"
dockerPrefix = "docker://"
authDir = "auth.d" authDir = "auth.d"
dockerAuthTemplate = `{"rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials":{"user":%q,"password":%q}}` dockerAuthTemplate = `{"rktKind":"dockerAuth","rktVersion":"v1","registries":[%q],"credentials":{"user":%q,"password":%q}}`
@ -130,7 +142,7 @@ type Runtime struct {
runtimeHelper kubecontainer.RuntimeHelper runtimeHelper kubecontainer.RuntimeHelper
recorder record.EventRecorder recorder record.EventRecorder
livenessManager proberesults.Manager livenessManager proberesults.Manager
volumeGetter volumeGetter volumeGetter VolumeGetter
imagePuller kubecontainer.ImagePuller imagePuller kubecontainer.ImagePuller
runner kubecontainer.HandlerRunner runner kubecontainer.HandlerRunner
execer utilexec.Interface execer utilexec.Interface
@ -154,7 +166,7 @@ type Runtime struct {
var _ kubecontainer.Runtime = &Runtime{} var _ kubecontainer.Runtime = &Runtime{}
// TODO(yifan): Remove this when volumeManager is moved to separate package. // TODO(yifan): Remove this when volumeManager is moved to separate package.
type volumeGetter interface { type VolumeGetter interface {
GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool) GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool)
} }
@ -181,7 +193,7 @@ func New(
containerRefManager *kubecontainer.RefManager, containerRefManager *kubecontainer.RefManager,
podGetter podGetter, podGetter podGetter,
livenessManager proberesults.Manager, livenessManager proberesults.Manager,
volumeGetter volumeGetter, volumeGetter VolumeGetter,
httpClient kubetypes.HttpGetter, httpClient kubetypes.HttpGetter,
networkPlugin network.NetworkPlugin, networkPlugin network.NetworkPlugin,
hairpinMode bool, hairpinMode bool,
@ -264,10 +276,8 @@ func New(
} }
func (r *Runtime) buildCommand(args ...string) *exec.Cmd { func (r *Runtime) buildCommand(args ...string) *exec.Cmd {
cmd := exec.Command(r.config.Path) allArgs := append(r.config.buildGlobalOptions(), args...)
cmd.Args = append(cmd.Args, r.config.buildGlobalOptions()...) return exec.Command(r.config.Path, allArgs...)
cmd.Args = append(cmd.Args, args...)
return cmd
} }
// convertToACName converts a string into ACName. // convertToACName converts a string into ACName.
@ -285,7 +295,8 @@ func (r *Runtime) RunCommand(args ...string) ([]string, error) {
var stdout, stderr bytes.Buffer var stdout, stderr bytes.Buffer
cmd := r.buildCommand(args...) cmd := r.buildCommand(args...)
cmd.Stdout, cmd.Stderr = &stdout, &stderr cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String()) return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String())
} }
@ -595,14 +606,19 @@ func (r *Runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appc
} }
} }
requiresPrivileged := false
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue) manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID)) manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID))
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name) manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace) manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace)
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount)) manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount))
if stage1Name, ok := pod.Annotations[k8sRktStage1NameAnno]; ok {
requiresPrivileged = true
manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktStage1NameAnno), stage1Name)
}
for _, c := range pod.Spec.Containers { for _, c := range pod.Spec.Containers {
err := r.newAppcRuntimeApp(pod, c, pullSecrets, manifest) err := r.newAppcRuntimeApp(pod, c, requiresPrivileged, pullSecrets, manifest)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -707,7 +723,11 @@ func (r *Runtime) makeContainerLogMount(opts *kubecontainer.RunContainerOptions,
return &mnt, nil return &mnt, nil
} }
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error { func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, requiresPrivileged bool, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
if requiresPrivileged && !capabilities.Get().AllowPrivileged {
return fmt.Errorf("cannot make %q: running a custom stage1 requires a privileged security context", format.Pod(pod))
}
if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil { if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
return nil return nil
} }
@ -950,6 +970,27 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
return teardownErr return teardownErr
} }
func (r *Runtime) preparePodArgs(manifest *appcschema.PodManifest, manifestFileName string) []string {
// Order of precedence for the stage1:
// 1) pod annotation (stage1 name)
// 2) kubelet configured stage1 (stage1 path)
// 3) empty; whatever rkt's compiled to default to
stage1ImageCmd := ""
if r.config.Stage1Image != "" {
stage1ImageCmd = "--stage1-path=" + r.config.Stage1Image
}
if stage1Name, ok := manifest.Annotations.Get(k8sRktStage1NameAnno); ok {
stage1ImageCmd = "--stage1-name=" + stage1Name
}
// Run 'rkt prepare' to get the rkt UUID.
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFileName}
if stage1ImageCmd != "" {
cmds = append(cmds, stage1ImageCmd)
}
return cmds
}
// preparePod will: // preparePod will:
// //
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid. // 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
@ -958,7 +999,7 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
// On success, it will return a string that represents name of the unit file // On success, it will return a string that represents name of the unit file
// and the runtime pod. // and the runtime pod.
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) { func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) {
// Generate the pod manifest from the pod spec. // Generate the appc pod manifest from the k8s pod spec.
manifest, err := r.makePodManifest(pod, pullSecrets) manifest, err := r.makePodManifest(pod, pullSecrets)
if err != nil { if err != nil {
return "", nil, err return "", nil, err
@ -986,12 +1027,8 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName s
return "", nil, err return "", nil, err
} }
// Run 'rkt prepare' to get the rkt UUID. prepareCmd := r.preparePodArgs(manifest, manifestFile.Name())
cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()} output, err := r.RunCommand(prepareCmd...)
if r.config.Stage1Image != "" {
cmds = append(cmds, "--stage1-path", r.config.Stage1Image)
}
output, err := r.cli.RunCommand(cmds...)
if err != nil { if err != nil {
return "", nil, err return "", nil, err
} }
@ -1809,7 +1846,7 @@ func (r *Runtime) ExecInContainer(containerID kubecontainer.ContainerID, cmd []s
if err != nil { if err != nil {
return err return err
} }
args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid) args := []string{"enter", fmt.Sprintf("--app=%s", id.appName), id.uuid}
args = append(args, cmd...) args = append(args, cmd...)
command := r.buildCommand(args...) command := r.buildCommand(args...)

View File

@ -36,8 +36,10 @@ import (
kubetesting "k8s.io/kubernetes/pkg/kubelet/container/testing" kubetesting "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_os" "k8s.io/kubernetes/pkg/kubelet/rkt/mock_os"
"k8s.io/kubernetes/pkg/kubelet/rkt/mock_rkt"
"k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/errors" "k8s.io/kubernetes/pkg/util/errors"
utilexec "k8s.io/kubernetes/pkg/util/exec"
utiltesting "k8s.io/kubernetes/pkg/util/testing" utiltesting "k8s.io/kubernetes/pkg/util/testing"
) )
@ -1194,6 +1196,11 @@ func TestGenerateRunCommand(t *testing.T) {
for i, tt := range tests { for i, tt := range tests {
testCaseHint := fmt.Sprintf("test case #%d", i) testCaseHint := fmt.Sprintf("test case #%d", i)
rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err} rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err}
rkt.execer = &utilexec.FakeExec{CommandScript: []utilexec.FakeCommandAction{func(cmd string, args ...string) utilexec.Cmd {
return utilexec.InitFakeCmd(&utilexec.FakeCmd{}, cmd, args...)
}}}
// a command should be created of this form, but the returned command shouldn't be called (asserted by having no expectations on it)
result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName) result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName)
assert.Equal(t, tt.err, err, testCaseHint) assert.Equal(t, tt.err, err, testCaseHint)
@ -1617,3 +1624,137 @@ func TestGarbageCollect(t *testing.T) {
getter.pods = make(map[types.UID]*api.Pod) getter.pods = make(map[types.UID]*api.Pod)
} }
} }
type annotationsByName []appctypes.Annotation
func (a annotationsByName) Len() int { return len(a) }
func (a annotationsByName) Less(x, y int) bool { return a[x].Name < a[y].Name }
func (a annotationsByName) Swap(x, y int) { a[x], a[y] = a[y], a[x] }
func TestMakePodManifestAnnotations(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
mockVolumeGetter := mock_rkt.NewMockVolumeGetter(ctrl)
fr := newFakeRktInterface()
fs := newFakeSystemd()
r := &Runtime{apisvc: fr, systemd: fs, volumeGetter: mockVolumeGetter}
testCases := []struct {
in *api.Pod
out *appcschema.PodManifest
outerr error
}{
{
in: &api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "uid-1",
Name: "name-1",
Namespace: "namespace-1",
Annotations: map[string]string{
k8sRktStage1NameAnno: "stage1-override-img",
},
},
},
out: &appcschema.PodManifest{
Annotations: []appctypes.Annotation{
{
Name: appctypes.ACIdentifier(k8sRktStage1NameAnno),
Value: "stage1-override-img",
},
{
Name: appctypes.ACIdentifier(k8sRktUIDAnno),
Value: "uid-1",
},
{
Name: appctypes.ACIdentifier(k8sRktNameAnno),
Value: "name-1",
},
{
Name: appctypes.ACIdentifier(k8sRktKubeletAnno),
Value: "true",
},
{
Name: appctypes.ACIdentifier(k8sRktNamespaceAnno),
Value: "namespace-1",
},
{
Name: appctypes.ACIdentifier(k8sRktRestartCountAnno),
Value: "0",
},
},
},
},
}
for i, testCase := range testCases {
hint := fmt.Sprintf("case #%d", i)
mockVolumeGetter.EXPECT().GetVolumes(gomock.Any()).Return(kubecontainer.VolumeMap{}, true)
result, err := r.makePodManifest(testCase.in, []api.Secret{})
assert.Equal(t, err, testCase.outerr, hint)
if err == nil {
sort.Sort(annotationsByName(result.Annotations))
sort.Sort(annotationsByName(testCase.out.Annotations))
assert.Equal(t, result.Annotations, testCase.out.Annotations, hint)
}
}
}
func TestPreparePodArgs(t *testing.T) {
r := &Runtime{
config: &Config{},
}
testCases := []struct {
manifest appcschema.PodManifest
stage1Config string
cmd []string
}{
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{
{
Name: k8sRktStage1NameAnno,
Value: "stage1-image",
},
},
},
"",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{
{
Name: k8sRktStage1NameAnno,
Value: "stage1-image",
},
},
},
"stage1-path",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{},
},
"stage1-path",
[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-path=stage1-path"},
},
{
appcschema.PodManifest{
Annotations: appctypes.Annotations{},
},
"",
[]string{"prepare", "--quiet", "--pod-manifest", "file"},
},
}
for i, testCase := range testCases {
r.config.Stage1Image = testCase.stage1Config
cmd := r.preparePodArgs(&testCase.manifest, "file")
assert.Equal(t, testCase.cmd, cmd, fmt.Sprintf("Test case #%d", i))
}
}