Share /var/lib/kubernetes on startup

Kubelet makes sure that /var/lib/kubelet is rshared when it starts.
If not, it bind-mounts it with rshared propagation to containers
that mount volumes to /var/lib/kubelet can benefit from mount propagation.
pull/6/head
Jan Safranek 2017-08-30 16:45:04 +02:00
parent 5030391c07
commit d9500105d8
15 changed files with 362 additions and 86 deletions

View File

@ -75,6 +75,10 @@ func (mi *fakeMountInterface) PathIsDevice(pathname string) (bool, error) {
return true, nil
}
func (mi *fakeMountInterface) MakeRShared(path string) error {
return nil
}
func fakeContainerMgrMountInt() mount.Interface {
return &fakeMountInterface{
[]mount.MountPoint{

View File

@ -1157,6 +1157,9 @@ func (kl *Kubelet) setupDataDirs() error {
if err := os.MkdirAll(kl.getRootDir(), 0750); err != nil {
return fmt.Errorf("error creating root directory: %v", err)
}
if err := kl.mounter.MakeRShared(kl.getRootDir()); err != nil {
return fmt.Errorf("error configuring root directory: %v", err)
}
if err := os.MkdirAll(kl.getPodsDir(), 0750); err != nil {
return fmt.Errorf("error creating pods directory: %v", err)
}

View File

@ -160,6 +160,7 @@ func newTestKubeletWithImageList(
kubelet.recorder = fakeRecorder
kubelet.kubeClient = fakeKubeClient
kubelet.os = &containertest.FakeOS{}
kubelet.mounter = &mount.FakeMounter{}
kubelet.hostname = testKubeletHostname
kubelet.nodeName = types.NodeName(testKubeletHostname)

View File

@ -46,6 +46,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
"k8s.io/kubernetes/pkg/kubelet/volumemanager"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
volumetest "k8s.io/kubernetes/pkg/volume/testing"
)
@ -127,6 +128,7 @@ func TestRunOnce(t *testing.T) {
kb.evictionManager = evictionManager
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
kb.mounter = &mount.FakeMounter{}
if err := kb.setupDataDirs(); err != nil {
t.Errorf("Failed to init data dirs: %v", err)
}

View File

@ -7,7 +7,10 @@ load(
go_library(
name = "go_default_library",
srcs = ["writer.go"],
srcs = [
"consistentread.go",
"writer.go",
],
deps = ["//vendor/github.com/golang/glog:go_default_library"],
)

View File

@ -0,0 +1,45 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"bytes"
"fmt"
"io/ioutil"
)
// ConsistentRead repeatedly reads a file until it gets the same content twice.
// This is useful when reading files in /proc that are larger than page size
// and kernel may modify them between individual read() syscalls.
func ConsistentRead(filename string, attempts int) ([]byte, error) {
oldContent, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
for i := 0; i < attempts; i++ {
newContent, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
if bytes.Compare(oldContent, newContent) == 0 {
return newContent, nil
}
// Files are different, continue reading
oldContent = newContent
}
return nil, fmt.Errorf("could not get consistent content of %s after %d attempts", filename, attempts)
}

View File

@ -27,6 +27,7 @@ go_library(
"//vendor/k8s.io/utils/exec:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/util/io:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],

View File

@ -171,3 +171,7 @@ func (f *FakeMounter) PathIsDevice(pathname string) (bool, error) {
func (f *FakeMounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) {
return getDeviceNameFromMount(f, mountPath, pluginDir)
}
func (f *FakeMounter) MakeRShared(path string) error {
return nil
}

View File

@ -67,6 +67,9 @@ type Interface interface {
// GetDeviceNameFromMount finds the device name by checking the mount path
// to get the global mount path which matches its plugin directory
GetDeviceNameFromMount(mountPath, pluginDir string) (string, error)
// MakeRShared checks that given path is on a mount with 'rshared' mount
// propagation. If not, it bind-mounts the path as rshared.
MakeRShared(path string) error
}
// Exec executes command where mount utilities are. This can be either the host,

View File

@ -19,10 +19,7 @@ limitations under the License.
package mount
import (
"bufio"
"fmt"
"hash/fnv"
"io"
"os"
"os/exec"
"strconv"
@ -32,6 +29,7 @@ import (
"github.com/golang/glog"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/sets"
utilio "k8s.io/kubernetes/pkg/util/io"
utilexec "k8s.io/utils/exec"
)
@ -42,6 +40,8 @@ const (
expectedNumFieldsPerLine = 6
// Location of the mount file to use
procMountsPath = "/proc/mounts"
// Location of the mountinfo file
procMountInfoPath = "/proc/self/mountinfo"
)
const (
@ -333,76 +333,54 @@ func (mounter *Mounter) GetDeviceNameFromMount(mountPath, pluginDir string) (str
}
func listProcMounts(mountFilePath string) ([]MountPoint, error) {
hash1, err := readProcMounts(mountFilePath, nil)
content, err := utilio.ConsistentRead(mountFilePath, maxListTries)
if err != nil {
return nil, err
}
for i := 0; i < maxListTries; i++ {
mps := []MountPoint{}
hash2, err := readProcMounts(mountFilePath, &mps)
if err != nil {
return nil, err
}
if hash1 == hash2 {
// Success
return mps, nil
}
hash1 = hash2
}
return nil, fmt.Errorf("failed to get a consistent snapshot of %v after %d tries", mountFilePath, maxListTries)
return parseProcMounts(content)
}
// readProcMounts reads the given mountFilePath (normally /proc/mounts) and produces a hash
// of the contents. If the out argument is not nil, this fills it with MountPoint structs.
func readProcMounts(mountFilePath string, out *[]MountPoint) (uint32, error) {
file, err := os.Open(mountFilePath)
if err != nil {
return 0, err
}
defer file.Close()
return readProcMountsFrom(file, out)
}
func readProcMountsFrom(file io.Reader, out *[]MountPoint) (uint32, error) {
hash := fnv.New32a()
scanner := bufio.NewReader(file)
for {
line, err := scanner.ReadString('\n')
if err == io.EOF {
break
func parseProcMounts(content []byte) ([]MountPoint, error) {
out := []MountPoint{}
lines := strings.Split(string(content), "\n")
for _, line := range lines {
if line == "" {
// the last split() item is empty string following the last \n
continue
}
fields := strings.Fields(line)
if len(fields) != expectedNumFieldsPerLine {
return 0, fmt.Errorf("wrong number of fields (expected %d, got %d): %s", expectedNumFieldsPerLine, len(fields), line)
return nil, fmt.Errorf("wrong number of fields (expected %d, got %d): %s", expectedNumFieldsPerLine, len(fields), line)
}
fmt.Fprintf(hash, "%s", line)
if out != nil {
mp := MountPoint{
Device: fields[0],
Path: fields[1],
Type: fields[2],
Opts: strings.Split(fields[3], ","),
}
freq, err := strconv.Atoi(fields[4])
if err != nil {
return 0, err
}
mp.Freq = freq
pass, err := strconv.Atoi(fields[5])
if err != nil {
return 0, err
}
mp.Pass = pass
*out = append(*out, mp)
mp := MountPoint{
Device: fields[0],
Path: fields[1],
Type: fields[2],
Opts: strings.Split(fields[3], ","),
}
freq, err := strconv.Atoi(fields[4])
if err != nil {
return nil, err
}
mp.Freq = freq
pass, err := strconv.Atoi(fields[5])
if err != nil {
return nil, err
}
mp.Pass = pass
out = append(out, mp)
}
return hash.Sum32(), nil
return out, nil
}
func (mounter *Mounter) MakeRShared(path string) error {
mountCmd := defaultMountCommand
mountArgs := []string{}
return doMakeRShared(path, procMountInfoPath, mountCmd, mountArgs)
}
// formatAndMount uses unix utils to format and mount the given disk
@ -502,3 +480,97 @@ func (mounter *SafeFormatAndMount) getDiskFormat(disk string) (string, error) {
// and MD RAID are reported as FSTYPE and caught above).
return "unknown data, probably partitions", nil
}
// isShared returns true, if given path is on a mount point that has shared
// mount propagation.
func isShared(path string, filename string) (bool, error) {
infos, err := parseMountInfo(filename)
if err != nil {
return false, err
}
// process /proc/xxx/mountinfo in backward order and find the first mount
// point that is prefix of 'path' - that's the mount where path resides
var info *mountInfo
for i := len(infos) - 1; i >= 0; i-- {
if strings.HasPrefix(path, infos[i].mountPoint) {
info = &infos[i]
break
}
}
if info == nil {
return false, fmt.Errorf("cannot find mount point for %q", path)
}
// parse optional parameters
for _, opt := range info.optional {
if strings.HasPrefix(opt, "shared:") {
return true, nil
}
}
return false, nil
}
type mountInfo struct {
mountPoint string
// list of "optional parameters", mount propagation is one of them
optional []string
}
// parseMountInfo parses /proc/xxx/mountinfo.
func parseMountInfo(filename string) ([]mountInfo, error) {
content, err := utilio.ConsistentRead(filename, maxListTries)
if err != nil {
return []mountInfo{}, err
}
contentStr := string(content)
infos := []mountInfo{}
for _, line := range strings.Split(contentStr, "\n") {
if line == "" {
// the last split() item is empty string following the last \n
continue
}
fields := strings.Fields(line)
if len(fields) < 7 {
return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line)
}
info := mountInfo{
mountPoint: fields[4],
optional: []string{},
}
for i := 6; i < len(fields) && fields[i] != "-"; i++ {
info.optional = append(info.optional, fields[i])
}
infos = append(infos, info)
}
return infos, nil
}
// doMakeRShared is common implementation of MakeRShared on Linux. It checks if
// path is shared and bind-mounts it as rshared if needed. mountCmd and
// mountArgs are expected to contain mount-like command, doMakeRShared will add
// '--bind <path> <path>' and '--make-rshared <path>' to mountArgs.
func doMakeRShared(path string, mountInfoFilename string, mountCmd string, mountArgs []string) error {
shared, err := isShared(path, mountInfoFilename)
if err != nil {
return err
}
if shared {
glog.V(4).Infof("Directory %s is already on a shared mount", path)
return nil
}
glog.V(2).Infof("Bind-mounting %q with shared mount propagation", path)
// mount --bind /var/lib/kubelet /var/lib/kubelet
if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_BIND, "" /*data*/); err != nil {
return fmt.Errorf("failed to bind-mount %s: %v", path, err)
}
// mount --make-rshared /var/lib/kubelet
if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_SHARED|syscall.MS_REC, "" /*data*/); err != nil {
return fmt.Errorf("failed to make %s rshared: %v", path, err)
}
return nil
}

View File

@ -19,32 +19,23 @@ limitations under the License.
package mount
import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
)
func TestReadProcMountsFrom(t *testing.T) {
successCase :=
`/dev/0 /path/to/0 type0 flags 0 0
/dev/1 /path/to/1 type1 flags 1 1
/dev/2 /path/to/2 type2 flags,1,2=3 2 2
`
/dev/1 /path/to/1 type1 flags 1 1
/dev/2 /path/to/2 type2 flags,1,2=3 2 2
`
// NOTE: readProcMountsFrom has been updated to using fnv.New32a()
hash, err := readProcMountsFrom(strings.NewReader(successCase), nil)
mounts, err := parseProcMounts([]byte(successCase))
if err != nil {
t.Errorf("expected success")
}
if hash != 0xa290ff0b {
t.Errorf("expected 0xa290ff0b, got %#x", hash)
}
mounts := []MountPoint{}
hash, err = readProcMountsFrom(strings.NewReader(successCase), &mounts)
if err != nil {
t.Errorf("expected success")
}
if hash != 0xa290ff0b {
t.Errorf("expected 0xa290ff0b, got %#x", hash)
t.Errorf("expected success, got %v", err)
}
if len(mounts) != 3 {
t.Fatalf("expected 3 mounts, got %d", len(mounts))
@ -68,7 +59,7 @@ func TestReadProcMountsFrom(t *testing.T) {
"/dev/2 /path/to/mount type flags 0 b\n",
}
for _, ec := range errorCases {
_, err := readProcMountsFrom(strings.NewReader(ec), &mounts)
_, err := parseProcMounts([]byte(ec))
if err == nil {
t.Errorf("expected error")
}
@ -208,3 +199,126 @@ func TestGetMountRefsByDev(t *testing.T) {
}
}
}
func writeFile(content string) (string, string, error) {
tempDir, err := ioutil.TempDir("", "mounter_shared_test")
if err != nil {
return "", "", err
}
filename := filepath.Join(tempDir, "mountinfo")
err = ioutil.WriteFile(filename, []byte(content), 0600)
if err != nil {
os.RemoveAll(tempDir)
return "", "", err
}
return tempDir, filename, nil
}
func TestIsSharedSuccess(t *testing.T) {
successMountInfo :=
`62 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
76 62 8:1 / /boot rw,relatime shared:29 - ext4 /dev/sda1 rw,seclabel,data=ordered
78 62 0:41 / /tmp rw,nosuid,nodev shared:30 - tmpfs tmpfs rw,seclabel
80 62 0:42 / /var/lib/nfs/rpc_pipefs rw,relatime shared:31 - rpc_pipefs sunrpc rw
82 62 0:43 / /var/lib/foo rw,relatime shared:32 - tmpfs tmpfs rw
83 63 0:44 / /var/lib/bar rw,relatime - tmpfs tmpfs rw
227 62 253:0 /var/lib/docker/devicemapper /var/lib/docker/devicemapper rw,relatime - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
224 62 253:0 /var/lib/docker/devicemapper/test/shared /var/lib/docker/devicemapper/test/shared rw,relatime master:1 shared:44 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
`
tempDir, filename, err := writeFile(successMountInfo)
if err != nil {
t.Fatalf("cannot create temporary file: %v", err)
}
defer os.RemoveAll(tempDir)
tests := []struct {
name string
path string
expectedResult bool
}{
{
// /var/lib/kubelet is a directory on mount '/' that is shared
// This is the most common case.
"shared",
"/var/lib/kubelet",
true,
},
{
// 8a2a... is a directory on mount /var/lib/docker/devicemapper
// that is private.
"private",
"/var/lib/docker/devicemapper/mnt/8a2a5c19eefb06d6f851dfcb240f8c113427f5b49b19658b5c60168e88267693/",
false,
},
{
// 'directory' is a directory on mount
// /var/lib/docker/devicemapper/test/shared that is shared, but one
// of its parent is private.
"nested-shared",
"/var/lib/docker/devicemapper/test/shared/my/test/directory",
true,
},
{
// /var/lib/foo is a mount point and it's shared
"shared-mount",
"/var/lib/foo",
true,
},
{
// /var/lib/bar is a mount point and it's private
"private-mount",
"/var/lib/bar",
false,
},
}
for _, test := range tests {
ret, err := isShared(test.path, filename)
if err != nil {
t.Errorf("test %s got unexpected error: %v", test.name, err)
}
if ret != test.expectedResult {
t.Errorf("test %s expected %v, got %v", test.name, test.expectedResult, ret)
}
}
}
func TestIsSharedFailure(t *testing.T) {
errorTests := []struct {
name string
content string
}{
{
// the first line is too short
name: "too-short-line",
content: `62 0 253:0 / / rw,relatime
76 62 8:1 / /boot rw,relatime shared:29 - ext4 /dev/sda1 rw,seclabel,data=ordered
78 62 0:41 / /tmp rw,nosuid,nodev shared:30 - tmpfs tmpfs rw,seclabel
80 62 0:42 / /var/lib/nfs/rpc_pipefs rw,relatime shared:31 - rpc_pipefs sunrpc rw
227 62 253:0 /var/lib/docker/devicemapper /var/lib/docker/devicemapper rw,relatime - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
224 62 253:0 /var/lib/docker/devicemapper/test/shared /var/lib/docker/devicemapper/test/shared rw,relatime master:1 shared:44 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
`,
},
{
// there is no root mount
name: "no-root-mount",
content: `76 62 8:1 / /boot rw,relatime shared:29 - ext4 /dev/sda1 rw,seclabel,data=ordered
78 62 0:41 / /tmp rw,nosuid,nodev shared:30 - tmpfs tmpfs rw,seclabel
80 62 0:42 / /var/lib/nfs/rpc_pipefs rw,relatime shared:31 - rpc_pipefs sunrpc rw
227 62 253:0 /var/lib/docker/devicemapper /var/lib/docker/devicemapper rw,relatime - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
224 62 253:0 /var/lib/docker/devicemapper/test/shared /var/lib/docker/devicemapper/test/shared rw,relatime master:1 shared:44 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
`,
},
}
for _, test := range errorTests {
tempDir, filename, err := writeFile(test.content)
if err != nil {
t.Fatalf("cannot create temporary file: %v", err)
}
defer os.RemoveAll(tempDir)
_, err = isShared("/", filename)
if err == nil {
t.Errorf("test %q: expected error, got none", test.name)
}
}
}

View File

@ -67,6 +67,10 @@ func (mounter *Mounter) PathIsDevice(pathname string) (bool, error) {
return true, nil
}
func (mounter *Mounter) MakeRShared(path string) error {
return nil
}
func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error {
return nil
}

View File

@ -90,9 +90,11 @@ func NewNsenterMounter() *NsenterMounter {
var _ = Interface(&NsenterMounter{})
const (
hostRootFsPath = "/rootfs"
hostProcMountsPath = "/rootfs/proc/1/mounts"
nsenterPath = "nsenter"
hostRootFsPath = "/rootfs"
hostProcMountsPath = "/rootfs/proc/1/mounts"
hostProcMountinfoPath = "/rootfs/proc/1/mountinfo"
hostMountNamespacePath = "/rootfs/proc/1/ns/mnt"
nsenterPath = "nsenter"
)
// Mount runs mount(8) in the host's root mount namespace. Aside from this
@ -164,7 +166,7 @@ func (n *NsenterMounter) makeNsenterArgs(source, target, fstype string, options
}
nsenterArgs := []string{
"--mount=/rootfs/proc/1/ns/mnt",
"--mount=" + hostMountNamespacePath,
"--",
mountCmd,
}
@ -176,7 +178,7 @@ func (n *NsenterMounter) makeNsenterArgs(source, target, fstype string, options
// Unmount runs umount(8) in the host's mount namespace.
func (n *NsenterMounter) Unmount(target string) error {
args := []string{
"--mount=/rootfs/proc/1/ns/mnt",
"--mount=" + hostMountNamespacePath,
"--",
n.absHostPath("umount"),
target,
@ -225,7 +227,7 @@ func (n *NsenterMounter) IsLikelyNotMountPoint(file string) (bool, error) {
// the first of multiple possible mountpoints using --first-only.
// Also add fstype output to make sure that the output of target file will give the full path
// TODO: Need more refactoring for this function. Track the solution with issue #26996
args := []string{"--mount=/rootfs/proc/1/ns/mnt", "--", n.absHostPath("findmnt"), "-o", "target,fstype", "--noheadings", "--first-only", "--target", file}
args := []string{"--mount=" + hostMountNamespacePath, "--", n.absHostPath("findmnt"), "-o", "target,fstype", "--noheadings", "--first-only", "--target", file}
glog.V(5).Infof("findmnt command: %v %v", nsenterPath, args)
exec := exec.New()
@ -290,3 +292,13 @@ func (n *NsenterMounter) absHostPath(command string) string {
}
return path
}
func (n *NsenterMounter) MakeRShared(path string) error {
nsenterCmd := nsenterPath
nsenterArgs := []string{
"--mount=" + hostMountNamespacePath,
"--",
n.absHostPath("mount"),
}
return doMakeRShared(path, hostProcMountinfoPath, nsenterCmd, nsenterArgs)
}

View File

@ -61,3 +61,7 @@ func (*NsenterMounter) PathIsDevice(pathname string) (bool, error) {
func (*NsenterMounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) {
return "", nil
}
func (*NsenterMounter) MakeRShared(path string) error {
return nil
}

View File

@ -66,6 +66,10 @@ func (mounter *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {
return true, nil
}
func (mounter *fakeMounter) MakeRShared(path string) error {
return nil
}
func TestRemoveAllOneFilesystem(t *testing.T) {
tests := []struct {
name string