mirror of https://github.com/k3s-io/k3s
Ubernetes Lite: Volumes can dictate zone scheduling
For AWS EBS, a volume can only be attached to a node in the same AZ. The scheduler must therefore detect if a volume is being attached to a pod, and ensure that the pod is scheduled on a node in the same AZ as the volume. So that the scheduler need not query the cloud provider every time, and to support decoupled operation (e.g. bare metal) we tag the volume with our placement labels. This is done automatically by means of an admission controller on AWS when a PersistentVolume is created backed by an EBS volume. Support for tagging GCE PVs will follow. Pods that specify a volume directly (i.e. without using a PersistentVolumeClaim) will not currently be scheduled correctly (i.e. they will be scheduled without zone-awareness).pull/6/head
parent
7743a4ca89
commit
f9a6ac077e
|
@ -126,7 +126,7 @@ if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
|
|||
fi
|
||||
|
||||
# Admission Controllers to invoke prior to persisting objects in cluster
|
||||
ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota
|
||||
ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota,PersistentVolumeLabel
|
||||
|
||||
# Optional: Enable/disable public IP assignment for minions.
|
||||
# Important Note: disable only if you have setup a NAT instance for internet access and configured appropriate routes!
|
||||
|
|
|
@ -122,7 +122,7 @@ if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
|
|||
fi
|
||||
|
||||
# Admission Controllers to invoke prior to persisting objects in cluster
|
||||
ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota
|
||||
ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,SecurityContextDeny,ServiceAccount,ResourceQuota,PersistentVolumeLabel
|
||||
|
||||
# Optional: Enable/disable public IP assignment for minions.
|
||||
# Important Note: disable only if you have setup a NAT instance for internet access and configured appropriate routes!
|
||||
|
|
|
@ -33,6 +33,7 @@ import (
|
|||
_ "k8s.io/kubernetes/plugin/pkg/admission/namespace/autoprovision"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/namespace/exists"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/namespace/lifecycle"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/persistentvolume/label"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/resourcequota"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/securitycontext/scdeny"
|
||||
_ "k8s.io/kubernetes/plugin/pkg/admission/serviceaccount"
|
||||
|
|
|
@ -51,7 +51,7 @@ kube-apiserver
|
|||
### Options
|
||||
|
||||
```
|
||||
--admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, ResourceQuota, SecurityContextDeny, ServiceAccount
|
||||
--admission-control="AlwaysAdmit": Ordered list of plug-ins to do admission control of resources into cluster. Comma-delimited list of: AlwaysAdmit, AlwaysDeny, AlwaysPullImages, DenyEscalatingExec, DenyExecOnPrivileged, InitialResources, LimitRanger, NamespaceAutoProvision, NamespaceExists, NamespaceLifecycle, PersistentVolumeLabel, ResourceQuota, SecurityContextDeny, ServiceAccount
|
||||
--admission-control-config-file="": File with admission control configuration.
|
||||
--advertise-address=<nil>: The IP address on which to advertise the apiserver to members of the cluster. This address must be reachable by the rest of the cluster. If blank, the --bind-address will be used. If --bind-address is unspecified, the host's default interface will be used.
|
||||
--allow-privileged[=false]: If true, allow privileged containers.
|
||||
|
|
|
@ -41,6 +41,7 @@ For each unscheduled Pod, the Kubernetes scheduler tries to find a node across t
|
|||
The purpose of filtering the nodes is to filter out the nodes that do not meet certain requirements of the Pod. For example, if the free resource on a node (measured by the capacity minus the sum of the resource requests of all the Pods that already run on the node) is less than the Pod's required resource, the node should not be considered in the ranking phase so it is filtered out. Currently, there are several "predicates" implementing different filtering policies, including:
|
||||
|
||||
- `NoDiskConflict`: Evaluate if a pod can fit due to the volumes it requests, and those that are already mounted.
|
||||
- `NoVolumeZoneConflict`: Evaluate if the volumes a pod requests are available on the node, given the Zone restrictions.
|
||||
- `PodFitsResources`: Check if the free resource (CPU and Memory) meets the requirement of the Pod. The free resource is measured by the capacity minus the sum of requests of all Pods on the node. To learn more about the resource QoS in Kubernetes, please check [QoS proposal](../proposals/resource-qos.md).
|
||||
- `PodFitsHostPorts`: Check if any HostPort required by the Pod is already occupied on the node.
|
||||
- `PodFitsHost`: Filter out all nodes except the one specified in the PodSpec's NodeName field.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
{"name" : "PodFitsPorts"},
|
||||
{"name" : "PodFitsResources"},
|
||||
{"name" : "NoDiskConflict"},
|
||||
{"name" : "NoVolumeZoneConflict"},
|
||||
{"name" : "MatchNodeSelector"},
|
||||
{"name" : "HostName"}
|
||||
],
|
||||
|
|
|
@ -417,3 +417,42 @@ func (s *StoreToJobLister) GetPodJobs(pod *api.Pod) (jobs []extensions.Job, err
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Typed wrapper around a store of PersistentVolumes
|
||||
type StoreToPVFetcher struct {
|
||||
Store
|
||||
}
|
||||
|
||||
// GetPersistentVolumeInfo returns cached data for the PersistentVolume 'id'.
|
||||
func (s *StoreToPVFetcher) GetPersistentVolumeInfo(id string) (*api.PersistentVolume, error) {
|
||||
o, exists, err := s.Get(&api.PersistentVolume{ObjectMeta: api.ObjectMeta{Name: id}})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving PersistentVolume '%v' from cache: %v", id, err)
|
||||
}
|
||||
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("PersistentVolume '%v' is not in cache", id)
|
||||
}
|
||||
|
||||
return o.(*api.PersistentVolume), nil
|
||||
}
|
||||
|
||||
// Typed wrapper around a store of PersistentVolumeClaims
|
||||
type StoreToPVCFetcher struct {
|
||||
Store
|
||||
}
|
||||
|
||||
// GetPersistentVolumeClaimInfo returns cached data for the PersistentVolumeClaim 'id'.
|
||||
func (s *StoreToPVCFetcher) GetPersistentVolumeClaimInfo(namespace string, id string) (*api.PersistentVolumeClaim, error) {
|
||||
o, exists, err := s.Get(&api.PersistentVolumeClaim{ObjectMeta: api.ObjectMeta{Namespace: namespace, Name: id}})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving PersistentVolumeClaim '%s/%s' from cache: %v", namespace, id, err)
|
||||
}
|
||||
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("PersistentVolumeClaim '%s/%s' is not in cache", namespace, id)
|
||||
}
|
||||
|
||||
return o.(*api.PersistentVolumeClaim), nil
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/util/sets"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
)
|
||||
|
||||
const ProviderName = "aws"
|
||||
|
@ -158,6 +159,9 @@ type Volumes interface {
|
|||
// Create a volume with the specified options
|
||||
CreateVolume(volumeOptions *VolumeOptions) (volumeName string, err error)
|
||||
DeleteVolume(volumeName string) error
|
||||
|
||||
// Get labels to apply to volume on creation
|
||||
GetVolumeLabels(volumeName string) (map[string]string, error)
|
||||
}
|
||||
|
||||
// InstanceGroups is an interface for managing cloud-managed instance groups / autoscaling instance groups
|
||||
|
@ -510,6 +514,16 @@ func isRegionValid(region string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Derives the region from a valid az name.
|
||||
// Returns an error if the az is known invalid (empty)
|
||||
func azToRegion(az string) (string, error) {
|
||||
if len(az) < 1 {
|
||||
return "", fmt.Errorf("invalid (empty) AZ")
|
||||
}
|
||||
region := az[:len(az)-1]
|
||||
return region, nil
|
||||
}
|
||||
|
||||
// newAWSCloud creates a new instance of AWSCloud.
|
||||
// AWSProvider and instanceId are primarily for tests
|
||||
func newAWSCloud(config io.Reader, awsServices AWSServices) (*AWSCloud, error) {
|
||||
|
@ -527,7 +541,10 @@ func newAWSCloud(config io.Reader, awsServices AWSServices) (*AWSCloud, error) {
|
|||
if len(zone) <= 1 {
|
||||
return nil, fmt.Errorf("invalid AWS zone in config file: %s", zone)
|
||||
}
|
||||
regionName := zone[:len(zone)-1]
|
||||
regionName, err := azToRegion(zone)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
valid := isRegionValid(regionName)
|
||||
if !valid {
|
||||
|
@ -1274,6 +1291,32 @@ func (aws *AWSCloud) DeleteVolume(volumeName string) error {
|
|||
return awsDisk.deleteVolume()
|
||||
}
|
||||
|
||||
// Implements Volumes.GetVolumeLabels
|
||||
func (c *AWSCloud) GetVolumeLabels(volumeName string) (map[string]string, error) {
|
||||
awsDisk, err := newAWSDisk(c, volumeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err := awsDisk.getInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
labels := make(map[string]string)
|
||||
az := aws.StringValue(info.AvailabilityZone)
|
||||
if az == "" {
|
||||
return nil, fmt.Errorf("volume did not have AZ information: %q", info.VolumeId)
|
||||
}
|
||||
|
||||
labels[unversioned.LabelZoneFailureDomain] = az
|
||||
region, err := azToRegion(az)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
labels[unversioned.LabelZoneRegion] = region
|
||||
|
||||
return labels, nil
|
||||
}
|
||||
|
||||
func (v *AWSCloud) Configure(name string, spec *api.NodeSpec) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package label
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/admission"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/aws"
|
||||
)
|
||||
|
||||
func init() {
|
||||
admission.RegisterPlugin("PersistentVolumeLabel", func(client client.Interface, config io.Reader) (admission.Interface, error) {
|
||||
persistentVolumeLabelAdmission := NewPersistentVolumeLabel()
|
||||
return persistentVolumeLabelAdmission, nil
|
||||
})
|
||||
}
|
||||
|
||||
var _ = admission.Interface(&persistentVolumeLabel{})
|
||||
|
||||
type persistentVolumeLabel struct {
|
||||
*admission.Handler
|
||||
|
||||
mutex sync.Mutex
|
||||
ebsVolumes aws.Volumes
|
||||
}
|
||||
|
||||
// NewPersistentVolumeLabel returns an admission.Interface implementation which adds labels to PersistentVolume CREATE requests,
|
||||
// based on the labels provided by the underlying cloud provider.
|
||||
//
|
||||
// As a side effect, the cloud provider may block invalid or non-existent volumes.
|
||||
func NewPersistentVolumeLabel() *persistentVolumeLabel {
|
||||
return &persistentVolumeLabel{
|
||||
Handler: admission.NewHandler(admission.Create),
|
||||
}
|
||||
}
|
||||
|
||||
func (l *persistentVolumeLabel) Admit(a admission.Attributes) (err error) {
|
||||
if a.GetResource() != api.Resource("persistentvolumes") {
|
||||
return nil
|
||||
}
|
||||
obj := a.GetObject()
|
||||
if obj == nil {
|
||||
return nil
|
||||
}
|
||||
volume, ok := obj.(*api.PersistentVolume)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
var volumeLabels map[string]string
|
||||
if volume.Spec.AWSElasticBlockStore != nil {
|
||||
labels, err := l.findAWSEBSLabels(volume)
|
||||
if err != nil {
|
||||
return admission.NewForbidden(a, fmt.Errorf("error querying AWS EBS volume %s: %v", volume.Spec.AWSElasticBlockStore.VolumeID, err))
|
||||
}
|
||||
volumeLabels = labels
|
||||
}
|
||||
|
||||
if len(volumeLabels) != 0 {
|
||||
if volume.Labels == nil {
|
||||
volume.Labels = make(map[string]string)
|
||||
}
|
||||
for k, v := range volumeLabels {
|
||||
// We (silently) replace labels if they are provided.
|
||||
// This should be OK because they are in the kubernetes.io namespace
|
||||
// i.e. we own them
|
||||
volume.Labels[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *persistentVolumeLabel) findAWSEBSLabels(volume *api.PersistentVolume) (map[string]string, error) {
|
||||
ebsVolumes, err := l.getEBSVolumes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ebsVolumes == nil {
|
||||
return nil, fmt.Errorf("unable to build AWS cloud provider for EBS")
|
||||
}
|
||||
|
||||
// TODO: GetVolumeLabels is actually a method on the Volumes interface
|
||||
// If that gets standardized we can refactor to reduce code duplication
|
||||
labels, err := ebsVolumes.GetVolumeLabels(volume.Spec.AWSElasticBlockStore.VolumeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return labels, err
|
||||
}
|
||||
|
||||
// getEBSVolumes returns the AWS Volumes interface for ebs
|
||||
func (l *persistentVolumeLabel) getEBSVolumes() (aws.Volumes, error) {
|
||||
l.mutex.Lock()
|
||||
defer l.mutex.Unlock()
|
||||
|
||||
if l.ebsVolumes == nil {
|
||||
cloudProvider, err := cloudprovider.GetCloudProvider("aws", nil)
|
||||
if err != nil || cloudProvider == nil {
|
||||
return nil, err
|
||||
}
|
||||
awsCloudProvider, ok := cloudProvider.(*aws.AWSCloud)
|
||||
if !ok {
|
||||
// GetCloudProvider has gone very wrong
|
||||
return nil, fmt.Errorf("error retrieving AWS cloud provider")
|
||||
}
|
||||
l.ebsVolumes = awsCloudProvider
|
||||
}
|
||||
return l.ebsVolumes, nil
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package label
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/admission"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/aws"
|
||||
)
|
||||
|
||||
type mockVolumes struct {
|
||||
volumeLabels map[string]string
|
||||
volumeLabelsError error
|
||||
}
|
||||
|
||||
var _ aws.Volumes = &mockVolumes{}
|
||||
|
||||
func (v *mockVolumes) AttachDisk(instanceName string, volumeName string, readOnly bool) (string, error) {
|
||||
return "", fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (v *mockVolumes) DetachDisk(instanceName string, volumeName string) error {
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (v *mockVolumes) CreateVolume(volumeOptions *aws.VolumeOptions) (volumeName string, err error) {
|
||||
return "", fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (v *mockVolumes) DeleteVolume(volumeName string) error {
|
||||
return fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (v *mockVolumes) GetVolumeLabels(volumeName string) (map[string]string, error) {
|
||||
return v.volumeLabels, v.volumeLabelsError
|
||||
}
|
||||
|
||||
func mockVolumeFailure(err error) *mockVolumes {
|
||||
return &mockVolumes{volumeLabelsError: err}
|
||||
}
|
||||
|
||||
func mockVolumeLabels(labels map[string]string) *mockVolumes {
|
||||
return &mockVolumes{volumeLabels: labels}
|
||||
}
|
||||
|
||||
// TestAdmission
|
||||
func TestAdmission(t *testing.T) {
|
||||
pvHandler := NewPersistentVolumeLabel()
|
||||
handler := admission.NewChainHandler(pvHandler)
|
||||
ignoredPV := api.PersistentVolume{
|
||||
ObjectMeta: api.ObjectMeta{Name: "noncloud", Namespace: "myns"},
|
||||
Spec: api.PersistentVolumeSpec{
|
||||
PersistentVolumeSource: api.PersistentVolumeSource{
|
||||
HostPath: &api.HostPathVolumeSource{
|
||||
Path: "/",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
awsPV := api.PersistentVolume{
|
||||
ObjectMeta: api.ObjectMeta{Name: "noncloud", Namespace: "myns"},
|
||||
Spec: api.PersistentVolumeSpec{
|
||||
PersistentVolumeSource: api.PersistentVolumeSource{
|
||||
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{
|
||||
VolumeID: "123",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Non-cloud PVs are ignored
|
||||
err := handler.Admit(admission.NewAttributesRecord(&ignoredPV, api.Kind("PersistentVolume"), ignoredPV.Namespace, ignoredPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error returned from admission handler (on ignored pv): %v", err)
|
||||
}
|
||||
|
||||
// We only add labels on creation
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Delete, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error returned from admission handler (when deleting aws pv): %v", err)
|
||||
}
|
||||
|
||||
// Errors from the cloudprovider block creation of the volume
|
||||
pvHandler.ebsVolumes = mockVolumeFailure(fmt.Errorf("invalid volume"))
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err == nil {
|
||||
t.Errorf("Expected error when aws pv info fails")
|
||||
}
|
||||
|
||||
// Don't add labels if the cloudprovider doesn't return any
|
||||
labels := make(map[string]string)
|
||||
pvHandler.ebsVolumes = mockVolumeLabels(labels)
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error when creating aws pv")
|
||||
}
|
||||
if len(awsPV.ObjectMeta.Labels) != 0 {
|
||||
t.Errorf("Unexpected number of labels")
|
||||
}
|
||||
|
||||
// Don't panic if the cloudprovider returns nil, nil
|
||||
pvHandler.ebsVolumes = mockVolumeFailure(nil)
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error when cloud provider returns empty labels")
|
||||
}
|
||||
|
||||
// Labels from the cloudprovider should be applied to the volume
|
||||
labels = make(map[string]string)
|
||||
labels["a"] = "1"
|
||||
labels["b"] = "2"
|
||||
pvHandler.ebsVolumes = mockVolumeLabels(labels)
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error when creating aws pv")
|
||||
}
|
||||
if awsPV.Labels["a"] != "1" || awsPV.Labels["b"] != "2" {
|
||||
t.Errorf("Expected label a to be added when creating aws pv")
|
||||
}
|
||||
|
||||
// User-provided labels should be honored, but cloudprovider labels replace them when they overlap
|
||||
awsPV.ObjectMeta.Labels = make(map[string]string)
|
||||
awsPV.ObjectMeta.Labels["a"] = "not1"
|
||||
awsPV.ObjectMeta.Labels["c"] = "3"
|
||||
err = handler.Admit(admission.NewAttributesRecord(&awsPV, api.Kind("PersistentVolume"), awsPV.Namespace, awsPV.Name, api.Resource("persistentvolumes"), "", admission.Create, nil))
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error when creating aws pv")
|
||||
}
|
||||
if awsPV.Labels["a"] != "1" || awsPV.Labels["b"] != "2" {
|
||||
t.Errorf("Expected cloudprovider labels to replace user labels when creating aws pv")
|
||||
}
|
||||
if awsPV.Labels["c"] != "3" {
|
||||
t.Errorf("Expected (non-conflicting) user provided labels to be honored when creating aws pv")
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
Copyright 2014 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// labels created persistent volumes with zone information
|
||||
// as provided by the cloud provider
|
||||
package label
|
|
@ -26,12 +26,21 @@ import (
|
|||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
)
|
||||
|
||||
type NodeInfo interface {
|
||||
GetNodeInfo(nodeID string) (*api.Node, error)
|
||||
}
|
||||
|
||||
type PersistentVolumeInfo interface {
|
||||
GetPersistentVolumeInfo(pvID string) (*api.PersistentVolume, error)
|
||||
}
|
||||
|
||||
type PersistentVolumeClaimInfo interface {
|
||||
GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*api.PersistentVolumeClaim, error)
|
||||
}
|
||||
|
||||
type StaticNodeInfo struct {
|
||||
*api.NodeList
|
||||
}
|
||||
|
@ -136,6 +145,108 @@ func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, e
|
|||
return true, nil
|
||||
}
|
||||
|
||||
type VolumeZoneChecker struct {
|
||||
nodeInfo NodeInfo
|
||||
pvInfo PersistentVolumeInfo
|
||||
pvcInfo PersistentVolumeClaimInfo
|
||||
}
|
||||
|
||||
// VolumeZonePredicate evaluates if a pod can fit due to the volumes it requests, given
|
||||
// that some volumes may have zone scheduling constraints. The requirement is that any
|
||||
// volume zone-labels must match the equivalent zone-labels on the node. It is OK for
|
||||
// the node to have more zone-label constraints (for example, a hypothetical replicated
|
||||
// volume might allow region-wide access)
|
||||
//
|
||||
// Currently this is only supported with PersistentVolumeClaims, and looks to the labels
|
||||
// only on the bound PersistentVolume.
|
||||
//
|
||||
// Working with volumes declared inline in the pod specification (i.e. not
|
||||
// using a PersistentVolume) is likely to be harder, as it would require
|
||||
// determining the zone of a volume during scheduling, and that is likely to
|
||||
// require calling out to the cloud provider. It seems that we are moving away
|
||||
// from inline volume declarations anyway.
|
||||
func NewVolumeZonePredicate(nodeInfo NodeInfo, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
|
||||
c := &VolumeZoneChecker{
|
||||
nodeInfo: nodeInfo,
|
||||
pvInfo: pvInfo,
|
||||
pvcInfo: pvcInfo,
|
||||
}
|
||||
return c.predicate
|
||||
}
|
||||
|
||||
func (c *VolumeZoneChecker) predicate(pod *api.Pod, existingPods []*api.Pod, nodeID string) (bool, error) {
|
||||
node, err := c.nodeInfo.GetNodeInfo(nodeID)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if node == nil {
|
||||
return false, fmt.Errorf("node not found: %q", nodeID)
|
||||
}
|
||||
|
||||
nodeConstraints := make(map[string]string)
|
||||
for k, v := range node.ObjectMeta.Labels {
|
||||
if k != unversioned.LabelZoneFailureDomain && k != unversioned.LabelZoneRegion {
|
||||
continue
|
||||
}
|
||||
nodeConstraints[k] = v
|
||||
}
|
||||
|
||||
if len(nodeConstraints) == 0 {
|
||||
// The node has no zone constraints, so we're OK to schedule.
|
||||
// In practice, when using zones, all nodes must be labeled with zone labels.
|
||||
// We want to fast-path this case though.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
namespace := pod.Namespace
|
||||
|
||||
manifest := &(pod.Spec)
|
||||
for i := range manifest.Volumes {
|
||||
volume := &manifest.Volumes[i]
|
||||
if volume.PersistentVolumeClaim != nil {
|
||||
pvcName := volume.PersistentVolumeClaim.ClaimName
|
||||
if pvcName == "" {
|
||||
return false, fmt.Errorf("PersistentVolumeClaim had no name: %q", pvcName)
|
||||
}
|
||||
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if pvc == nil {
|
||||
return false, fmt.Errorf("PersistentVolumeClaim was not found: %q", pvcName)
|
||||
}
|
||||
|
||||
pvName := pvc.Spec.VolumeName
|
||||
if pvName == "" {
|
||||
return false, fmt.Errorf("PersistentVolumeClaim is not bound: %q", pvcName)
|
||||
}
|
||||
|
||||
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if pv == nil {
|
||||
return false, fmt.Errorf("PersistentVolume not found: %q", pvName)
|
||||
}
|
||||
|
||||
for k, v := range pv.ObjectMeta.Labels {
|
||||
if k != unversioned.LabelZoneFailureDomain && k != unversioned.LabelZoneRegion {
|
||||
continue
|
||||
}
|
||||
nodeV, _ := nodeConstraints[k]
|
||||
if v != nodeV {
|
||||
glog.V(2).Infof("Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)", pod.Name, nodeID, pvName, k)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
type ResourceFit struct {
|
||||
info NodeInfo
|
||||
}
|
||||
|
|
|
@ -64,6 +64,13 @@ func defaultPredicates() sets.String {
|
|||
),
|
||||
// Fit is determined by non-conflicting disk volumes.
|
||||
factory.RegisterFitPredicate("NoDiskConflict", predicates.NoDiskConflict),
|
||||
// Fit is determined by volume zone requirements.
|
||||
factory.RegisterFitPredicateFactory(
|
||||
"NoVolumeZoneConflict",
|
||||
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
|
||||
return predicates.NewVolumeZonePredicate(args.NodeInfo, args.PVInfo, args.PVCInfo)
|
||||
},
|
||||
),
|
||||
// Fit is determined by node selector query.
|
||||
factory.RegisterFitPredicateFactory(
|
||||
"MatchNodeSelector",
|
||||
|
|
|
@ -58,6 +58,10 @@ type ConfigFactory struct {
|
|||
PodLister algorithm.PodLister
|
||||
// a means to list all nodes
|
||||
NodeLister *cache.StoreToNodeLister
|
||||
// a means to list all PersistentVolumes
|
||||
PVLister *cache.StoreToPVFetcher
|
||||
// a means to list all PersistentVolumeClaims
|
||||
PVCLister *cache.StoreToPVCFetcher
|
||||
// a means to list all services
|
||||
ServiceLister *cache.StoreToServiceLister
|
||||
// a means to list all controllers
|
||||
|
@ -85,6 +89,8 @@ func NewConfigFactory(client *client.Client, rateLimiter util.RateLimiter, sched
|
|||
ScheduledPodLister: &cache.StoreToPodLister{},
|
||||
// Only nodes in the "Ready" condition with status == "True" are schedulable
|
||||
NodeLister: &cache.StoreToNodeLister{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)},
|
||||
PVLister: &cache.StoreToPVFetcher{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)},
|
||||
PVCLister: &cache.StoreToPVCFetcher{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)},
|
||||
ServiceLister: &cache.StoreToServiceLister{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)},
|
||||
ControllerLister: &cache.StoreToReplicationControllerLister{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)},
|
||||
StopEverything: make(chan struct{}),
|
||||
|
@ -188,6 +194,8 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
|
|||
// All fit predicates only need to consider schedulable nodes.
|
||||
NodeLister: f.NodeLister.NodeCondition(getNodeConditionPredicate()),
|
||||
NodeInfo: &predicates.CachedNodeInfo{f.NodeLister},
|
||||
PVInfo: f.PVLister,
|
||||
PVCInfo: f.PVCLister,
|
||||
}
|
||||
predicateFuncs, err := getFitPredicateFunctions(predicateKeys, pluginArgs)
|
||||
if err != nil {
|
||||
|
@ -209,6 +217,11 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
|
|||
// Nodes may be listed frequently, so provide a local up-to-date cache.
|
||||
cache.NewReflector(f.createNodeLW(), &api.Node{}, f.NodeLister.Store, 0).RunUntil(f.StopEverything)
|
||||
|
||||
// Watch PVs & PVCs
|
||||
// They may be listed frequently for scheduling constraints, so provide a local up-to-date cache.
|
||||
cache.NewReflector(f.createPersistentVolumeLW(), &api.PersistentVolume{}, f.PVLister.Store, 0).RunUntil(f.StopEverything)
|
||||
cache.NewReflector(f.createPersistentVolumeClaimLW(), &api.PersistentVolumeClaim{}, f.PVCLister.Store, 0).RunUntil(f.StopEverything)
|
||||
|
||||
// Watch and cache all service objects. Scheduler needs to find all pods
|
||||
// created by the same services or ReplicationControllers, so that it can spread them correctly.
|
||||
// Cache this locally.
|
||||
|
@ -303,6 +316,16 @@ func (factory *ConfigFactory) createNodeLW() *cache.ListWatch {
|
|||
return cache.NewListWatchFromClient(factory.Client, "nodes", api.NamespaceAll, fields)
|
||||
}
|
||||
|
||||
// createPersistentVolumeLW returns a cache.ListWatch that gets all changes to persistentVolumes.
|
||||
func (factory *ConfigFactory) createPersistentVolumeLW() *cache.ListWatch {
|
||||
return cache.NewListWatchFromClient(factory.Client, "persistentVolumes", api.NamespaceAll, fields.ParseSelectorOrDie(""))
|
||||
}
|
||||
|
||||
// createPersistentVolumeClaimLW returns a cache.ListWatch that gets all changes to persistentVolumeClaims.
|
||||
func (factory *ConfigFactory) createPersistentVolumeClaimLW() *cache.ListWatch {
|
||||
return cache.NewListWatchFromClient(factory.Client, "persistentVolumeClaims", api.NamespaceAll, fields.ParseSelectorOrDie(""))
|
||||
}
|
||||
|
||||
// Returns a cache.ListWatch that gets all changes to services.
|
||||
func (factory *ConfigFactory) createServiceLW() *cache.ListWatch {
|
||||
return cache.NewListWatchFromClient(factory.Client, "services", api.NamespaceAll, fields.ParseSelectorOrDie(""))
|
||||
|
|
|
@ -38,6 +38,8 @@ type PluginFactoryArgs struct {
|
|||
algorithm.ControllerLister
|
||||
NodeLister algorithm.NodeLister
|
||||
NodeInfo predicates.NodeInfo
|
||||
PVInfo predicates.PersistentVolumeInfo
|
||||
PVCInfo predicates.PersistentVolumeClaimInfo
|
||||
}
|
||||
|
||||
// A FitPredicateFactory produces a FitPredicate from the given args.
|
||||
|
|
Loading…
Reference in New Issue