2019-01-12 04:58:27 +00:00
/ *
Copyright 2015 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package util
import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
2019-01-22 20:53:35 +00:00
v1 "k8s.io/api/core/v1"
2019-01-12 04:58:27 +00:00
storage "k8s.io/api/storage/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/api/legacyscheme"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/util/mount"
utilstrings "k8s.io/kubernetes/pkg/util/strings"
"k8s.io/kubernetes/pkg/volume"
"reflect"
"hash/fnv"
"math/rand"
"strconv"
"k8s.io/apimachinery/pkg/api/resource"
utypes "k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/volume/util/types"
"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
)
const (
// GB - GigaByte size
GB = 1000 * 1000 * 1000
// GIB - GibiByte size
GIB = 1024 * 1024 * 1024
readyFileName = "ready"
// ControllerManagedAttachAnnotation is the key of the annotation on Node
// objects that indicates attach/detach operations for the node should be
// managed by the attach/detach controller
ControllerManagedAttachAnnotation string = "volumes.kubernetes.io/controller-managed-attach-detach"
// KeepTerminatedPodVolumesAnnotation is the key of the annotation on Node
// that decides if pod volumes are unmounted when pod is terminated
KeepTerminatedPodVolumesAnnotation string = "volumes.kubernetes.io/keep-terminated-pod-volumes"
// VolumeGidAnnotationKey is the of the annotation on the PersistentVolume
// object that specifies a supplemental GID.
VolumeGidAnnotationKey = "pv.beta.kubernetes.io/gid"
// VolumeDynamicallyCreatedByKey is the key of the annotation on PersistentVolume
// object created dynamically
VolumeDynamicallyCreatedByKey = "kubernetes.io/createdby"
)
// VolumeZoneConfig contains config information about zonal volume.
type VolumeZoneConfig struct {
ZonePresent bool
ZonesPresent bool
ReplicaZoneFromNodePresent bool
Zone string
Zones string
ReplicaZoneFromNode string
}
// IsReady checks for the existence of a regular file
// called 'ready' in the given directory and returns
// true if that file exists.
func IsReady ( dir string ) bool {
readyFile := path . Join ( dir , readyFileName )
s , err := os . Stat ( readyFile )
if err != nil {
return false
}
if ! s . Mode ( ) . IsRegular ( ) {
klog . Errorf ( "ready-file is not a file: %s" , readyFile )
return false
}
return true
}
// SetReady creates a file called 'ready' in the given
// directory. It logs an error if the file cannot be
// created.
func SetReady ( dir string ) {
if err := os . MkdirAll ( dir , 0750 ) ; err != nil && ! os . IsExist ( err ) {
klog . Errorf ( "Can't mkdir %s: %v" , dir , err )
return
}
readyFile := path . Join ( dir , readyFileName )
file , err := os . Create ( readyFile )
if err != nil {
klog . Errorf ( "Can't touch %s: %v" , readyFile , err )
return
}
file . Close ( )
}
// UnmountPath is a common unmount routine that unmounts the given path and
// deletes the remaining directory if successful.
2019-01-22 20:53:35 +00:00
// TODO: Remove this function and change callers to call mount pkg directly
2019-01-12 04:58:27 +00:00
func UnmountPath ( mountPath string , mounter mount . Interface ) error {
2019-01-22 20:53:35 +00:00
return mount . CleanupMountPoint ( mountPath , mounter , false /* extensiveMountPointCheck */ )
2019-01-12 04:58:27 +00:00
}
// UnmountMountPoint is a common unmount routine that unmounts the given path and
// deletes the remaining directory if successful.
// if extensiveMountPointCheck is true
// IsNotMountPoint will be called instead of IsLikelyNotMountPoint.
// IsNotMountPoint is more expensive but properly handles bind mounts.
2019-01-22 20:53:35 +00:00
// TODO: Change callers to call mount pkg directly
2019-01-12 04:58:27 +00:00
func UnmountMountPoint ( mountPath string , mounter mount . Interface , extensiveMountPointCheck bool ) error {
2019-01-22 20:53:35 +00:00
return mount . CleanupMountPoint ( mountPath , mounter , extensiveMountPointCheck )
2019-01-12 04:58:27 +00:00
}
// PathExists returns true if the specified path exists.
2019-01-22 20:53:35 +00:00
// TODO: Change callers to call mount pkg directly
2019-01-12 04:58:27 +00:00
func PathExists ( path string ) ( bool , error ) {
2019-01-22 20:53:35 +00:00
return mount . PathExists ( path )
2019-01-12 04:58:27 +00:00
}
// IsCorruptedMnt return true if err is about corrupted mount point
2019-01-22 20:53:35 +00:00
// TODO: Change callers to call mount pkg directly
2019-01-12 04:58:27 +00:00
func IsCorruptedMnt ( err error ) bool {
2019-01-22 20:53:35 +00:00
return mount . IsCorruptedMnt ( err )
2019-01-12 04:58:27 +00:00
}
// GetSecretForPod locates secret by name in the pod's namespace and returns secret map
func GetSecretForPod ( pod * v1 . Pod , secretName string , kubeClient clientset . Interface ) ( map [ string ] string , error ) {
secret := make ( map [ string ] string )
if kubeClient == nil {
return secret , fmt . Errorf ( "Cannot get kube client" )
}
secrets , err := kubeClient . CoreV1 ( ) . Secrets ( pod . Namespace ) . Get ( secretName , metav1 . GetOptions { } )
if err != nil {
return secret , err
}
for name , data := range secrets . Data {
secret [ name ] = string ( data )
}
return secret , nil
}
// GetSecretForPV locates secret by name and namespace, verifies the secret type, and returns secret map
func GetSecretForPV ( secretNamespace , secretName , volumePluginName string , kubeClient clientset . Interface ) ( map [ string ] string , error ) {
secret := make ( map [ string ] string )
if kubeClient == nil {
return secret , fmt . Errorf ( "Cannot get kube client" )
}
secrets , err := kubeClient . CoreV1 ( ) . Secrets ( secretNamespace ) . Get ( secretName , metav1 . GetOptions { } )
if err != nil {
return secret , err
}
if secrets . Type != v1 . SecretType ( volumePluginName ) {
return secret , fmt . Errorf ( "Cannot get secret of type %s" , volumePluginName )
}
for name , data := range secrets . Data {
secret [ name ] = string ( data )
}
return secret , nil
}
// GetClassForVolume locates storage class by persistent volume
func GetClassForVolume ( kubeClient clientset . Interface , pv * v1 . PersistentVolume ) ( * storage . StorageClass , error ) {
if kubeClient == nil {
return nil , fmt . Errorf ( "Cannot get kube client" )
}
className := v1helper . GetPersistentVolumeClass ( pv )
if className == "" {
return nil , fmt . Errorf ( "Volume has no storage class" )
}
class , err := kubeClient . StorageV1 ( ) . StorageClasses ( ) . Get ( className , metav1 . GetOptions { } )
if err != nil {
return nil , err
}
return class , nil
}
// CheckNodeAffinity looks at the PV node affinity, and checks if the node has the same corresponding labels
// This ensures that we don't mount a volume that doesn't belong to this node
func CheckNodeAffinity ( pv * v1 . PersistentVolume , nodeLabels map [ string ] string ) error {
return checkVolumeNodeAffinity ( pv , nodeLabels )
}
func checkVolumeNodeAffinity ( pv * v1 . PersistentVolume , nodeLabels map [ string ] string ) error {
if pv . Spec . NodeAffinity == nil {
return nil
}
if pv . Spec . NodeAffinity . Required != nil {
terms := pv . Spec . NodeAffinity . Required . NodeSelectorTerms
klog . V ( 10 ) . Infof ( "Match for Required node selector terms %+v" , terms )
if ! v1helper . MatchNodeSelectorTerms ( terms , labels . Set ( nodeLabels ) , nil ) {
return fmt . Errorf ( "No matching NodeSelectorTerms" )
}
}
return nil
}
// LoadPodFromFile will read, decode, and return a Pod from a file.
func LoadPodFromFile ( filePath string ) ( * v1 . Pod , error ) {
if filePath == "" {
return nil , fmt . Errorf ( "file path not specified" )
}
podDef , err := ioutil . ReadFile ( filePath )
if err != nil {
return nil , fmt . Errorf ( "failed to read file path %s: %+v" , filePath , err )
}
if len ( podDef ) == 0 {
return nil , fmt . Errorf ( "file was empty: %s" , filePath )
}
pod := & v1 . Pod { }
codec := legacyscheme . Codecs . UniversalDecoder ( )
if err := runtime . DecodeInto ( codec , podDef , pod ) ; err != nil {
return nil , fmt . Errorf ( "failed decoding file: %v" , err )
}
return pod , nil
}
// SelectZoneForVolume is a wrapper around SelectZonesForVolume
// to select a single zone for a volume based on parameters
func SelectZoneForVolume ( zoneParameterPresent , zonesParameterPresent bool , zoneParameter string , zonesParameter , zonesWithNodes sets . String , node * v1 . Node , allowedTopologies [ ] v1 . TopologySelectorTerm , pvcName string ) ( string , error ) {
zones , err := SelectZonesForVolume ( zoneParameterPresent , zonesParameterPresent , zoneParameter , zonesParameter , zonesWithNodes , node , allowedTopologies , pvcName , 1 )
if err != nil {
return "" , err
}
zone , ok := zones . PopAny ( )
if ! ok {
return "" , fmt . Errorf ( "could not determine a zone to provision volume in" )
}
return zone , nil
}
// SelectZonesForVolume selects zones for a volume based on several factors:
// node.zone, allowedTopologies, zone/zones parameters from storageclass,
// zones with active nodes from the cluster. The number of zones = replicas.
func SelectZonesForVolume ( zoneParameterPresent , zonesParameterPresent bool , zoneParameter string , zonesParameter , zonesWithNodes sets . String , node * v1 . Node , allowedTopologies [ ] v1 . TopologySelectorTerm , pvcName string , numReplicas uint32 ) ( sets . String , error ) {
if zoneParameterPresent && zonesParameterPresent {
return nil , fmt . Errorf ( "both zone and zones StorageClass parameters must not be used at the same time" )
}
var zoneFromNode string
// pick one zone from node if present
if node != nil {
// VolumeScheduling implicit since node is not nil
if zoneParameterPresent || zonesParameterPresent {
return nil , fmt . Errorf ( "zone[s] cannot be specified in StorageClass if VolumeBindingMode is set to WaitForFirstConsumer. Please specify allowedTopologies in StorageClass for constraining zones" )
}
// pick node's zone for one of the replicas
var ok bool
zoneFromNode , ok = node . ObjectMeta . Labels [ kubeletapis . LabelZoneFailureDomain ]
if ! ok {
return nil , fmt . Errorf ( "%s Label for node missing" , kubeletapis . LabelZoneFailureDomain )
}
// if single replica volume and node with zone found, return immediately
if numReplicas == 1 {
return sets . NewString ( zoneFromNode ) , nil
}
}
// pick zone from allowedZones if specified
allowedZones , err := ZonesFromAllowedTopologies ( allowedTopologies )
if err != nil {
return nil , err
}
if ( len ( allowedTopologies ) > 0 ) && ( allowedZones . Len ( ) == 0 ) {
return nil , fmt . Errorf ( "no matchLabelExpressions with %s key found in allowedTopologies. Please specify matchLabelExpressions with %s key" , kubeletapis . LabelZoneFailureDomain , kubeletapis . LabelZoneFailureDomain )
}
if allowedZones . Len ( ) > 0 {
// VolumeScheduling implicit since allowedZones present
if zoneParameterPresent || zonesParameterPresent {
return nil , fmt . Errorf ( "zone[s] cannot be specified in StorageClass if allowedTopologies specified" )
}
// scheduler will guarantee if node != null above, zoneFromNode is member of allowedZones.
// so if zoneFromNode != "", we can safely assume it is part of allowedZones.
zones , err := chooseZonesForVolumeIncludingZone ( allowedZones , pvcName , zoneFromNode , numReplicas )
if err != nil {
return nil , fmt . Errorf ( "cannot process zones in allowedTopologies: %v" , err )
}
return zones , nil
}
// pick zone from parameters if present
if zoneParameterPresent {
if numReplicas > 1 {
return nil , fmt . Errorf ( "zone cannot be specified if desired number of replicas for pv is greather than 1. Please specify zones or allowedTopologies to specify desired zones" )
}
return sets . NewString ( zoneParameter ) , nil
}
if zonesParameterPresent {
if uint32 ( zonesParameter . Len ( ) ) < numReplicas {
return nil , fmt . Errorf ( "not enough zones found in zones parameter to provision a volume with %d replicas. Found %d zones, need %d zones" , numReplicas , zonesParameter . Len ( ) , numReplicas )
}
// directly choose from zones parameter; no zone from node need to be considered
return ChooseZonesForVolume ( zonesParameter , pvcName , numReplicas ) , nil
}
// pick zone from zones with nodes
if zonesWithNodes . Len ( ) > 0 {
// If node != null (and thus zoneFromNode != ""), zoneFromNode will be member of zonesWithNodes
zones , err := chooseZonesForVolumeIncludingZone ( zonesWithNodes , pvcName , zoneFromNode , numReplicas )
if err != nil {
return nil , fmt . Errorf ( "cannot process zones where nodes exist in the cluster: %v" , err )
}
return zones , nil
}
return nil , fmt . Errorf ( "cannot determine zones to provision volume in" )
}
// ZonesFromAllowedTopologies returns a list of zones specified in allowedTopologies
func ZonesFromAllowedTopologies ( allowedTopologies [ ] v1 . TopologySelectorTerm ) ( sets . String , error ) {
zones := make ( sets . String )
for _ , term := range allowedTopologies {
for _ , exp := range term . MatchLabelExpressions {
if exp . Key == kubeletapis . LabelZoneFailureDomain {
for _ , value := range exp . Values {
zones . Insert ( value )
}
} else {
return nil , fmt . Errorf ( "unsupported key found in matchLabelExpressions: %s" , exp . Key )
}
}
}
return zones , nil
}
// ZonesSetToLabelValue converts zones set to label value
func ZonesSetToLabelValue ( strSet sets . String ) string {
return strings . Join ( strSet . UnsortedList ( ) , kubeletapis . LabelMultiZoneDelimiter )
}
// ZonesToSet converts a string containing a comma separated list of zones to set
func ZonesToSet ( zonesString string ) ( sets . String , error ) {
zones , err := stringToSet ( zonesString , "," )
if err != nil {
return nil , fmt . Errorf ( "error parsing zones %s, must be strings separated by commas: %v" , zonesString , err )
}
return zones , nil
}
// LabelZonesToSet converts a PV label value from string containing a delimited list of zones to set
func LabelZonesToSet ( labelZonesValue string ) ( sets . String , error ) {
return stringToSet ( labelZonesValue , kubeletapis . LabelMultiZoneDelimiter )
}
// StringToSet converts a string containing list separated by specified delimiter to a set
func stringToSet ( str , delimiter string ) ( sets . String , error ) {
zonesSlice := strings . Split ( str , delimiter )
zonesSet := make ( sets . String )
for _ , zone := range zonesSlice {
trimmedZone := strings . TrimSpace ( zone )
if trimmedZone == "" {
return make ( sets . String ) , fmt . Errorf (
"%q separated list (%q) must not contain an empty string" ,
delimiter ,
str )
}
zonesSet . Insert ( trimmedZone )
}
return zonesSet , nil
}
// LabelZonesToList converts a PV label value from string containing a delimited list of zones to list
func LabelZonesToList ( labelZonesValue string ) ( [ ] string , error ) {
return stringToList ( labelZonesValue , kubeletapis . LabelMultiZoneDelimiter )
}
// StringToList converts a string containing list separated by specified delimiter to a list
func stringToList ( str , delimiter string ) ( [ ] string , error ) {
zonesSlice := make ( [ ] string , 0 )
for _ , zone := range strings . Split ( str , delimiter ) {
trimmedZone := strings . TrimSpace ( zone )
if trimmedZone == "" {
return nil , fmt . Errorf (
"%q separated list (%q) must not contain an empty string" ,
delimiter ,
str )
}
zonesSlice = append ( zonesSlice , trimmedZone )
}
return zonesSlice , nil
}
// CalculateTimeoutForVolume calculates time for a Recycler pod to complete a
// recycle operation. The calculation and return value is either the
// minimumTimeout or the timeoutIncrement per Gi of storage size, whichever is
// greater.
func CalculateTimeoutForVolume ( minimumTimeout , timeoutIncrement int , pv * v1 . PersistentVolume ) int64 {
giQty := resource . MustParse ( "1Gi" )
pvQty := pv . Spec . Capacity [ v1 . ResourceStorage ]
giSize := giQty . Value ( )
pvSize := pvQty . Value ( )
timeout := ( pvSize / giSize ) * int64 ( timeoutIncrement )
if timeout < int64 ( minimumTimeout ) {
return int64 ( minimumTimeout )
}
return timeout
}
// RoundUpSize calculates how many allocation units are needed to accommodate
// a volume of given size. E.g. when user wants 1500MiB volume, while AWS EBS
// allocates volumes in gibibyte-sized chunks,
// RoundUpSize(1500 * 1024*1024, 1024*1024*1024) returns '2'
// (2 GiB is the smallest allocatable volume that can hold 1500MiB)
func RoundUpSize ( volumeSizeBytes int64 , allocationUnitBytes int64 ) int64 {
roundedUp := volumeSizeBytes / allocationUnitBytes
if volumeSizeBytes % allocationUnitBytes > 0 {
roundedUp ++
}
return roundedUp
}
// RoundUpToGB rounds up given quantity to chunks of GB
func RoundUpToGB ( size resource . Quantity ) int64 {
requestBytes := size . Value ( )
return RoundUpSize ( requestBytes , GB )
}
// RoundUpToGiB rounds up given quantity upto chunks of GiB
func RoundUpToGiB ( size resource . Quantity ) int64 {
requestBytes := size . Value ( )
return RoundUpSize ( requestBytes , GIB )
}
// RoundUpSizeInt calculates how many allocation units are needed to accommodate
// a volume of given size. It returns an int instead of an int64 and an error if
// there's overflow
func RoundUpSizeInt ( volumeSizeBytes int64 , allocationUnitBytes int64 ) ( int , error ) {
roundedUp := RoundUpSize ( volumeSizeBytes , allocationUnitBytes )
roundedUpInt := int ( roundedUp )
if int64 ( roundedUpInt ) != roundedUp {
return 0 , fmt . Errorf ( "capacity %v is too great, casting results in integer overflow" , roundedUp )
}
return roundedUpInt , nil
}
// RoundUpToGBInt rounds up given quantity to chunks of GB. It returns an
// int instead of an int64 and an error if there's overflow
func RoundUpToGBInt ( size resource . Quantity ) ( int , error ) {
requestBytes := size . Value ( )
return RoundUpSizeInt ( requestBytes , GB )
}
// RoundUpToGiBInt rounds up given quantity upto chunks of GiB. It returns an
// int instead of an int64 and an error if there's overflow
func RoundUpToGiBInt ( size resource . Quantity ) ( int , error ) {
requestBytes := size . Value ( )
return RoundUpSizeInt ( requestBytes , GIB )
}
// GenerateVolumeName returns a PV name with clusterName prefix. The function
// should be used to generate a name of GCE PD or Cinder volume. It basically
// adds "<clusterName>-dynamic-" before the PV name, making sure the resulting
// string fits given length and cuts "dynamic" if not.
func GenerateVolumeName ( clusterName , pvName string , maxLength int ) string {
prefix := clusterName + "-dynamic"
pvLen := len ( pvName )
// cut the "<clusterName>-dynamic" to fit full pvName into maxLength
// +1 for the '-' dash
if pvLen + 1 + len ( prefix ) > maxLength {
prefix = prefix [ : maxLength - pvLen - 1 ]
}
return prefix + "-" + pvName
}
// GetPath checks if the path from the mounter is empty.
func GetPath ( mounter volume . Mounter ) ( string , error ) {
path := mounter . GetPath ( )
if path == "" {
return "" , fmt . Errorf ( "Path is empty %s" , reflect . TypeOf ( mounter ) . String ( ) )
}
return path , nil
}
// ChooseZoneForVolume implements our heuristics for choosing a zone for volume creation based on the volume name
// Volumes are generally round-robin-ed across all active zones, using the hash of the PVC Name.
// However, if the PVCName ends with `-<integer>`, we will hash the prefix, and then add the integer to the hash.
// This means that a StatefulSet's volumes (`claimname-statefulsetname-id`) will spread across available zones,
// assuming the id values are consecutive.
func ChooseZoneForVolume ( zones sets . String , pvcName string ) string {
// No zones available, return empty string.
if zones . Len ( ) == 0 {
return ""
}
// We create the volume in a zone determined by the name
// Eventually the scheduler will coordinate placement into an available zone
hash , index := getPVCNameHashAndIndexOffset ( pvcName )
// Zones.List returns zones in a consistent order (sorted)
// We do have a potential failure case where volumes will not be properly spread,
// if the set of zones changes during StatefulSet volume creation. However, this is
// probably relatively unlikely because we expect the set of zones to be essentially
// static for clusters.
// Hopefully we can address this problem if/when we do full scheduler integration of
// PVC placement (which could also e.g. avoid putting volumes in overloaded or
// unhealthy zones)
zoneSlice := zones . List ( )
zone := zoneSlice [ ( hash + index ) % uint32 ( len ( zoneSlice ) ) ]
klog . V ( 2 ) . Infof ( "Creating volume for PVC %q; chose zone=%q from zones=%q" , pvcName , zone , zoneSlice )
return zone
}
// chooseZonesForVolumeIncludingZone is a wrapper around ChooseZonesForVolume that ensures zoneToInclude is chosen
// zoneToInclude can either be empty in which case it is ignored. If non-empty, zoneToInclude is expected to be member of zones.
// numReplicas is expected to be > 0 and <= zones.Len()
func chooseZonesForVolumeIncludingZone ( zones sets . String , pvcName , zoneToInclude string , numReplicas uint32 ) ( sets . String , error ) {
if numReplicas == 0 {
return nil , fmt . Errorf ( "invalid number of replicas passed" )
}
if uint32 ( zones . Len ( ) ) < numReplicas {
return nil , fmt . Errorf ( "not enough zones found to provision a volume with %d replicas. Need at least %d distinct zones for a volume with %d replicas" , numReplicas , numReplicas , numReplicas )
}
if zoneToInclude != "" && ! zones . Has ( zoneToInclude ) {
return nil , fmt . Errorf ( "zone to be included: %s needs to be member of set: %v" , zoneToInclude , zones )
}
if uint32 ( zones . Len ( ) ) == numReplicas {
return zones , nil
}
if zoneToInclude != "" {
zones . Delete ( zoneToInclude )
numReplicas = numReplicas - 1
}
zonesChosen := ChooseZonesForVolume ( zones , pvcName , numReplicas )
if zoneToInclude != "" {
zonesChosen . Insert ( zoneToInclude )
}
return zonesChosen , nil
}
// ChooseZonesForVolume is identical to ChooseZoneForVolume, but selects a multiple zones, for multi-zone disks.
func ChooseZonesForVolume ( zones sets . String , pvcName string , numZones uint32 ) sets . String {
// No zones available, return empty set.
replicaZones := sets . NewString ( )
if zones . Len ( ) == 0 {
return replicaZones
}
// We create the volume in a zone determined by the name
// Eventually the scheduler will coordinate placement into an available zone
hash , index := getPVCNameHashAndIndexOffset ( pvcName )
// Zones.List returns zones in a consistent order (sorted)
// We do have a potential failure case where volumes will not be properly spread,
// if the set of zones changes during StatefulSet volume creation. However, this is
// probably relatively unlikely because we expect the set of zones to be essentially
// static for clusters.
// Hopefully we can address this problem if/when we do full scheduler integration of
// PVC placement (which could also e.g. avoid putting volumes in overloaded or
// unhealthy zones)
zoneSlice := zones . List ( )
startingIndex := index * numZones
for index = startingIndex ; index < startingIndex + numZones ; index ++ {
zone := zoneSlice [ ( hash + index ) % uint32 ( len ( zoneSlice ) ) ]
replicaZones . Insert ( zone )
}
klog . V ( 2 ) . Infof ( "Creating volume for replicated PVC %q; chosen zones=%q from zones=%q" ,
pvcName , replicaZones . UnsortedList ( ) , zoneSlice )
return replicaZones
}
func getPVCNameHashAndIndexOffset ( pvcName string ) ( hash uint32 , index uint32 ) {
if pvcName == "" {
// We should always be called with a name; this shouldn't happen
klog . Warningf ( "No name defined during volume create; choosing random zone" )
hash = rand . Uint32 ( )
} else {
hashString := pvcName
// Heuristic to make sure that volumes in a StatefulSet are spread across zones
// StatefulSet PVCs are (currently) named ClaimName-StatefulSetName-Id,
// where Id is an integer index.
// Note though that if a StatefulSet pod has multiple claims, we need them to be
// in the same zone, because otherwise the pod will be unable to mount both volumes,
// and will be unschedulable. So we hash _only_ the "StatefulSetName" portion when
// it looks like `ClaimName-StatefulSetName-Id`.
// We continue to round-robin volume names that look like `Name-Id` also; this is a useful
// feature for users that are creating statefulset-like functionality without using statefulsets.
lastDash := strings . LastIndexByte ( pvcName , '-' )
if lastDash != - 1 {
statefulsetIDString := pvcName [ lastDash + 1 : ]
statefulsetID , err := strconv . ParseUint ( statefulsetIDString , 10 , 32 )
if err == nil {
// Offset by the statefulsetID, so we round-robin across zones
index = uint32 ( statefulsetID )
// We still hash the volume name, but only the prefix
hashString = pvcName [ : lastDash ]
// In the special case where it looks like `ClaimName-StatefulSetName-Id`,
// hash only the StatefulSetName, so that different claims on the same StatefulSet
// member end up in the same zone.
// Note that StatefulSetName (and ClaimName) might themselves both have dashes.
// We actually just take the portion after the final - of ClaimName-StatefulSetName.
// For our purposes it doesn't much matter (just suboptimal spreading).
lastDash := strings . LastIndexByte ( hashString , '-' )
if lastDash != - 1 {
hashString = hashString [ lastDash + 1 : ]
}
klog . V ( 2 ) . Infof ( "Detected StatefulSet-style volume name %q; index=%d" , pvcName , index )
}
}
// We hash the (base) volume name, so we don't bias towards the first N zones
h := fnv . New32 ( )
h . Write ( [ ] byte ( hashString ) )
hash = h . Sum32 ( )
}
return hash , index
}
// UnmountViaEmptyDir delegates the tear down operation for secret, configmap, git_repo and downwardapi
// to empty_dir
func UnmountViaEmptyDir ( dir string , host volume . VolumeHost , volName string , volSpec volume . Spec , podUID utypes . UID ) error {
klog . V ( 3 ) . Infof ( "Tearing down volume %v for pod %v at %v" , volName , podUID , dir )
// Wrap EmptyDir, let it do the teardown.
wrapped , err := host . NewWrapperUnmounter ( volName , volSpec , podUID )
if err != nil {
return err
}
return wrapped . TearDownAt ( dir )
}
// MountOptionFromSpec extracts and joins mount options from volume spec with supplied options
func MountOptionFromSpec ( spec * volume . Spec , options ... string ) [ ] string {
pv := spec . PersistentVolume
if pv != nil {
// Use beta annotation first
if mo , ok := pv . Annotations [ v1 . MountOptionAnnotation ] ; ok {
moList := strings . Split ( mo , "," )
return JoinMountOptions ( moList , options )
}
if len ( pv . Spec . MountOptions ) > 0 {
return JoinMountOptions ( pv . Spec . MountOptions , options )
}
}
return options
}
// JoinMountOptions joins mount options eliminating duplicates
func JoinMountOptions ( userOptions [ ] string , systemOptions [ ] string ) [ ] string {
allMountOptions := sets . NewString ( )
for _ , mountOption := range userOptions {
if len ( mountOption ) > 0 {
allMountOptions . Insert ( mountOption )
}
}
for _ , mountOption := range systemOptions {
allMountOptions . Insert ( mountOption )
}
return allMountOptions . List ( )
}
// ValidateZone returns:
// - an error in case zone is an empty string or contains only any combination of spaces and tab characters
// - nil otherwise
func ValidateZone ( zone string ) error {
if strings . TrimSpace ( zone ) == "" {
return fmt . Errorf ( "the provided %q zone is not valid, it's an empty string or contains only spaces and tab characters" , zone )
}
return nil
}
// AccessModesContains returns whether the requested mode is contained by modes
func AccessModesContains ( modes [ ] v1 . PersistentVolumeAccessMode , mode v1 . PersistentVolumeAccessMode ) bool {
for _ , m := range modes {
if m == mode {
return true
}
}
return false
}
// AccessModesContainedInAll returns whether all of the requested modes are contained by modes
func AccessModesContainedInAll ( indexedModes [ ] v1 . PersistentVolumeAccessMode , requestedModes [ ] v1 . PersistentVolumeAccessMode ) bool {
for _ , mode := range requestedModes {
if ! AccessModesContains ( indexedModes , mode ) {
return false
}
}
return true
}
// GetWindowsPath get a windows path
func GetWindowsPath ( path string ) string {
windowsPath := strings . Replace ( path , "/" , "\\" , - 1 )
if strings . HasPrefix ( windowsPath , "\\" ) {
windowsPath = "c:" + windowsPath
}
return windowsPath
}
// GetUniquePodName returns a unique identifier to reference a pod by
func GetUniquePodName ( pod * v1 . Pod ) types . UniquePodName {
return types . UniquePodName ( pod . UID )
}
// GetUniqueVolumeName returns a unique name representing the volume/plugin.
// Caller should ensure that volumeName is a name/ID uniquely identifying the
// actual backing device, directory, path, etc. for a particular volume.
// The returned name can be used to uniquely reference the volume, for example,
// to prevent operations (attach/detach or mount/unmount) from being triggered
// on the same volume.
func GetUniqueVolumeName ( pluginName , volumeName string ) v1 . UniqueVolumeName {
return v1 . UniqueVolumeName ( fmt . Sprintf ( "%s/%s" , pluginName , volumeName ) )
}
2019-01-22 20:53:35 +00:00
// GetUniqueVolumeNameFromSpecWithPod returns a unique volume name with pod
// name included. This is useful to generate different names for different pods
// on same volume.
func GetUniqueVolumeNameFromSpecWithPod (
2019-01-12 04:58:27 +00:00
podName types . UniquePodName , volumePlugin volume . VolumePlugin , volumeSpec * volume . Spec ) v1 . UniqueVolumeName {
return v1 . UniqueVolumeName (
fmt . Sprintf ( "%s/%v-%s" , volumePlugin . GetPluginName ( ) , podName , volumeSpec . Name ( ) ) )
}
// GetUniqueVolumeNameFromSpec uses the given VolumePlugin to generate a unique
// name representing the volume defined in the specified volume spec.
// This returned name can be used to uniquely reference the actual backing
// device, directory, path, etc. referenced by the given volumeSpec.
// If the given plugin does not support the volume spec, this returns an error.
func GetUniqueVolumeNameFromSpec (
volumePlugin volume . VolumePlugin ,
volumeSpec * volume . Spec ) ( v1 . UniqueVolumeName , error ) {
if volumePlugin == nil {
return "" , fmt . Errorf (
"volumePlugin should not be nil. volumeSpec.Name=%q" ,
volumeSpec . Name ( ) )
}
volumeName , err := volumePlugin . GetVolumeName ( volumeSpec )
if err != nil || volumeName == "" {
return "" , fmt . Errorf (
"failed to GetVolumeName from volumePlugin for volumeSpec %q err=%v" ,
volumeSpec . Name ( ) ,
err )
}
return GetUniqueVolumeName (
volumePlugin . GetPluginName ( ) ,
volumeName ) ,
nil
}
// IsPodTerminated checks if pod is terminated
func IsPodTerminated ( pod * v1 . Pod , podStatus v1 . PodStatus ) bool {
return podStatus . Phase == v1 . PodFailed || podStatus . Phase == v1 . PodSucceeded || ( pod . DeletionTimestamp != nil && notRunning ( podStatus . ContainerStatuses ) )
}
// notRunning returns true if every status is terminated or waiting, or the status list
// is empty.
func notRunning ( statuses [ ] v1 . ContainerStatus ) bool {
for _ , status := range statuses {
if status . State . Terminated == nil && status . State . Waiting == nil {
return false
}
}
return true
}
// SplitUniqueName splits the unique name to plugin name and volume name strings. It expects the uniqueName to follow
// the format plugin_name/volume_name and the plugin name must be namespaced as described by the plugin interface,
// i.e. namespace/plugin containing exactly one '/'. This means the unique name will always be in the form of
// plugin_namespace/plugin/volume_name, see k8s.io/kubernetes/pkg/volume/plugins.go VolumePlugin interface
// description and pkg/volume/util/volumehelper/volumehelper.go GetUniqueVolumeNameFromSpec that constructs
// the unique volume names.
func SplitUniqueName ( uniqueName v1 . UniqueVolumeName ) ( string , string , error ) {
components := strings . SplitN ( string ( uniqueName ) , "/" , 3 )
if len ( components ) != 3 {
return "" , "" , fmt . Errorf ( "cannot split volume unique name %s to plugin/volume components" , uniqueName )
}
pluginName := fmt . Sprintf ( "%s/%s" , components [ 0 ] , components [ 1 ] )
return pluginName , components [ 2 ] , nil
}
// NewSafeFormatAndMountFromHost creates a new SafeFormatAndMount with Mounter
// and Exec taken from given VolumeHost.
func NewSafeFormatAndMountFromHost ( pluginName string , host volume . VolumeHost ) * mount . SafeFormatAndMount {
mounter := host . GetMounter ( pluginName )
exec := host . GetExec ( pluginName )
return & mount . SafeFormatAndMount { Interface : mounter , Exec : exec }
}
// GetVolumeMode retrieves VolumeMode from pv.
// If the volume doesn't have PersistentVolume, it's an inline volume,
// should return volumeMode as filesystem to keep existing behavior.
func GetVolumeMode ( volumeSpec * volume . Spec ) ( v1 . PersistentVolumeMode , error ) {
if volumeSpec == nil || volumeSpec . PersistentVolume == nil {
return v1 . PersistentVolumeFilesystem , nil
}
if volumeSpec . PersistentVolume . Spec . VolumeMode != nil {
return * volumeSpec . PersistentVolume . Spec . VolumeMode , nil
}
return "" , fmt . Errorf ( "cannot get volumeMode for volume: %v" , volumeSpec . Name ( ) )
}
// GetPersistentVolumeClaimVolumeMode retrieves VolumeMode from pvc.
func GetPersistentVolumeClaimVolumeMode ( claim * v1 . PersistentVolumeClaim ) ( v1 . PersistentVolumeMode , error ) {
if claim . Spec . VolumeMode != nil {
return * claim . Spec . VolumeMode , nil
}
return "" , fmt . Errorf ( "cannot get volumeMode from pvc: %v" , claim . Name )
}
// GetPersistentVolumeClaimQualifiedName returns a qualified name for pvc.
func GetPersistentVolumeClaimQualifiedName ( claim * v1 . PersistentVolumeClaim ) string {
return utilstrings . JoinQualifiedName ( claim . GetNamespace ( ) , claim . GetName ( ) )
}
// CheckVolumeModeFilesystem checks VolumeMode.
// If the mode is Filesystem, return true otherwise return false.
func CheckVolumeModeFilesystem ( volumeSpec * volume . Spec ) ( bool , error ) {
if utilfeature . DefaultFeatureGate . Enabled ( features . BlockVolume ) {
volumeMode , err := GetVolumeMode ( volumeSpec )
if err != nil {
return true , err
}
if volumeMode == v1 . PersistentVolumeBlock {
return false , nil
}
}
return true , nil
}
// CheckPersistentVolumeClaimModeBlock checks VolumeMode.
// If the mode is Block, return true otherwise return false.
func CheckPersistentVolumeClaimModeBlock ( pvc * v1 . PersistentVolumeClaim ) bool {
return utilfeature . DefaultFeatureGate . Enabled ( features . BlockVolume ) && pvc . Spec . VolumeMode != nil && * pvc . Spec . VolumeMode == v1 . PersistentVolumeBlock
}
// IsWindowsUNCPath checks if path is prefixed with \\
// This can be used to skip any processing of paths
// that point to SMB shares, local named pipes and local UNC path
func IsWindowsUNCPath ( goos , path string ) bool {
if goos != "windows" {
return false
}
// Check for UNC prefix \\
if strings . HasPrefix ( path , ` \\ ` ) {
return true
}
return false
}
// IsWindowsLocalPath checks if path is a local path
// prefixed with "/" or "\" like "/foo/bar" or "\foo\bar"
func IsWindowsLocalPath ( goos , path string ) bool {
if goos != "windows" {
return false
}
if IsWindowsUNCPath ( goos , path ) {
return false
}
if strings . Contains ( path , ":" ) {
return false
}
if ! ( strings . HasPrefix ( path , ` / ` ) || strings . HasPrefix ( path , ` \ ` ) ) {
return false
}
return true
}
// MakeAbsolutePath convert path to absolute path according to GOOS
func MakeAbsolutePath ( goos , path string ) string {
if goos != "windows" {
return filepath . Clean ( "/" + path )
}
// These are all for windows
// If there is a colon, give up.
if strings . Contains ( path , ":" ) {
return path
}
// If there is a slash, but no drive, add 'c:'
if strings . HasPrefix ( path , "/" ) || strings . HasPrefix ( path , "\\" ) {
return "c:" + path
}
// Otherwise, add 'c:\'
return "c:\\" + path
}
// MapBlockVolume is a utility function to provide a common way of mounting
// block device path for a specified volume and pod. This function should be
// called by volume plugins that implements volume.BlockVolumeMapper.Map() method.
func MapBlockVolume (
devicePath ,
globalMapPath ,
podVolumeMapPath ,
volumeMapName string ,
podUID utypes . UID ,
) error {
blkUtil := volumepathhandler . NewBlockVolumePathHandler ( )
// map devicePath to global node path
mapErr := blkUtil . MapDevice ( devicePath , globalMapPath , string ( podUID ) )
if mapErr != nil {
return mapErr
}
// map devicePath to pod volume path
mapErr = blkUtil . MapDevice ( devicePath , podVolumeMapPath , volumeMapName )
if mapErr != nil {
return mapErr
}
return nil
}