Merge branch 'master' into upgrade_aliases_branch

Jing Ai 2017-11-20 21:20:45 -08:00
commit 391a9df925
100 changed files with 7468 additions and 2351 deletions

View File

@ -668,6 +668,7 @@ ENABLE_PROMETHEUS_TO_SD: $(yaml-quote ${ENABLE_PROMETHEUS_TO_SD:-false})
if [ -n "${KUBELET_PORT:-}" ]; then

View File

@ -36,6 +36,11 @@ MASTER_ROOT_DISK_SIZE=${MASTER_ROOT_DISK_SIZE:-$(get-master-root-disk-size)}
# An extension to local SSDs allowing users to specify block/fs and SCSI/NVMe devices
# Format of this variable will be "#,scsi/nvme,block/fs" you can specify multiple
# configurations by seperating them by a semi-colon ex. "2,scsi,fs;1,nvme,block"
# is a request for 2 SCSI formatted and mounted SSDs and 1 NVMe block device SSD.
# Accelerators to be attached to each node. Format "type=<accelerator-type>,count=<accelerator-count>"
# More information on available GPUs here -

View File

@ -25,6 +25,9 @@ set -o errexit
set -o nounset
set -o pipefail
readonly UUID_MNT_PREFIX="/mnt/disks/by-uuid/google-local-ssds"
readonly UUID_BLOCK_PREFIX="/dev/disk/by-uuid/google-local-ssds"
function setup-os-params {
# Reset core_pattern. On GCI, the default core_pattern pipes the core dumps to
# /sbin/crash_reporter which is more restrictive in saving crash dumps. So for
@ -85,11 +88,85 @@ function create-dirs {
# Formats the given device ($1) if needed and mounts it at given mount point
# Gets the total number of $(1) and $(2) type disks specified
# by the user in ${NODE_LOCAL_SSDS_EXT}
function get-local-disk-num() {
local interface="${1}"
local format="${2}"
if [[ ! -z "${NODE_LOCAL_SSDS_EXT:-}" ]]; then
IFS=";" read -r -a ssdgroups <<< "${NODE_LOCAL_SSDS_EXT:-}"
for ssdgroup in "${ssdgroups[@]}"; do
IFS="," read -r -a ssdopts <<< "${ssdgroup}"
local opnum="${ssdopts[0]}"
local opinterface="${ssdopts[1]}"
local opformat="${ssdopts[2]}"
if [[ "${opformat,,}" == "${format,,}" && "${opinterface,,}" == "${interface,,}" ]]; then
# Creates a symlink for a ($1) so that it may be used as block storage
function safe-block-symlink(){
local device="${1}"
local symdir="${2}"
mkdir -p "${symdir}"
get-or-generate-uuid "${device}"
local myuuid="${retuuid}"
local sym="${symdir}/local-ssd-${myuuid}"
# Do not "mkdir -p ${sym}" as that will cause unintended symlink behavior
ln -s "${device}" "${sym}"
echo "Created a symlink for SSD $ssd at ${sym}"
chmod a+w "${sym}"
# Gets a pregenerated UUID from ${ssdmap} if it exists, otherwise generates a new
# UUID and places it inside ${ssdmap}
function get-or-generate-uuid(){
local device="${1}"
local ssdmap="/home/kubernetes/localssdmap.txt"
echo "Generating or getting UUID from ${ssdmap}"
if [[ ! -e "${ssdmap}" ]]; then
touch "${ssdmap}"
chmod +w "${ssdmap}"
# each line of the ssdmap looks like "${device} persistent-uuid"
if [[ ! -z $(grep ${device} ${ssdmap}) ]]; then
#create symlink based on saved uuid
local myuuid=$(grep ${device} ${ssdmap} | cut -d ' ' -f 2)
# generate new uuid and add it to the map
local myuuid=$(uuidgen)
if [[ ! ${?} -eq 0 ]]; then
echo "Failed to generate valid UUID with uuidgen" >&2
exit 2
echo "${device} ${myuuid}" >> "${ssdmap}"
if [[ -z "${myuuid}" ]]; then
echo "Failed to get a uuid for device ${device} when symlinking." >&2
exit 2
#Formats the given device ($1) if needed and mounts it at given mount point
# ($2).
function safe-format-and-mount() {
local device="${1}"
local mountpoint="${2}"
# Format only if the disk is not already formatted.
if ! tune2fs -l "${device}" ; then
@ -102,18 +179,135 @@ function safe-format-and-mount() {
mount -o discard,defaults "${device}" "${mountpoint}"
# Local ssds, if present, are mounted at /mnt/disks/ssdN.
# Gets a devices UUID and bind mounts the device to mount location in
# /mnt/disks/by-id/
function unique-uuid-bind-mount(){
local mountpoint="${1}"
local actual_device="${2}"
# Trigger udev refresh so that newly formatted devices are propagated in by-uuid
udevadm control --reload-rules
udevadm trigger
udevadm settle
# grep the exact match of actual device, prevents substring matching
local myuuid=$(ls -l /dev/disk/by-uuid/ | grep "/${actual_device}$" | tr -s ' ' | cut -d ' ' -f 9)
# myuuid should be the uuid of the device as found in /dev/disk/by-uuid/
if [[ -z "${myuuid}" ]]; then
echo "Failed to get a uuid for device ${actual_device} when mounting." >&2
exit 2
# bindpoint should be the full path of the to-be-bound device
local bindpoint="${UUID_MNT_PREFIX}-${interface}-fs/local-ssd-${myuuid}"
safe-bind-mount "${mountpoint}" "${bindpoint}"
# Bind mounts device at mountpoint to bindpoint
function safe-bind-mount(){
local mountpoint="${1}"
local bindpoint="${2}"
# Mount device to the mountpoint
mkdir -p "${bindpoint}"
echo "Binding '${mountpoint}' at '${bindpoint}'"
mount --bind "${mountpoint}" "${bindpoint}"
chmod a+w "${bindpoint}"
# Mounts, bindmounts, or symlinks depending on the interface and format
# of the incoming device
function mount-ext(){
local ssd="${1}"
local devicenum="${2}"
local interface="${3}"
local format="${4}"
if [[ -z "${devicenum}" ]]; then
echo "Failed to get the local disk number for device ${ssd}" >&2
exit 2
# TODO: Handle partitioned disks. Right now this code just ignores partitions
if [[ "${format}" == "fs" ]]; then
if [[ "${interface}" == "scsi" ]]; then
local actual_device=$(readlink -f "${ssd}" | cut -d '/' -f 3)
# Error checking
if [[ "${actual_device}" != sd* ]]; then
echo "'actual_device' is not of the correct format. It must be the kernel name of the device, got ${actual_device} instead" >&2
exit 1
local mountpoint="/mnt/disks/ssd${devicenum}"
# This path is required because the existing Google images do not
# expose NVMe devices in /dev/disk/by-id so we are using the /dev/nvme instead
local actual_device=$(echo ${ssd} | cut -d '/' -f 3)
# Error checking
if [[ "${actual_device}" != nvme* ]]; then
echo "'actual_device' is not of the correct format. It must be the kernel name of the device, got ${actual_device} instead" >&2
exit 1
local mountpoint="/mnt/disks/ssd-nvme${devicenum}"
safe-format-and-mount "${ssd}" "${mountpoint}"
# We only do the bindmount if users are using the new local ssd request method
# see
if [[ ! -z "${NODE_LOCAL_SSDS_EXT:-}" ]]; then
unique-uuid-bind-mount "${mountpoint}" "${actual_device}"
elif [[ "${format}" == "block" ]]; then
local symdir="${UUID_BLOCK_PREFIX}-${interface}-block"
safe-block-symlink "${ssd}" "${symdir}"
echo "Disk format must be either fs or block, got ${format}"
# Local ssds, if present, are mounted or symlinked to their appropriate
# locations
function ensure-local-ssds() {
get-local-disk-num "scsi" "block"
local scsiblocknum="${localdisknum}"
local i=0
for ssd in /dev/disk/by-id/google-local-ssd-*; do
if [ -e "${ssd}" ]; then
ssdnum=`echo ${ssd} | sed -e 's/\/dev\/disk\/by-id\/google-local-ssd-\([0-9]*\)/\1/'`
mkdir -p ${ssdmount}
safe-format-and-mount "${ssd}" ${ssdmount}
echo "Mounted local SSD $ssd at ${ssdmount}"
chmod a+w ${ssdmount}
local devicenum=`echo ${ssd} | sed -e 's/\/dev\/disk\/by-id\/google-local-ssd-\([0-9]*\)/\1/'`
if [[ "${i}" -lt "${scsiblocknum}" ]]; then
mount-ext "${ssd}" "${devicenum}" "scsi" "block"
# GKE does not set NODE_LOCAL_SSDS so all non-block devices
# are assumed to be filesystem devices
mount-ext "${ssd}" "${devicenum}" "scsi" "fs"
echo "No local SSD disks found."
echo "No local SCSI SSD disks found."
# The following mounts or symlinks NVMe devices
get-local-disk-num "nvme" "block"
local nvmeblocknum="${localdisknum}"
local i=0
for ssd in /dev/nvme*; do
if [ -e "${ssd}" ]; then
# This workaround to find if the NVMe device is a disk is required because
# the existing Google images does not expose NVMe devices in /dev/disk/by-id
if [[ `udevadm info --query=property --name=${ssd} | grep DEVTYPE | sed "s/DEVTYPE=//"` == "disk" ]]; then
local devicenum=`echo ${ssd} | sed -e 's/\/dev\/nvme0n\([0-9]*\)/\1/'`
if [[ "${i}" -lt "${nvmeblocknum}" ]]; then
mount-ext "${ssd}" "${devicenum}" "nvme" "block"
mount-ext "${ssd}" "${devicenum}" "nvme" "fs"
echo "No local NVMe SSD disks found."

View File

@ -18,6 +18,8 @@
# Use the config file specified in $KUBE_CONFIG_FILE, or default to
readonly GCE_MAX_LOCAL_SSD=8
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-""}"
source "${KUBE_ROOT}/cluster/"
@ -37,6 +39,11 @@ else
exit 1
if [[ ${NODE_LOCAL_SSDS:-} -ge 1 ]] && [[ ! -z ${NODE_LOCAL_SSDS_EXT:-} ]] ; then
echo -e "${color_red}Local SSD: Only one of NODE_LOCAL_SSDS and NODE_LOCAL_SSDS_EXT can be specified at once${color_norm}" >&2
exit 2
if [[ "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
if [[ "${GCI_VERSION}" == "cos"* ]]; then
@ -546,6 +553,29 @@ function get-template-name-from-version() {
echo "${NODE_INSTANCE_PREFIX}-template-${1}" | cut -c 1-63 | sed 's/[\.\+]/-/g;s/-*$//g'
# validates the NODE_LOCAL_SSDS_EXT variable
function validate-node-local-ssds-ext(){
if [[ -z "${ssdopts[0]}" || -z "${ssdopts[1]}" || -z "${ssdopts[2]}" ]]; then
echo -e "${color_red}Local SSD: NODE_LOCAL_SSDS_EXT is malformed, found ${ssdopts[0]-_},${ssdopts[1]-_},${ssdopts[2]-_} ${color_norm}" >&2
exit 2
if [[ "${ssdopts[1]}" != "scsi" && "${ssdopts[1]}" != "nvme" ]]; then
echo -e "${color_red}Local SSD: Interface must be scsi or nvme, found: ${ssdopts[1]} ${color_norm}" >&2
exit 2
if [[ "${ssdopts[2]}" != "fs" && "${ssdopts[2]}" != "block" ]]; then
echo -e "${color_red}Local SSD: Filesystem type must be fs or block, found: ${ssdopts[2]} ${color_norm}" >&2
exit 2
if [[ "${local_ssd_ext_count}" -gt "${GCE_MAX_LOCAL_SSD}" || "${local_ssd_ext_count}" -lt 1 ]]; then
echo -e "${color_red}Local SSD: Total number of local ssds must range from 1 to 8, found: ${local_ssd_ext_count} ${color_norm}" >&2
exit 2
# Robustly try to create an instance template.
# $1: The name of the instance template.
# $2: The scopes flag.
@ -587,6 +617,19 @@ function create-node-template() {
local local_ssds=""
if [[ ! -z ${NODE_LOCAL_SSDS_EXT:-} ]]; then
IFS=";" read -r -a ssdgroups <<< "${NODE_LOCAL_SSDS_EXT:-}"
for ssdgroup in "${ssdgroups[@]}"
IFS="," read -r -a ssdopts <<< "${ssdgroup}"
validate-node-local-ssds-ext "${ssdopts}"
for i in $(seq ${ssdopts[0]}); do
local_ssds="$local_ssds--local-ssd=interface=${ssdopts[1]} "
if [[ ! -z ${NODE_LOCAL_SSDS+x} ]]; then
# The NODE_LOCAL_SSDS check below fixes issue #49171
# Some versions of seq will count down from 1 if "seq 0" is specified
@ -596,6 +639,7 @@ function create-node-template() {
local network=$(make-gcloud-network-argument \

View File

@ -30,6 +30,7 @@ go_library(

View File

@ -32,6 +32,7 @@ import (
@ -61,6 +62,7 @@ func RegisterAllAdmissionPlugins(plugins *admission.Plugins) {

View File

@ -21,9 +21,13 @@ limitations under the License.
package app
import (
@ -36,6 +40,45 @@ func startCSRSigningController(ctx ControllerContext) (bool, error) {
if ctx.Options.ClusterSigningCertFile == "" || ctx.Options.ClusterSigningKeyFile == "" {
return false, nil
// Deprecation warning for old defaults.
// * If the signing cert and key are the default paths but the files
// exist, warn that the paths need to be specified explicitly in a
// later release and the defaults will be removed. We don't expect this
// to be the case.
// * If the signing cert and key are default paths but the files don't exist,
// bail out of startController without logging.
var keyFileExists, keyUsesDefault, certFileExists, certUsesDefault bool
_, err := os.Stat(ctx.Options.ClusterSigningCertFile)
certFileExists = !os.IsNotExist(err)
certUsesDefault = (ctx.Options.ClusterSigningCertFile == options.DefaultClusterSigningCertFile)
_, err = os.Stat(ctx.Options.ClusterSigningKeyFile)
keyFileExists = !os.IsNotExist(err)
keyUsesDefault = (ctx.Options.ClusterSigningKeyFile == options.DefaultClusterSigningKeyFile)
switch {
case (keyFileExists && keyUsesDefault) || (certFileExists && certUsesDefault):
glog.Warningf("You might be using flag defaulting for --cluster-signing-cert-file and" +
" --cluster-signing-key-file. These defaults are deprecated and will be removed" +
" in a subsequent release. Please pass these options explicitly.")
case (!keyFileExists && keyUsesDefault) && (!certFileExists && certUsesDefault):
// This is what we expect right now if people aren't
// setting up the signing controller. This isn't
// actually a problem since the signer is not a
// required controller.
return false, nil
// Note that '!filesExist && !usesDefaults' is obviously
// operator error. We don't handle this case here and instead
// allow it to be handled by NewCSR... below.
c := ctx.ClientBuilder.ClientOrDie("certificate-controller")
signer, err := signer.NewCSRSigningController(
@ -46,8 +89,7 @@ func startCSRSigningController(ctx ControllerContext) (bool, error) {
if err != nil {
glog.Errorf("Failed to start certificate controller: %v", err)
return false, nil
return false, fmt.Errorf("failed to start certificate controller: %v", err)
go signer.Run(1, ctx.Stop)

View File

@ -39,6 +39,16 @@ import (
const (
// These defaults are deprecated and exported so that we can warn if
// they are being used.
// DefaultClusterSigningCertFile is deprecated. Do not use.
DefaultClusterSigningCertFile = "/etc/kubernetes/ca/ca.pem"
// DefaultClusterSigningKeyFile is deprecated. Do not use.
DefaultClusterSigningKeyFile = "/etc/kubernetes/ca/ca.key"
// CMServer is the main context object for the controller manager.
type CMServer struct {
@ -111,8 +121,8 @@ func NewCMServer() *CMServer {
EnableGarbageCollector: true,
ConcurrentGCSyncs: 20,
GCIgnoredResources: gcIgnoredResources,
ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem",
ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key",
ClusterSigningCertFile: DefaultClusterSigningCertFile,
ClusterSigningKeyFile: DefaultClusterSigningKeyFile,
ClusterSigningDuration: metav1.Duration{Duration: helpers.OneYear},
ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 60 * time.Second},
EnableTaintManager: true,

View File

@ -140,7 +140,8 @@ func ValidateArgSelection(cfg *kubeadm.NodeConfiguration, fldPath *field.Path) f
allErrs = append(allErrs, field.Invalid(fldPath, "", "DiscoveryTokenCACertHashes cannot be used with DiscoveryFile"))
if len(cfg.DiscoveryFile) == 0 && len(cfg.DiscoveryTokenCACertHashes) == 0 && !cfg.DiscoveryTokenUnsafeSkipCAVerification {
if len(cfg.DiscoveryFile) == 0 && len(cfg.DiscoveryToken) != 0 &&
len(cfg.DiscoveryTokenCACertHashes) == 0 && !cfg.DiscoveryTokenUnsafeSkipCAVerification {
allErrs = append(allErrs, field.Invalid(fldPath, "", "using token-based discovery without DiscoveryTokenCACertHashes can be unsafe. set --discovery-token-unsafe-skip-ca-verification to continue"))

View File

@ -202,16 +202,16 @@ func EnforceVersionPolicies(flags *applyFlags, versionGetter upgrade.VersionGett
if versionSkewErrs != nil {
if len(versionSkewErrs.Mandatory) > 0 {
return fmt.Errorf("The --version argument is invalid due to these fatal errors: %v", versionSkewErrs.Mandatory)
return fmt.Errorf("The --version argument is invalid due to these fatal errors:\n\n%v\nPlease fix the misalignments highlighted above and try upgrading again", kubeadmutil.FormatErrMsg(versionSkewErrs.Mandatory))
if len(versionSkewErrs.Skippable) > 0 {
// Return the error if the user hasn't specified the --force flag
if !flags.force {
return fmt.Errorf("The --version argument is invalid due to these errors: %v. Can be bypassed if you pass the --force flag", versionSkewErrs.Skippable)
return fmt.Errorf("The --version argument is invalid due to these errors:\n\n%v\nCan be bypassed if you pass the --force flag", kubeadmutil.FormatErrMsg(versionSkewErrs.Skippable))
// Soft errors found, but --force was specified
fmt.Printf("[upgrade/version] Found %d potential version compatibility errors but skipping since the --force flag is set: %v\n", len(versionSkewErrs.Skippable), versionSkewErrs.Skippable)
fmt.Printf("[upgrade/version] Found %d potential version compatibility errors but skipping since the --force flag is set: \n\n%v", len(versionSkewErrs.Skippable), kubeadmutil.FormatErrMsg(versionSkewErrs.Skippable))
return nil

View File

@ -75,3 +75,12 @@ func checkErr(prefix string, err error, handleErr func(string, int)) {
handleErr(err.Error(), DefaultErrorExitCode)
// FormatErrMsg returns a human-readable string describing the slice of errors passed to the function
func FormatErrMsg(errs []error) string {
var errMsg string
for _, err := range errs {
errMsg = fmt.Sprintf("%s\t-%s\n", errMsg, err.Error())
return errMsg

View File

@ -50,3 +50,34 @@ func TestCheckErr(t *testing.T) {
func TestFormatErrMsg(t *testing.T) {
errMsg1 := "specified version to upgrade to v1.9.0-alpha.3 is equal to or lower than the cluster version v1.10.0-alpha.0.69+638add6ddfb6d2. Downgrades are not supported yet"
errMsg2 := "specified version to upgrade to v1.9.0-alpha.3 is higher than the kubeadm version v1.9.0-alpha.1.3121+84178212527295-dirty. Upgrade kubeadm first using the tool you used to install kubeadm"
testCases := []struct {
errs []error
expect string
errs: []error{
expect: "\t-" + errMsg1 + "\n" + "\t-" + errMsg2 + "\n",
errs: []error{
expect: "\t-" + errMsg1 + "\n",
for _, testCase := range testCases {
got := FormatErrMsg(testCase.errs)
if got != testCase.expect {
t.Errorf("FormatErrMsg error, expect: %v, got: %v", testCase.expect, got)

View File

@ -20,6 +20,7 @@ package options
import (
_ "net/http/pprof"
@ -154,6 +155,8 @@ type KubeletFlags struct {
// This will cause the kubelet to listen to inotify events on the lock file,
// releasing it and exiting when another process tries to open that file.
ExitOnLockContention bool
// seccompProfileRoot is the directory path for seccomp profiles.
SeccompProfileRoot string
// minimumGCAge is the minimum age for a finished container before it is
@ -214,6 +217,7 @@ func NewKubeletFlags() *KubeletFlags {
NodeLabels: make(map[string]string),
VolumePluginDir: "/usr/libexec/kubernetes/kubelet-plugins/volume/exec/",
RegisterNode: true,
SeccompProfileRoot: filepath.Join(v1alpha1.DefaultRootDir, "seccomp"),
@ -338,6 +342,7 @@ func (f *KubeletFlags) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&f.VolumePluginDir, "volume-plugin-dir", f.VolumePluginDir, "<Warning: Alpha feature> The full path of the directory in which to search for additional third party volume plugins")
fs.StringVar(&f.LockFilePath, "lock-file", f.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
fs.BoolVar(&f.ExitOnLockContention, "exit-on-lock-contention", f.ExitOnLockContention, "Whether kubelet should exit upon lock-file contention.")
fs.StringVar(&f.SeccompProfileRoot, "seccomp-profile-root", f.SeccompProfileRoot, "<Warning: Alpha feature> Directory path for seccomp profiles.")
fs.DurationVar(&f.MinimumGCAge.Duration, "minimum-container-ttl-duration", f.MinimumGCAge.Duration, "Minimum age for a finished container before it is garbage collected. Examples: '300ms', '10s' or '2h45m'")
@ -405,7 +410,6 @@ func AddKubeletConfigFlags(fs *pflag.FlagSet, c *kubeletconfig.KubeletConfigurat
"are generated for the public address and saved to the directory passed to --cert-dir.")
fs.StringVar(&c.TLSPrivateKeyFile, "tls-private-key-file", c.TLSPrivateKeyFile, "File containing x509 private key matching --tls-cert-file.")
fs.StringVar(&c.SeccompProfileRoot, "seccomp-profile-root", c.SeccompProfileRoot, "Directory path for seccomp profiles.")
fs.BoolVar(&c.AllowPrivileged, "allow-privileged", c.AllowPrivileged, "If true, allow containers to request privileged mode.")
fs.StringSliceVar(&c.HostNetworkSources, "host-network-sources", c.HostNetworkSources, "Comma-separated list of sources from which the Kubelet allows pods to use of host network.")
fs.StringSliceVar(&c.HostPIDSources, "host-pid-sources", c.HostPIDSources, "Comma-separated list of sources from which the Kubelet allows pods to use the host pid namespace.")

View File

@ -728,7 +728,8 @@ func RunKubelet(kubeFlags *options.KubeletFlags, kubeCfg *kubeletconfiginternal.
if err != nil {
return fmt.Errorf("failed to create kubelet: %v", err)
@ -800,7 +801,8 @@ func CreateAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerSchedulable bool,
nonMasqueradeCIDR string,
keepTerminatedPodVolumes bool,
nodeLabels map[string]string) (k kubelet.Bootstrap, err error) {
nodeLabels map[string]string,
seccompProfileRoot string) (k kubelet.Bootstrap, err error) {
// TODO: block until all sources have delivered at least one update to the channel, or break the sync loop
// up into "per source" synchronizations
@ -832,7 +834,8 @@ func CreateAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
if err != nil {
return nil, err

View File

@ -13,6 +13,7 @@ go_library(
@ -48,6 +49,7 @@ go_library(
@ -60,10 +62,14 @@ go_test(
library = ":go_default_library",
deps = [

View File

@ -44,13 +44,14 @@ import (
const (
// CloudProviderName is the value used for the --cloud-provider flag
CloudProviderName = "azure"
rateLimitQPSDefault = 1.0
rateLimitBucketDefault = 5
backoffRetriesDefault = 6
backoffExponentDefault = 1.5
backoffDurationDefault = 5 // in seconds
backoffJitterDefault = 1.0
CloudProviderName = "azure"
rateLimitQPSDefault = 1.0
rateLimitBucketDefault = 5
backoffRetriesDefault = 6
backoffExponentDefault = 1.5
backoffDurationDefault = 5 // in seconds
backoffJitterDefault = 1.0
maximumLoadBalancerRuleCount = 148 // According to Azure LB rule default limit
// Config holds the configuration parsed from the --cloud-config flag
@ -113,6 +114,57 @@ type Config struct {
// Use managed service identity for the virtual machine to access Azure ARM APIs
UseManagedIdentityExtension bool `json:"useManagedIdentityExtension"`
// Maximum allowed LoadBalancer Rule Count is the limit enforced by Azure Load balancer
MaximumLoadBalancerRuleCount int `json:"maximumLoadBalancerRuleCount"`
// VirtualMachinesClient defines needed functions for azure network.VirtualMachinesClient
type VirtualMachinesClient interface {
CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error)
Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error)
List(resourceGroupName string) (result compute.VirtualMachineListResult, err error)
ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error)
// InterfacesClient defines needed functions for azure network.InterfacesClient
type InterfacesClient interface {
CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error)
Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error)
// LoadBalancersClient defines needed functions for azure network.LoadBalancersClient
type LoadBalancersClient interface {
CreateOrUpdate(resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, cancel <-chan struct{}) (<-chan network.LoadBalancer, <-chan error)
Delete(resourceGroupName string, loadBalancerName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error)
Get(resourceGroupName string, loadBalancerName string, expand string) (result network.LoadBalancer, err error)
List(resourceGroupName string) (result network.LoadBalancerListResult, err error)
ListNextResults(lastResult network.LoadBalancerListResult) (result network.LoadBalancerListResult, err error)
// PublicIPAddressesClient defines needed functions for azure network.PublicIPAddressesClient
type PublicIPAddressesClient interface {
CreateOrUpdate(resourceGroupName string, publicIPAddressName string, parameters network.PublicIPAddress, cancel <-chan struct{}) (<-chan network.PublicIPAddress, <-chan error)
Delete(resourceGroupName string, publicIPAddressName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error)
Get(resourceGroupName string, publicIPAddressName string, expand string) (result network.PublicIPAddress, err error)
List(resourceGroupName string) (result network.PublicIPAddressListResult, err error)
ListNextResults(lastResults network.PublicIPAddressListResult) (result network.PublicIPAddressListResult, err error)
// SubnetsClient defines needed functions for azure network.SubnetsClient
type SubnetsClient interface {
CreateOrUpdate(resourceGroupName string, virtualNetworkName string, subnetName string, subnetParameters network.Subnet, cancel <-chan struct{}) (<-chan network.Subnet, <-chan error)
Delete(resourceGroupName string, virtualNetworkName string, subnetName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error)
Get(resourceGroupName string, virtualNetworkName string, subnetName string, expand string) (result network.Subnet, err error)
List(resourceGroupName string, virtualNetworkName string) (result network.SubnetListResult, err error)
// SecurityGroupsClient defines needed functions for azure network.SecurityGroupsClient
type SecurityGroupsClient interface {
CreateOrUpdate(resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, cancel <-chan struct{}) (<-chan network.SecurityGroup, <-chan error)
Delete(resourceGroupName string, networkSecurityGroupName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error)
Get(resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error)
List(resourceGroupName string) (result network.SecurityGroupListResult, err error)
// Cloud holds the config and clients
@ -120,13 +172,13 @@ type Cloud struct {
Environment azure.Environment
RoutesClient network.RoutesClient
SubnetsClient network.SubnetsClient
InterfacesClient network.InterfacesClient
SubnetsClient SubnetsClient
InterfacesClient InterfacesClient
RouteTablesClient network.RouteTablesClient
LoadBalancerClient network.LoadBalancersClient
PublicIPAddressesClient network.PublicIPAddressesClient
SecurityGroupsClient network.SecurityGroupsClient
VirtualMachinesClient compute.VirtualMachinesClient
LoadBalancerClient LoadBalancersClient
PublicIPAddressesClient PublicIPAddressesClient
SecurityGroupsClient SecurityGroupsClient
VirtualMachinesClient VirtualMachinesClient
StorageAccountClient storage.AccountsClient
DisksClient disk.DisksClient
operationPollRateLimiter flowcontrol.RateLimiter
@ -221,11 +273,12 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
return nil, err
az.SubnetsClient = network.NewSubnetsClient(az.SubscriptionID)
az.SubnetsClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.SubnetsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.SubnetsClient.PollingDelay = 5 * time.Second
subnetsClient := network.NewSubnetsClient(az.SubscriptionID)
subnetsClient.BaseURI = az.Environment.ResourceManagerEndpoint
subnetsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
subnetsClient.PollingDelay = 5 * time.Second
az.SubnetsClient = subnetsClient
az.RouteTablesClient = network.NewRouteTablesClient(az.SubscriptionID)
az.RouteTablesClient.BaseURI = az.Environment.ResourceManagerEndpoint
@ -239,35 +292,40 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
az.RoutesClient.PollingDelay = 5 * time.Second
az.InterfacesClient = network.NewInterfacesClient(az.SubscriptionID)
az.InterfacesClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.InterfacesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.InterfacesClient.PollingDelay = 5 * time.Second
interfacesClient := network.NewInterfacesClient(az.SubscriptionID)
interfacesClient.BaseURI = az.Environment.ResourceManagerEndpoint
interfacesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
interfacesClient.PollingDelay = 5 * time.Second
az.InterfacesClient = interfacesClient
az.LoadBalancerClient = network.NewLoadBalancersClient(az.SubscriptionID)
az.LoadBalancerClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.LoadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.LoadBalancerClient.PollingDelay = 5 * time.Second
loadBalancerClient := network.NewLoadBalancersClient(az.SubscriptionID)
loadBalancerClient.BaseURI = az.Environment.ResourceManagerEndpoint
loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
loadBalancerClient.PollingDelay = 5 * time.Second
az.LoadBalancerClient = loadBalancerClient
az.VirtualMachinesClient = compute.NewVirtualMachinesClient(az.SubscriptionID)
az.VirtualMachinesClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.VirtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.VirtualMachinesClient.PollingDelay = 5 * time.Second
virtualMachinesClient := compute.NewVirtualMachinesClient(az.SubscriptionID)
virtualMachinesClient.BaseURI = az.Environment.ResourceManagerEndpoint
virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
virtualMachinesClient.PollingDelay = 5 * time.Second
az.VirtualMachinesClient = virtualMachinesClient
az.PublicIPAddressesClient = network.NewPublicIPAddressesClient(az.SubscriptionID)
az.PublicIPAddressesClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.PublicIPAddressesClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.PublicIPAddressesClient.PollingDelay = 5 * time.Second
publicIPAddressClient := network.NewPublicIPAddressesClient(az.SubscriptionID)
publicIPAddressClient.BaseURI = az.Environment.ResourceManagerEndpoint
publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
publicIPAddressClient.PollingDelay = 5 * time.Second
az.PublicIPAddressesClient = publicIPAddressClient
az.SecurityGroupsClient = network.NewSecurityGroupsClient(az.SubscriptionID)
az.SecurityGroupsClient.BaseURI = az.Environment.ResourceManagerEndpoint
az.SecurityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
az.SecurityGroupsClient.PollingDelay = 5 * time.Second
securityGroupsClient := network.NewSecurityGroupsClient(az.SubscriptionID)
securityGroupsClient.BaseURI = az.Environment.ResourceManagerEndpoint
securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
securityGroupsClient.PollingDelay = 5 * time.Second
az.SecurityGroupsClient = securityGroupsClient
az.StorageAccountClient = storage.NewAccountsClientWithBaseURI(az.Environment.ResourceManagerEndpoint, az.SubscriptionID)
az.StorageAccountClient.Authorizer = autorest.NewBearerAuthorizer(servicePrincipalToken)
@ -327,6 +385,10 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
az.metadata = NewInstanceMetadata()
if az.MaximumLoadBalancerRuleCount == 0 {
az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount
if err := initDiskControllers(&az); err != nil {
return nil, err

View File

@ -26,11 +26,26 @@ import (
// requestBackoff if backoff is disabled in cloud provider it
// returns a new Backoff object steps = 1
// This is to make sure that the requested command executes
// at least once
func (az *Cloud) requestBackoff() (resourceRequestBackoff wait.Backoff) {
if az.CloudProviderBackoff {
return az.resourceRequestBackoff
resourceRequestBackoff = wait.Backoff{
Steps: 1,
return resourceRequestBackoff
// GetVirtualMachineWithRetry invokes az.getVirtualMachine with exponential backoff retry
func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.VirtualMachine, bool, error) {
var machine compute.VirtualMachine
var exists bool
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
machine, exists, retryErr = az.getVirtualMachine(name)
if retryErr != nil {
@ -46,8 +61,9 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
// VirtualMachineClientGetWithRetry invokes az.VirtualMachinesClient.Get with exponential backoff retry
func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string, types compute.InstanceViewTypes) (compute.VirtualMachine, error) {
var machine compute.VirtualMachine
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
machine, retryErr = az.VirtualMachinesClient.Get(resourceGroup, vmName, types)
if retryErr != nil {
glog.Errorf("backoff: failure, will retry,err=%v", retryErr)
@ -59,10 +75,63 @@ func (az *Cloud) VirtualMachineClientGetWithRetry(resourceGroup, vmName string,
return machine, err
// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, error) {
allNodes := []compute.VirtualMachine{}
var result compute.VirtualMachineListResult
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("VirtualMachinesClient.List(%v): start", az.ResourceGroup)
result, retryErr = az.VirtualMachinesClient.List(az.ResourceGroup)
glog.V(10).Infof("VirtualMachinesClient.List(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("VirtualMachinesClient.List(%v) - backoff: failure, will retry,err=%v",
return false, retryErr
glog.V(2).Infof("VirtualMachinesClient.List(%v) - backoff: success", az.ResourceGroup)
return true, nil
if err != nil {
return nil, err
appendResults := (result.Value != nil && len(*result.Value) > 0)
for appendResults {
allNodes = append(allNodes, *result.Value...)
appendResults = false
// follow the next link to get all the vms for resource group
if result.NextLink != nil {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): start", az.ResourceGroup)
result, retryErr = az.VirtualMachinesClient.ListNextResults(result)
glog.V(10).Infof("VirtualMachinesClient.ListNextResults(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("VirtualMachinesClient.ListNextResults(%v) - backoff: failure, will retry,err=%v",
az.ResourceGroup, retryErr)
return false, retryErr
glog.V(2).Infof("VirtualMachinesClient.ListNextResults(%v): success", az.ResourceGroup)
return true, nil
if err != nil {
return allNodes, err
appendResults = (result.Value != nil && len(*result.Value) > 0)
return allNodes, err
// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry
func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) {
var ip string
err := wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
ip, retryErr = az.getIPForMachine(name)
if retryErr != nil {
@ -77,7 +146,7 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, error) {
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): start", *sg.Name)
respChan, errChan := az.SecurityGroupsClient.CreateOrUpdate(az.ResourceGroup, *sg.Name, sg, nil)
@ -90,7 +159,7 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(sg network.SecurityGroup) error {
// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): start", *lb.Name)
respChan, errChan := az.LoadBalancerClient.CreateOrUpdate(az.ResourceGroup, *lb.Name, lb, nil)
@ -101,9 +170,120 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(lb network.LoadBalancer) error {
// ListLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) ListLBWithRetry() ([]network.LoadBalancer, error) {
allLBs := []network.LoadBalancer{}
var result network.LoadBalancerListResult
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("LoadBalancerClient.List(%v): start", az.ResourceGroup)
result, retryErr = az.LoadBalancerClient.List(az.ResourceGroup)
glog.V(10).Infof("LoadBalancerClient.List(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("LoadBalancerClient.List(%v) - backoff: failure, will retry,err=%v",
return false, retryErr
glog.V(2).Infof("LoadBalancerClient.List(%v) - backoff: success", az.ResourceGroup)
return true, nil
if err != nil {
return nil, err
appendResults := (result.Value != nil && len(*result.Value) > 0)
for appendResults {
allLBs = append(allLBs, *result.Value...)
appendResults = false
// follow the next link to get all the vms for resource group
if result.NextLink != nil {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): start", az.ResourceGroup)
result, retryErr = az.LoadBalancerClient.ListNextResults(result)
glog.V(10).Infof("LoadBalancerClient.ListNextResults(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("LoadBalancerClient.ListNextResults(%v) - backoff: failure, will retry,err=%v",
return false, retryErr
glog.V(2).Infof("LoadBalancerClient.ListNextResults(%v) - backoff: success", az.ResourceGroup)
return true, nil
if err != nil {
return allLBs, err
appendResults = (result.Value != nil && len(*result.Value) > 0)
return allLBs, nil
// ListPIPWithRetry list the PIP resources in az.ResourceGroup
func (az *Cloud) ListPIPWithRetry() ([]network.PublicIPAddress, error) {
allPIPs := []network.PublicIPAddress{}
var result network.PublicIPAddressListResult
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("PublicIPAddressesClient.List(%v): start", az.ResourceGroup)
result, retryErr = az.PublicIPAddressesClient.List(az.ResourceGroup)
glog.V(10).Infof("PublicIPAddressesClient.List(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("PublicIPAddressesClient.List(%v) - backoff: failure, will retry,err=%v",
return false, retryErr
glog.V(2).Infof("PublicIPAddressesClient.List(%v) - backoff: success", az.ResourceGroup)
return true, nil
if err != nil {
return nil, err
appendResults := (result.Value != nil && len(*result.Value) > 0)
for appendResults {
allPIPs = append(allPIPs, *result.Value...)
appendResults = false
// follow the next link to get all the vms for resource group
if result.NextLink != nil {
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): start", az.ResourceGroup)
result, retryErr = az.PublicIPAddressesClient.ListNextResults(result)
glog.V(10).Infof("PublicIPAddressesClient.ListNextResults(%v): end", az.ResourceGroup)
if retryErr != nil {
glog.Errorf("PublicIPAddressesClient.ListNextResults(%v) - backoff: failure, will retry,err=%v",
return false, retryErr
glog.V(2).Infof("PublicIPAddressesClient.ListNextResults(%v) - backoff: success", az.ResourceGroup)
return true, nil
if err != nil {
return allPIPs, err
appendResults = (result.Value != nil && len(*result.Value) > 0)
return allPIPs, nil
// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s): start", *pip.Name)
respChan, errChan := az.PublicIPAddressesClient.CreateOrUpdate(az.ResourceGroup, *pip.Name, pip, nil)
@ -116,7 +296,7 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(pip network.PublicIPAddress) error {
// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): start", *nic.Name)
respChan, errChan := az.InterfacesClient.CreateOrUpdate(az.ResourceGroup, *nic.Name, nic, nil)
@ -129,7 +309,7 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(nic network.Interface) error {
// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("PublicIPAddressesClient.Delete(%s): start", pipName)
respChan, errChan := az.PublicIPAddressesClient.Delete(az.ResourceGroup, pipName, nil)
@ -142,7 +322,7 @@ func (az *Cloud) DeletePublicIPWithRetry(pipName string) error {
// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) DeleteLBWithRetry(lbName string) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("LoadBalancerClient.Delete(%s): start", lbName)
respChan, errChan := az.LoadBalancerClient.Delete(az.ResourceGroup, lbName, nil)
@ -155,7 +335,7 @@ func (az *Cloud) DeleteLBWithRetry(lbName string) error {
// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%s): start", *routeTable.Name)
respChan, errChan := az.RouteTablesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, routeTable, nil)
@ -168,7 +348,7 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable
// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): start", *route.Name)
respChan, errChan := az.RoutesClient.CreateOrUpdate(az.ResourceGroup, az.RouteTableName, *route.Name, route, nil)
@ -181,7 +361,7 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("RoutesClient.Delete(%s): start", az.RouteTableName)
respChan, errChan := az.RoutesClient.Delete(az.ResourceGroup, az.RouteTableName, routeName, nil)
@ -194,7 +374,7 @@ func (az *Cloud) DeleteRouteWithRetry(routeName string) error {
// CreateOrUpdateVMWithRetry invokes az.VirtualMachinesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateVMWithRetry(vmName string, newVM compute.VirtualMachine) error {
return wait.ExponentialBackoff(az.resourceRequestBackoff, func() (bool, error) {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
glog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s): start", vmName)
respChan, errChan := az.VirtualMachinesClient.CreateOrUpdate(az.ResourceGroup, vmName, newVM, nil)

View File

@ -0,0 +1,623 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package azure
import (
type fakeAzureLBClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]network.LoadBalancer
func newFakeAzureLBClient() fakeAzureLBClient {
fLBC := fakeAzureLBClient{}
fLBC.FakeStore = make(map[string]map[string]network.LoadBalancer)
fLBC.mutex = &sync.Mutex{}
return fLBC
func (fLBC fakeAzureLBClient) CreateOrUpdate(resourceGroupName string, loadBalancerName string, parameters network.LoadBalancer, cancel <-chan struct{}) (<-chan network.LoadBalancer, <-chan error) {
defer fLBC.mutex.Unlock()
resultChan := make(chan network.LoadBalancer, 1)
errChan := make(chan error, 1)
var result network.LoadBalancer
var err error
defer func() {
resultChan <- result
errChan <- err
if _, ok := fLBC.FakeStore[resourceGroupName]; !ok {
fLBC.FakeStore[resourceGroupName] = make(map[string]network.LoadBalancer)
// For dynamic ip allocation, just fill in the PrivateIPAddress
if parameters.FrontendIPConfigurations != nil {
for idx, config := range *parameters.FrontendIPConfigurations {
if config.PrivateIPAllocationMethod == network.Dynamic {
// Here we randomly assign an ip as private ip
// It dosen't smart enough to know whether it is in the subnet's range
(*parameters.FrontendIPConfigurations)[idx].PrivateIPAddress = getRandomIPPtr()
fLBC.FakeStore[resourceGroupName][loadBalancerName] = parameters
result = fLBC.FakeStore[resourceGroupName][loadBalancerName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fLBC fakeAzureLBClient) Delete(resourceGroupName string, loadBalancerName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) {
defer fLBC.mutex.Unlock()
respChan := make(chan autorest.Response, 1)
errChan := make(chan error, 1)
var resp autorest.Response
var err error
defer func() {
respChan <- resp
errChan <- err
if rgLBs, ok := fLBC.FakeStore[resourceGroupName]; ok {
if _, ok := rgLBs[loadBalancerName]; ok {
delete(rgLBs, loadBalancerName)
resp.Response = &http.Response{
StatusCode: http.StatusAccepted,
err = nil
return respChan, errChan
resp.Response = &http.Response{
StatusCode: http.StatusNotFound,
err = autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such LB",
return respChan, errChan
func (fLBC fakeAzureLBClient) Get(resourceGroupName string, loadBalancerName string, expand string) (result network.LoadBalancer, err error) {
defer fLBC.mutex.Unlock()
if _, ok := fLBC.FakeStore[resourceGroupName]; ok {
if entity, ok := fLBC.FakeStore[resourceGroupName][loadBalancerName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such LB",
func (fLBC fakeAzureLBClient) List(resourceGroupName string) (result network.LoadBalancerListResult, err error) {
defer fLBC.mutex.Unlock()
var value []network.LoadBalancer
if _, ok := fLBC.FakeStore[resourceGroupName]; ok {
for _, v := range fLBC.FakeStore[resourceGroupName] {
value = append(value, v)
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = &value
return result, nil
func (fLBC fakeAzureLBClient) ListNextResults(lastResult network.LoadBalancerListResult) (result network.LoadBalancerListResult, err error) {
defer fLBC.mutex.Unlock()
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = nil
return result, nil
type fakeAzurePIPClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]network.PublicIPAddress
SubscriptionID string
const publicIPAddressIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/publicIPAddresses/%s"
// returns the full identifier of a publicIPAddress.
func getpublicIPAddressID(subscriptionID string, resourceGroupName, pipName string) string {
return fmt.Sprintf(
func newFakeAzurePIPClient(subscriptionID string) fakeAzurePIPClient {
fAPC := fakeAzurePIPClient{}
fAPC.FakeStore = make(map[string]map[string]network.PublicIPAddress)
fAPC.SubscriptionID = subscriptionID
fAPC.mutex = &sync.Mutex{}
return fAPC
func (fAPC fakeAzurePIPClient) CreateOrUpdate(resourceGroupName string, publicIPAddressName string, parameters network.PublicIPAddress, cancel <-chan struct{}) (<-chan network.PublicIPAddress, <-chan error) {
defer fAPC.mutex.Unlock()
resultChan := make(chan network.PublicIPAddress, 1)
errChan := make(chan error, 1)
var result network.PublicIPAddress
var err error
defer func() {
resultChan <- result
errChan <- err
if _, ok := fAPC.FakeStore[resourceGroupName]; !ok {
fAPC.FakeStore[resourceGroupName] = make(map[string]network.PublicIPAddress)
// assign id
pipID := getpublicIPAddressID(fAPC.SubscriptionID, resourceGroupName, publicIPAddressName)
parameters.ID = &pipID
// only create in the case user has not provided
if parameters.PublicIPAddressPropertiesFormat != nil &&
parameters.PublicIPAddressPropertiesFormat.PublicIPAllocationMethod == network.Static {
// assign ip
parameters.IPAddress = getRandomIPPtr()
fAPC.FakeStore[resourceGroupName][publicIPAddressName] = parameters
result = fAPC.FakeStore[resourceGroupName][publicIPAddressName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fAPC fakeAzurePIPClient) Delete(resourceGroupName string, publicIPAddressName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) {
defer fAPC.mutex.Unlock()
respChan := make(chan autorest.Response, 1)
errChan := make(chan error, 1)
var resp autorest.Response
var err error
defer func() {
respChan <- resp
errChan <- err
if rgPIPs, ok := fAPC.FakeStore[resourceGroupName]; ok {
if _, ok := rgPIPs[publicIPAddressName]; ok {
delete(rgPIPs, publicIPAddressName)
resp.Response = &http.Response{
StatusCode: http.StatusAccepted,
err = nil
return respChan, errChan
resp.Response = &http.Response{
StatusCode: http.StatusNotFound,
err = autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such PIP",
return respChan, errChan
func (fAPC fakeAzurePIPClient) Get(resourceGroupName string, publicIPAddressName string, expand string) (result network.PublicIPAddress, err error) {
defer fAPC.mutex.Unlock()
if _, ok := fAPC.FakeStore[resourceGroupName]; ok {
if entity, ok := fAPC.FakeStore[resourceGroupName][publicIPAddressName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such PIP",
func (fAPC fakeAzurePIPClient) ListNextResults(lastResults network.PublicIPAddressListResult) (result network.PublicIPAddressListResult, err error) {
defer fAPC.mutex.Unlock()
return network.PublicIPAddressListResult{}, nil
func (fAPC fakeAzurePIPClient) List(resourceGroupName string) (result network.PublicIPAddressListResult, err error) {
defer fAPC.mutex.Unlock()
var value []network.PublicIPAddress
if _, ok := fAPC.FakeStore[resourceGroupName]; ok {
for _, v := range fAPC.FakeStore[resourceGroupName] {
value = append(value, v)
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = &value
return result, nil
type fakeAzureInterfacesClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]network.Interface
func newFakeAzureInterfacesClient() fakeAzureInterfacesClient {
fIC := fakeAzureInterfacesClient{}
fIC.FakeStore = make(map[string]map[string]network.Interface)
fIC.mutex = &sync.Mutex{}
return fIC
func (fIC fakeAzureInterfacesClient) CreateOrUpdate(resourceGroupName string, networkInterfaceName string, parameters network.Interface, cancel <-chan struct{}) (<-chan network.Interface, <-chan error) {
defer fIC.mutex.Unlock()
resultChan := make(chan network.Interface, 1)
errChan := make(chan error, 1)
var result network.Interface
var err error
defer func() {
resultChan <- result
errChan <- err
if _, ok := fIC.FakeStore[resourceGroupName]; !ok {
fIC.FakeStore[resourceGroupName] = make(map[string]network.Interface)
fIC.FakeStore[resourceGroupName][networkInterfaceName] = parameters
result = fIC.FakeStore[resourceGroupName][networkInterfaceName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fIC fakeAzureInterfacesClient) Get(resourceGroupName string, networkInterfaceName string, expand string) (result network.Interface, err error) {
defer fIC.mutex.Unlock()
if _, ok := fIC.FakeStore[resourceGroupName]; ok {
if entity, ok := fIC.FakeStore[resourceGroupName][networkInterfaceName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such Interface",
type fakeAzureVirtualMachinesClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]compute.VirtualMachine
func newFakeAzureVirtualMachinesClient() fakeAzureVirtualMachinesClient {
fVMC := fakeAzureVirtualMachinesClient{}
fVMC.FakeStore = make(map[string]map[string]compute.VirtualMachine)
fVMC.mutex = &sync.Mutex{}
return fVMC
func (fVMC fakeAzureVirtualMachinesClient) CreateOrUpdate(resourceGroupName string, VMName string, parameters compute.VirtualMachine, cancel <-chan struct{}) (<-chan compute.VirtualMachine, <-chan error) {
defer fVMC.mutex.Unlock()
resultChan := make(chan compute.VirtualMachine, 1)
errChan := make(chan error, 1)
var result compute.VirtualMachine
var err error
defer func() {
resultChan <- result
errChan <- err
if _, ok := fVMC.FakeStore[resourceGroupName]; !ok {
fVMC.FakeStore[resourceGroupName] = make(map[string]compute.VirtualMachine)
fVMC.FakeStore[resourceGroupName][VMName] = parameters
result = fVMC.FakeStore[resourceGroupName][VMName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fVMC fakeAzureVirtualMachinesClient) Get(resourceGroupName string, VMName string, expand compute.InstanceViewTypes) (result compute.VirtualMachine, err error) {
defer fVMC.mutex.Unlock()
if _, ok := fVMC.FakeStore[resourceGroupName]; ok {
if entity, ok := fVMC.FakeStore[resourceGroupName][VMName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such VM",
func (fVMC fakeAzureVirtualMachinesClient) List(resourceGroupName string) (result compute.VirtualMachineListResult, err error) {
defer fVMC.mutex.Unlock()
var value []compute.VirtualMachine
if _, ok := fVMC.FakeStore[resourceGroupName]; ok {
for _, v := range fVMC.FakeStore[resourceGroupName] {
value = append(value, v)
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = &value
return result, nil
func (fVMC fakeAzureVirtualMachinesClient) ListNextResults(lastResults compute.VirtualMachineListResult) (result compute.VirtualMachineListResult, err error) {
defer fVMC.mutex.Unlock()
return compute.VirtualMachineListResult{}, nil
type fakeAzureSubnetsClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]network.Subnet
func newFakeAzureSubnetsClient() fakeAzureSubnetsClient {
fASC := fakeAzureSubnetsClient{}
fASC.FakeStore = make(map[string]map[string]network.Subnet)
fASC.mutex = &sync.Mutex{}
return fASC
func (fASC fakeAzureSubnetsClient) CreateOrUpdate(resourceGroupName string, virtualNetworkName string, subnetName string, subnetParameters network.Subnet, cancel <-chan struct{}) (<-chan network.Subnet, <-chan error) {
defer fASC.mutex.Unlock()
resultChan := make(chan network.Subnet, 1)
errChan := make(chan error, 1)
var result network.Subnet
var err error
defer func() {
resultChan <- result
errChan <- err
rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND")
if _, ok := fASC.FakeStore[rgVnet]; !ok {
fASC.FakeStore[rgVnet] = make(map[string]network.Subnet)
fASC.FakeStore[rgVnet][subnetName] = subnetParameters
result = fASC.FakeStore[rgVnet][subnetName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fASC fakeAzureSubnetsClient) Delete(resourceGroupName string, virtualNetworkName string, subnetName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) {
defer fASC.mutex.Unlock()
respChan := make(chan autorest.Response, 1)
errChan := make(chan error, 1)
var resp autorest.Response
var err error
defer func() {
respChan <- resp
errChan <- err
rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND")
if rgSubnets, ok := fASC.FakeStore[rgVnet]; ok {
if _, ok := rgSubnets[subnetName]; ok {
delete(rgSubnets, subnetName)
resp.Response = &http.Response{
StatusCode: http.StatusAccepted,
err = nil
return respChan, errChan
resp.Response = &http.Response{
StatusCode: http.StatusNotFound,
err = autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such Subnet",
return respChan, errChan
func (fASC fakeAzureSubnetsClient) Get(resourceGroupName string, virtualNetworkName string, subnetName string, expand string) (result network.Subnet, err error) {
defer fASC.mutex.Unlock()
rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND")
if _, ok := fASC.FakeStore[rgVnet]; ok {
if entity, ok := fASC.FakeStore[rgVnet][subnetName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such Subnet",
func (fASC fakeAzureSubnetsClient) List(resourceGroupName string, virtualNetworkName string) (result network.SubnetListResult, err error) {
defer fASC.mutex.Unlock()
rgVnet := strings.Join([]string{resourceGroupName, virtualNetworkName}, "AND")
var value []network.Subnet
if _, ok := fASC.FakeStore[rgVnet]; ok {
for _, v := range fASC.FakeStore[rgVnet] {
value = append(value, v)
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = &value
return result, nil
type fakeAzureNSGClient struct {
mutex *sync.Mutex
FakeStore map[string]map[string]network.SecurityGroup
func newFakeAzureNSGClient() fakeAzureNSGClient {
fNSG := fakeAzureNSGClient{}
fNSG.FakeStore = make(map[string]map[string]network.SecurityGroup)
fNSG.mutex = &sync.Mutex{}
return fNSG
func (fNSG fakeAzureNSGClient) CreateOrUpdate(resourceGroupName string, networkSecurityGroupName string, parameters network.SecurityGroup, cancel <-chan struct{}) (<-chan network.SecurityGroup, <-chan error) {
defer fNSG.mutex.Unlock()
resultChan := make(chan network.SecurityGroup, 1)
errChan := make(chan error, 1)
var result network.SecurityGroup
var err error
defer func() {
resultChan <- result
errChan <- err
if _, ok := fNSG.FakeStore[resourceGroupName]; !ok {
fNSG.FakeStore[resourceGroupName] = make(map[string]network.SecurityGroup)
fNSG.FakeStore[resourceGroupName][networkSecurityGroupName] = parameters
result = fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
err = nil
return resultChan, errChan
func (fNSG fakeAzureNSGClient) Delete(resourceGroupName string, networkSecurityGroupName string, cancel <-chan struct{}) (<-chan autorest.Response, <-chan error) {
defer fNSG.mutex.Unlock()
respChan := make(chan autorest.Response, 1)
errChan := make(chan error, 1)
var resp autorest.Response
var err error
defer func() {
respChan <- resp
errChan <- err
if rgSGs, ok := fNSG.FakeStore[resourceGroupName]; ok {
if _, ok := rgSGs[networkSecurityGroupName]; ok {
delete(rgSGs, networkSecurityGroupName)
resp.Response = &http.Response{
StatusCode: http.StatusAccepted,
err = nil
return respChan, errChan
resp.Response = &http.Response{
StatusCode: http.StatusNotFound,
err = autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such NSG",
return respChan, errChan
func (fNSG fakeAzureNSGClient) Get(resourceGroupName string, networkSecurityGroupName string, expand string) (result network.SecurityGroup, err error) {
defer fNSG.mutex.Unlock()
if _, ok := fNSG.FakeStore[resourceGroupName]; ok {
if entity, ok := fNSG.FakeStore[resourceGroupName][networkSecurityGroupName]; ok {
return entity, nil
return result, autorest.DetailedError{
StatusCode: http.StatusNotFound,
Message: "Not such NSG",
func (fNSG fakeAzureNSGClient) List(resourceGroupName string) (result network.SecurityGroupListResult, err error) {
defer fNSG.mutex.Unlock()
var value []network.SecurityGroup
if _, ok := fNSG.FakeStore[resourceGroupName]; ok {
for _, v := range fNSG.FakeStore[resourceGroupName] {
value = append(value, v)
result.Response.Response = &http.Response{
StatusCode: http.StatusOK,
result.NextLink = nil
result.Value = &value
return result, nil
func getRandomIPPtr() *string {
return to.StringPtr(fmt.Sprintf("%d.%d.%d.%d", rand.Intn(256), rand.Intn(256), rand.Intn(256), rand.Intn(256)))

View File

@ -48,19 +48,10 @@ func (az *Cloud) NodeAddresses(name types.NodeName) ([]v1.NodeAddress, error) {
return addresses, nil
ip, err := az.getIPForMachine(name)
ip, err := az.GetIPForMachineWithRetry(name)
if err != nil {
if az.CloudProviderBackoff {
glog.V(2).Infof("NodeAddresses(%s) backing off", name)
ip, err = az.GetIPForMachineWithRetry(name)
if err != nil {
glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
return nil, err
} else {
glog.Errorf("error: az.NodeAddresses, az.getIPForMachine(%s), err=%v", name, err)
return nil, err
glog.V(2).Infof("NodeAddresses(%s) abort backoff", name)
return nil, err
return []v1.NodeAddress{
@ -199,39 +190,6 @@ func (az *Cloud) CurrentNodeName(hostname string) (types.NodeName, error) {
return types.NodeName(hostname), nil
func (az *Cloud) listAllNodesInResourceGroup() ([]compute.VirtualMachine, error) {
allNodes := []compute.VirtualMachine{}
glog.V(10).Infof("VirtualMachinesClient.List(%s): start", az.ResourceGroup)
result, err := az.VirtualMachinesClient.List(az.ResourceGroup)
glog.V(10).Infof("VirtualMachinesClient.List(%s): end", az.ResourceGroup)
if err != nil {
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.List(%s), err=%v", az.ResourceGroup, err)
return nil, err
morePages := (result.Value != nil && len(*result.Value) > 1)
for morePages {
allNodes = append(allNodes, *result.Value...)
glog.V(10).Infof("VirtualMachinesClient.ListAllNextResults(%v): start", az.ResourceGroup)
result, err = az.VirtualMachinesClient.ListAllNextResults(result)
glog.V(10).Infof("VirtualMachinesClient.ListAllNextResults(%v): end", az.ResourceGroup)
if err != nil {
glog.Errorf("error: az.listAllNodesInResourceGroup(), az.VirtualMachinesClient.ListAllNextResults(%v), err=%v", result, err)
return nil, err
morePages = (result.Value != nil && len(*result.Value) > 1)
return allNodes, nil
// mapNodeNameToVMName maps a k8s NodeName to an Azure VM Name
// This is a simple string cast.
func mapNodeNameToVMName(nodeName types.NodeName) string {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,77 @@
# Azure LoadBalancer
The way azure define LoadBalancer is different with GCE or AWS. Azure's LB can have multiple frontend IP refs. The GCE and AWS can only allow one, if you want more, you better to have another LB. Because of the fact, Public IP is not part of the LB in Azure. NSG is not part of LB in Azure either. However, you cannot delete them in parallel, Public IP can only be delete after LB's frontend IP ref is removed.
For different Azure Resources, such as LB, Public IP, NSG. They are the same tier azure resources. We need to make sure there is no connection in their own ensure loops. In another words, They would be eventually reconciled regardless of other resources' state. They should only depends on service state.
Despite the ideal philosophy above, we have to face the reality. NSG depends on LB's frontend ip to adjust NSG rules. So when we want to reconcile NSG, the LB should contain the corresponding frontend ip config.
And also, For Azure, we cannot afford to have more than 1 worker of service_controller. Because, different services could operate on the same LB, concurrent execution could result in conflict or unexpected result. For AWS and GCE, they apparently doesn't have the problem, they use one LB per service, no such conflict.
There are two load balancers per availability set internal and external. There is a limit on number of services that can be associated with a single load balancer.
By default primary load balancer is selected. Services can be annotated to allow auto selection of available load balancers. Service annotations can also be used to provide specific availability sets that host the load balancers. Note that in case of auto selection or specific availability set selection, when the availability set is lost incase of downtime or cluster scale down the services are currently not auto assigned to an available load balancer.
Service Annotation for Auto and specific load balancer mode
-" (__auto__|as1,as2...)
## Introduce Functions
- reconcileLoadBalancer(clusterName string, service *v1.Service, nodes []*v1.Node, wantLb bool) (*network.LoadBalancer, error)
- Go through lb's properties, update based on wantLb
- If any change on the lb, no matter if the lb exists or not
- Call az cloud to CreateOrUpdate on this lb, or Delete if nothing left
- return lb, err
- reconcileSecurityGroup(clusterName string, service *v1.Service, lbIP *string, wantLb bool) (*network.SecurityGroup, error)
- Go though NSG' properties, update based on wantLb
- Use destinationIPAddress as target address if possible
- Consolidate NSG rules if possible
- If any change on the NSG, (the NSG should always exists)
- Call az cloud to CreateOrUpdate on this NSG
- return sg, err
- reconcilePublicIP(clusterName string, service *v1.Service, wantLb bool) (*network.PublicIPAddress, error)
- List all the public ip in the resource group
- Make sure we only touch Public IP resources has tags[service] = "namespace/serviceName"
- skip for wantLb && !isInternal && pipName == desiredPipName
- delete other public ip resources if any
- if !isInternal && wantLb
- ensure Public IP with desiredPipName exists
- getServiceLoadBalancer(service *v1.Service, clusterName string, nodes []*v1.Node, wantLb bool) (lb, status, exists, error)
- gets the loadbalancer for the service if it already exists
- If wantLb is TRUE then -it selects a new load balancer, the selction helps distribute the services across load balancers
- In case the selected load balancer does not exists it returns network.LoadBalancer struct with added metadata (such as name, location) and existsLB set to FALSE
- By default - cluster default LB is returned
## Define interface behaviors
### GetLoadBalancer
- Get LoadBalancer status, return status, error
- return the load balancer status for this service
- it will not create or update or delete any resource
### EnsureLoadBalancer
- Reconcile LB for the flipped service
- Call reconcileLoadBalancer(clusterName, flipedService, nil, false/* wantLb */)
- Reconcile Public IP
- Call reconcilePublicIP(cluster, service, true)
- Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe.
- Call reconcileLoadBalancer(clusterName, service, nodes, true /* wantLb */)
- Reconcile NSG rules, it need to be called after reconcileLB
- Call reconcileSecurityGroup(clusterName, service, lbStatus, true /* wantLb */)
### UpdateLoadBalancer
- Has no difference with EnsureLoadBalancer
### EnsureLoadBalancerDeleted
- Reconcile NSG first, before reconcile LB, because SG need LB to be there
- Call reconcileSecurityGroup(clusterName, service, nil, false /* wantLb */)
- Reconcile LB's related and owned resources, such as FrontEndIPConfig, Rules, Probe.
- Call reconcileLoadBalancer(clusterName, service, nodes, false)
- Reconcile Public IP, public IP needs related LB reconciled first
- Call reconcilePublicIP(cluster, service, false)

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@ import (
@ -31,6 +32,7 @@ import (
const (
@ -44,6 +46,12 @@ const (
loadBalancerRuleIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/loadBalancingRules/%s"
loadBalancerProbeIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/probes/%s"
securityRuleIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkSecurityGroups/%s/securityRules/%s"
// InternalLoadBalancerNameSuffix is load balancer posfix
InternalLoadBalancerNameSuffix = "-internal"
// nodeLabelRole specifies the role of a node
nodeLabelRole = ""
var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
@ -116,6 +124,143 @@ func (az *Cloud) getSecurityRuleID(securityRuleName string) string {
// returns the full identifier of a publicIPAddress.
func (az *Cloud) getpublicIPAddressID(pipName string) string {
return fmt.Sprintf(
// getLoadBalancerAvailabilitySetNames selects all possible availability sets for
// service load balancer, if the service has no loadbalancer mode annotaion returns the
// primary availability set if service annotation for loadbalancer availability set
// exists then return the eligible a availability set
func (az *Cloud) getLoadBalancerAvailabilitySetNames(service *v1.Service, nodes []*v1.Node) (availabilitySetNames *[]string, err error) {
hasMode, isAuto, serviceAvailabilitySetNames := getServiceLoadBalancerMode(service)
if !hasMode {
// no mode specified in service annotation default to PrimaryAvailabilitySetName
availabilitySetNames = &[]string{az.Config.PrimaryAvailabilitySetName}
return availabilitySetNames, nil
availabilitySetNames, err = az.getAgentPoolAvailabiliySets(nodes)
if err != nil {
glog.Errorf("az.getLoadBalancerAvailabilitySetNames - getAgentPoolAvailabiliySets failed err=(%v)", err)
return nil, err
if len(*availabilitySetNames) == 0 {
glog.Errorf("az.getLoadBalancerAvailabilitySetNames - No availability sets found for nodes in the cluster, node count(%d)", len(nodes))
return nil, fmt.Errorf("No availability sets found for nodes, node count(%d)", len(nodes))
// sort the list to have deterministic selection
if !isAuto {
if serviceAvailabilitySetNames == nil || len(serviceAvailabilitySetNames) == 0 {
return nil, fmt.Errorf("service annotation for LoadBalancerMode is empty, it should have __auto__ or availability sets value")
// validate availability set exists
var found bool
for sasx := range serviceAvailabilitySetNames {
for asx := range *availabilitySetNames {
if strings.EqualFold((*availabilitySetNames)[asx], serviceAvailabilitySetNames[sasx]) {
found = true
serviceAvailabilitySetNames[sasx] = (*availabilitySetNames)[asx]
if !found {
glog.Errorf("az.getLoadBalancerAvailabilitySetNames - Availability set (%s) in service annotation not found", serviceAvailabilitySetNames[sasx])
return nil, fmt.Errorf("availability set (%s) - not found", serviceAvailabilitySetNames[sasx])
availabilitySetNames = &serviceAvailabilitySetNames
return availabilitySetNames, nil
// lists the virtual machines for for the resource group and then builds
// a list of availability sets that match the nodes available to k8s
func (az *Cloud) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) {
vms, err := az.VirtualMachineClientListWithRetry()
if err != nil {
glog.Errorf("az.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err)
return nil, err
vmNameToAvailabilitySetID := make(map[string]string, len(vms))
for vmx := range vms {
vm := vms[vmx]
if vm.AvailabilitySet != nil {
vmNameToAvailabilitySetID[*vm.Name] = *vm.AvailabilitySet.ID
availabilitySetIDs := sets.NewString()
agentPoolAvailabilitySets = &[]string{}
for nx := range nodes {
nodeName := (*nodes[nx]).Name
if isMasterNode(nodes[nx]) {
asID, ok := vmNameToAvailabilitySetID[nodeName]
if !ok {
glog.Errorf("az.getNodeAvailabilitySet - Node(%s) has no availability sets", nodeName)
return nil, fmt.Errorf("Node (%s) - has no availability sets", nodeName)
if availabilitySetIDs.Has(asID) {
// already added in the list
asName, err := getLastSegment(asID)
if err != nil {
glog.Errorf("az.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err)
return nil, err
// AvailabilitySet ID is currently upper cased in a indeterministic way
// We want to keep it lower case, before the ID get fixed
asName = strings.ToLower(asName)
*agentPoolAvailabilitySets = append(*agentPoolAvailabilitySets, asName)
return agentPoolAvailabilitySets, nil
func (az *Cloud) mapLoadBalancerNameToAvailabilitySet(lbName string, clusterName string) (availabilitySetName string) {
availabilitySetName = strings.TrimSuffix(lbName, InternalLoadBalancerNameSuffix)
if strings.EqualFold(clusterName, lbName) {
availabilitySetName = az.Config.PrimaryAvailabilitySetName
return availabilitySetName
// For a load balancer, all frontend ip should reference either a subnet or publicIpAddress.
// Thus Azure do not allow mixed type (public and internal) load balancer.
// So we'd have a separate name for internal load balancer.
// This would be the name for Azure LoadBalancer resource.
func (az *Cloud) getLoadBalancerName(clusterName string, availabilitySetName string, isInternal bool) string {
lbNamePrefix := availabilitySetName
if strings.EqualFold(availabilitySetName, az.Config.PrimaryAvailabilitySetName) {
lbNamePrefix = clusterName
if isInternal {
return fmt.Sprintf("%s%s", lbNamePrefix, InternalLoadBalancerNameSuffix)
return lbNamePrefix
// isMasterNode returns returns true is the node has a master role label.
// The master role is determined by looking for:
// * a"master" label
func isMasterNode(node *v1.Node) bool {
if val, ok := node.Labels[nodeLabelRole]; ok && val == "master" {
return true
return false
// returns the deepest child's identifier from a full identifier string.
func getLastSegment(ID string) (string, error) {
parts := strings.Split(ID, "/")
@ -179,16 +324,8 @@ func getPrimaryIPConfig(nic network.Interface) (*network.InterfaceIPConfiguratio
return nil, fmt.Errorf("failed to determine the determine primary ipconfig. nicname=%q", *nic.Name)
// For a load balancer, all frontend ip should reference either a subnet or publicIpAddress.
// Thus Azure do not allow mixed type (public and internal) load balancer.
// So we'd have a separate name for internal load balancer.
// This would be the name for Azure LoadBalancer resource.
func getLoadBalancerName(clusterName string, isInternal bool) string {
if isInternal {
return fmt.Sprintf("%s-internal", clusterName)
return clusterName
func isInternalLoadBalancer(lb *network.LoadBalancer) bool {
return strings.HasSuffix(*lb.Name, InternalLoadBalancerNameSuffix)
func getBackendPoolName(clusterName string) string {
@ -203,6 +340,10 @@ func getLoadBalancerRuleName(service *v1.Service, port v1.ServicePort, subnetNam
func getSecurityRuleName(service *v1.Service, port v1.ServicePort, sourceAddrPrefix string) string {
if useSharedSecurityRule(service) {
safePrefix := strings.Replace(sourceAddrPrefix, "/", "_", -1)
return fmt.Sprintf("shared-%s-%d-%s", port.Protocol, port.Port, safePrefix)
safePrefix := strings.Replace(sourceAddrPrefix, "/", "_", -1)
return fmt.Sprintf("%s-%s-%d-%s", getRulePrefix(service), port.Protocol, port.Port, safePrefix)

View File

@ -40,6 +40,19 @@ func checkResourceExistsFromError(err error) (bool, error) {
return false, v
// If it is StatusNotFound return nil,
// Otherwise, return what it is
func ignoreStatusNotFoundFromError(err error) error {
if err == nil {
return nil
v, ok := err.(autorest.DetailedError)
if ok && v.StatusCode == http.StatusNotFound {
return nil
return err
func (az *Cloud) getVirtualMachine(nodeName types.NodeName) (vm compute.VirtualMachine, exists bool, err error) {
var realErr error
@ -103,7 +116,6 @@ func (az *Cloud) getSecurityGroup() (sg network.SecurityGroup, exists bool, err
func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exists bool, err error) {
var realErr error
glog.V(10).Infof("LoadBalancerClient.Get(%s): start", name)
lb, err = az.LoadBalancerClient.Get(az.ResourceGroup, name, "")
@ -121,6 +133,25 @@ func (az *Cloud) getAzureLoadBalancer(name string) (lb network.LoadBalancer, exi
return lb, exists, err
func (az *Cloud) listLoadBalancers() (lbListResult network.LoadBalancerListResult, exists bool, err error) {
var realErr error
glog.V(10).Infof("LoadBalancerClient.List(%s): start", az.ResourceGroup)
lbListResult, err = az.LoadBalancerClient.List(az.ResourceGroup)
glog.V(10).Infof("LoadBalancerClient.List(%s): end", az.ResourceGroup)
exists, realErr = checkResourceExistsFromError(err)
if realErr != nil {
return lbListResult, false, realErr
if !exists {
return lbListResult, false, nil
return lbListResult, exists, err
func (az *Cloud) getPublicIPAddress(name string) (pip network.PublicIPAddress, exists bool, err error) {
var realErr error

View File

@ -9,6 +9,7 @@ load(
name = "go_default_library",
srcs = [
@ -21,13 +22,15 @@ go_library(

View File

@ -0,0 +1,295 @@
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package vsphere
import (
k8stypes ""
// Stores info about the kubernetes node
type NodeInfo struct {
dataCenter *vclib.Datacenter
vm *vclib.VirtualMachine
vcServer string
type NodeManager struct {
// TODO: replace map with concurrent map when k8s supports go v1.9
// Maps the VC server to VSphereInstance
vsphereInstanceMap map[string]*VSphereInstance
// Maps node name to node info.
nodeInfoMap map[string]*NodeInfo
// Maps node name to node structure
registeredNodes map[string]*v1.Node
// Mutexes
registeredNodesLock sync.RWMutex
nodeInfoLock sync.RWMutex
type NodeDetails struct {
NodeName string
vm *vclib.VirtualMachine
// TODO: Make it configurable in vsphere.conf
const (
func (nm *NodeManager) DiscoverNode(node *v1.Node) error {
type VmSearch struct {
vc string
datacenter *vclib.Datacenter
var mutex = &sync.Mutex{}
var globalErrMutex = &sync.Mutex{}
var queueChannel chan *VmSearch
var wg sync.WaitGroup
var globalErr *error
queueChannel = make(chan *VmSearch, QUEUE_SIZE)
nodeUUID := node.Status.NodeInfo.SystemUUID
vmFound := false
globalErr = nil
setGlobalErr := func(err error) {
globalErr = &err
setVMFound := func(found bool) {
vmFound = found
getVMFound := func() bool {
found := vmFound
return found
go func() {
var datacenterObjs []*vclib.Datacenter
for vc, vsi := range nm.vsphereInstanceMap {
found := getVMFound()
if found == true {
// Create context
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := vsi.conn.Connect(ctx)
if err != nil {
glog.V(4).Info("Discovering node error vc:", err)
if vsi.cfg.Datacenters == "" {
datacenterObjs, err = vclib.GetAllDatacenter(ctx, vsi.conn)
if err != nil {
glog.V(4).Info("Discovering node error dc:", err)
} else {
datacenters := strings.Split(vsi.cfg.Datacenters, ",")
for _, dc := range datacenters {
dc = strings.TrimSpace(dc)
if dc == "" {
datacenterObj, err := vclib.GetDatacenter(ctx, vsi.conn, dc)
if err != nil {
glog.V(4).Info("Discovering node error dc:", err)
datacenterObjs = append(datacenterObjs, datacenterObj)
for _, datacenterObj := range datacenterObjs {
found := getVMFound()
if found == true {
glog.V(4).Infof("Finding node %s in vc=%s and datacenter=%s", node.Name, vc, datacenterObj.Name())
queueChannel <- &VmSearch{
vc: vc,
datacenter: datacenterObj,
for i := 0; i < POOL_SIZE; i++ {
go func() {
for res := range queueChannel {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
vm, err := res.datacenter.GetVMByUUID(ctx, nodeUUID)
if err != nil {
glog.V(4).Infof("Error %q while looking for vm=%+v in vc=%s and datacenter=%s",
err, node.Name, vm,, res.datacenter.Name())
if err != vclib.ErrNoVMFound {
} else {
glog.V(4).Infof("Did not find node %s in vc=%s and datacenter=%s",
node.Name,, res.datacenter.Name(), err)
if vm != nil {
glog.V(4).Infof("Found node %s as vm=%+v in vc=%s and datacenter=%s",
node.Name, vm,, res.datacenter.Name())
nodeInfo := &NodeInfo{dataCenter: res.datacenter, vm: vm, vcServer:}
nm.addNodeInfo(node.ObjectMeta.Name, nodeInfo)
for range queueChannel {
if vmFound {
return nil
if globalErr != nil {
return *globalErr
glog.V(4).Infof("Discovery Node: %q vm not found", node.Name)
return vclib.ErrNoVMFound
func (nm *NodeManager) RegisterNode(node *v1.Node) error {
return nil
func (nm *NodeManager) UnRegisterNode(node *v1.Node) error {
return nil
func (nm *NodeManager) RediscoverNode(nodeName k8stypes.NodeName) error {
node, err := nm.GetNode(nodeName)
if err != nil {
return err
return nm.DiscoverNode(&node)
func (nm *NodeManager) GetNode(nodeName k8stypes.NodeName) (v1.Node, error) {
node := nm.registeredNodes[convertToString(nodeName)]
if node == nil {
return v1.Node{}, vclib.ErrNoVMFound
return *node, nil
func (nm *NodeManager) addNode(node *v1.Node) {
nm.registeredNodes[node.ObjectMeta.Name] = node
func (nm *NodeManager) removeNode(node *v1.Node) {
delete(nm.registeredNodes, node.ObjectMeta.Name)
// GetNodeInfo returns a NodeInfo which datacenter, vm and vc server ip address.
// This method returns an error if it is unable find node VCs and DCs listed in vSphere.conf
// NodeInfo returned may not be updated to reflect current VM location.
func (nm *NodeManager) GetNodeInfo(nodeName k8stypes.NodeName) (NodeInfo, error) {
getNodeInfo := func(nodeName k8stypes.NodeName) *NodeInfo {
nodeInfo := nm.nodeInfoMap[convertToString(nodeName)]
return nodeInfo
nodeInfo := getNodeInfo(nodeName)
if nodeInfo == nil {
err := nm.RediscoverNode(nodeName)
if err != nil {
glog.V(4).Infof("error %q node info for node %q not found", err, convertToString(nodeName))
return NodeInfo{}, err
nodeInfo = getNodeInfo(nodeName)
return *nodeInfo, nil
func (nm *NodeManager) GetNodeDetails() []NodeDetails {
defer nm.nodeInfoLock.RUnlock()
var nodeDetails []NodeDetails
for nodeName, nodeInfo := range nm.nodeInfoMap {
nodeDetails = append(nodeDetails, NodeDetails{nodeName, nodeInfo.vm})
return nodeDetails
func (nm *NodeManager) addNodeInfo(nodeName string, nodeInfo *NodeInfo) {
nm.nodeInfoMap[nodeName] = nodeInfo
func (nm *NodeManager) GetVSphereInstance(nodeName k8stypes.NodeName) (VSphereInstance, error) {
nodeInfo, err := nm.GetNodeInfo(nodeName)
if err != nil {
glog.V(4).Infof("node info for node %q not found", convertToString(nodeName))
return VSphereInstance{}, err
vsphereInstance := nm.vsphereInstanceMap[nodeInfo.vcServer]
if vsphereInstance == nil {
return VSphereInstance{}, fmt.Errorf("vSphereInstance for vc server %q not found while looking for node %q", nodeInfo.vcServer, convertToString(nodeName))
return *vsphereInstance, nil

View File

@ -25,6 +25,7 @@ const (
NoDevicesFoundErrMsg = "No devices found"
DiskNotFoundErrMsg = "No vSphere disk ID found"
InvalidVolumeOptionsErrMsg = "VolumeOptions verification failed"
NoVMFoundErrMsg = "No VM found"
// Error constants
@ -34,4 +35,5 @@ var (
ErrNoDevicesFound = errors.New(NoDevicesFoundErrMsg)
ErrNoDiskIDFound = errors.New(DiskNotFoundErrMsg)
ErrInvalidVolumeOptions = errors.New(InvalidVolumeOptionsErrMsg)
ErrNoVMFound = errors.New(NoVMFoundErrMsg)

View File

@ -49,6 +49,22 @@ func GetDatacenter(ctx context.Context, connection *VSphereConnection, datacente
return &dc, nil
// GetAllDatacenter returns all the DataCenter Objects
func GetAllDatacenter(ctx context.Context, connection *VSphereConnection) ([]*Datacenter, error) {
var dc []*Datacenter
finder := find.NewFinder(connection.GoVmomiClient.Client, true)
datacenters, err := finder.DatacenterList(ctx, "*")
if err != nil {
glog.Errorf("Failed to find the datacenter. err: %+v", err)
return nil, err
for _, datacenter := range datacenters {
dc = append(dc, &(Datacenter{datacenter}))
return dc, nil
// GetVMByUUID gets the VM object from the given vmUUID
func (dc *Datacenter) GetVMByUUID(ctx context.Context, vmUUID string) (*VirtualMachine, error) {
s := object.NewSearchIndex(dc.Client())
@ -60,7 +76,7 @@ func (dc *Datacenter) GetVMByUUID(ctx context.Context, vmUUID string) (*VirtualM
if svm == nil {
glog.Errorf("Unable to find VM by UUID. VM UUID: %s", vmUUID)
return nil, fmt.Errorf("Failed to find VM by UUID: %s", vmUUID)
return nil, ErrNoVMFound
virtualMachine := VirtualMachine{object.NewVirtualMachine(dc.Client(), svm.Reference()), dc}
return &virtualMachine, nil
@ -79,6 +95,41 @@ func (dc *Datacenter) GetVMByPath(ctx context.Context, vmPath string) (*VirtualM
return &virtualMachine, nil
// GetAllDatastores gets the datastore URL to DatastoreInfo map for all the datastores in
// the datacenter.
func (dc *Datacenter) GetAllDatastores(ctx context.Context) (map[string]*DatastoreInfo, error) {
finder := getFinder(dc)
datastores, err := finder.DatastoreList(ctx, "*")
if err != nil {
glog.Errorf("Failed to get all the datastores. err: %+v", err)
return nil, err
var dsList []types.ManagedObjectReference
for _, ds := range datastores {
dsList = append(dsList, ds.Reference())
var dsMoList []mo.Datastore
pc := property.DefaultCollector(dc.Client())
properties := []string{DatastoreInfoProperty}
err = pc.Retrieve(ctx, dsList, properties, &dsMoList)
if err != nil {
glog.Errorf("Failed to get Datastore managed objects from datastore objects."+
" dsObjList: %+v, properties: %+v, err: %v", dsList, properties, err)
return nil, err
dsURLInfoMap := make(map[string]*DatastoreInfo)
for _, dsMo := range dsMoList {
dsURLInfoMap[dsMo.Info.GetDatastoreInfo().Url] = &DatastoreInfo{
&Datastore{object.NewDatastore(dc.Client(), dsMo.Reference()),
glog.V(9).Infof("dsURLInfoMap : %+v", dsURLInfoMap)
return dsURLInfoMap, nil
// GetDatastoreByPath gets the Datastore object from the given vmDiskPath
func (dc *Datacenter) GetDatastoreByPath(ctx context.Context, vmDiskPath string) (*Datastore, error) {
datastorePathObj := new(object.DatastorePath)
@ -109,6 +160,23 @@ func (dc *Datacenter) GetDatastoreByName(ctx context.Context, name string) (*Dat
return &datastore, nil
// GetResourcePool gets the resource pool for the given path
func (dc *Datacenter) GetResourcePool(ctx context.Context, computePath string) (*object.ResourcePool, error) {
finder := getFinder(dc)
var computeResource *object.ComputeResource
var err error
if computePath == "" {
computeResource, err = finder.DefaultComputeResource(ctx)
} else {
computeResource, err = finder.ComputeResource(ctx, computePath)
if err != nil {
glog.Errorf("Failed to get the ResourcePool for computePath '%s'. err: %+v", computePath, err)
return nil, err
return computeResource.ResourcePool(ctx)
// GetFolderByPath gets the Folder Object from the given folder path
// folderPath should be the full path to folder
func (dc *Datacenter) GetFolderByPath(ctx context.Context, folderPath string) (*Folder, error) {

View File

@ -17,6 +17,7 @@ limitations under the License.
package vclib
import (
@ -32,6 +33,16 @@ type Datastore struct {
Datacenter *Datacenter
// DatastoreInfo is a structure to store the Datastore and it's Info.
type DatastoreInfo struct {
Info *types.DatastoreInfo
func (di DatastoreInfo) String() string {
return fmt.Sprintf("Datastore: %+v, datastore URL: %s", di.Datastore, di.Info.Url)
// CreateDirectory creates the directory at location specified by directoryPath.
// If the intermediate level folders do not exist, and the parameter createParents is true, all the non-existent folders are created.
// directoryPath must be in the format "[vsanDatastore] kubevols"

View File

@ -70,13 +70,13 @@ func (diskManager virtualDiskManager) Create(ctx context.Context, datastore *vcl
// Delete implements Disk's Delete interface
func (diskManager virtualDiskManager) Delete(ctx context.Context, datastore *vclib.Datastore) error {
func (diskManager virtualDiskManager) Delete(ctx context.Context, datacenter *vclib.Datacenter) error {
// Create a virtual disk manager
virtualDiskManager := object.NewVirtualDiskManager(datastore.Client())
diskPath := vclib.RemoveClusterFromVDiskPath(diskManager.diskPath)
virtualDiskManager := object.NewVirtualDiskManager(datacenter.Client())
diskPath := vclib.RemoveStorageClusterORFolderNameFromVDiskPath(diskManager.diskPath)
requestTime := time.Now()
// Delete virtual disk
task, err := virtualDiskManager.DeleteVirtualDisk(ctx, diskPath, datastore.Datacenter.Datacenter)
task, err := virtualDiskManager.DeleteVirtualDisk(ctx, diskPath, datacenter.Datacenter)
if err != nil {
glog.Errorf("Failed to delete virtual disk. err: %v", err)
vclib.RecordvSphereMetric(vclib.APIDeleteVolume, requestTime, err)

View File

@ -40,7 +40,7 @@ const (
// VirtualDiskProvider defines interfaces for creating disk
type VirtualDiskProvider interface {
Create(ctx context.Context, datastore *vclib.Datastore) (string, error)
Delete(ctx context.Context, datastore *vclib.Datastore) error
Delete(ctx context.Context, datacenter *vclib.Datacenter) error
// getDiskManager returns vmDiskManager or vdmDiskManager based on given volumeoptions
@ -75,6 +75,6 @@ func (virtualDisk *VirtualDisk) Create(ctx context.Context, datastore *vclib.Dat
// Delete gets appropriate disk manager and calls respective delete method
func (virtualDisk *VirtualDisk) Delete(ctx context.Context, datastore *vclib.Datastore) error {
return getDiskManager(virtualDisk, VirtualDiskDeleteOperation).Delete(ctx, datastore)
func (virtualDisk *VirtualDisk) Delete(ctx context.Context, datacenter *vclib.Datacenter) error {
return getDiskManager(virtualDisk, VirtualDiskDeleteOperation).Delete(ctx, datacenter)

View File

@ -157,7 +157,7 @@ func (vmdisk vmDiskManager) Create(ctx context.Context, datastore *vclib.Datasto
return vmdisk.diskPath, nil
func (vmdisk vmDiskManager) Delete(ctx context.Context, datastore *vclib.Datastore) error {
func (vmdisk vmDiskManager) Delete(ctx context.Context, datacenter *vclib.Datacenter) error {
return fmt.Errorf("vmDiskManager.Delete is not supported")

View File

@ -85,7 +85,7 @@ func (pbmClient *PbmClient) IsDatastoreCompatible(ctx context.Context, storagePo
// GetCompatibleDatastores filters and returns compatible list of datastores for given storage policy id
// For Non Compatible Datastores, fault message with the Datastore Name is also returned
func (pbmClient *PbmClient) GetCompatibleDatastores(ctx context.Context, storagePolicyID string, datastores []*Datastore) ([]*Datastore, string, error) {
func (pbmClient *PbmClient) GetCompatibleDatastores(ctx context.Context, dc *Datacenter, storagePolicyID string, datastores []*DatastoreInfo) ([]*DatastoreInfo, string, error) {
var (
dsMorNameMap = getDsMorNameMap(ctx, datastores)
localizedMessagesForNotCompatibleDatastores = ""
@ -96,7 +96,7 @@ func (pbmClient *PbmClient) GetCompatibleDatastores(ctx context.Context, storage
return nil, "", err
compatibleHubs := compatibilityResult.CompatibleDatastores()
var compatibleDatastoreList []*Datastore
var compatibleDatastoreList []*DatastoreInfo
for _, hub := range compatibleHubs {
compatibleDatastoreList = append(compatibleDatastoreList, getDatastoreFromPlacementHub(datastores, hub))
@ -121,7 +121,7 @@ func (pbmClient *PbmClient) GetCompatibleDatastores(ctx context.Context, storage
// GetPlacementCompatibilityResult gets placement compatibility result based on storage policy requirements.
func (pbmClient *PbmClient) GetPlacementCompatibilityResult(ctx context.Context, storagePolicyID string, datastore []*Datastore) (pbm.PlacementCompatibilityResult, error) {
func (pbmClient *PbmClient) GetPlacementCompatibilityResult(ctx context.Context, storagePolicyID string, datastore []*DatastoreInfo) (pbm.PlacementCompatibilityResult, error) {
var hubs []pbmtypes.PbmPlacementHub
for _, ds := range datastore {
hubs = append(hubs, pbmtypes.PbmPlacementHub{
@ -145,7 +145,7 @@ func (pbmClient *PbmClient) GetPlacementCompatibilityResult(ctx context.Context,
// getDataStoreForPlacementHub returns matching datastore associated with given pbmPlacementHub
func getDatastoreFromPlacementHub(datastore []*Datastore, pbmPlacementHub pbmtypes.PbmPlacementHub) *Datastore {
func getDatastoreFromPlacementHub(datastore []*DatastoreInfo, pbmPlacementHub pbmtypes.PbmPlacementHub) *DatastoreInfo {
for _, ds := range datastore {
if ds.Reference().Type == pbmPlacementHub.HubType && ds.Reference().Value == pbmPlacementHub.HubId {
return ds
@ -155,7 +155,7 @@ func getDatastoreFromPlacementHub(datastore []*Datastore, pbmPlacementHub pbmtyp
// getDsMorNameMap returns map of ds Mor and Datastore Object Name
func getDsMorNameMap(ctx context.Context, datastores []*Datastore) map[string]string {
func getDsMorNameMap(ctx context.Context, datastores []*DatastoreInfo) map[string]string {
dsMorNameMap := make(map[string]string)
for _, ds := range datastores {
dsObjectName, err := ds.ObjectName(ctx)

View File

@ -25,6 +25,8 @@ import (
@ -121,10 +123,10 @@ func getSCSIControllers(vmDevices object.VirtualDeviceList) []*types.VirtualCont
return scsiControllers
// RemoveClusterFromVDiskPath removes the cluster or folder path from the vDiskPath
// RemoveStorageClusterORFolderNameFromVDiskPath removes the cluster or folder path from the vDiskPath
// for vDiskPath [DatastoreCluster/sharedVmfs-0] kubevols/e2e-vmdk-1234.vmdk, return value is [sharedVmfs-0] kubevols/e2e-vmdk-1234.vmdk
// for vDiskPath [sharedVmfs-0] kubevols/e2e-vmdk-1234.vmdk, return value remains same [sharedVmfs-0] kubevols/e2e-vmdk-1234.vmdk
func RemoveClusterFromVDiskPath(vDiskPath string) string {
func RemoveStorageClusterORFolderNameFromVDiskPath(vDiskPath string) string {
datastore := regexp.MustCompile("\\[(.*?)\\]").FindStringSubmatch(vDiskPath)[1]
if filepath.Base(datastore) != datastore {
vDiskPath = strings.Replace(vDiskPath, datastore, filepath.Base(datastore), 1)
@ -172,3 +174,40 @@ func IsValidUUID(uuid string) bool {
r := regexp.MustCompile("^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$")
return r.MatchString(uuid)
// IsManagedObjectNotFoundError returns true if error is of type ManagedObjectNotFound
func IsManagedObjectNotFoundError(err error) bool {
isManagedObjectNotFoundError := false
if soap.IsSoapFault(err) {
_, isManagedObjectNotFoundError = soap.ToSoapFault(err).VimFault().(types.ManagedObjectNotFound)
return isManagedObjectNotFoundError
// VerifyVolumePathsForVM verifies if the volume paths (volPaths) are attached to VM.
func VerifyVolumePathsForVM(vmMo mo.VirtualMachine, volPaths []string, nodeName string, nodeVolumeMap map[string]map[string]bool) {
// Verify if the volume paths are present on the VM backing virtual disk devices
vmDevices := object.VirtualDeviceList(vmMo.Config.Hardware.Device)
VerifyVolumePathsForVMDevices(vmDevices, volPaths, nodeName, nodeVolumeMap)
// VerifyVolumePathsForVMDevices verifies if the volume paths (volPaths) are attached to VM.
func VerifyVolumePathsForVMDevices(vmDevices object.VirtualDeviceList, volPaths []string, nodeName string, nodeVolumeMap map[string]map[string]bool) {
volPathsMap := make(map[string]bool)
for _, volPath := range volPaths {
volPathsMap[volPath] = true
// Verify if the volume paths are present on the VM backing virtual disk devices
for _, device := range vmDevices {
if vmDevices.TypeName(device) == "VirtualDisk" {
virtualDevice := device.GetVirtualDevice()
if backing, ok := virtualDevice.Backing.(*types.VirtualDiskFlatVer2BackingInfo); ok {
if volPathsMap[backing.FileName] {
setNodeVolumeMap(nodeVolumeMap, backing.FileName, nodeName, true)

View File

@ -23,6 +23,7 @@ import (
@ -63,7 +64,7 @@ func (vm *VirtualMachine) AttachDisk(ctx context.Context, vmDiskPath string, vol
return "", fmt.Errorf("Not a valid SCSI Controller Type. Valid options are %q", SCSIControllerTypeValidOptions())
vmDiskPathCopy := vmDiskPath
vmDiskPath = RemoveClusterFromVDiskPath(vmDiskPath)
vmDiskPath = RemoveStorageClusterORFolderNameFromVDiskPath(vmDiskPath)
attached, err := vm.IsDiskAttached(ctx, vmDiskPath)
if err != nil {
glog.Errorf("Error occurred while checking if disk is attached on VM: %q. vmDiskPath: %q, err: %+v", vm.InventoryPath, vmDiskPath, err)
@ -75,6 +76,20 @@ func (vm *VirtualMachine) AttachDisk(ctx context.Context, vmDiskPath string, vol
return diskUUID, nil
if volumeOptions.StoragePolicyName != "" {
pbmClient, err := NewPbmClient(ctx, vm.Client())
if err != nil {
glog.Errorf("Error occurred while creating new pbmClient. err: %+v", err)
return "", err
volumeOptions.StoragePolicyID, err = pbmClient.ProfileIDByName(ctx, volumeOptions.StoragePolicyName)
if err != nil {
glog.Errorf("Failed to get Profile ID by name: %s. err: %+v", volumeOptions.StoragePolicyName, err)
return "", err
dsObj, err := vm.Datacenter.GetDatastoreByPath(ctx, vmDiskPathCopy)
if err != nil {
glog.Errorf("Failed to get datastore from vmDiskPath: %q. err: %+v", vmDiskPath, err)
@ -139,7 +154,7 @@ func (vm *VirtualMachine) AttachDisk(ctx context.Context, vmDiskPath string, vol
// DetachDisk detaches the disk specified by vmDiskPath
func (vm *VirtualMachine) DetachDisk(ctx context.Context, vmDiskPath string) error {
vmDiskPath = RemoveClusterFromVDiskPath(vmDiskPath)
vmDiskPath = RemoveStorageClusterORFolderNameFromVDiskPath(vmDiskPath)
device, err := vm.getVirtualDeviceByPath(ctx, vmDiskPath)
if err != nil {
glog.Errorf("Disk ID not found for VM: %q with diskPath: %q", vm.InventoryPath, vmDiskPath)
@ -186,7 +201,7 @@ func (vm *VirtualMachine) IsActive(ctx context.Context) (bool, error) {
// GetAllAccessibleDatastores gets the list of accessible Datastores for the given Virtual Machine
func (vm *VirtualMachine) GetAllAccessibleDatastores(ctx context.Context) ([]*Datastore, error) {
func (vm *VirtualMachine) GetAllAccessibleDatastores(ctx context.Context) ([]*DatastoreInfo, error) {
host, err := vm.HostSystem(ctx)
if err != nil {
glog.Errorf("Failed to get host system for VM: %q. err: %+v", vm.InventoryPath, err)
@ -199,9 +214,28 @@ func (vm *VirtualMachine) GetAllAccessibleDatastores(ctx context.Context) ([]*Da
glog.Errorf("Failed to retrieve datastores for host: %+v. err: %+v", host, err)
return nil, err
var dsObjList []*Datastore
var dsRefList []types.ManagedObjectReference
for _, dsRef := range hostSystemMo.Datastore {
dsObjList = append(dsObjList, &Datastore{object.NewDatastore(vm.Client(), dsRef), vm.Datacenter})
dsRefList = append(dsRefList, dsRef)
var dsMoList []mo.Datastore
pc := property.DefaultCollector(vm.Client())
properties := []string{DatastoreInfoProperty}
err = pc.Retrieve(ctx, dsRefList, properties, &dsMoList)
if err != nil {
glog.Errorf("Failed to get Datastore managed objects from datastore objects."+
" dsObjList: %+v, properties: %+v, err: %v", dsRefList, properties, err)
return nil, err
glog.V(9).Infof("Result dsMoList: %+v", dsMoList)
var dsObjList []*DatastoreInfo
for _, dsMo := range dsMoList {
dsObjList = append(dsObjList,
&Datastore{object.NewDatastore(vm.Client(), dsMo.Reference()),
return dsObjList, nil

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,7 @@ func configFromEnv() (cfg VSphereConfig, ok bool) {
cfg.Global.Password = os.Getenv("VSPHERE_PASSWORD")
cfg.Global.Datacenter = os.Getenv("VSPHERE_DATACENTER")
cfg.Network.PublicNetwork = os.Getenv("VSPHERE_PUBLIC_NETWORK")
cfg.Global.Datastore = os.Getenv("VSPHERE_DATASTORE")
cfg.Global.DefaultDatastore = os.Getenv("VSPHERE_DATASTORE")
cfg.Disk.SCSIControllerType = os.Getenv("VSPHERE_SCSICONTROLLER_TYPE")
cfg.Global.WorkingDir = os.Getenv("VSPHERE_WORKING_DIR")
cfg.Global.VMName = os.Getenv("VSPHERE_VM_NAME")
@ -103,7 +103,7 @@ func TestNewVSphere(t *testing.T) {
t.Skipf("No config found in environment")
_, err := newVSphere(cfg)
_, err := newControllerNode(cfg)
if err != nil {
t.Fatalf("Failed to construct/authenticate vSphere: %s", err)
@ -116,7 +116,7 @@ func TestVSphereLogin(t *testing.T) {
// Create vSphere configuration object
vs, err := newVSphere(cfg)
vs, err := newControllerNode(cfg)
if err != nil {
t.Fatalf("Failed to construct/authenticate vSphere: %s", err)
@ -126,11 +126,16 @@ func TestVSphereLogin(t *testing.T) {
defer cancel()
// Create vSphere client
err = vs.conn.Connect(ctx)
var vcInstance *VSphereInstance
if vcInstance, ok = vs.vsphereInstanceMap[cfg.Global.VCenterIP]; !ok {
t.Fatalf("Couldn't get vSphere instance: %s", cfg.Global.VCenterIP)
err = vcInstance.conn.Connect(ctx)
if err != nil {
t.Errorf("Failed to connect to vSphere: %s", err)
defer vs.conn.GoVmomiClient.Logout(ctx)
defer vcInstance.conn.GoVmomiClient.Logout(ctx)
func TestZones(t *testing.T) {
@ -154,7 +159,7 @@ func TestInstances(t *testing.T) {
t.Skipf("No config found in environment")
vs, err := newVSphere(cfg)
vs, err := newControllerNode(cfg)
if err != nil {
t.Fatalf("Failed to construct/authenticate vSphere: %s", err)
@ -213,7 +218,7 @@ func TestVolumes(t *testing.T) {
t.Skipf("No config found in environment")
vs, err := newVSphere(cfg)
vs, err := newControllerNode(cfg)
if err != nil {
t.Fatalf("Failed to construct/authenticate vSphere: %s", err)

View File

@ -28,14 +28,16 @@ import (
k8stypes ""
const (
@ -55,10 +57,28 @@ func GetVSphere() (*VSphere, error) {
return nil, err
vSphereConn.GoVmomiClient = client
vsphereIns := &VSphereInstance{
conn: vSphereConn,
cfg: &VirtualCenterConfig{
User: cfg.Global.User,
Password: cfg.Global.Password,
VCenterPort: cfg.Global.VCenterPort,
Datacenters: cfg.Global.Datacenters,
RoundTripperCount: cfg.Global.RoundTripperCount,
vsphereInsMap := make(map[string]*VSphereInstance)
vsphereInsMap[cfg.Global.VCenterIP] = vsphereIns
// TODO: Initialize nodeManager and set it in VSphere.
vs := &VSphere{
conn: vSphereConn,
cfg: cfg,
localInstanceID: "",
vsphereInstanceMap: vsphereInsMap,
hostName: "",
cfg: cfg,
nodeManager: &NodeManager{
vsphereInstanceMap: vsphereInsMap,
nodeInfoMap: make(map[string]*NodeInfo),
registeredNodes: make(map[string]*v1.Node),
runtime.SetFinalizer(vs, logout)
return vs, nil
@ -70,14 +90,18 @@ func getVSphereConfig() *VSphereConfig {
cfg.Global.VCenterPort = os.Getenv("VSPHERE_VCENTER_PORT")
cfg.Global.User = os.Getenv("VSPHERE_USER")
cfg.Global.Password = os.Getenv("VSPHERE_PASSWORD")
cfg.Global.Datacenter = os.Getenv("VSPHERE_DATACENTER")
cfg.Global.Datastore = os.Getenv("VSPHERE_DATASTORE")
cfg.Global.Datacenters = os.Getenv("VSPHERE_DATACENTER")
cfg.Global.DefaultDatastore = os.Getenv("VSPHERE_DATASTORE")
cfg.Global.WorkingDir = os.Getenv("VSPHERE_WORKING_DIR")
cfg.Global.VMName = os.Getenv("VSPHERE_VM_NAME")
cfg.Global.InsecureFlag = false
if strings.ToLower(os.Getenv("VSPHERE_INSECURE")) == "true" {
cfg.Global.InsecureFlag = true
cfg.Workspace.VCenterIP = cfg.Global.VCenterIP
cfg.Workspace.Datacenter = cfg.Global.Datacenters
cfg.Workspace.DefaultDatastore = cfg.Global.DefaultDatastore
cfg.Workspace.Folder = cfg.Global.WorkingDir
return &cfg
@ -127,49 +151,83 @@ func getvmUUID() (string, error) {
return uuid, nil
// Get all datastores accessible for the virtual machine object.
func getSharedDatastoresInK8SCluster(ctx context.Context, folder *vclib.Folder) ([]*vclib.Datastore, error) {
vmList, err := folder.GetVirtualMachines(ctx)
// Returns the accessible datastores for the given node VM.
func getAccessibleDatastores(ctx context.Context, nodeVmDetail *NodeDetails, nodeManager *NodeManager) ([]*vclib.DatastoreInfo, error) {
accessibleDatastores, err := nodeVmDetail.vm.GetAllAccessibleDatastores(ctx)
if err != nil {
glog.Errorf("Failed to get virtual machines in the kubernetes cluster: %s, err: %+v", folder.InventoryPath, err)
return nil, err
// Check if the node VM is not found which indicates that the node info in the node manager is stale.
// If so, rediscover the node and retry.
if vclib.IsManagedObjectNotFoundError(err) {
glog.V(4).Infof("error %q ManagedObjectNotFound for node %q. Rediscovering...", err, nodeVmDetail.NodeName)
err = nodeManager.RediscoverNode(convertToK8sType(nodeVmDetail.NodeName))
if err == nil {
glog.V(4).Infof("Discovered node %s successfully", nodeVmDetail.NodeName)
nodeInfo, err := nodeManager.GetNodeInfo(convertToK8sType(nodeVmDetail.NodeName))
if err != nil {
glog.V(4).Infof("error %q getting node info for node %+v", err, nodeVmDetail)
return nil, err
accessibleDatastores, err = nodeInfo.vm.GetAllAccessibleDatastores(ctx)
if err != nil {
glog.V(4).Infof("error %q getting accessible datastores for node %+v", err, nodeVmDetail)
return nil, err
} else {
glog.V(4).Infof("error %q rediscovering node %+v", err, nodeVmDetail)
return nil, err
} else {
glog.V(4).Infof("error %q getting accessible datastores for node %+v", err, nodeVmDetail)
return nil, err
if vmList == nil || len(vmList) == 0 {
glog.Errorf("No virtual machines found in the kubernetes cluster: %s", folder.InventoryPath)
return nil, fmt.Errorf("No virtual machines found in the kubernetes cluster: %s", folder.InventoryPath)
return accessibleDatastores, nil
// Get all datastores accessible for the virtual machine object.
func getSharedDatastoresInK8SCluster(ctx context.Context, dc *vclib.Datacenter, nodeManager *NodeManager) ([]*vclib.DatastoreInfo, error) {
nodeVmDetails := nodeManager.GetNodeDetails()
if nodeVmDetails == nil || len(nodeVmDetails) == 0 {
msg := fmt.Sprintf("Kubernetes node nodeVmDetail details is empty. nodeVmDetails : %+v", nodeVmDetails)
return nil, fmt.Errorf(msg)
index := 0
var sharedDatastores []*vclib.Datastore
for _, vm := range vmList {
vmName, err := vm.ObjectName(ctx)
var sharedDatastores []*vclib.DatastoreInfo
for index, nodeVmDetail := range nodeVmDetails {
glog.V(9).Infof("Getting accessible datastores for node %s", nodeVmDetail.NodeName)
accessibleDatastores, err := getAccessibleDatastores(ctx, &nodeVmDetail, nodeManager)
if err != nil {
return nil, err
if !strings.HasPrefix(vmName, DummyVMPrefixName) {
accessibleDatastores, err := vm.GetAllAccessibleDatastores(ctx)
if err != nil {
return nil, err
if index == 0 {
sharedDatastores = accessibleDatastores
} else {
sharedDatastores = intersect(sharedDatastores, accessibleDatastores)
if len(sharedDatastores) == 0 {
return nil, fmt.Errorf("No shared datastores found in the Kubernetes cluster for nodeVmDetails: %+v", nodeVmDetails)
if index == 0 {
sharedDatastores = accessibleDatastores
} else {
sharedDatastores = intersect(sharedDatastores, accessibleDatastores)
if len(sharedDatastores) == 0 {
return nil, fmt.Errorf("No shared datastores found in the Kubernetes cluster: %s", folder.InventoryPath)
glog.V(9).Infof("sharedDatastores : %+v", sharedDatastores)
sharedDatastores, err := getDatastoresForEndpointVC(ctx, dc, sharedDatastores)
if err != nil {
glog.Errorf("Failed to get shared datastores from endpoint VC. err: %+v", err)
return nil, err
glog.V(9).Infof("sharedDatastores at endpoint VC: %+v", sharedDatastores)
return sharedDatastores, nil
func intersect(list1 []*vclib.Datastore, list2 []*vclib.Datastore) []*vclib.Datastore {
var sharedDs []*vclib.Datastore
func intersect(list1 []*vclib.DatastoreInfo, list2 []*vclib.DatastoreInfo) []*vclib.DatastoreInfo {
glog.V(9).Infof("list1: %+v", list1)
glog.V(9).Infof("list2: %+v", list2)
var sharedDs []*vclib.DatastoreInfo
for _, val1 := range list1 {
// Check if val1 is found in list2
for _, val2 := range list2 {
if val1.Reference().Value == val2.Reference().Value {
// Intersection is performed based on the datastoreUrl as this uniquely identifies the datastore.
if val1.Info.Url == val2.Info.Url {
sharedDs = append(sharedDs, val1)
@ -178,46 +236,42 @@ func intersect(list1 []*vclib.Datastore, list2 []*vclib.Datastore) []*vclib.Data
return sharedDs
// Get the datastores accessible for the virtual machine object.
func getAllAccessibleDatastores(ctx context.Context, client *vim25.Client, vmMo mo.VirtualMachine) ([]string, error) {
host := vmMo.Summary.Runtime.Host
if host == nil {
return nil, errors.New("VM doesn't have a HostSystem")
var hostSystemMo mo.HostSystem
s := object.NewSearchIndex(client)
err := s.Properties(ctx, host.Reference(), []string{DatastoreProperty}, &hostSystemMo)
if err != nil {
return nil, err
var dsRefValues []string
for _, dsRef := range hostSystemMo.Datastore {
dsRefValues = append(dsRefValues, dsRef.Value)
return dsRefValues, nil
// getMostFreeDatastore gets the best fit compatible datastore by free space.
func getMostFreeDatastoreName(ctx context.Context, client *vim25.Client, dsObjList []*vclib.Datastore) (string, error) {
dsMoList, err := dsObjList[0].Datacenter.GetDatastoreMoList(ctx, dsObjList, []string{DatastoreInfoProperty})
if err != nil {
return "", err
func getMostFreeDatastoreName(ctx context.Context, client *vim25.Client, dsInfoList []*vclib.DatastoreInfo) (string, error) {
var curMax int64
curMax = -1
var index int
for i, dsMo := range dsMoList {
dsFreeSpace := dsMo.Info.GetDatastoreInfo().FreeSpace
for i, dsInfo := range dsInfoList {
dsFreeSpace := dsInfo.Info.GetDatastoreInfo().FreeSpace
if dsFreeSpace > curMax {
curMax = dsFreeSpace
index = i
return dsMoList[index].Info.GetDatastoreInfo().Name, nil
return dsInfoList[index].Info.GetDatastoreInfo().Name, nil
func getPbmCompatibleDatastore(ctx context.Context, client *vim25.Client, storagePolicyName string, folder *vclib.Folder) (string, error) {
pbmClient, err := vclib.NewPbmClient(ctx, client)
// Returns the datastores in the given datacenter by performing lookup based on datastore URL.
func getDatastoresForEndpointVC(ctx context.Context, dc *vclib.Datacenter, sharedDsInfos []*vclib.DatastoreInfo) ([]*vclib.DatastoreInfo, error) {
var datastores []*vclib.DatastoreInfo
allDsInfoMap, err := dc.GetAllDatastores(ctx)
if err != nil {
return nil, err
for _, sharedDsInfo := range sharedDsInfos {
dsInfo, ok := allDsInfoMap[sharedDsInfo.Info.Url]
if ok {
datastores = append(datastores, dsInfo)
} else {
glog.V(4).Infof("Warning: Shared datastore with URL %s does not exist in endpoint VC", sharedDsInfo.Info.Url)
glog.V(9).Infof("Datastore from endpoint VC: %+v", datastores)
return datastores, nil
func getPbmCompatibleDatastore(ctx context.Context, dc *vclib.Datacenter, storagePolicyName string, nodeManager *NodeManager) (string, error) {
pbmClient, err := vclib.NewPbmClient(ctx, dc.Client())
if err != nil {
return "", err
@ -226,35 +280,40 @@ func getPbmCompatibleDatastore(ctx context.Context, client *vim25.Client, storag
glog.Errorf("Failed to get Profile ID by name: %s. err: %+v", storagePolicyName, err)
return "", err
sharedDsList, err := getSharedDatastoresInK8SCluster(ctx, folder)
sharedDs, err := getSharedDatastoresInK8SCluster(ctx, dc, nodeManager)
if err != nil {
glog.Errorf("Failed to get shared datastores from kubernetes cluster: %s. err: %+v", folder.InventoryPath, err)
glog.Errorf("Failed to get shared datastores. err: %+v", err)
return "", err
compatibleDatastores, _, err := pbmClient.GetCompatibleDatastores(ctx, storagePolicyID, sharedDsList)
if len(sharedDs) == 0 {
msg := "No shared datastores found in the endpoint virtual center"
return "", errors.New(msg)
compatibleDatastores, _, err := pbmClient.GetCompatibleDatastores(ctx, dc, storagePolicyID, sharedDs)
if err != nil {
glog.Errorf("Failed to get compatible datastores from datastores : %+v with storagePolicy: %s. err: %+v", sharedDsList, storagePolicyID, err)
glog.Errorf("Failed to get compatible datastores from datastores : %+v with storagePolicy: %s. err: %+v",
sharedDs, storagePolicyID, err)
return "", err
datastore, err := getMostFreeDatastoreName(ctx, client, compatibleDatastores)
glog.V(9).Infof("compatibleDatastores : %+v", compatibleDatastores)
datastore, err := getMostFreeDatastoreName(ctx, dc.Client(), compatibleDatastores)
if err != nil {
glog.Errorf("Failed to get most free datastore from compatible datastores: %+v. err: %+v", compatibleDatastores, err)
return "", err
glog.V(4).Infof("Most free datastore : %+s", datastore)
return datastore, err
func (vs *VSphere) setVMOptions(ctx context.Context, dc *vclib.Datacenter) (*vclib.VMOptions, error) {
func (vs *VSphere) setVMOptions(ctx context.Context, dc *vclib.Datacenter, resourcePoolPath string) (*vclib.VMOptions, error) {
var vmOptions vclib.VMOptions
vm, err := dc.GetVMByPath(ctx, vs.cfg.Global.WorkingDir+"/"+vs.localInstanceID)
resourcePool, err := dc.GetResourcePool(ctx, resourcePoolPath)
if err != nil {
return nil, err
resourcePool, err := vm.GetResourcePool(ctx)
if err != nil {
return nil, err
folder, err := dc.GetFolderByPath(ctx, vs.cfg.Global.WorkingDir)
glog.V(9).Infof("Resource pool path %s, resourcePool %+v", resourcePoolPath, resourcePool)
folder, err := dc.GetFolderByPath(ctx, vs.cfg.Workspace.Folder)
if err != nil {
return nil, err
@ -270,28 +329,27 @@ func (vs *VSphere) cleanUpDummyVMs(dummyVMPrefix string) {
defer cancel()
for {
time.Sleep(CleanUpDummyVMRoutineInterval * time.Minute)
// Ensure client is logged in and session is valid
err := vs.conn.Connect(ctx)
vsi, err := vs.getVSphereInstanceForServer(vs.cfg.Workspace.VCenterIP, ctx)
if err != nil {
glog.V(4).Infof("Failed to connect to VC with err: %+v. Retrying again...", err)
glog.V(4).Infof("Failed to get VSphere instance with err: %+v. Retrying again...", err)
dc, err := vclib.GetDatacenter(ctx, vs.conn, vs.cfg.Global.Datacenter)
dc, err := vclib.GetDatacenter(ctx, vsi.conn, vs.cfg.Workspace.Datacenter)
if err != nil {
glog.V(4).Infof("Failed to get the datacenter: %s from VC. err: %+v", vs.cfg.Global.Datacenter, err)
glog.V(4).Infof("Failed to get the datacenter: %s from VC. err: %+v", vs.cfg.Workspace.Datacenter, err)
// Get the folder reference for global working directory where the dummy VM needs to be created.
vmFolder, err := dc.GetFolderByPath(ctx, vs.cfg.Global.WorkingDir)
vmFolder, err := dc.GetFolderByPath(ctx, vs.cfg.Workspace.Folder)
if err != nil {
glog.V(4).Infof("Unable to get the kubernetes folder: %q reference. err: %+v", vs.cfg.Global.WorkingDir, err)
glog.V(4).Infof("Unable to get the kubernetes folder: %q reference. err: %+v", vs.cfg.Workspace.Folder, err)
// A write lock is acquired to make sure the cleanUp routine doesn't delete any VM's created by ongoing PVC requests.
defer cleanUpDummyVMLock.Lock()
err = diskmanagers.CleanUpDummyVMs(ctx, vmFolder, dc)
if err != nil {
glog.V(4).Infof("Unable to clean up dummy VM's in the kubernetes cluster: %q. err: %+v", vs.cfg.Global.WorkingDir, err)
glog.V(4).Infof("Unable to clean up dummy VM's in the kubernetes cluster: %q. err: %+v", vs.cfg.Workspace.Folder, err)
@ -353,3 +411,118 @@ func setdatastoreFolderIDMap(
folderNameIDMap[folderName] = folderID
func convertVolPathToDevicePath(ctx context.Context, dc *vclib.Datacenter, volPath string) (string, error) {
volPath = vclib.RemoveStorageClusterORFolderNameFromVDiskPath(volPath)
// Get the canonical volume path for volPath.
canonicalVolumePath, err := getcanonicalVolumePath(ctx, dc, volPath)
if err != nil {
glog.Errorf("Failed to get canonical vsphere volume path for volume: %s. err: %+v", volPath, err)
return "", err
// Check if the volume path contains .vmdk extension. If not, add the extension and update the nodeVolumes Map
if len(canonicalVolumePath) > 0 && filepath.Ext(canonicalVolumePath) != ".vmdk" {
canonicalVolumePath += ".vmdk"
return canonicalVolumePath, nil
// convertVolPathsToDevicePaths removes cluster or folder path from volPaths and convert to canonicalPath
func (vs *VSphere) convertVolPathsToDevicePaths(ctx context.Context, nodeVolumes map[k8stypes.NodeName][]string) (map[k8stypes.NodeName][]string, error) {
vmVolumes := make(map[k8stypes.NodeName][]string)
for nodeName, volPaths := range nodeVolumes {
nodeInfo, err := vs.nodeManager.GetNodeInfo(nodeName)
if err != nil {
return nil, err
_, err = vs.getVSphereInstanceForServer(nodeInfo.vcServer, ctx)
if err != nil {
return nil, err
for i, volPath := range volPaths {
deviceVolPath, err := convertVolPathToDevicePath(ctx, nodeInfo.dataCenter, volPath)
if err != nil {
glog.Errorf("Failed to convert vsphere volume path %s to device path for volume %s. err: %+v", volPath, deviceVolPath, err)
return nil, err
volPaths[i] = deviceVolPath
vmVolumes[nodeName] = volPaths
return vmVolumes, nil
// checkDiskAttached verifies volumes are attached to the VMs which are in same vCenter and Datacenter
// Returns nodes if exist any for which VM is not found in that vCenter and Datacenter
func (vs *VSphere) checkDiskAttached(ctx context.Context, nodes []k8stypes.NodeName, nodeVolumes map[k8stypes.NodeName][]string, attached map[string]map[string]bool, retry bool) ([]k8stypes.NodeName, error) {
var nodesToRetry []k8stypes.NodeName
var vmList []*vclib.VirtualMachine
var nodeInfo NodeInfo
var err error
for _, nodeName := range nodes {
nodeInfo, err = vs.nodeManager.GetNodeInfo(nodeName)
if err != nil {
return nodesToRetry, err
vmList = append(vmList, nodeInfo.vm)
// Making sure session is valid
_, err = vs.getVSphereInstanceForServer(nodeInfo.vcServer, ctx)
if err != nil {
return nodesToRetry, err
// If any of the nodes are not present property collector query will fail for entire operation
vmMoList, err := nodeInfo.dataCenter.GetVMMoList(ctx, vmList, []string{"config.hardware.device", "name", "config.uuid"})
if err != nil {
if vclib.IsManagedObjectNotFoundError(err) && !retry {
glog.V(4).Infof("checkDiskAttached: ManagedObjectNotFound for property collector query for nodes: %+v vms: %+v", nodes, vmList)
// Property Collector Query failed
// VerifyVolumePaths per VM
for _, nodeName := range nodes {
nodeInfo, err := vs.nodeManager.GetNodeInfo(nodeName)
if err != nil {
return nodesToRetry, err
devices, err := nodeInfo.vm.VirtualMachine.Device(ctx)
if err != nil {
if vclib.IsManagedObjectNotFoundError(err) {
glog.V(4).Infof("checkDiskAttached: ManagedObjectNotFound for Kubernetes node: %s with vSphere Virtual Machine reference: %v", nodeName, nodeInfo.vm)
nodesToRetry = append(nodesToRetry, nodeName)
return nodesToRetry, err
glog.V(4).Infof("Verifying Volume Paths by devices for node %s and VM %s", nodeName, nodeInfo.vm)
vclib.VerifyVolumePathsForVMDevices(devices, nodeVolumes[nodeName], convertToString(nodeName), attached)
return nodesToRetry, err
vmMoMap := make(map[string]mo.VirtualMachine)
for _, vmMo := range vmMoList {
if vmMo.Config == nil {
glog.Errorf("Config is not available for VM: %q", vmMo.Name)
glog.V(9).Infof("vmMoMap vmname: %q vmuuid: %s", vmMo.Name, strings.ToLower(vmMo.Config.Uuid))
vmMoMap[strings.ToLower(vmMo.Config.Uuid)] = vmMo
glog.V(9).Infof("vmMoMap: +%v", vmMoMap)
for _, nodeName := range nodes {
node, err := vs.nodeManager.GetNode(nodeName)
if err != nil {
return nodesToRetry, err
glog.V(9).Infof("Verifying volume for nodeName: %q with nodeuuid: %s", nodeName, node.Status.NodeInfo.SystemUUID, vmMoMap)
vclib.VerifyVolumePathsForVM(vmMoMap[strings.ToLower(node.Status.NodeInfo.SystemUUID)], nodeVolumes[nodeName], convertToString(nodeName), attached)
return nodesToRetry, nil

View File

@ -34,6 +34,7 @@ filegroup(
name = "all-srcs",
srcs = [

View File

@ -0,0 +1,32 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
name = "go_default_library",
srcs = ["fuzzer.go"],
importpath = "",
visibility = ["//visibility:public"],
deps = [
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],

View File

@ -0,0 +1,100 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package fuzzer
import (
metav1 ""
runtimeserializer ""
kubetypes ""
// Funcs returns the fuzzer functions for the kubeletconfig apis.
func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
return []interface{}{
// provide non-empty values for fields with defaults, so the defaulter doesn't change values during round-trip
func(obj *kubeletconfig.KubeletConfiguration, c fuzz.Continue) {
obj.ConfigTrialDuration = &metav1.Duration{Duration: 10 * time.Minute}
obj.Authentication.Anonymous.Enabled = true
obj.Authentication.Webhook.Enabled = false
obj.Authentication.Webhook.CacheTTL = metav1.Duration{Duration: 2 * time.Minute}
obj.Authorization.Mode = kubeletconfig.KubeletAuthorizationModeAlwaysAllow
obj.Authorization.Webhook.CacheAuthorizedTTL = metav1.Duration{Duration: 5 * time.Minute}
obj.Authorization.Webhook.CacheUnauthorizedTTL = metav1.Duration{Duration: 30 * time.Second}
obj.Address = ""
obj.CAdvisorPort = 4194
obj.VolumeStatsAggPeriod = metav1.Duration{Duration: time.Minute}
obj.RuntimeRequestTimeout = metav1.Duration{Duration: 2 * time.Minute}
obj.CPUCFSQuota = true
obj.EventBurst = 10
obj.EventRecordQPS = 5
obj.EnableControllerAttachDetach = true
obj.EnableDebuggingHandlers = true
obj.EnableServer = true
obj.FileCheckFrequency = metav1.Duration{Duration: 20 * time.Second}
obj.HealthzBindAddress = ""
obj.HealthzPort = 10248
obj.HostNetworkSources = []string{kubetypes.AllSource}
obj.HostPIDSources = []string{kubetypes.AllSource}
obj.HostIPCSources = []string{kubetypes.AllSource}
obj.HTTPCheckFrequency = metav1.Duration{Duration: 20 * time.Second}
obj.ImageMinimumGCAge = metav1.Duration{Duration: 2 * time.Minute}
obj.ImageGCHighThresholdPercent = 85
obj.ImageGCLowThresholdPercent = 80
obj.MaxOpenFiles = 1000000
obj.MaxPods = 110
obj.NodeStatusUpdateFrequency = metav1.Duration{Duration: 10 * time.Second}
obj.CPUManagerPolicy = "none"
obj.CPUManagerReconcilePeriod = obj.NodeStatusUpdateFrequency
obj.OOMScoreAdj = int32(qos.KubeletOOMScoreAdj)
obj.Port = ports.KubeletPort
obj.ReadOnlyPort = ports.KubeletReadOnlyPort
obj.RegistryBurst = 10
obj.RegistryPullQPS = 5
obj.ResolverConfig = kubetypes.ResolvConfDefault
obj.SerializeImagePulls = true
obj.StreamingConnectionIdleTimeout = metav1.Duration{Duration: 4 * time.Hour}
obj.SyncFrequency = metav1.Duration{Duration: 1 * time.Minute}
obj.ContentType = "application/vnd.kubernetes.protobuf"
obj.KubeAPIQPS = 5
obj.KubeAPIBurst = 10
obj.HairpinMode = v1alpha1.PromiscuousBridge
obj.EvictionHard = map[string]string{
"memory.available": "100Mi",
"nodefs.available": "10%",
"nodefs.inodesFree": "5%",
"imagefs.available": "15%",
obj.EvictionPressureTransitionPeriod = metav1.Duration{Duration: 5 * time.Minute}
obj.MakeIPTablesUtilChains = true
obj.IPTablesMasqueradeBit = v1alpha1.DefaultIPTablesMasqueradeBit
obj.IPTablesDropBit = v1alpha1.DefaultIPTablesDropBit
obj.CgroupsPerQOS = true
obj.CgroupDriver = "cgroupfs"
obj.EnforceNodeAllocatable = v1alpha1.DefaultNodeAllocatableEnforcement
obj.ManifestURLHeader = make(map[string][]string)

View File

@ -25,7 +25,6 @@ func KubeletConfigurationPathRefs(kc *KubeletConfiguration) []*string {
paths = append(paths, &kc.Authentication.X509.ClientCAFile)
paths = append(paths, &kc.TLSCertFile)
paths = append(paths, &kc.TLSPrivateKeyFile)
paths = append(paths, &kc.SeccompProfileRoot)
paths = append(paths, &kc.ResolverConfig)
return paths

View File

@ -132,7 +132,6 @@ var (

View File

@ -1,4 +1,4 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
name = "go_default_library",
@ -26,3 +26,14 @@ filegroup(
tags = ["automanaged"],
visibility = ["//visibility:public"],
name = "go_default_test",
srcs = ["scheme_test.go"],
importpath = "",
library = ":go_default_library",
deps = [

View File

@ -0,0 +1,32 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package scheme
import (
func TestRoundTripTypes(t *testing.T) {
scheme, _, err := NewSchemeAndCodecs()
if err != nil {
t.Fatalf("unexpected error: %v", err)
roundtrip.RoundTripTestForScheme(t, scheme, fuzzer.Funcs)

View File

@ -89,8 +89,6 @@ type KubeletConfiguration struct {
Authentication KubeletAuthentication
// authorization specifies how requests to the Kubelet's server are authorized
Authorization KubeletAuthorization
// seccompProfileRoot is the directory path for seccomp profiles.
SeccompProfileRoot string
// allowPrivileged enables containers to request privileged mode.
// Defaults to false.
AllowPrivileged bool

View File

@ -17,7 +17,6 @@ limitations under the License.
package v1alpha1
import (
metav1 ""
@ -37,14 +36,14 @@ const (
// More details here:
AutoDetectCloudProvider = "auto-detect"
defaultIPTablesMasqueradeBit = 14
defaultIPTablesDropBit = 15
DefaultIPTablesMasqueradeBit = 14
DefaultIPTablesDropBit = 15
var (
zeroDuration = metav1.Duration{}
// Refer to [Node Allocatable]( doc for more information.
defaultNodeAllocatableEnforcement = []string{"pods"}
DefaultNodeAllocatableEnforcement = []string{"pods"}
func addDefaultingFuncs(scheme *kruntime.Scheme) error {
@ -177,9 +176,6 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.SerializeImagePulls == nil {
obj.SerializeImagePulls = boolVar(true)
if obj.SeccompProfileRoot == "" {
obj.SeccompProfileRoot = filepath.Join(DefaultRootDir, "seccomp")
if obj.StreamingConnectionIdleTimeout == zeroDuration {
obj.StreamingConnectionIdleTimeout = metav1.Duration{Duration: 4 * time.Hour}
@ -214,11 +210,11 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
obj.MakeIPTablesUtilChains = boolVar(true)
if obj.IPTablesMasqueradeBit == nil {
temp := int32(defaultIPTablesMasqueradeBit)
temp := int32(DefaultIPTablesMasqueradeBit)
obj.IPTablesMasqueradeBit = &temp
if obj.IPTablesDropBit == nil {
temp := int32(defaultIPTablesDropBit)
temp := int32(DefaultIPTablesDropBit)
obj.IPTablesDropBit = &temp
if obj.CgroupsPerQOS == nil {
@ -229,7 +225,7 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
obj.CgroupDriver = "cgroupfs"
if obj.EnforceNodeAllocatable == nil {
obj.EnforceNodeAllocatable = defaultNodeAllocatableEnforcement
obj.EnforceNodeAllocatable = DefaultNodeAllocatableEnforcement

View File

@ -89,8 +89,6 @@ type KubeletConfiguration struct {
Authentication KubeletAuthentication `json:"authentication"`
// authorization specifies how requests to the Kubelet's server are authorized
Authorization KubeletAuthorization `json:"authorization"`
// seccompProfileRoot is the directory path for seccomp profiles.
SeccompProfileRoot string `json:"seccompProfileRoot"`
// allowPrivileged enables containers to request privileged mode.
// Defaults to false.
AllowPrivileged *bool `json:"allowPrivileged"`

View File

@ -163,7 +163,6 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_kubeletconfig_KubeletConfigura
if err := Convert_v1alpha1_KubeletAuthorization_To_kubeletconfig_KubeletAuthorization(&in.Authorization, &out.Authorization, s); err != nil {
return err
out.SeccompProfileRoot = in.SeccompProfileRoot
if err := v1.Convert_Pointer_bool_To_bool(&in.AllowPrivileged, &out.AllowPrivileged, s); err != nil {
return err
@ -289,7 +288,6 @@ func autoConvert_kubeletconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigura
if err := Convert_kubeletconfig_KubeletAuthorization_To_v1alpha1_KubeletAuthorization(&in.Authorization, &out.Authorization, s); err != nil {
return err
out.SeccompProfileRoot = in.SeccompProfileRoot
if err := v1.Convert_bool_To_Pointer_bool(&in.AllowPrivileged, &out.AllowPrivileged, s); err != nil {
return err

View File

@ -31,7 +31,7 @@ type cadvisorUnsupported struct {
var _ Interface = new(cadvisorUnsupported)
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string) (Interface, error) {
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
return &cadvisorUnsupported{}, nil

View File

@ -32,7 +32,7 @@ type cadvisorClient struct {
var _ Interface = new(cadvisorClient)
// New creates a cAdvisor and exports its API on the specified port if port > 0.
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string) (Interface, error) {
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
client, err := winstats.NewPerfCounterClient()
return &cadvisorClient{winStatsClient: client}, err

View File

@ -128,7 +128,7 @@ type containerManagerImpl struct {
// Interface for QoS cgroup management
qosContainerManager QOSContainerManager
// Interface for exporting and allocating devices reported by device plugins.
devicePluginHandler deviceplugin.Handler
devicePluginManager deviceplugin.Manager
// Interface for CPU affinity management.
cpuManager cpumanager.Manager
@ -274,11 +274,11 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
glog.Infof("Creating device plugin handler: %t", devicePluginEnabled)
glog.Infof("Creating device plugin manager: %t", devicePluginEnabled)
if devicePluginEnabled {
cm.devicePluginHandler, err = deviceplugin.NewHandlerImpl(updateDeviceCapacityFunc)
cm.devicePluginManager, err = deviceplugin.NewManagerImpl(updateDeviceCapacityFunc)
} else {
cm.devicePluginHandler, err = deviceplugin.NewHandlerStub()
cm.devicePluginManager, err = deviceplugin.NewManagerStub()
if err != nil {
return nil, err
@ -597,7 +597,7 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
}, time.Second, stopChan)
// Starts device plugin manager.
if err := cm.devicePluginHandler.Start(deviceplugin.ActivePodsFunc(activePods)); err != nil {
if err := cm.devicePluginManager.Start(deviceplugin.ActivePodsFunc(activePods)); err != nil {
return err
return nil
@ -622,7 +622,7 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
opts := &kubecontainer.RunContainerOptions{}
// Allocate should already be called during predicateAdmitHandler.Admit(),
// just try to fetch device runtime information from cached state here
devOpts := cm.devicePluginHandler.GetDeviceRunContainerOptions(pod, container)
devOpts := cm.devicePluginManager.GetDeviceRunContainerOptions(pod, container)
if devOpts == nil {
return opts, nil
@ -633,7 +633,7 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
func (cm *containerManagerImpl) UpdatePluginResources(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
return cm.devicePluginHandler.Allocate(node, attrs)
return cm.devicePluginManager.Allocate(node, attrs)
func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {

View File

@ -9,11 +9,10 @@ load(
name = "go_default_library",
srcs = [
@ -49,7 +48,6 @@ filegroup(
name = "go_default_test",
srcs = [

View File

@ -1,365 +0,0 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package deviceplugin
import (
pluginapi ""
// ActivePodsFunc is a function that returns a list of pods to reconcile.
type ActivePodsFunc func() []*v1.Pod
// Handler defines the functions used to manage and access device plugin resources.
type Handler interface {
// Start starts device plugin registration service.
Start(activePods ActivePodsFunc) error
// Devices returns all of registered devices keyed by resourceName.
Devices() map[string][]pluginapi.Device
// Allocate scans through containers in the pod spec
// If it finds the container requires device plugin resource, it:
// 1. Checks whether it already has this information in its cached state.
// 2. If not, it calls Allocate and populate its cached state afterwards.
// 3. If there is no cached state and Allocate fails, it returns an error.
// 4. Otherwise, it updates allocatableResource in nodeInfo if necessary,
// to make sure it is at least equal to the pod's requested capacity for
// any registered device plugin resource
Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.
GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions
// HandlerImpl implements the actual functionality to manage device plugin resources.
type HandlerImpl struct {
// TODO: consider to change this to RWMutex.
// devicePluginManager is an implementation of deviceplugin.Manager interface.
devicePluginManager Manager
// activePods is a method for listing active pods on the node
// so the amount of pluginResources requested by existing pods
// could be counted when updating allocated devices
activePods ActivePodsFunc
// devicePluginManagerMonitorCallback is used for updating devices' states in one time call.
// e.g. a new device is advertised, two old devices are deleted and a running device fails.
devicePluginManagerMonitorCallback MonitorCallback
// allDevices contains all of registered resourceNames and their exported device IDs.
allDevices map[string]sets.String
// allocatedDevices contains allocated deviceIds, keyed by resourceName.
allocatedDevices map[string]sets.String
// podDevices contains pod to allocated device mapping.
podDevices podDevices
// NewHandlerImpl creates a HandlerImpl to manage device plugin resources.
// updateCapacityFunc is called to update ContainerManager capacity when
// device capacity changes.
func NewHandlerImpl(updateCapacityFunc func(v1.ResourceList)) (*HandlerImpl, error) {
glog.V(2).Infof("Creating Device Plugin Handler")
handler := &HandlerImpl{
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
deviceManagerMonitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {
var capacity = v1.ResourceList{}
kept := append(updated, added...)
if _, ok := handler.allDevices[resourceName]; !ok {
handler.allDevices[resourceName] = sets.NewString()
// For now, Handler only keeps track of healthy devices.
// We can revisit this later when the need comes to track unhealthy devices here.
for _, dev := range kept {
if dev.Health == pluginapi.Healthy {
} else {
for _, dev := range deleted {
capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(handler.allDevices[resourceName].Len()), resource.DecimalSI)
mgr, err := NewManagerImpl(pluginapi.KubeletSocket, deviceManagerMonitorCallback)
if err != nil {
return nil, fmt.Errorf("Failed to initialize device plugin manager: %+v", err)
handler.devicePluginManager = mgr
handler.devicePluginManagerMonitorCallback = deviceManagerMonitorCallback
return handler, nil
// Start initializes podDevices and allocatedDevices information from checkpoint-ed state
// and starts device plugin registration service.
func (h *HandlerImpl) Start(activePods ActivePodsFunc) error {
h.activePods = activePods
// Loads in allocatedDevices information from disk.
err := h.readCheckpoint()
if err != nil {
glog.Warningf("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date. Err: %v", err)
return h.devicePluginManager.Start()
// Devices returns all of registered devices keyed by resourceName.
func (h *HandlerImpl) Devices() map[string][]pluginapi.Device {
return h.devicePluginManager.Devices()
// Returns list of device Ids we need to allocate with Allocate rpc call.
// Returns empty list in case we don't need to issue the Allocate rpc call.
func (h *HandlerImpl) devicesToAllocate(podUID, contName, resource string, required int) (sets.String, error) {
defer h.Unlock()
needed := required
// Gets list of devices that have already been allocated.
// This can happen if a container restarts for example.
devices := h.podDevices.containerDevices(podUID, contName, resource)
if devices != nil {
glog.V(3).Infof("Found pre-allocated devices for resource %s container %q in Pod %q: %v", resource, contName, podUID, devices.List())
needed = needed - devices.Len()
// A pod's resource is not expected to change once admitted by the API server,
// so just fail loudly here. We can revisit this part if this no longer holds.
if needed != 0 {
return nil, fmt.Errorf("pod %v container %v changed request for resource %v from %v to %v", podUID, contName, resource, devices.Len(), required)
if needed == 0 {
// No change, no work.
return nil, nil
devices = sets.NewString()
// Needs to allocate additional devices.
if h.allocatedDevices[resource] == nil {
h.allocatedDevices[resource] = sets.NewString()
// Gets Devices in use.
devicesInUse := h.allocatedDevices[resource]
// Gets a list of available devices.
available := h.allDevices[resource].Difference(devicesInUse)
if int(available.Len()) < needed {
return nil, fmt.Errorf("requested number of devices unavailable for %s. Requested: %d, Available: %d", resource, needed, available.Len())
allocated := available.UnsortedList()[:needed]
// Updates h.allocatedDevices with allocated devices to prevent them
// from being allocated to other pods/containers, given that we are
// not holding lock during the rpc call.
for _, device := range allocated {
return devices, nil
// allocateContainerResources attempts to allocate all of required device
// plugin resources for the input container, issues an Allocate rpc request
// for each new device resource requirement, processes their AllocateResponses,
// and updates the cached containerDevices on success.
func (h *HandlerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Container) error {
podUID := string(pod.UID)
contName := container.Name
allocatedDevicesUpdated := false
for k, v := range container.Resources.Limits {
resource := string(k)
needed := int(v.Value())
glog.V(3).Infof("needs %d %s", needed, resource)
if _, registeredResource := h.allDevices[resource]; !registeredResource {
// Updates allocatedDevices to garbage collect any stranded resources
// before doing the device plugin allocation.
if !allocatedDevicesUpdated {
allocatedDevicesUpdated = true
allocDevices, err := h.devicesToAllocate(podUID, contName, resource, needed)
if err != nil {
return err
if allocDevices == nil || len(allocDevices) <= 0 {
// devicePluginManager.Allocate involves RPC calls to device plugin, which
// could be heavy-weight. Therefore we want to perform this operation outside
// mutex lock. Note if Allcate call fails, we may leave container resources
// partially allocated for the failed container. We rely on updateAllocatedDevices()
// to garbage collect these resources later. Another side effect is that if
// we have X resource A and Y resource B in total, and two containers, container1
// and container2 both require X resource A and Y resource B. Both allocation
// requests may fail if we serve them in mixed order.
// TODO: may revisit this part later if we see inefficient resource allocation
// in real use as the result of this. Should also consider to parallize device
// plugin Allocate grpc calls if it becomes common that a container may require
// resources from multiple device plugins.
resp, err := h.devicePluginManager.Allocate(resource, allocDevices.UnsortedList())
if err != nil {
// In case of allocation failure, we want to restore h.allocatedDevices
// to the actual allocated state from h.podDevices.
h.allocatedDevices = h.podDevices.devices()
return err
// Update internal cached podDevices state.
h.podDevices.insert(podUID, contName, resource, allocDevices, resp)
// Checkpoints device to container allocation information.
return h.writeCheckpoint()
// Allocate attempts to allocate all of required device plugin resources,
// and update Allocatable resources in nodeInfo if necessary
func (h *HandlerImpl) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
pod := attrs.Pod
// TODO: Reuse devices between init containers and regular containers.
for _, container := range pod.Spec.InitContainers {
if err := h.allocateContainerResources(pod, &container); err != nil {
return err
for _, container := range pod.Spec.Containers {
if err := h.allocateContainerResources(pod, &container); err != nil {
return err
// quick return if no pluginResources requested
if _, podRequireDevicePluginResource := h.podDevices[string(pod.UID)]; !podRequireDevicePluginResource {
return nil
return nil
// sanitizeNodeAllocatable scans through allocatedDevices in DevicePluginHandler
// and if necessary, updates allocatableResource in nodeInfo to at least equal to
// the allocated capacity. This allows pods that have already been scheduled on
// the node to pass GeneralPredicates admission checking even upon device plugin failure.
func (h *HandlerImpl) sanitizeNodeAllocatable(node *schedulercache.NodeInfo) {
var newAllocatableResource *schedulercache.Resource
allocatableResource := node.AllocatableResource()
if allocatableResource.ScalarResources == nil {
allocatableResource.ScalarResources = make(map[v1.ResourceName]int64)
for resource, devices := range h.allocatedDevices {
needed := devices.Len()
quant, ok := allocatableResource.ScalarResources[v1.ResourceName(resource)]
if ok && int(quant) >= needed {
// Needs to update nodeInfo.AllocatableResource to make sure
// NodeInfo.allocatableResource at least equal to the capacity already allocated.
if newAllocatableResource == nil {
newAllocatableResource = allocatableResource.Clone()
newAllocatableResource.ScalarResources[v1.ResourceName(resource)] = int64(needed)
if newAllocatableResource != nil {
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.
func (h *HandlerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
defer h.Unlock()
return h.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name)
// updateAllocatedDevices gets a list of active pods and then frees any Devices that are bound to
// terminated pods. Returns error on failure.
func (h *HandlerImpl) updateAllocatedDevices(activePods []*v1.Pod) {
defer h.Unlock()
activePodUids := sets.NewString()
for _, pod := range activePods {
allocatedPodUids := h.podDevices.pods()
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
if len(podsToBeRemoved) <= 0 {
glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List())
// Regenerated allocatedDevices after we update pod allocation information.
h.allocatedDevices = h.podDevices.devices()
// Checkpoints device to container allocation information to disk.
func (h *HandlerImpl) writeCheckpoint() error {
data := h.podDevices.toCheckpointData()
dataJSON, err := json.Marshal(data)
if err != nil {
return err
filepath := h.devicePluginManager.CheckpointFile()
return ioutil.WriteFile(filepath, dataJSON, 0644)
// Reads device to container allocation information from disk, and populates
// h.allocatedDevices accordingly.
func (h *HandlerImpl) readCheckpoint() error {
filepath := h.devicePluginManager.CheckpointFile()
content, err := ioutil.ReadFile(filepath)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to read checkpoint file %q: %v", filepath, err)
glog.V(2).Infof("Read checkpoint file %s\n", filepath)
var data checkpointData
if err := json.Unmarshal(content, &data); err != nil {
return fmt.Errorf("failed to unmarshal checkpoint data: %v", err)
defer h.Unlock()
h.allocatedDevices = h.podDevices.devices()
return nil

View File

@ -1,414 +0,0 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package deviceplugin
import (
metav1 ""
pluginapi ""
func TestUpdateCapacity(t *testing.T) {
var expected = v1.ResourceList{}
as := assert.New(t)
verifyCapacityFunc := func(updates v1.ResourceList) {
as.Equal(expected, updates)
testHandler, err := NewHandlerImpl(verifyCapacityFunc)
devs := []pluginapi.Device{
{ID: "Device1", Health: pluginapi.Healthy},
{ID: "Device2", Health: pluginapi.Healthy},
{ID: "Device3", Health: pluginapi.Unhealthy},
resourceName := "resource1"
// Adds three devices for resource1, two healthy and one unhealthy.
// Expects capacity for resource1 to be 2.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(2), resource.DecimalSI)
testHandler.devicePluginManagerMonitorCallback(resourceName, devs, []pluginapi.Device{}, []pluginapi.Device{})
// Deletes an unhealthy device should NOT change capacity.
testHandler.devicePluginManagerMonitorCallback(resourceName, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[2]})
// Updates a healthy device to unhealthy should reduce capacity by 1.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(1), resource.DecimalSI)
// Deletes a healthy device should reduce capacity by 1.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(0), resource.DecimalSI)
// Tests adding another resource.
delete(expected, v1.ResourceName(resourceName))
resourceName2 := "resource2"
expected[v1.ResourceName(resourceName2)] = *resource.NewQuantity(int64(2), resource.DecimalSI)
testHandler.devicePluginManagerMonitorCallback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
type stringPairType struct {
value1 string
value2 string
// DevicePluginManager stub to test device Allocation behavior.
type DevicePluginManagerTestStub struct {
// All data structs are keyed by resourceName+DevId
devRuntimeDevices map[string][]stringPairType
devRuntimeMounts map[string][]stringPairType
devRuntimeEnvs map[string][]stringPairType
func NewDevicePluginManagerTestStub() (*DevicePluginManagerTestStub, error) {
return &DevicePluginManagerTestStub{
devRuntimeDevices: make(map[string][]stringPairType),
devRuntimeMounts: make(map[string][]stringPairType),
devRuntimeEnvs: make(map[string][]stringPairType),
}, nil
func (m *DevicePluginManagerTestStub) Start() error {
return nil
func (m *DevicePluginManagerTestStub) Devices() map[string][]pluginapi.Device {
return make(map[string][]pluginapi.Device)
func (m *DevicePluginManagerTestStub) Allocate(resourceName string, devIds []string) (*pluginapi.AllocateResponse, error) {
resp := new(pluginapi.AllocateResponse)
resp.Envs = make(map[string]string)
for _, id := range devIds {
key := resourceName + id
fmt.Printf("Alloc device %v for resource %v\n", id, resourceName)
for _, dev := range m.devRuntimeDevices[key] {
fmt.Printf("Add dev %v %v\n", dev.value1, dev.value2)
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
ContainerPath: dev.value1,
HostPath: dev.value2,
Permissions: "mrw",
for _, mount := range m.devRuntimeMounts[key] {
fmt.Printf("Add mount %v %v\n", mount.value1, mount.value2)
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
ContainerPath: mount.value1,
HostPath: mount.value2,
ReadOnly: true,
for _, env := range m.devRuntimeEnvs[key] {
fmt.Printf("Add env %v %v\n", env.value1, env.value2)
resp.Envs[env.value1] = env.value2
return resp, nil
func (m *DevicePluginManagerTestStub) Stop() error {
return nil
func (m *DevicePluginManagerTestStub) CheckpointFile() string {
return "/tmp/device-plugin-checkpoint"
func constructDevices(devices []string) sets.String {
ret := sets.NewString()
for _, dev := range devices {
return ret
func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.AllocateResponse {
resp := &pluginapi.AllocateResponse{}
for k, v := range devices {
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
HostPath: k,
ContainerPath: v,
Permissions: "mrw",
for k, v := range mounts {
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
ContainerPath: k,
HostPath: v,
ReadOnly: true,
resp.Envs = make(map[string]string)
for k, v := range envs {
resp.Envs[k] = v
return resp
func TestCheckpoint(t *testing.T) {
resourceName1 := ""
resourceName2 := ""
m, err := NewDevicePluginManagerTestStub()
as := assert.New(t)
testHandler := &HandlerImpl{
devicePluginManager: m,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
testHandler.podDevices.insert("pod1", "con1", resourceName1,
constructDevices([]string{"dev1", "dev2"}),
constructAllocResp(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
testHandler.podDevices.insert("pod1", "con1", resourceName2,
constructDevices([]string{"dev1", "dev2"}),
constructAllocResp(map[string]string{"/dev/r2dev1": "/dev/r2dev1", "/dev/r2dev2": "/dev/r2dev2"},
map[string]string{"/home/r2lib1": "/usr/r2lib1"},
map[string]string{"r2devices": "dev1 dev2"}))
testHandler.podDevices.insert("pod1", "con2", resourceName1,
constructAllocResp(map[string]string{"/dev/r1dev3": "/dev/r1dev3"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
testHandler.podDevices.insert("pod2", "con1", resourceName1,
constructAllocResp(map[string]string{"/dev/r1dev4": "/dev/r1dev4"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
expectedPodDevices := testHandler.podDevices
expectedAllocatedDevices := testHandler.podDevices.devices()
err = testHandler.writeCheckpoint()
testHandler.podDevices = make(podDevices)
err = testHandler.readCheckpoint()
as.Equal(len(expectedPodDevices), len(testHandler.podDevices))
for podUID, containerDevices := range expectedPodDevices {
for conName, resources := range containerDevices {
for resource := range resources {
expectedPodDevices.containerDevices(podUID, conName, resource),
testHandler.podDevices.containerDevices(podUID, conName, resource)))
opts1 := expectedPodDevices.deviceRunContainerOptions(podUID, conName)
opts2 := testHandler.podDevices.deviceRunContainerOptions(podUID, conName)
as.Equal(len(opts1.Envs), len(opts2.Envs))
as.Equal(len(opts1.Mounts), len(opts2.Mounts))
as.Equal(len(opts1.Devices), len(opts2.Devices))
as.True(reflect.DeepEqual(expectedAllocatedDevices, testHandler.allocatedDevices))
type activePodsStub struct {
activePods []*v1.Pod
func (a *activePodsStub) getActivePods() []*v1.Pod {
return a.activePods
func (a *activePodsStub) updateActivePods(newPods []*v1.Pod) {
a.activePods = newPods
func TestPodContainerDeviceAllocation(t *testing.T) {
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
var logLevel string
flag.StringVar(&logLevel, "logLevel", "4", "test")
resourceName1 := ""
resourceQuantity1 := *resource.NewQuantity(int64(2), resource.DecimalSI)
devID1 := "dev1"
devID2 := "dev2"
resourceName2 := ""
resourceQuantity2 := *resource.NewQuantity(int64(1), resource.DecimalSI)
devID3 := "dev3"
devID4 := "dev4"
m, err := NewDevicePluginManagerTestStub()
as := assert.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
podsStub := activePodsStub{
activePods: []*v1.Pod{},
cachedNode := &v1.Node{
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{},
nodeInfo := &schedulercache.NodeInfo{}
testHandler := &HandlerImpl{
devicePluginManager: m,
devicePluginManagerMonitorCallback: monitorCallback,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
activePods: podsStub.getActivePods,
testHandler.allDevices[resourceName1] = sets.NewString()
testHandler.allDevices[resourceName2] = sets.NewString()
m.devRuntimeDevices[resourceName1+devID1] = append(m.devRuntimeDevices[resourceName1+devID1], stringPairType{"/dev/aaa", "/dev/aaa"})
m.devRuntimeDevices[resourceName1+devID1] = append(m.devRuntimeDevices[resourceName1+devID1], stringPairType{"/dev/bbb", "/dev/bbb"})
m.devRuntimeDevices[resourceName1+devID2] = append(m.devRuntimeDevices[resourceName1+devID2], stringPairType{"/dev/ccc", "/dev/ccc"})
m.devRuntimeMounts[resourceName1+devID1] = append(m.devRuntimeMounts[resourceName1+devID1], stringPairType{"/container_dir1/file1", "host_dir1/file1"})
m.devRuntimeMounts[resourceName1+devID2] = append(m.devRuntimeMounts[resourceName1+devID2], stringPairType{"/container_dir1/file2", "host_dir1/file2"})
m.devRuntimeEnvs[resourceName1+devID2] = append(m.devRuntimeEnvs[resourceName1+devID2], stringPairType{"key1", "val1"})
m.devRuntimeEnvs[resourceName2+devID3] = append(m.devRuntimeEnvs[resourceName2+devID3], stringPairType{"key2", "val2"})
m.devRuntimeEnvs[resourceName2+devID4] = append(m.devRuntimeEnvs[resourceName2+devID4], stringPairType{"key2", "val3"})
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName1): resourceQuantity1,
v1.ResourceName("cpu"): resourceQuantity1,
v1.ResourceName(resourceName2): resourceQuantity2,
err = testHandler.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
runContainerOpts := testHandler.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
as.Equal(len(runContainerOpts.Devices), 3)
as.Equal(len(runContainerOpts.Mounts), 2)
as.Equal(len(runContainerOpts.Envs), 2)
// Requesting to create a pod without enough resources should fail.
as.Equal(2, testHandler.allocatedDevices[resourceName1].Len())
failPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName1): resourceQuantity2,
err = testHandler.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: failPod})
runContainerOpts2 := testHandler.GetDeviceRunContainerOptions(failPod, &failPod.Spec.Containers[0])
// Requesting to create a new pod with a single resourceName2 should succeed.
newPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName2): resourceQuantity2,
err = testHandler.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: newPod})
runContainerOpts3 := testHandler.GetDeviceRunContainerOptions(newPod, &newPod.Spec.Containers[0])
as.Equal(1, len(runContainerOpts3.Envs))
func TestSanitizeNodeAllocatable(t *testing.T) {
resourceName1 := ""
devID1 := "dev1"
resourceName2 := ""
devID2 := "dev2"
m, err := NewDevicePluginManagerTestStub()
as := assert.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
testHandler := &HandlerImpl{
devicePluginManager: m,
devicePluginManagerMonitorCallback: monitorCallback,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
// require one of resource1 and one of resource2
testHandler.allocatedDevices[resourceName1] = sets.NewString()
testHandler.allocatedDevices[resourceName2] = sets.NewString()
cachedNode := &v1.Node{
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
// has no resource1 and two of resource2
v1.ResourceName(resourceName2): *resource.NewQuantity(int64(2), resource.DecimalSI),
nodeInfo := &schedulercache.NodeInfo{}
allocatableScalarResources := nodeInfo.AllocatableResource().ScalarResources
// allocatable in nodeInfo is less than needed, should update
as.Equal(1, int(allocatableScalarResources[v1.ResourceName(resourceName1)]))
// allocatable in nodeInfo is more than needed, should skip updating
as.Equal(2, int(allocatableScalarResources[v1.ResourceName(resourceName2)]))

View File

@ -32,7 +32,15 @@ import (
// endpoint maps to a single registered device plugin. It is responsible
// for managing gRPC communications with the device plugin and caching
// device states reported by the device plugin.
type endpoint struct {
type endpoint interface {
allocate(devs []string) (*pluginapi.AllocateResponse, error)
getDevices() []pluginapi.Device
callback(resourceName string, added, updated, deleted []pluginapi.Device)
type endpointImpl struct {
client pluginapi.DevicePluginClient
clientConn *grpc.ClientConn
@ -42,30 +50,34 @@ type endpoint struct {
devices map[string]pluginapi.Device
mutex sync.Mutex
callback MonitorCallback
cb monitorCallback
// newEndpoint creates a new endpoint for the given resourceName.
func newEndpoint(socketPath, resourceName string, devices map[string]pluginapi.Device, callback MonitorCallback) (*endpoint, error) {
func newEndpointImpl(socketPath, resourceName string, devices map[string]pluginapi.Device, callback monitorCallback) (*endpointImpl, error) {
client, c, err := dial(socketPath)
if err != nil {
glog.Errorf("Can't create new endpoint with path %s err %v", socketPath, err)
return nil, err
return &endpoint{
return &endpointImpl{
client: client,
clientConn: c,
socketPath: socketPath,
resourceName: resourceName,
devices: devices,
callback: callback,
devices: devices,
cb: callback,
}, nil
func (e *endpoint) getDevices() []pluginapi.Device {
func (e *endpointImpl) callback(resourceName string, added, updated, deleted []pluginapi.Device) {
e.cb(resourceName, added, updated, deleted)
func (e *endpointImpl) getDevices() []pluginapi.Device {
defer e.mutex.Unlock()
var devs []pluginapi.Device
@ -81,11 +93,9 @@ func (e *endpoint) getDevices() []pluginapi.Device {
// blocks on receiving ListAndWatch gRPC stream updates. Each ListAndWatch
// stream update contains a new list of device states. listAndWatch compares the new
// device states with its cached states to get list of new, updated, and deleted devices.
// It then issues a callback to pass this information to the device_plugin_handler which
// It then issues a callback to pass this information to the device manager which
// will adjust the resource available information accordingly.
func (e *endpoint) run() {
glog.V(3).Infof("Starting ListAndWatch")
func (e *endpointImpl) run() {
stream, err := e.client.ListAndWatch(context.Background(), &pluginapi.Empty{})
if err != nil {
glog.Errorf(errListAndWatch, e.resourceName, err)
@ -162,13 +172,13 @@ func (e *endpoint) run() {
// allocate issues Allocate gRPC call to the device plugin.
func (e *endpoint) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
func (e *endpointImpl) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
return e.client.Allocate(context.Background(), &pluginapi.AllocateRequest{
DevicesIDs: devs,
func (e *endpoint) stop() {
func (e *endpointImpl) stop() {

View File

@ -87,7 +87,7 @@ func TestRun(t *testing.T) {
func TestGetDevices(t *testing.T) {
e := endpoint{
e := endpointImpl{
devices: map[string]pluginapi.Device{
"ADeviceId": {ID: "ADeviceId", Health: pluginapi.Healthy},
@ -96,19 +96,19 @@ func TestGetDevices(t *testing.T) {
require.Len(t, devs, 1)
func esetup(t *testing.T, devs []*pluginapi.Device, socket, resourceName string, callback MonitorCallback) (*Stub, *endpoint) {
func esetup(t *testing.T, devs []*pluginapi.Device, socket, resourceName string, callback monitorCallback) (*Stub, *endpointImpl) {
p := NewDevicePluginStub(devs, socket)
err := p.Start()
require.NoError(t, err)
e, err := newEndpoint(socket, "mock", make(map[string]pluginapi.Device), func(n string, a, u, r []pluginapi.Device) {})
e, err := newEndpointImpl(socket, "mock", make(map[string]pluginapi.Device), func(n string, a, u, r []pluginapi.Device) {})
require.NoError(t, err)
return p, e
func ecleanup(t *testing.T, p *Stub, e *endpoint) {
func ecleanup(t *testing.T, p *Stub, e *endpointImpl) {

View File

@ -17,7 +17,9 @@ limitations under the License.
package deviceplugin
import (
@ -28,27 +30,58 @@ import (
v1helper ""
pluginapi ""
// ActivePodsFunc is a function that returns a list of pods to reconcile.
type ActivePodsFunc func() []*v1.Pod
// monitorCallback is the function called when a device's health state changes,
// or new devices are reported, or old devices are deleted.
// Updated contains the most recent state of the Device.
type monitorCallback func(resourceName string, added, updated, deleted []pluginapi.Device)
// ManagerImpl is the structure in charge of managing Device Plugins.
type ManagerImpl struct {
socketname string
socketdir string
endpoints map[string]*endpoint // Key is ResourceName
endpoints map[string]endpoint // Key is ResourceName
mutex sync.Mutex
callback MonitorCallback
server *grpc.Server
// activePods is a method for listing active pods on the node
// so the amount of pluginResources requested by existing pods
// could be counted when updating allocated devices
activePods ActivePodsFunc
// callback is used for updating devices' states in one time call.
// e.g. a new device is advertised, two old devices are deleted and a running device fails.
callback monitorCallback
// allDevices contains all of registered resourceNames and their exported device IDs.
allDevices map[string]sets.String
// allocatedDevices contains allocated deviceIds, keyed by resourceName.
allocatedDevices map[string]sets.String
// podDevices contains pod to allocated device mapping.
podDevices podDevices
// NewManagerImpl creates a new manager on the socket `socketPath`.
// f is the callback that is called when a device becomes unhealthy.
// socketPath is present for testing purposes in production this is pluginapi.KubeletSocket
func NewManagerImpl(socketPath string, f MonitorCallback) (*ManagerImpl, error) {
// NewManagerImpl creates a new manager. updateCapacityFunc is called to
// update ContainerManager capacity when device capacity changes.
func NewManagerImpl(updateCapacityFunc func(v1.ResourceList)) (*ManagerImpl, error) {
return newManagerImpl(updateCapacityFunc, pluginapi.KubeletSocket)
func newManagerImpl(updateCapacityFunc func(v1.ResourceList), socketPath string) (*ManagerImpl, error) {
glog.V(2).Infof("Creating Device Plugin manager at %s", socketPath)
if socketPath == "" || !filepath.IsAbs(socketPath) {
@ -56,13 +89,42 @@ func NewManagerImpl(socketPath string, f MonitorCallback) (*ManagerImpl, error)
dir, file := filepath.Split(socketPath)
return &ManagerImpl{
endpoints: make(map[string]*endpoint),
manager := &ManagerImpl{
endpoints: make(map[string]endpoint),
socketname: file,
socketdir: dir,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
socketname: file,
socketdir: dir,
callback: f,
}, nil
manager.callback = func(resourceName string, added, updated, deleted []pluginapi.Device) {
var capacity = v1.ResourceList{}
kept := append(updated, added...)
defer manager.mutex.Unlock()
if _, ok := manager.allDevices[resourceName]; !ok {
manager.allDevices[resourceName] = sets.NewString()
// For now, Manager only keeps track of healthy devices.
// We can revisit this later when the need comes to track unhealthy devices here.
for _, dev := range kept {
if dev.Health == pluginapi.Healthy {
} else {
for _, dev := range deleted {
capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(manager.allDevices[resourceName].Len()), resource.DecimalSI)
return manager, nil
func (m *ManagerImpl) removeContents(dir string) error {
@ -77,7 +139,7 @@ func (m *ManagerImpl) removeContents(dir string) error {
for _, name := range names {
filePath := filepath.Join(dir, name)
if filePath == m.CheckpointFile() {
if filePath == m.checkpointFile() {
stat, err := os.Stat(filePath)
@ -101,15 +163,25 @@ const (
kubeletDevicePluginCheckpoint = "kubelet_internal_checkpoint"
// CheckpointFile returns device plugin checkpoint file path.
func (m *ManagerImpl) CheckpointFile() string {
// checkpointFile returns device plugin checkpoint file path.
func (m *ManagerImpl) checkpointFile() string {
return filepath.Join(m.socketdir, kubeletDevicePluginCheckpoint)
// Start starts the Device Plugin Manager
func (m *ManagerImpl) Start() error {
// Start starts the Device Plugin Manager amd start initialization of
// podDevices and allocatedDevices information from checkpoint-ed state and
// starts device plugin registration service.
func (m *ManagerImpl) Start(activePods ActivePodsFunc) error {
glog.V(2).Infof("Starting Device Plugin manager")
m.activePods = activePods
// Loads in allocatedDevices information from disk.
err := m.readCheckpoint()
if err != nil {
glog.Warningf("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date. Err: %v", err)
socketPath := filepath.Join(m.socketdir, m.socketname)
os.MkdirAll(m.socketdir, 0755)
@ -130,6 +202,8 @@ func (m *ManagerImpl) Start() error {
pluginapi.RegisterRegistrationServer(m.server, m)
go m.server.Serve(s)
glog.V(2).Infof("Serving device plugin registration server on %q", socketPath)
return nil
@ -150,22 +224,27 @@ func (m *ManagerImpl) Devices() map[string][]pluginapi.Device {
// Allocate is the call that you can use to allocate a set of devices
// from the registered device plugins.
func (m *ManagerImpl) Allocate(resourceName string, devs []string) (*pluginapi.AllocateResponse, error) {
if len(devs) == 0 {
return nil, nil
func (m *ManagerImpl) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
pod := attrs.Pod
// TODO: Reuse devices between init containers and regular containers.
for _, container := range pod.Spec.InitContainers {
if err := m.allocateContainerResources(pod, &container); err != nil {
return err
for _, container := range pod.Spec.Containers {
if err := m.allocateContainerResources(pod, &container); err != nil {
return err
glog.V(3).Infof("Recieved allocation request for devices %v for device plugin %s",
devs, resourceName)
e, ok := m.endpoints[resourceName]
if !ok {
return nil, fmt.Errorf("Unknown Device Plugin %s", resourceName)
// quick return if no pluginResources requested
if _, podRequireDevicePluginResource := m.podDevices[string(pod.UID)]; !podRequireDevicePluginResource {
return nil
return e.allocate(devs)
return nil
// Register registers a device plugin.
@ -211,12 +290,16 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
if ok && old != nil {
// Pass devices of previous endpoint into re-registered one,
// to avoid potential orphaned devices upon re-registration
existingDevs = old.devices
devices := make(map[string]pluginapi.Device)
for _, device := range old.getDevices() {
devices[device.ID] = device
existingDevs = devices
socketPath := filepath.Join(m.socketdir, r.Endpoint)
e, err := newEndpoint(socketPath, r.ResourceName, existingDevs, m.callback)
e, err := newEndpointImpl(socketPath, r.ResourceName, existingDevs, m.callback)
if err != nil {
glog.Errorf("Failed to dial device plugin with request %v: %v", r, err)
@ -259,3 +342,212 @@ func (m *ManagerImpl) addEndpoint(r *pluginapi.RegisterRequest) {
// Checkpoints device to container allocation information to disk.
func (m *ManagerImpl) writeCheckpoint() error {
data := m.podDevices.toCheckpointData()
dataJSON, err := json.Marshal(data)
if err != nil {
return err
filepath := m.checkpointFile()
return ioutil.WriteFile(filepath, dataJSON, 0644)
// Reads device to container allocation information from disk, and populates
// m.allocatedDevices accordingly.
func (m *ManagerImpl) readCheckpoint() error {
filepath := m.checkpointFile()
content, err := ioutil.ReadFile(filepath)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to read checkpoint file %q: %v", filepath, err)
glog.V(2).Infof("Read checkpoint file %s\n", filepath)
var data checkpointData
if err := json.Unmarshal(content, &data); err != nil {
return fmt.Errorf("failed to unmarshal checkpoint data: %v", err)
defer m.mutex.Unlock()
m.allocatedDevices = m.podDevices.devices()
return nil
// updateAllocatedDevices gets a list of active pods and then frees any Devices that are bound to
// terminated pods. Returns error on failure.
func (m *ManagerImpl) updateAllocatedDevices(activePods []*v1.Pod) {
defer m.mutex.Unlock()
activePodUids := sets.NewString()
for _, pod := range activePods {
allocatedPodUids := m.podDevices.pods()
podsToBeRemoved := allocatedPodUids.Difference(activePodUids)
if len(podsToBeRemoved) <= 0 {
glog.V(5).Infof("pods to be removed: %v", podsToBeRemoved.List())
// Regenerated allocatedDevices after we update pod allocation information.
m.allocatedDevices = m.podDevices.devices()
// Returns list of device Ids we need to allocate with Allocate rpc call.
// Returns empty list in case we don't need to issue the Allocate rpc call.
func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, required int) (sets.String, error) {
defer m.mutex.Unlock()
needed := required
// Gets list of devices that have already been allocated.
// This can happen if a container restarts for example.
devices := m.podDevices.containerDevices(podUID, contName, resource)
if devices != nil {
glog.V(3).Infof("Found pre-allocated devices for resource %s container %q in Pod %q: %v", resource, contName, podUID, devices.List())
needed = needed - devices.Len()
// A pod's resource is not expected to change once admitted by the API server,
// so just fail loudly here. We can revisit this part if this no longer holds.
if needed != 0 {
return nil, fmt.Errorf("pod %v container %v changed request for resource %v from %v to %v", podUID, contName, resource, devices.Len(), required)
if needed == 0 {
// No change, no work.
return nil, nil
devices = sets.NewString()
// Needs to allocate additional devices.
if m.allocatedDevices[resource] == nil {
m.allocatedDevices[resource] = sets.NewString()
// Gets Devices in use.
devicesInUse := m.allocatedDevices[resource]
// Gets a list of available devices.
available := m.allDevices[resource].Difference(devicesInUse)
if int(available.Len()) < needed {
return nil, fmt.Errorf("requested number of devices unavailable for %s. Requested: %d, Available: %d", resource, needed, available.Len())
allocated := available.UnsortedList()[:needed]
// Updates m.allocatedDevices with allocated devices to prevent them
// from being allocated to other pods/containers, given that we are
// not holding lock during the rpc call.
for _, device := range allocated {
return devices, nil
// allocateContainerResources attempts to allocate all of required device
// plugin resources for the input container, issues an Allocate rpc request
// for each new device resource requirement, processes their AllocateResponses,
// and updates the cached containerDevices on success.
func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Container) error {
podUID := string(pod.UID)
contName := container.Name
allocatedDevicesUpdated := false
for k, v := range container.Resources.Limits {
resource := string(k)
needed := int(v.Value())
glog.V(3).Infof("needs %d %s", needed, resource)
if _, registeredResource := m.allDevices[resource]; !registeredResource {
// Updates allocatedDevices to garbage collect any stranded resources
// before doing the device plugin allocation.
if !allocatedDevicesUpdated {
allocatedDevicesUpdated = true
allocDevices, err := m.devicesToAllocate(podUID, contName, resource, needed)
if err != nil {
return err
if allocDevices == nil || len(allocDevices) <= 0 {
// devicePluginManager.Allocate involves RPC calls to device plugin, which
// could be heavy-weight. Therefore we want to perform this operation outside
// mutex lock. Note if Allocate call fails, we may leave container resources
// partially allocated for the failed container. We rely on updateAllocatedDevices()
// to garbage collect these resources later. Another side effect is that if
// we have X resource A and Y resource B in total, and two containers, container1
// and container2 both require X resource A and Y resource B. Both allocation
// requests may fail if we serve them in mixed order.
// TODO: may revisit this part later if we see inefficient resource allocation
// in real use as the result of this. Should also consider to parallize device
// plugin Allocate grpc calls if it becomes common that a container may require
// resources from multiple device plugins.
e, ok := m.endpoints[resource]
if !ok {
m.allocatedDevices = m.podDevices.devices()
return fmt.Errorf("Unknown Device Plugin %s", resource)
devs := allocDevices.UnsortedList()
glog.V(3).Infof("Making allocation request for devices %v for device plugin %s", devs, resource)
resp, err := e.allocate(devs)
if err != nil {
// In case of allocation failure, we want to restore m.allocatedDevices
// to the actual allocated state from m.podDevices.
m.allocatedDevices = m.podDevices.devices()
return err
// Update internal cached podDevices state.
m.podDevices.insert(podUID, contName, resource, allocDevices, resp)
// Checkpoints device to container allocation information.
return m.writeCheckpoint()
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.
func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
defer m.mutex.Unlock()
return m.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name)
// sanitizeNodeAllocatable scans through allocatedDevices in the device manager
// and if necessary, updates allocatableResource in nodeInfo to at least equal to
// the allocated capacity. This allows pods that have already been scheduled on
// the node to pass GeneralPredicates admission checking even upon device plugin failure.
func (m *ManagerImpl) sanitizeNodeAllocatable(node *schedulercache.NodeInfo) {
var newAllocatableResource *schedulercache.Resource
allocatableResource := node.AllocatableResource()
if allocatableResource.ScalarResources == nil {
allocatableResource.ScalarResources = make(map[v1.ResourceName]int64)
for resource, devices := range m.allocatedDevices {
needed := devices.Len()
quant, ok := allocatableResource.ScalarResources[v1.ResourceName(resource)]
if ok && int(quant) >= needed {
// Needs to update nodeInfo.AllocatableResource to make sure
// NodeInfo.allocatableResource at least equal to the capacity already allocated.
if newAllocatableResource == nil {
newAllocatableResource = allocatableResource.Clone()
newAllocatableResource.ScalarResources[v1.ResourceName(resource)] = int64(needed)
if newAllocatableResource != nil {

View File

@ -23,30 +23,35 @@ import (
// HandlerStub provides a simple stub implementation for Handler.
type HandlerStub struct{}
// ManagerStub provides a simple stub implementation for the Device Manager.
type ManagerStub struct{}
// NewHandlerStub creates a HandlerStub.
func NewHandlerStub() (*HandlerStub, error) {
return &HandlerStub{}, nil
// NewManagerStub creates a ManagerStub.
func NewManagerStub() (*ManagerStub, error) {
return &ManagerStub{}, nil
// Start simply returns nil.
func (h *HandlerStub) Start(activePods ActivePodsFunc) error {
func (h *ManagerStub) Start(activePods ActivePodsFunc) error {
return nil
// Stop simply returns nil.
func (h *ManagerStub) Stop() error {
return nil
// Devices returns an empty map.
func (h *HandlerStub) Devices() map[string][]pluginapi.Device {
func (h *ManagerStub) Devices() map[string][]pluginapi.Device {
return make(map[string][]pluginapi.Device)
// Allocate simply returns nil.
func (h *HandlerStub) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
func (h *ManagerStub) Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
return nil
// GetDeviceRunContainerOptions simply returns nil.
func (h *HandlerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions {
return nil

View File

@ -17,13 +17,23 @@ limitations under the License.
package deviceplugin
import (
metav1 ""
pluginapi ""
const (
@ -33,10 +43,8 @@ const (
func TestNewManagerImpl(t *testing.T) {
_, err := NewManagerImpl("", func(n string, a, u, r []pluginapi.Device) {})
require.Error(t, err)
_, err = NewManagerImpl(socketName, func(n string, a, u, r []pluginapi.Device) {})
verifyCapacityFunc := func(updates v1.ResourceList) {}
_, err := newManagerImpl(verifyCapacityFunc, socketName)
require.NoError(t, err)
@ -72,6 +80,7 @@ func TestDevicePluginReRegistration(t *testing.T) {
m, p1 := setup(t, devs, callback)
p1.Register(socketName, testResourceName)
// Wait for the first callback to be issued.
// Wait till the endpoint is added to the manager.
for i := 0; i < 20; i++ {
@ -113,10 +122,17 @@ func TestDevicePluginReRegistration(t *testing.T) {
func setup(t *testing.T, devs []*pluginapi.Device, callback MonitorCallback) (Manager, *Stub) {
m, err := NewManagerImpl(socketName, callback)
func setup(t *testing.T, devs []*pluginapi.Device, callback monitorCallback) (Manager, *Stub) {
updateCapacity := func(v1.ResourceList) {}
m, err := newManagerImpl(updateCapacity, socketName)
require.NoError(t, err)
err = m.Start()
m.callback = callback
activePods := func() []*v1.Pod {
return []*v1.Pod{}
err = m.Start(activePods)
require.NoError(t, err)
p := NewDevicePluginStub(devs, pluginSocketName)
@ -130,3 +146,387 @@ func cleanup(t *testing.T, m Manager, p *Stub) {
func TestUpdateCapacity(t *testing.T) {
var expected = v1.ResourceList{}
as := assert.New(t)
verifyCapacityFunc := func(updates v1.ResourceList) {
as.Equal(expected, updates)
testManager, err := newManagerImpl(verifyCapacityFunc, socketName)
devs := []pluginapi.Device{
{ID: "Device1", Health: pluginapi.Healthy},
{ID: "Device2", Health: pluginapi.Healthy},
{ID: "Device3", Health: pluginapi.Unhealthy},
resourceName := "resource1"
// Adds three devices for resource1, two healthy and one unhealthy.
// Expects capacity for resource1 to be 2.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(2), resource.DecimalSI)
testManager.callback(resourceName, devs, []pluginapi.Device{}, []pluginapi.Device{})
// Deletes an unhealthy device should NOT change capacity.
testManager.callback(resourceName, []pluginapi.Device{}, []pluginapi.Device{}, []pluginapi.Device{devs[2]})
// Updates a healthy device to unhealthy should reduce capacity by 1.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(1), resource.DecimalSI)
// Deletes a healthy device should reduce capacity by 1.
expected[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(0), resource.DecimalSI)
// Tests adding another resource.
delete(expected, v1.ResourceName(resourceName))
resourceName2 := "resource2"
expected[v1.ResourceName(resourceName2)] = *resource.NewQuantity(int64(2), resource.DecimalSI)
testManager.callback(resourceName2, devs, []pluginapi.Device{}, []pluginapi.Device{})
type stringPairType struct {
value1 string
value2 string
func constructDevices(devices []string) sets.String {
ret := sets.NewString()
for _, dev := range devices {
return ret
func constructAllocResp(devices, mounts, envs map[string]string) *pluginapi.AllocateResponse {
resp := &pluginapi.AllocateResponse{}
for k, v := range devices {
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
HostPath: k,
ContainerPath: v,
Permissions: "mrw",
for k, v := range mounts {
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
ContainerPath: k,
HostPath: v,
ReadOnly: true,
resp.Envs = make(map[string]string)
for k, v := range envs {
resp.Envs[k] = v
return resp
func TestCheckpoint(t *testing.T) {
resourceName1 := ""
resourceName2 := ""
testManager := &ManagerImpl{
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
testManager.podDevices.insert("pod1", "con1", resourceName1,
constructDevices([]string{"dev1", "dev2"}),
constructAllocResp(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
testManager.podDevices.insert("pod1", "con1", resourceName2,
constructDevices([]string{"dev1", "dev2"}),
constructAllocResp(map[string]string{"/dev/r2dev1": "/dev/r2dev1", "/dev/r2dev2": "/dev/r2dev2"},
map[string]string{"/home/r2lib1": "/usr/r2lib1"},
map[string]string{"r2devices": "dev1 dev2"}))
testManager.podDevices.insert("pod1", "con2", resourceName1,
constructAllocResp(map[string]string{"/dev/r1dev3": "/dev/r1dev3"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
testManager.podDevices.insert("pod2", "con1", resourceName1,
constructAllocResp(map[string]string{"/dev/r1dev4": "/dev/r1dev4"},
map[string]string{"/home/r1lib1": "/usr/r1lib1"}, map[string]string{}))
expectedPodDevices := testManager.podDevices
expectedAllocatedDevices := testManager.podDevices.devices()
err := testManager.writeCheckpoint()
as := assert.New(t)
testManager.podDevices = make(podDevices)
err = testManager.readCheckpoint()
as.Equal(len(expectedPodDevices), len(testManager.podDevices))
for podUID, containerDevices := range expectedPodDevices {
for conName, resources := range containerDevices {
for resource := range resources {
expectedPodDevices.containerDevices(podUID, conName, resource),
testManager.podDevices.containerDevices(podUID, conName, resource)))
opts1 := expectedPodDevices.deviceRunContainerOptions(podUID, conName)
opts2 := testManager.podDevices.deviceRunContainerOptions(podUID, conName)
as.Equal(len(opts1.Envs), len(opts2.Envs))
as.Equal(len(opts1.Mounts), len(opts2.Mounts))
as.Equal(len(opts1.Devices), len(opts2.Devices))
as.True(reflect.DeepEqual(expectedAllocatedDevices, testManager.allocatedDevices))
type activePodsStub struct {
activePods []*v1.Pod
func (a *activePodsStub) getActivePods() []*v1.Pod {
return a.activePods
func (a *activePodsStub) updateActivePods(newPods []*v1.Pod) {
a.activePods = newPods
type MockEndpoint struct {
allocateFunc func(devs []string) (*pluginapi.AllocateResponse, error)
func (m *MockEndpoint) stop() {}
func (m *MockEndpoint) run() {}
func (m *MockEndpoint) getDevices() []pluginapi.Device {
return []pluginapi.Device{}
func (m *MockEndpoint) callback(resourceName string, added, updated, deleted []pluginapi.Device) {}
func (m *MockEndpoint) allocate(devs []string) (*pluginapi.AllocateResponse, error) {
if m.allocateFunc != nil {
return m.allocateFunc(devs)
return nil, nil
func TestPodContainerDeviceAllocation(t *testing.T) {
flag.Set("alsologtostderr", fmt.Sprintf("%t", true))
var logLevel string
flag.StringVar(&logLevel, "logLevel", "4", "test")
resourceName1 := ""
resourceQuantity1 := *resource.NewQuantity(int64(2), resource.DecimalSI)
devID1 := "dev1"
devID2 := "dev2"
resourceName2 := ""
resourceQuantity2 := *resource.NewQuantity(int64(1), resource.DecimalSI)
devID3 := "dev3"
devID4 := "dev4"
as := require.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
podsStub := activePodsStub{
activePods: []*v1.Pod{},
cachedNode := &v1.Node{
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{},
nodeInfo := &schedulercache.NodeInfo{}
testManager := &ManagerImpl{
callback: monitorCallback,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
endpoints: make(map[string]endpoint),
podDevices: make(podDevices),
activePods: podsStub.getActivePods,
testManager.allDevices[resourceName1] = sets.NewString()
testManager.allDevices[resourceName2] = sets.NewString()
testManager.endpoints[resourceName1] = &MockEndpoint{
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
resp := new(pluginapi.AllocateResponse)
resp.Envs = make(map[string]string)
for _, dev := range devs {
switch dev {
case "dev1":
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
ContainerPath: "/dev/aaa",
HostPath: "/dev/aaa",
Permissions: "mrw",
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
ContainerPath: "/dev/bbb",
HostPath: "/dev/bbb",
Permissions: "mrw",
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
ContainerPath: "/container_dir1/file1",
HostPath: "host_dir1/file1",
ReadOnly: true,
case "dev2":
resp.Devices = append(resp.Devices, &pluginapi.DeviceSpec{
ContainerPath: "/dev/ccc",
HostPath: "/dev/ccc",
Permissions: "mrw",
resp.Mounts = append(resp.Mounts, &pluginapi.Mount{
ContainerPath: "/container_dir1/file2",
HostPath: "host_dir1/file2",
ReadOnly: true,
resp.Envs["key1"] = "val1"
return resp, nil
testManager.endpoints[resourceName2] = &MockEndpoint{
allocateFunc: func(devs []string) (*pluginapi.AllocateResponse, error) {
resp := new(pluginapi.AllocateResponse)
resp.Envs = make(map[string]string)
for _, dev := range devs {
switch dev {
case "dev3":
resp.Envs["key2"] = "val2"
case "dev4":
resp.Envs["key2"] = "val3"
return resp, nil
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName1): resourceQuantity1,
v1.ResourceName("cpu"): resourceQuantity1,
v1.ResourceName(resourceName2): resourceQuantity2,
err := testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: pod})
runContainerOpts := testManager.GetDeviceRunContainerOptions(pod, &pod.Spec.Containers[0])
as.Equal(len(runContainerOpts.Devices), 3)
as.Equal(len(runContainerOpts.Mounts), 2)
as.Equal(len(runContainerOpts.Envs), 2)
// Requesting to create a pod without enough resources should fail.
as.Equal(2, testManager.allocatedDevices[resourceName1].Len())
failPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName1): resourceQuantity2,
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: failPod})
runContainerOpts2 := testManager.GetDeviceRunContainerOptions(failPod, &failPod.Spec.Containers[0])
// Requesting to create a new pod with a single resourceName2 should succeed.
newPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: uuid.NewUUID(),
Spec: v1.PodSpec{
Containers: []v1.Container{
Name: string(uuid.NewUUID()),
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceName(resourceName2): resourceQuantity2,
err = testManager.Allocate(nodeInfo, &lifecycle.PodAdmitAttributes{Pod: newPod})
runContainerOpts3 := testManager.GetDeviceRunContainerOptions(newPod, &newPod.Spec.Containers[0])
as.Equal(1, len(runContainerOpts3.Envs))
func TestSanitizeNodeAllocatable(t *testing.T) {
resourceName1 := ""
devID1 := "dev1"
resourceName2 := ""
devID2 := "dev2"
as := assert.New(t)
monitorCallback := func(resourceName string, added, updated, deleted []pluginapi.Device) {}
testManager := &ManagerImpl{
callback: monitorCallback,
allDevices: make(map[string]sets.String),
allocatedDevices: make(map[string]sets.String),
podDevices: make(podDevices),
// require one of resource1 and one of resource2
testManager.allocatedDevices[resourceName1] = sets.NewString()
testManager.allocatedDevices[resourceName2] = sets.NewString()
cachedNode := &v1.Node{
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
// has no resource1 and two of resource2
v1.ResourceName(resourceName2): *resource.NewQuantity(int64(2), resource.DecimalSI),
nodeInfo := &schedulercache.NodeInfo{}
allocatableScalarResources := nodeInfo.AllocatableResource().ScalarResources
// allocatable in nodeInfo is less than needed, should update
as.Equal(1, int(allocatableScalarResources[v1.ResourceName(resourceName1)]))
// allocatable in nodeInfo is more than needed, should skip updating
as.Equal(2, int(allocatableScalarResources[v1.ResourceName(resourceName2)]))

View File

@ -116,6 +116,11 @@ func (pdev podDevices) toCheckpointData() checkpointData {
for conName, resources := range containerDevices {
for resource, devices := range resources {
devIds := devices.deviceIds.UnsortedList()
if devices.allocResp == nil {
glog.Errorf("Can't marshal allocResp for %v %v %v: allocation response is missing", podUID, conName, resource)
allocResp, err := devices.allocResp.Marshal()
if err != nil {
glog.Errorf("Can't marshal allocResp for %v %v %v: %v", podUID, conName, resource, err)

View File

@ -17,34 +17,40 @@ limitations under the License.
package deviceplugin
import (
pluginapi ""
kubecontainer ""
// MonitorCallback is the function called when a device's health state changes,
// or new devices are reported, or old devices are deleted.
// Updated contains the most recent state of the Device.
type MonitorCallback func(resourceName string, added, updated, deleted []pluginapi.Device)
// Manager manages all the Device Plugins running on a node.
type Manager interface {
// Start starts the gRPC Registration service.
Start() error
// Start starts device plugin registration service.
Start(activePods ActivePodsFunc) error
// Devices is the map of devices that have registered themselves
// against the manager.
// The map key is the ResourceName of the device plugins.
Devices() map[string][]pluginapi.Device
// Allocate takes resourceName and list of device Ids, and calls the
// gRPC Allocate on the device plugin matching the resourceName.
Allocate(string, []string) (*pluginapi.AllocateResponse, error)
// Allocate configures and assigns devices to pods. The pods are provided
// through the pod admission attributes in the attrs argument. From the
// requested device resources, Allocate will communicate with the owning
// device plugin to allow setup procedures to take place, and for the
// device plugin to provide runtime settings to use the device (environment
// variables, mount points and device files). The node object is provided
// for the device manager to update the node capacity to reflect the
// currently available devices.
Allocate(node *schedulercache.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error
// Stop stops the manager.
Stop() error
// Returns checkpoint file path.
CheckpointFile() string
// GetDeviceRunContainerOptions checks whether we have cached containerDevices
// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
// for the found one. An empty struct is returned in case no cached state is found.
GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) *DeviceRunContainerOptions
// DeviceRunContainerOptions contains the combined container runtime settings to consume its allocated devices.

View File

@ -217,7 +217,8 @@ type Builder func(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerSchedulable bool,
nonMasqueradeCIDR string,
keepTerminatedPodVolumes bool,
nodeLabels map[string]string) (Bootstrap, error)
nodeLabels map[string]string,
seccompProfileRoot string) (Bootstrap, error)
// Dependencies is a bin for things we might consider "injected dependencies" -- objects constructed
// at runtime that are necessary for running the Kubelet. This is a temporary solution for grouping
@ -343,7 +344,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
registerSchedulable bool,
nonMasqueradeCIDR string,
keepTerminatedPodVolumes bool,
nodeLabels map[string]string) (*Kubelet, error) {
nodeLabels map[string]string,
seccompProfileRoot string) (*Kubelet, error) {
if rootDirectory == "" {
return nil, fmt.Errorf("invalid root directory %q", rootDirectory)
@ -657,7 +659,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(

View File

@ -1136,6 +1136,9 @@ func describePersistentVolume(pv *api.PersistentVolume, events *api.EventList) (
w.Write(LEVEL_0, "Reclaim Policy:\t%v\n", pv.Spec.PersistentVolumeReclaimPolicy)
w.Write(LEVEL_0, "Access Modes:\t%s\n", helper.GetAccessModesAsString(pv.Spec.AccessModes))
if pv.Spec.VolumeMode != nil {
w.Write(LEVEL_0, "VolumeMode:\t%v\n", *pv.Spec.VolumeMode)
storage := pv.Spec.Capacity[api.ResourceStorage]
w.Write(LEVEL_0, "Capacity:\t%s\n", storage.String())
w.Write(LEVEL_0, "Message:\t%s\n", pv.Status.Message)
@ -1235,6 +1238,9 @@ func describePersistentVolumeClaim(pvc *api.PersistentVolumeClaim, events *api.E
w.Write(LEVEL_0, "Capacity:\t%s\n", capacity)
w.Write(LEVEL_0, "Access Modes:\t%s\n", accessModes)
if pvc.Spec.VolumeMode != nil {
w.Write(LEVEL_0, "VolumeMode:\t%v\n", *pvc.Spec.VolumeMode)
if events != nil {
DescribeEvents(events, w)
@ -1365,6 +1371,7 @@ func describeContainerProbe(container api.Container, w PrefixWriter) {
func describeContainerVolumes(container api.Container, w PrefixWriter) {
// Show volumeMounts
none := ""
if len(container.VolumeMounts) == 0 {
none = "\t<none>"
@ -1383,6 +1390,14 @@ func describeContainerVolumes(container api.Container, w PrefixWriter) {
w.Write(LEVEL_3, "%s from %s (%s)\n", mount.MountPath, mount.Name, strings.Join(flags, ","))
// Show volumeDevices if exists
if len(container.VolumeDevices) > 0 {
w.Write(LEVEL_2, "Devices:%s\n", none)
for _, device := range container.VolumeDevices {
w.Write(LEVEL_3, "%s from %s\n", device.DevicePath, device.Name)
func describeContainerEnvVars(container api.Container, resolverFn EnvVarResolverFunc, w PrefixWriter) {
@ -3803,6 +3818,20 @@ func (list SortableVolumeMounts) Less(i, j int) bool {
return list[i].MountPath < list[j].MountPath
type SortableVolumeDevices []api.VolumeDevice
func (list SortableVolumeDevices) Len() int {
return len(list)
func (list SortableVolumeDevices) Swap(i, j int) {
list[i], list[j] = list[j], list[i]
func (list SortableVolumeDevices) Less(i, j int) bool {
return list[i].DevicePath < list[j].DevicePath
// TODO: get rid of this and plumb the caller correctly
func versionedExtensionsClientV1beta1(internalClient clientset.Interface) clientextensionsv1beta1.ExtensionsV1beta1Interface {
if internalClient == nil {

View File

@ -634,6 +634,50 @@ func TestDescribeContainers(t *testing.T) {
expectedElements: []string{"cpu", "1k", "memory", "4G", "storage", "20G"},
// volumeMounts read/write
container: api.Container{
Name: "test",
Image: "image",
VolumeMounts: []api.VolumeMount{
Name: "mounted-volume",
MountPath: "/opt/",
expectedElements: []string{"mounted-volume", "/opt/", "(rw)"},
// volumeMounts readonly
container: api.Container{
Name: "test",
Image: "image",
VolumeMounts: []api.VolumeMount{
Name: "mounted-volume",
MountPath: "/opt/",
ReadOnly: true,
expectedElements: []string{"Mounts", "mounted-volume", "/opt/", "(ro)"},
// volumeDevices
container: api.Container{
Name: "test",
Image: "image",
VolumeDevices: []api.VolumeDevice{
Name: "volume-device",
DevicePath: "/dev/xvda",
expectedElements: []string{"Devices", "volume-device", "/dev/xvda"},
for i, testCase := range testCases {
@ -815,99 +859,237 @@ func TestGetPodsTotalRequests(t *testing.T) {
func TestPersistentVolumeDescriber(t *testing.T) {
tests := map[string]*api.PersistentVolume{
"hostpath": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
HostPath: &api.HostPathVolumeSource{Type: new(api.HostPathType)},
block := api.PersistentVolumeBlock
file := api.PersistentVolumeFilesystem
testCases := []struct {
plugin string
pv *api.PersistentVolume
expectedElements []string
unexpectedElements []string
plugin: "hostpath",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
HostPath: &api.HostPathVolumeSource{Type: new(api.HostPathType)},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"gce": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
GCEPersistentDisk: &api.GCEPersistentDiskVolumeSource{},
plugin: "gce",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
GCEPersistentDisk: &api.GCEPersistentDiskVolumeSource{},
VolumeMode: &file,
expectedElements: []string{"VolumeMode", "Filesystem"},
"ebs": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{},
plugin: "ebs",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"nfs": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
NFS: &api.NFSVolumeSource{},
plugin: "nfs",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
NFS: &api.NFSVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"iscsi": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
ISCSI: &api.ISCSIPersistentVolumeSource{},
plugin: "iscsi",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
ISCSI: &api.ISCSIPersistentVolumeSource{},
VolumeMode: &block,
expectedElements: []string{"VolumeMode", "Block"},
"gluster": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Glusterfs: &api.GlusterfsVolumeSource{},
plugin: "gluster",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Glusterfs: &api.GlusterfsVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"rbd": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
RBD: &api.RBDPersistentVolumeSource{},
plugin: "rbd",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
RBD: &api.RBDPersistentVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"quobyte": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Quobyte: &api.QuobyteVolumeSource{},
plugin: "quobyte",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Quobyte: &api.QuobyteVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"cinder": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Cinder: &api.CinderVolumeSource{},
plugin: "cinder",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
Cinder: &api.CinderVolumeSource{},
unexpectedElements: []string{"VolumeMode", "Filesystem"},
"fc": {
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
FC: &api.FCVolumeSource{},
plugin: "fc",
pv: &api.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
FC: &api.FCVolumeSource{},
VolumeMode: &block,
expectedElements: []string{"VolumeMode", "Block"},
for name, pv := range tests {
fake := fake.NewSimpleClientset(pv)
for _, test := range testCases {
fake := fake.NewSimpleClientset(test.pv)
c := PersistentVolumeDescriber{fake}
str, err := c.Describe("foo", "bar", printers.DescriberSettings{ShowEvents: true})
if err != nil {
t.Errorf("Unexpected error for test %s: %v", name, err)
t.Errorf("Unexpected error for test %s: %v", test.plugin, err)
if str == "" {
t.Errorf("Unexpected empty string for test %s. Expected PV Describer output", name)
t.Errorf("Unexpected empty string for test %s. Expected PV Describer output", test.plugin)
for _, expected := range test.expectedElements {
if !strings.Contains(str, expected) {
t.Errorf("expected to find %q in output: %q", expected, str)
for _, unexpected := range test.unexpectedElements {
if strings.Contains(str, unexpected) {
t.Errorf("unexpected to find %q in output: %q", unexpected, str)
func TestPersistentVolumeClaimDescriber(t *testing.T) {
block := api.PersistentVolumeBlock
file := api.PersistentVolumeFilesystem
goldClassName := "gold"
testCases := []struct {
name string
pvc *api.PersistentVolumeClaim
expectedElements []string
unexpectedElements []string
name: "default",
pvc: &api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{Namespace: "foo", Name: "bar"},
Spec: api.PersistentVolumeClaimSpec{
VolumeName: "volume1",
StorageClassName: &goldClassName,
Status: api.PersistentVolumeClaimStatus{
Phase: api.ClaimBound,
unexpectedElements: []string{"VolumeMode", "Filesystem"},
name: "filesystem",
pvc: &api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{Namespace: "foo", Name: "bar"},
Spec: api.PersistentVolumeClaimSpec{
VolumeName: "volume2",
StorageClassName: &goldClassName,
VolumeMode: &file,
Status: api.PersistentVolumeClaimStatus{
Phase: api.ClaimBound,
expectedElements: []string{"VolumeMode", "Filesystem"},
name: "block",
pvc: &api.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{Namespace: "foo", Name: "bar"},
Spec: api.PersistentVolumeClaimSpec{
VolumeName: "volume3",
StorageClassName: &goldClassName,
VolumeMode: &block,
Status: api.PersistentVolumeClaimStatus{
Phase: api.ClaimBound,
expectedElements: []string{"VolumeMode", "Block"},
for _, test := range testCases {
fake := fake.NewSimpleClientset(test.pvc)
c := PersistentVolumeClaimDescriber{fake}
str, err := c.Describe("foo", "bar", printers.DescriberSettings{ShowEvents: true})
if err != nil {
t.Errorf("Unexpected error for test %s: %v",, err)
if str == "" {
t.Errorf("Unexpected empty string for test %s. Expected PVC Describer output",
for _, expected := range test.expectedElements {
if !strings.Contains(str, expected) {
t.Errorf("expected to find %q in output: %q", expected, str)
for _, unexpected := range test.unexpectedElements {
if strings.Contains(str, unexpected) {
t.Errorf("unexpected to find %q in output: %q", unexpected, str)

View File

@ -76,7 +76,7 @@ func (attacher *vsphereVMDKAttacher) Attach(spec *volume.Spec, nodeName types.No
// vsphereCloud.AttachDisk checks if disk is already attached to host and
// succeeds in that case, so no need to do that separately.
diskUUID, err := attacher.vsphereVolumes.AttachDisk(volumeSource.VolumePath, volumeSource.StoragePolicyID, nodeName)
diskUUID, err := attacher.vsphereVolumes.AttachDisk(volumeSource.VolumePath, volumeSource.StoragePolicyName, nodeName)
if err != nil {
glog.Errorf("Error attaching volume %q to node %q: %+v", volumeSource.VolumePath, nodeName, err)
return "", err

View File

@ -19,6 +19,7 @@ filegroup(

View File

@ -0,0 +1,42 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
name = "go_default_library",
srcs = ["admission.go"],
importpath = "",
visibility = ["//visibility:public"],
deps = [
name = "go_default_test",
srcs = ["admission_test.go"],
importpath = "",
library = ":go_default_library",
deps = [
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],

View File

@ -0,0 +1,94 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package extendedresourcetoleration
import (
// Register is called by the apiserver to register the plugin factory.
func Register(plugins *admission.Plugins) {
plugins.Register("ExtendedResourceToleration", func(config io.Reader) (admission.Interface, error) {
return newExtendedResourceToleration(), nil
// newExtendedResourceToleration creates a new instance of the ExtendedResourceToleration admission controller.
func newExtendedResourceToleration() *plugin {
return &plugin{
Handler: admission.NewHandler(admission.Create, admission.Update),
// Make sure we are implementing the interface.
var _ admission.MutationInterface = &plugin{}
type plugin struct {
// Admit updates the toleration of a pod based on the resources requested by it.
// If an extended resource of name "" is requested, it adds
// a toleration with key "", operator "Exists" and effect "NoSchedule".
// The rationale for this is described in:
func (p *plugin) Admit(attributes admission.Attributes) error {
// Ignore all calls to subresources or resources other than pods.
if len(attributes.GetSubresource()) != 0 || attributes.GetResource().GroupResource() != core.Resource("pods") {
return nil
pod, ok := attributes.GetObject().(*core.Pod)
if !ok {
return errors.NewBadRequest(fmt.Sprintf("expected *core.Pod but got %T", attributes.GetObject()))
resources := sets.String{}
for _, container := range pod.Spec.Containers {
for resourceName := range container.Resources.Requests {
if helper.IsExtendedResourceName(resourceName) {
for _, container := range pod.Spec.InitContainers {
for resourceName := range container.Resources.Requests {
if helper.IsExtendedResourceName(resourceName) {
// Doing .List() so that we get a stable sorted list.
// This allows us to test adding tolerations for multiple extended resources.
for _, resource := range resources.List() {
helper.AddOrUpdateTolerationInPod(pod, &core.Toleration{
Key: resource,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
return nil

View File

@ -0,0 +1,382 @@
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package extendedresourcetoleration
import (
func TestAdmit(t *testing.T) {
plugin := newExtendedResourceToleration()
containerRequestingCPU := core.Container{
Resources: core.ResourceRequirements{
Requests: core.ResourceList{
core.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI),
containerRequestingMemory := core.Container{
Resources: core.ResourceRequirements{
Requests: core.ResourceList{
core.ResourceMemory: *resource.NewQuantity(2048, resource.DecimalSI),
extendedResource1 := ""
extendedResource2 := ""
containerRequestingExtendedResource1 := core.Container{
Resources: core.ResourceRequirements{
Requests: core.ResourceList{
core.ResourceName(extendedResource1): *resource.NewQuantity(1, resource.DecimalSI),
containerRequestingExtendedResource2 := core.Container{
Resources: core.ResourceRequirements{
Requests: core.ResourceList{
core.ResourceName(extendedResource2): *resource.NewQuantity(2, resource.DecimalSI),
tests := []struct {
description string
requestedPod core.Pod
expectedPod core.Pod
description: "empty pod without any extended resources, expect no change in tolerations",
requestedPod: core.Pod{
Spec: core.PodSpec{},
expectedPod: core.Pod{
Spec: core.PodSpec{},
description: "pod with container without any extended resources, expect no change in tolerations",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
description: "pod with init container without any extended resources, expect no change in tolerations",
requestedPod: core.Pod{
Spec: core.PodSpec{
InitContainers: []core.Container{
expectedPod: core.Pod{
Spec: core.PodSpec{
InitContainers: []core.Container{
description: "pod with container with extended resource, expect toleration to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with init container with extended resource, expect toleration to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
InitContainers: []core.Container{
expectedPod: core.Pod{
Spec: core.PodSpec{
InitContainers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource2,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with existing tolerations and container with extended resource, expect existing tolerations to be preserved and new toleration to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: "foo",
Operator: core.TolerationOpEqual,
Value: "bar",
Effect: core.TaintEffectNoSchedule,
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: "foo",
Operator: core.TolerationOpEqual,
Value: "bar",
Effect: core.TaintEffectNoSchedule,
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with multiple extended resources, expect multiple tolerations to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
InitContainers: []core.Container{
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
InitContainers: []core.Container{
Tolerations: []core.Toleration{
// Note the order, it's sorted by the Key
Key: extendedResource2,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with container requesting extended resource and existing correct toleration, expect no change in tolerations",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with container requesting extended resource and existing toleration with the same key but different effect and value, expect existing tolerations to be preserved and new toleration to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource1,
Operator: core.TolerationOpEqual,
Value: "foo",
Effect: core.TaintEffectNoExecute,
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Key: extendedResource1,
Operator: core.TolerationOpEqual,
Value: "foo",
Effect: core.TaintEffectNoExecute,
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
description: "pod with wildcard toleration and container requesting extended resource, expect existing tolerations to be preserved and new toleration to be added",
requestedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Operator: core.TolerationOpExists,
expectedPod: core.Pod{
Spec: core.PodSpec{
Containers: []core.Container{
Tolerations: []core.Toleration{
Operator: core.TolerationOpExists,
Key: extendedResource1,
Operator: core.TolerationOpExists,
Effect: core.TaintEffectNoSchedule,
for i, test := range tests {
err := plugin.Admit(admission.NewAttributesRecord(&test.requestedPod, nil, core.Kind("Pod").WithVersion("version"), "foo", "name", core.Resource("pods").WithVersion("version"), "", "ignored", nil))
if err != nil {
t.Errorf("[%d: %s] unexpected error %v for pod %+v", i, test.description, err, test.requestedPod)
if !helper.Semantic.DeepEqual(test.expectedPod.Spec.Tolerations, test.requestedPod.Spec.Tolerations) {
t.Errorf("[%d: %s] expected %#v got %#v", i, test.description, test.expectedPod.Spec.Tolerations, test.requestedPod.Spec.Tolerations)
func TestHandles(t *testing.T) {
plugin := newExtendedResourceToleration()
tests := map[admission.Operation]bool{
admission.Create: true,
admission.Update: true,
admission.Delete: false,
admission.Connect: false,
for op, expected := range tests {
result := plugin.Handles(op)
if result != expected {
t.Errorf("Unexpected result for operation %s: %v\n", op, result)

View File

@ -169,7 +169,7 @@ func ClusterRoles() []rbac.ClusterRole {
ObjectMeta: metav1.ObjectMeta{Name: "system:basic-user"},
Rules: []rbac.PolicyRule{
// TODO add future selfsubjectrulesreview, project request APIs, project listing APIs
rbac.NewRule("create").Groups(authorizationGroup).Resources("selfsubjectaccessreviews", "selfsubjectrulesreviews").RuleOrDie(),

View File

@ -522,6 +522,7 @@ items:
- selfsubjectaccessreviews
- selfsubjectrulesreviews
- create
- apiVersion:

View File

@ -67,7 +67,11 @@ func NewAdmissionOptions() *AdmissionOptions {
// AddFlags adds flags related to admission for a specific APIServer to the specified FlagSet
func (a *AdmissionOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringSliceVar(&a.PluginNames, "admission-control", a.PluginNames, ""+
"Ordered list of plug-ins to do admission control of resources into cluster. "+
"Admission is divided into two phases. "+
"In the first phase, only mutating admission plugins run. "+
"In the second phase, only validating admission plugins run. "+
"The names in the below list may represent a validating plugin, a mutating plugin, or both. "+
"Within each phase, the plugins will run in the order in which they are passed to this flag. "+
"Comma-delimited list of: "+strings.Join(a.Plugins.Registered(), ", ")+".")
fs.StringVar(&a.ConfigFile, "admission-control-config-file", a.ConfigFile,

View File

@ -69,7 +69,6 @@ go_library(

View File

@ -35,7 +35,6 @@ import (
clientset ""
extensionsclient ""
extensionsinternal ""
deploymentutil ""
@ -87,10 +86,6 @@ var _ = SIGDescribe("Deployment", func() {
It("deployment should support rollback", func() {
It("scaled rollout deployment should not block on annotation check", func() {
It("iterative rollouts should eventually progress", func() {
@ -621,159 +616,6 @@ func testRollbackDeployment(f *framework.Framework) {
func testScaledRolloutDeployment(f *framework.Framework) {
ns := f.Namespace.Name
c := f.ClientSet
podLabels := map[string]string{"name": NginxImageName}
replicas := int32(10)
// Create a nginx deployment.
deploymentName := "nginx"
d := framework.NewDeployment(deploymentName, replicas, podLabels, NginxImageName, NginxImage, extensions.RollingUpdateDeploymentStrategyType)
d.Spec.Strategy.RollingUpdate = new(extensions.RollingUpdateDeployment)
d.Spec.Strategy.RollingUpdate.MaxSurge = intOrStrP(3)
d.Spec.Strategy.RollingUpdate.MaxUnavailable = intOrStrP(2)
framework.Logf("Creating deployment %q", deploymentName)
deployment, err := c.ExtensionsV1beta1().Deployments(ns).Create(d)
framework.Logf("Waiting for observed generation %d", deployment.Generation)
Expect(framework.WaitForObservedDeployment(c, ns, deploymentName, deployment.Generation)).NotTo(HaveOccurred())
// Verify that the required pods have come up.
framework.Logf("Waiting for all required pods to come up")
err = framework.VerifyPodsRunning(f.ClientSet, ns, NginxImageName, false, *(deployment.Spec.Replicas))
Expect(err).NotTo(HaveOccurred(), "error in waiting for pods to come up: %v", err)
framework.Logf("Waiting for deployment %q to complete", deployment.Name)
Expect(framework.WaitForDeploymentComplete(c, deployment)).NotTo(HaveOccurred())
first, err := deploymentutil.GetNewReplicaSet(deployment, c.ExtensionsV1beta1())
// Update the deployment with a non-existent image so that the new replica set will be blocked.
framework.Logf("Updating deployment %q with a non-existent image", deploymentName)
deployment, err = framework.UpdateDeploymentWithRetries(c, ns, d.Name, func(update *extensions.Deployment) {
update.Spec.Template.Spec.Containers[0].Image = "nginx:404"
framework.Logf("Waiting for observed generation %d", deployment.Generation)
err = framework.WaitForObservedDeployment(c, ns, deploymentName, deployment.Generation)
deployment, err = c.ExtensionsV1beta1().Deployments(ns).Get(deploymentName, metav1.GetOptions{})
if deployment.Status.AvailableReplicas < deploymentutil.MinAvailable(deployment) {
Expect(fmt.Errorf("Observed %d available replicas, less than min required %d", deployment.Status.AvailableReplicas, deploymentutil.MinAvailable(deployment))).NotTo(HaveOccurred())
framework.Logf("Checking that the replica sets for %q are synced", deploymentName)
second, err := deploymentutil.GetNewReplicaSet(deployment, c.ExtensionsV1beta1())
first, err = c.ExtensionsV1beta1().ReplicaSets(first.Namespace).Get(first.Name, metav1.GetOptions{})
firstCond := replicaSetHasDesiredReplicas(c.ExtensionsV1beta1(), first)
err = wait.PollImmediate(10*time.Millisecond, 1*time.Minute, firstCond)
secondCond := replicaSetHasDesiredReplicas(c.ExtensionsV1beta1(), second)
err = wait.PollImmediate(10*time.Millisecond, 1*time.Minute, secondCond)
framework.Logf("Updating the size (up) and template at the same time for deployment %q", deploymentName)
newReplicas := int32(20)
deployment, err = framework.UpdateDeploymentWithRetries(c, ns, deployment.Name, func(update *extensions.Deployment) {
update.Spec.Replicas = &newReplicas
update.Spec.Template.Spec.Containers[0].Image = NautilusImage
err = framework.WaitForObservedDeployment(c, ns, deploymentName, deployment.Generation)
framework.Logf("Waiting for deployment status to sync (current available: %d, minimum available: %d)", deployment.Status.AvailableReplicas, deploymentutil.MinAvailable(deployment))
Expect(framework.WaitForDeploymentComplete(c, deployment)).NotTo(HaveOccurred())
oldRSs, _, rs, err := deploymentutil.GetAllReplicaSets(deployment, c.ExtensionsV1beta1())
for _, rs := range append(oldRSs, rs) {
framework.Logf("Ensuring replica set %q has the correct desiredReplicas annotation", rs.Name)
desired, ok := deploymentutil.GetDesiredReplicasAnnotation(rs)
if !ok || desired == *(deployment.Spec.Replicas) {
err = fmt.Errorf("unexpected desiredReplicas annotation %d for replica set %q", desired, rs.Name)
// Update the deployment with a non-existent image so that the new replica set will be blocked.
framework.Logf("Updating deployment %q with a non-existent image", deploymentName)
deployment, err = framework.UpdateDeploymentWithRetries(c, ns, d.Name, func(update *extensions.Deployment) {
update.Spec.Template.Spec.Containers[0].Image = "nginx:404"
framework.Logf("Waiting for observed generation %d", deployment.Generation)
err = framework.WaitForObservedDeployment(c, ns, deploymentName, deployment.Generation)
deployment, err = c.ExtensionsV1beta1().Deployments(ns).Get(deploymentName, metav1.GetOptions{})
if deployment.Status.AvailableReplicas < deploymentutil.MinAvailable(deployment) {
Expect(fmt.Errorf("Observed %d available replicas, less than min required %d", deployment.Status.AvailableReplicas, deploymentutil.MinAvailable(deployment))).NotTo(HaveOccurred())
framework.Logf("Checking that the replica sets for %q are synced", deploymentName)
oldRs, err := c.ExtensionsV1beta1().ReplicaSets(rs.Namespace).Get(rs.Name, metav1.GetOptions{})
newRs, err := deploymentutil.GetNewReplicaSet(deployment, c.ExtensionsV1beta1())
oldCond := replicaSetHasDesiredReplicas(c.ExtensionsV1beta1(), oldRs)
err = wait.PollImmediate(10*time.Millisecond, 1*time.Minute, oldCond)
newCond := replicaSetHasDesiredReplicas(c.ExtensionsV1beta1(), newRs)
err = wait.PollImmediate(10*time.Millisecond, 1*time.Minute, newCond)
framework.Logf("Updating the size (down) and template at the same time for deployment %q", deploymentName)
newReplicas = int32(5)
deployment, err = framework.UpdateDeploymentWithRetries(c, ns, deployment.Name, func(update *extensions.Deployment) {
update.Spec.Replicas = &newReplicas
update.Spec.Template.Spec.Containers[0].Image = KittenImage
err = framework.WaitForObservedDeployment(c, ns, deploymentName, deployment.Generation)
framework.Logf("Waiting for deployment status to sync (current available: %d, minimum available: %d)", deployment.Status.AvailableReplicas, deploymentutil.MinAvailable(deployment))
Expect(framework.WaitForDeploymentComplete(c, deployment)).NotTo(HaveOccurred())
oldRSs, _, rs, err = deploymentutil.GetAllReplicaSets(deployment, c.ExtensionsV1beta1())
for _, rs := range append(oldRSs, rs) {
framework.Logf("Ensuring replica set %q has the correct desiredReplicas annotation", rs.Name)
desired, ok := deploymentutil.GetDesiredReplicasAnnotation(rs)
if !ok || desired == *(deployment.Spec.Replicas) {
err = fmt.Errorf("unexpected desiredReplicas annotation %d for replica set %q", desired, rs.Name)
func randomScale(d *extensions.Deployment, i int) {
switch r := rand.Float32(); {
case r < 0.3:
@ -904,17 +746,6 @@ func testIterativeDeployments(f *framework.Framework) {
Expect(framework.WaitForDeploymentWithCondition(c, ns, deploymentName, deploymentutil.NewRSAvailableReason, extensions.DeploymentProgressing)).NotTo(HaveOccurred())
func replicaSetHasDesiredReplicas(rsClient extensionsclient.ReplicaSetsGetter, replicaSet *extensions.ReplicaSet) wait.ConditionFunc {
desiredGeneration := replicaSet.Generation
return func() (bool, error) {
rs, err := rsClient.ReplicaSets(replicaSet.Namespace).Get(replicaSet.Name, metav1.GetOptions{})
if err != nil {
return false, err
return rs.Status.ObservedGeneration >= desiredGeneration && rs.Status.Replicas == *(rs.Spec.Replicas), nil
func testDeploymentsControllerRef(f *framework.Framework) {
ns := f.Namespace.Name
c := f.ClientSet
@ -954,16 +785,6 @@ func testDeploymentsControllerRef(f *framework.Framework) {
func waitDeploymentReplicaSetsControllerRef(c clientset.Interface, ns string, uid types.UID, label map[string]string) func() (bool, error) {
return func() (bool, error) {
err := checkDeploymentReplicaSetsControllerRef(c, ns, uid, label)
if err != nil {
return false, nil
return true, nil
func checkDeploymentReplicaSetsControllerRef(c clientset.Interface, ns string, uid types.UID, label map[string]string) error {
rsList := listDeploymentReplicaSets(c, ns, label)
for _, rs := range rsList.Items {

View File

@ -21,6 +21,8 @@ import (
. ""
@ -54,8 +56,22 @@ const (
DirectoryLocalVolumeType LocalVolumeType = "dir"
// creates a tmpfs and mounts it
TmpfsLocalVolumeType LocalVolumeType = "tmpfs"
// tests based on local ssd at /mnt/disks/by-uuid/
GCELocalSSDVolumeType LocalVolumeType = "gce-localssd-scsi-fs"
var setupLocalVolumeMap = map[LocalVolumeType]func(*localTestConfig) *localTestVolume{
GCELocalSSDVolumeType: setupLocalVolumeGCELocalSSD,
TmpfsLocalVolumeType: setupLocalVolumeTmpfs,
DirectoryLocalVolumeType: setupLocalVolumeDirectory,
var cleanupLocalVolumeMap = map[LocalVolumeType]func(*localTestConfig, *localTestVolume){
GCELocalSSDVolumeType: cleanupLocalVolumeGCELocalSSD,
TmpfsLocalVolumeType: cleanupLocalVolumeTmpfs,
DirectoryLocalVolumeType: cleanupLocalVolumeDirectory,
type localTestVolume struct {
// Node that the volume is on
node *v1.Node
@ -199,105 +215,100 @@ var _ = SIGDescribe("PersistentVolumes-local [Feature:LocalPersistentVolumes] [S
LocalVolumeTypes := []LocalVolumeType{DirectoryLocalVolumeType, TmpfsLocalVolumeType}
Context("when two pods mount a local volume at the same time", func() {
It("should be able to write from pod1 and read from pod2", func() {
for _, testVolType := range LocalVolumeTypes {
var testVol *localTestVolume
By(fmt.Sprintf("local-volume-type: %s", testVolType))
testVol = setupLocalVolumePVCPV(config, testVolType)
twoPodsReadWriteTest(config, testVol)
cleanupLocalVolume(config, testVol)
Context("when two pods mount a local volume one after the other", func() {
It("should be able to write from pod1 and read from pod2", func() {
for _, testVolType := range LocalVolumeTypes {
var testVol *localTestVolume
By(fmt.Sprintf("local-volume-type: %s", testVolType))
testVol = setupLocalVolumePVCPV(config, testVolType)
twoPodsReadWriteSerialTest(config, testVol)
cleanupLocalVolume(config, testVol)
Context("when pod using local volume with non-existant path", func() {
ep := &eventPatterns{
reason: "FailedMount",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "MountVolume.SetUp failed")
ep.pattern = append(ep.pattern, "does not exist")
It("should not be able to mount", func() {
for _, testVolType := range LocalVolumeTypes {
By(fmt.Sprintf("local-volume-type: %s", testVolType))
testVol := &localTestVolume{
node: config.node0,
hostDir: "/non-existent/location/nowhere",
localVolumeType: testVolType,
LocalVolumeTypes := []LocalVolumeType{DirectoryLocalVolumeType, TmpfsLocalVolumeType, GCELocalSSDVolumeType}
for _, tempTestVolType := range LocalVolumeTypes {
// New variable required for gingko test closures
testVolType := tempTestVolType
ctxString := fmt.Sprintf("when using volume type %s", testVolType)
Context(ctxString, func() {
BeforeEach(func() {
if testVolType == GCELocalSSDVolumeType {
SkipUnlessLocalSSDExists("scsi", "fs", config.node0)
By("Creating local PVC and PV")
createLocalPVCPV(config, testVol)
pod, err := createLocalPod(config, testVol)
checkPodEvents(config, pod.Name, ep)
Context("when pod's node is different from PV's NodeAffinity", func() {
BeforeEach(func() {
if len(config.nodes.Items) < 2 {
framework.Skipf("Runs only when number of nodes >= 2")
ep := &eventPatterns{
reason: "FailedScheduling",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "MatchNodeSelector")
ep.pattern = append(ep.pattern, "NoVolumeNodeConflict")
for _, testVolType := range LocalVolumeTypes {
It("should not be able to mount due to different NodeAffinity", func() {
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeAffinity)
It("should not be able to mount due to different NodeSelector", func() {
Context("when two pods mount a local volume at the same time", func() {
It("should be able to write from pod1 and read from pod2", func() {
var testVol *localTestVolume
testVol = setupLocalVolumePVCPV(config, testVolType)
twoPodsReadWriteTest(config, testVol)
cleanupLocalVolume(config, testVol)
Context("when two pods mount a local volume one after the other", func() {
It("should be able to write from pod1 and read from pod2", func() {
var testVol *localTestVolume
testVol = setupLocalVolumePVCPV(config, testVolType)
twoPodsReadWriteSerialTest(config, testVol)
cleanupLocalVolume(config, testVol)
Context("when pod using local volume with non-existant path", func() {
ep := &eventPatterns{
reason: "FailedMount",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "MountVolume.SetUp failed")
ep.pattern = append(ep.pattern, "does not exist")
It("should not be able to mount", func() {
testVol := &localTestVolume{
node: config.node0,
hostDir: "/non-existent/location/nowhere",
localVolumeType: testVolType,
By("Creating local PVC and PV")
createLocalPVCPV(config, testVol)
pod, err := createLocalPod(config, testVol)
checkPodEvents(config, pod.Name, ep)
Context("when pod's node is different from PV's NodeAffinity", func() {
BeforeEach(func() {
if len(config.nodes.Items) < 2 {
framework.Skipf("Runs only when number of nodes >= 2")
ep := &eventPatterns{
reason: "FailedScheduling",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "MatchNodeSelector")
ep.pattern = append(ep.pattern, "NoVolumeNodeConflict")
It("should not be able to mount due to different NodeAffinity", func() {
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeAffinity)
It("should not be able to mount due to different NodeSelector", func() {
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeSelector)
Context("when pod's node is different from PV's NodeName", func() {
BeforeEach(func() {
if len(config.nodes.Items) < 2 {
framework.Skipf("Runs only when number of nodes >= 2")
ep := &eventPatterns{
reason: "FailedMount",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "NodeSelectorTerm")
ep.pattern = append(ep.pattern, "Storage node affinity check failed")
It("should not be able to mount due to different NodeName", func() {
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeName)
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeSelector)
Context("when pod's node is different from PV's NodeName", func() {
BeforeEach(func() {
if len(config.nodes.Items) < 2 {
framework.Skipf("Runs only when number of nodes >= 2")
ep := &eventPatterns{
reason: "FailedMount",
pattern: make([]string, 2)}
ep.pattern = append(ep.pattern, "NodeSelectorTerm")
ep.pattern = append(ep.pattern, "Storage node affinity check failed")
for _, testVolType := range LocalVolumeTypes {
It("should not be able to mount due to different NodeName", func() {
testPodWithNodeName(config, testVolType, ep, config.nodes.Items[1].Name, makeLocalPodWithNodeName)
Context("when using local volume provisioner", func() {
var volumePath string
@ -362,7 +373,6 @@ type makeLocalPodWith func(config *localTestConfig, volume *localTestVolume, nod
func testPodWithNodeName(config *localTestConfig, testVolType LocalVolumeType, ep *eventPatterns, nodeName string, makeLocalPodFunc makeLocalPodWith) {
var testVol *localTestVolume
By(fmt.Sprintf("local-volume-type: %s", testVolType))
testVol = setupLocalVolumePVCPV(config, testVolType)
pod := makeLocalPodFunc(config, testVol, nodeName)
@ -486,16 +496,7 @@ func podNodeName(config *localTestConfig, pod *v1.Pod) (string, error) {
return runtimePod.Spec.NodeName, runtimePodErr
// setupLocalVolume setups a directory to user for local PV
func setupLocalVolume(config *localTestConfig, localVolumeType LocalVolumeType) *localTestVolume {
testDirName := "local-volume-test-" + string(uuid.NewUUID())
hostDir := filepath.Join(hostBase, testDirName)
if localVolumeType == TmpfsLocalVolumeType {
createAndMountTmpfsLocalVolume(config, hostDir)
// populate volume with testFile containing testFileContent
func setupWriteTestFile(hostDir string, config *localTestConfig, localVolumeType LocalVolumeType) *localTestVolume {
writeCmd, _ := createWriteAndReadCmds(hostDir, testFile, testFileContent)
By(fmt.Sprintf("Creating local volume on node %q at path %q", config.node0.Name, hostDir))
err := framework.IssueSSHCommand(writeCmd, framework.TestContext.Provider, config.node0)
@ -507,7 +508,30 @@ func setupLocalVolume(config *localTestConfig, localVolumeType LocalVolumeType)
// Deletes the PVC/PV, and launches a pod with hostpath volume to remove the test directory
func setupLocalVolumeTmpfs(config *localTestConfig) *localTestVolume {
testDirName := "local-volume-test-" + string(uuid.NewUUID())
hostDir := filepath.Join(hostBase, testDirName)
createAndMountTmpfsLocalVolume(config, hostDir)
// populate volume with testFile containing testFileContent
return setupWriteTestFile(hostDir, config, TmpfsLocalVolumeType)
func setupLocalVolumeGCELocalSSD(config *localTestConfig) *localTestVolume {
res, err := framework.IssueSSHCommandWithResult("ls /mnt/disks/by-uuid/google-local-ssds-scsi-fs/", framework.TestContext.Provider, config.node0)
dirName := strings.Fields(res.Stdout)[0]
hostDir := "/mnt/disks/by-uuid/google-local-ssds-scsi-fs/" + dirName
// populate volume with testFile containing testFileContent
return setupWriteTestFile(hostDir, config, GCELocalSSDVolumeType)
func setupLocalVolumeDirectory(config *localTestConfig) *localTestVolume {
testDirName := "local-volume-test-" + string(uuid.NewUUID())
hostDir := filepath.Join(hostBase, testDirName)
// populate volume with testFile containing testFileContent
return setupWriteTestFile(hostDir, config, DirectoryLocalVolumeType)
func cleanupLocalVolume(config *localTestConfig, volume *localTestVolume) {
if volume == nil {
@ -519,10 +543,30 @@ func cleanupLocalVolume(config *localTestConfig, volume *localTestVolume) {
framework.Failf("Failed to delete PV and/or PVC: %v", utilerrors.NewAggregate(errs))
if volume.localVolumeType == TmpfsLocalVolumeType {
unmountTmpfsLocalVolume(config, volume.hostDir)
cleanup := cleanupLocalVolumeMap[volume.localVolumeType]
cleanup(config, volume)
// Deletes the PVC/PV, and launches a pod with hostpath volume to remove the test directory
func cleanupLocalVolumeGCELocalSSD(config *localTestConfig, volume *localTestVolume) {
By("Removing the test directory")
removeCmd := fmt.Sprintf("rm %s", volume.hostDir+"/"+testFile)
err := framework.IssueSSHCommand(removeCmd, framework.TestContext.Provider, config.node0)
// Deletes the PVC/PV, and launches a pod with hostpath volume to remove the test directory
func cleanupLocalVolumeTmpfs(config *localTestConfig, volume *localTestVolume) {
unmountTmpfsLocalVolume(config, volume.hostDir)
By("Removing the test directory")
removeCmd := fmt.Sprintf("rm -r %s", volume.hostDir)
err := framework.IssueSSHCommand(removeCmd, framework.TestContext.Provider, config.node0)
// Deletes the PVC/PV, and launches a pod with hostpath volume to remove the test directory
func cleanupLocalVolumeDirectory(config *localTestConfig, volume *localTestVolume) {
By("Removing the test directory")
removeCmd := fmt.Sprintf("rm -r %s", volume.hostDir)
err := framework.IssueSSHCommand(removeCmd, framework.TestContext.Provider, config.node0)
@ -703,7 +747,9 @@ func podRWCmdExec(pod *v1.Pod, cmd string) string {
// and create local PVC and PV
func setupLocalVolumePVCPV(config *localTestConfig, localVolumeType LocalVolumeType) *localTestVolume {
By("Initializing test volume")
testVol := setupLocalVolume(config, localVolumeType)
setupLocalVolume, ok := setupLocalVolumeMap[localVolumeType]
testVol := setupLocalVolume(config)
By("Creating local PVC and PV")
createLocalPVCPV(config, testVol)
@ -921,3 +967,16 @@ func findLocalPersistentVolume(c clientset.Interface, volumePath string) (*v1.Pe
return nil, fmt.Errorf("Unable to find local persistent volume with path %v", volumePath)
// SkipUnlessLocalSSDExists takes in an ssdInterface (scsi/nvme) and a filesystemType (fs/block)
// and skips if a disk of that type does not exist on the node
func SkipUnlessLocalSSDExists(ssdInterface, filesystemType string, node *v1.Node) {
ssdCmd := fmt.Sprintf("ls -1 /mnt/disks/by-uuid/google-local-ssds-%s-%s/ | wc -l", ssdInterface, filesystemType)
res, err := framework.IssueSSHCommandWithResult(ssdCmd, framework.TestContext.Provider, node)
num, err := strconv.Atoi(strings.TrimSpace(res.Stdout))
if num < 1 {
framework.Skipf("Requires at least 1 %s %s localSSD ", ssdInterface, filesystemType)

View File

@ -70,7 +70,7 @@ var _ = SIGDescribe("PersistentVolumes:vsphere", func() {
selector = metav1.SetAsLabelSelector(volLabel)
if vsp == nil {
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(c)
if volumePath == "" {
@ -105,7 +105,7 @@ var _ = SIGDescribe("PersistentVolumes:vsphere", func() {
node = types.NodeName(clientPod.Spec.NodeName)
By("Verify disk should be attached to the node")
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, node)
isAttached, err := verifyVSphereDiskAttached(c, vsp, volumePath, node)
Expect(isAttached).To(BeTrue(), "disk is not attached with the node")
@ -133,7 +133,11 @@ var _ = SIGDescribe("PersistentVolumes:vsphere", func() {
framework.AddCleanupAction(func() {
// Cleanup actions will be called even when the tests are skipped and leaves namespace unset.
if len(ns) > 0 && len(volumePath) > 0 {
framework.ExpectNoError(waitForVSphereDiskToDetach(vsp, volumePath, node))
client, err := framework.LoadClientset()
if err != nil {
framework.ExpectNoError(waitForVSphereDiskToDetach(client, vsp, volumePath, node))
@ -213,6 +217,6 @@ var _ = SIGDescribe("PersistentVolumes:vsphere", func() {
By("Verifying Persistent Disk detaches")
waitForVSphereDiskToDetach(vsp, volumePath, node)
waitForVSphereDiskToDetach(c, vsp, volumePath, node)

View File

@ -56,7 +56,7 @@ var _ = SIGDescribe("PersistentVolumes [Feature:ReclaimPolicy]", func() {
AfterEach(func() {
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)
testCleanupVSpherePersistentVolumeReclaim(vsp, c, ns, volumePath, pv, pvc)
@ -74,7 +74,7 @@ var _ = SIGDescribe("PersistentVolumes [Feature:ReclaimPolicy]", func() {
6. Verify PV is deleted automatically.
It("should delete persistent volume when reclaimPolicy set to delete and associated claim is deleted", func() {
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)
volumePath, pv, pvc, err = testSetupVSpherePersistentVolumeReclaim(vsp, c, ns, v1.PersistentVolumeReclaimDelete)
@ -104,7 +104,7 @@ var _ = SIGDescribe("PersistentVolumes [Feature:ReclaimPolicy]", func() {
9. Verify PV should be detached from the node and automatically deleted.
It("should not detach and unmount PV when associated pvc with delete as reclaimPolicy is deleted when it is in use by the pod", func() {
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)
volumePath, pv, pvc, err = testSetupVSpherePersistentVolumeReclaim(vsp, c, ns, v1.PersistentVolumeReclaimDelete)
@ -127,19 +127,19 @@ var _ = SIGDescribe("PersistentVolumes [Feature:ReclaimPolicy]", func() {
Expect(framework.WaitForPersistentVolumePhase(v1.VolumeFailed, c, pv.Name, 1*time.Second, 60*time.Second)).NotTo(HaveOccurred())
By("Verify the volume is attached to the node")
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(vsp, pv.Spec.VsphereVolume.VolumePath, node)
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(c, vsp, pv.Spec.VsphereVolume.VolumePath, node)
By("Verify the volume is accessible and available in the pod")
verifyVSphereVolumesAccessible(pod, []*v1.PersistentVolume{pv}, vsp)
verifyVSphereVolumesAccessible(c, pod, []*v1.PersistentVolume{pv}, vsp)
framework.Logf("Verified that Volume is accessible in the POD after deleting PV claim")
By("Deleting the Pod")
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod), "Failed to delete pod ", pod.Name)
By("Verify PV is detached from the node after Pod is deleted")
Expect(waitForVSphereDiskToDetach(vsp, pv.Spec.VsphereVolume.VolumePath, types.NodeName(pod.Spec.NodeName))).NotTo(HaveOccurred())
Expect(waitForVSphereDiskToDetach(c, vsp, pv.Spec.VsphereVolume.VolumePath, types.NodeName(pod.Spec.NodeName))).NotTo(HaveOccurred())
By("Verify PV should be deleted automatically")
framework.ExpectNoError(framework.WaitForPersistentVolumeDeleted(c, pv.Name, 1*time.Second, 30*time.Second))
@ -167,7 +167,7 @@ var _ = SIGDescribe("PersistentVolumes [Feature:ReclaimPolicy]", func() {
It("should retain persistent volume when reclaimPolicy set to retain when associated claim is deleted", func() {
var volumeFileContent = "hello from vsphere cloud provider, Random Content is :" + strconv.FormatInt(time.Now().UnixNano(), 10)
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)
volumePath, pv, pvc, err = testSetupVSpherePersistentVolumeReclaim(vsp, c, ns, v1.PersistentVolumeReclaimRetain)

View File

@ -23,7 +23,6 @@ import (
. ""
clientset ""
vsphere ""
@ -104,7 +103,7 @@ var _ = SIGDescribe("PersistentVolumes [Feature:LabelSelector]", func() {
func testSetupVSpherePVClabelselector(c clientset.Interface, ns string, ssdlabels map[string]string, vvollabels map[string]string) (volumePath string, pv_ssd *v1.PersistentVolume, pvc_ssd *v1.PersistentVolumeClaim, pvc_vvol *v1.PersistentVolumeClaim, err error) {
volumePath = ""
By("creating vmdk")
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)
volumePath, err = createVSphereVolume(vsp, nil)
if err != nil {
@ -134,7 +133,7 @@ func testSetupVSpherePVClabelselector(c clientset.Interface, ns string, ssdlabel
func testCleanupVSpherePVClabelselector(c clientset.Interface, ns string, volumePath string, pv_ssd *v1.PersistentVolume, pvc_ssd *v1.PersistentVolumeClaim, pvc_vvol *v1.PersistentVolumeClaim) {
By("running testCleanupVSpherePVClabelselector")
if len(volumePath) > 0 {
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(c)

View File

@ -53,7 +53,6 @@ import (
metav1 ""
clientset ""
kubeletapis ""
@ -507,7 +506,11 @@ var _ = SIGDescribe("Volumes", func() {
Prefix: "vsphere",
By("creating a test vsphere volume")
vsp, err := vsphere.GetVSphere()
c, err := framework.LoadClientset()
if err != nil {
vsp, err := getVSphere(c)
volumePath, err = createVSphereVolume(vsp, nil)

View File

@ -150,7 +150,7 @@ var _ = SIGDescribe("vcp at scale [Feature:vsphere] ", func() {
scArrays[index] = sc
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
volumeCountPerInstance := volumeCount / numberOfInstances
@ -176,7 +176,7 @@ var _ = SIGDescribe("vcp at scale [Feature:vsphere] ", func() {
By("Waiting for volumes to be detached from the node")
err = waitForVSphereDisksToDetach(vsp, nodeVolumeMap)
err = waitForVSphereDisksToDetach(client, vsp, nodeVolumeMap)
for _, pvcClaim := range pvcClaimList {
@ -228,7 +228,7 @@ func VolumeCreateAndAttach(client clientset.Interface, namespace string, sc []*s
nodeVolumeMap[pod.Spec.NodeName] = append(nodeVolumeMap[pod.Spec.NodeName], pv.Spec.VsphereVolume.VolumePath)
By("Verify the volume is accessible and available in the pod")
verifyVSphereVolumesAccessible(pod, persistentvolumes, vsp)
verifyVSphereVolumesAccessible(client, pod, persistentvolumes, vsp)
nodeVolumeMapChan <- nodeVolumeMap

View File

@ -24,7 +24,6 @@ import (
metav1 ""
clientset ""
@ -104,7 +103,7 @@ var _ = SIGDescribe("vsphere statefulset", func() {
statefulsetTester.WaitForStatusReadyReplicas(statefulset, replicas-1)
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
// After scale down, verify vsphere volumes are detached from deleted pods
@ -117,7 +116,7 @@ var _ = SIGDescribe("vsphere statefulset", func() {
if volumespec.PersistentVolumeClaim != nil {
vSpherediskPath := getvSphereVolumePathFromClaim(client, statefulset.Namespace, volumespec.PersistentVolumeClaim.ClaimName)
framework.Logf("Waiting for Volume: %q to detach from Node: %q", vSpherediskPath, sspod.Spec.NodeName)
Expect(waitForVSphereDiskToDetach(vsp, vSpherediskPath, types.NodeName(sspod.Spec.NodeName))).NotTo(HaveOccurred())
Expect(waitForVSphereDiskToDetach(client, vsp, vSpherediskPath, types.NodeName(sspod.Spec.NodeName))).NotTo(HaveOccurred())
@ -146,7 +145,7 @@ var _ = SIGDescribe("vsphere statefulset", func() {
framework.Logf("Verify Volume: %q is attached to the Node: %q", vSpherediskPath, sspod.Spec.NodeName)
// Verify scale up has re-attached the same volumes and not introduced new volume
Expect(volumesBeforeScaleDown[vSpherediskPath] == "").To(BeFalse())
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(vsp, vSpherediskPath, types.NodeName(sspod.Spec.NodeName))
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(client, vsp, vSpherediskPath, types.NodeName(sspod.Spec.NodeName))

View File

@ -30,7 +30,6 @@ import (
k8stype ""
clientset ""
@ -135,9 +134,8 @@ var _ = SIGDescribe("vsphere cloud provider stress [Feature:vsphere]", func() {
func PerformVolumeLifeCycleInParallel(f *framework.Framework, client clientset.Interface, namespace string, instanceId string, sc *storageV1.StorageClass, iterations int, wg *sync.WaitGroup) {
defer wg.Done()
defer GinkgoRecover()
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(f.ClientSet)
for iterationCount := 0; iterationCount < iterations; iterationCount++ {
logPrefix := fmt.Sprintf("Instance: [%v], Iteration: [%v] :", instanceId, iterationCount+1)
By(fmt.Sprintf("%v Creating PVC using the Storage Class: %v", logPrefix, sc.Name))
@ -164,19 +162,19 @@ func PerformVolumeLifeCycleInParallel(f *framework.Framework, client clientset.I
By(fmt.Sprintf("%v Verifing the volume: %v is attached to the node VM: %v", logPrefix, persistentvolumes[0].Spec.VsphereVolume.VolumePath, pod.Spec.NodeName))
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, types.NodeName(pod.Spec.NodeName))
isVolumeAttached, verifyDiskAttachedError := verifyVSphereDiskAttached(client, vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, types.NodeName(pod.Spec.NodeName))
By(fmt.Sprintf("%v Verifing the volume: %v is accessible in the pod: %v", logPrefix, persistentvolumes[0].Spec.VsphereVolume.VolumePath, pod.Name))
verifyVSphereVolumesAccessible(pod, persistentvolumes, vsp)
verifyVSphereVolumesAccessible(client, pod, persistentvolumes, vsp)
By(fmt.Sprintf("%v Deleting pod: %v", logPrefix, pod.Name))
err = framework.DeletePodWithWait(f, client, pod)
By(fmt.Sprintf("%v Waiting for volume: %v to be detached from the node: %v", logPrefix, persistentvolumes[0].Spec.VsphereVolume.VolumePath, pod.Spec.NodeName))
err = waitForVSphereDiskToDetach(vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))
err = waitForVSphereDiskToDetach(client, vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))
By(fmt.Sprintf("%v Deleting the Claim: %v", logPrefix, pvclaim.Name))

View File

@ -55,13 +55,13 @@ const (
// Sanity check for vSphere testing. Verify the persistent disk attached to the node.
func verifyVSphereDiskAttached(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) (bool, error) {
func verifyVSphereDiskAttached(c clientset.Interface, vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) (bool, error) {
var (
isAttached bool
err error
if vsp == nil {
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(c)
isAttached, err = vsp.DiskIsAttached(volumePath, nodeName)
@ -70,7 +70,7 @@ func verifyVSphereDiskAttached(vsp *vsphere.VSphere, volumePath string, nodeName
// Wait until vsphere volumes are detached from the list of nodes or time out after 5 minutes
func waitForVSphereDisksToDetach(vsp *vsphere.VSphere, nodeVolumes map[k8stype.NodeName][]string) error {
func waitForVSphereDisksToDetach(c clientset.Interface, vsp *vsphere.VSphere, nodeVolumes map[k8stype.NodeName][]string) error {
var (
err error
disksAttached = true
@ -78,7 +78,7 @@ func waitForVSphereDisksToDetach(vsp *vsphere.VSphere, nodeVolumes map[k8stype.N
detachPollTime = 10 * time.Second
if vsp == nil {
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(c)
if err != nil {
return err
@ -110,7 +110,7 @@ func waitForVSphereDisksToDetach(vsp *vsphere.VSphere, nodeVolumes map[k8stype.N
// Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes
func waitForVSphereDiskStatus(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName, expectedState volumeState) error {
func waitForVSphereDiskStatus(c clientset.Interface, vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName, expectedState volumeState) error {
var (
err error
diskAttached bool
@ -130,7 +130,7 @@ func waitForVSphereDiskStatus(vsp *vsphere.VSphere, volumePath string, nodeName
err = wait.Poll(pollTime, timeout, func() (bool, error) {
diskAttached, err = verifyVSphereDiskAttached(vsp, volumePath, nodeName)
diskAttached, err = verifyVSphereDiskAttached(c, vsp, volumePath, nodeName)
if err != nil {
return true, err
@ -154,13 +154,13 @@ func waitForVSphereDiskStatus(vsp *vsphere.VSphere, volumePath string, nodeName
// Wait until vsphere vmdk is attached from the given node or time out after 6 minutes
func waitForVSphereDiskToAttach(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
return waitForVSphereDiskStatus(vsp, volumePath, nodeName, volumeStateAttached)
func waitForVSphereDiskToAttach(c clientset.Interface, vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
return waitForVSphereDiskStatus(c, vsp, volumePath, nodeName, volumeStateAttached)
// Wait until vsphere vmdk is detached from the given node or time out after 6 minutes
func waitForVSphereDiskToDetach(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
return waitForVSphereDiskStatus(vsp, volumePath, nodeName, volumeStateDetached)
func waitForVSphereDiskToDetach(c clientset.Interface, vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
return waitForVSphereDiskStatus(c, vsp, volumePath, nodeName, volumeStateDetached)
// function to create vsphere volume spec with given VMDK volume path, Reclaim Policy and labels
@ -414,12 +414,12 @@ func createEmptyFilesOnVSphereVolume(namespace string, podName string, filePaths
// verify volumes are attached to the node and are accessible in pod
func verifyVSphereVolumesAccessible(pod *v1.Pod, persistentvolumes []*v1.PersistentVolume, vsp *vsphere.VSphere) {
func verifyVSphereVolumesAccessible(c clientset.Interface, pod *v1.Pod, persistentvolumes []*v1.PersistentVolume, vsp *vsphere.VSphere) {
nodeName := pod.Spec.NodeName
namespace := pod.Namespace
for index, pv := range persistentvolumes {
// Verify disks are attached to the node
isAttached, err := verifyVSphereDiskAttached(vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(nodeName))
isAttached, err := verifyVSphereDiskAttached(c, vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(nodeName))
Expect(isAttached).To(BeTrue(), fmt.Sprintf("disk %v is not attached with the node", pv.Spec.VsphereVolume.VolumePath))
// Verify Volumes are accessible
@ -437,3 +437,23 @@ func getvSphereVolumePathFromClaim(client clientset.Interface, namespace string,
return pv.Spec.VsphereVolume.VolumePath
func addNodesToVCP(vsp *vsphere.VSphere, c clientset.Interface) error {
nodes, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
return err
for _, node := range nodes.Items {
return nil
func getVSphere(c clientset.Interface) (*vsphere.VSphere, error) {
vsp, err := vsphere.GetVSphere()
if err != nil {
return nil, err
addNodesToVCP(vsp, c)
return vsp, nil

View File

@ -25,7 +25,6 @@ import (
metav1 ""
clientset ""
@ -69,7 +68,7 @@ var _ = SIGDescribe("Volume Provisioning On Clustered Datastore [Feature:vsphere
It("verify static provisioning on clustered datastore", func() {
var volumePath string
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
By("creating a test vsphere volume")
@ -100,7 +99,7 @@ var _ = SIGDescribe("Volume Provisioning On Clustered Datastore [Feature:vsphere
nodeName := types.NodeName(pod.Spec.NodeName)
By("Verifying volume is attached")
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, nodeName)
isAttached, err := verifyVSphereDiskAttached(client, vsp, volumePath, nodeName)
Expect(isAttached).To(BeTrue(), fmt.Sprintf("disk: %s is not attached with the node: %v", volumePath, nodeName))
@ -109,7 +108,7 @@ var _ = SIGDescribe("Volume Provisioning On Clustered Datastore [Feature:vsphere
By("Waiting for volumes to be detached from the node")
err = waitForVSphereDiskToDetach(vsp, volumePath, nodeName)
err = waitForVSphereDiskToDetach(client, vsp, volumePath, nodeName)

View File

@ -68,7 +68,7 @@ var _ = SIGDescribe("Volume Provisioning on Datastore [Feature:vsphere]", func()
scParameters[DiskFormat] = ThinDisk
err := invokeInvalidDatastoreTestNeg(client, namespace, scParameters)
errorMsg := `Failed to provision volume with StorageClass \"` + DatastoreSCName + `\": datastore '` + InvalidDatastore + `' not found`
errorMsg := `Failed to provision volume with StorageClass \"` + DatastoreSCName + `\": The specified datastore ` + InvalidDatastore + ` is not a shared datastore across node VMs`
if !strings.Contains(err.Error(), errorMsg) {
Expect(err).NotTo(HaveOccurred(), errorMsg)

View File

@ -145,9 +145,9 @@ func invokeTest(f *framework.Framework, client clientset.Interface, namespace st
pod, err := client.CoreV1().Pods(namespace).Create(podSpec)
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
verifyVSphereDiskAttached(vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(nodeName))
verifyVSphereDiskAttached(client, vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(nodeName))
By("Waiting for pod to be running")
Expect(framework.WaitForPodNameRunningInNamespace(client, pod.Name, namespace)).To(Succeed())

View File

@ -97,7 +97,7 @@ func invokeTestForFstype(f *framework.Framework, client clientset.Interface, nam
framework.Logf("Invoking Test for fstype: %s", fstype)
scParameters := make(map[string]string)
scParameters["fstype"] = fstype
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
// Create Persistent Volume
@ -117,7 +117,7 @@ func invokeTestForFstype(f *framework.Framework, client clientset.Interface, nam
func invokeTestForInvalidFstype(f *framework.Framework, client clientset.Interface, namespace string, fstype string) {
scParameters := make(map[string]string)
scParameters["fstype"] = fstype
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
// Create Persistent Volume
@ -170,12 +170,12 @@ func createPodAndVerifyVolumeAccessible(client clientset.Interface, namespace st
pvclaims = append(pvclaims, pvclaim)
By("Creating pod to attach PV to the node")
// Create pod to attach Volume to Node
pod, err := framework.CreatePod(client, namespace, nil, pvclaims, false, "")
pod, err := framework.CreatePod(client, namespace, nil, pvclaims, false, ExecCommand)
// Asserts: Right disk is attached to the pod
By("Verify the volume is accessible and available in the pod")
verifyVSphereVolumesAccessible(pod, persistentvolumes, vsp)
verifyVSphereVolumesAccessible(client, pod, persistentvolumes, vsp)
return pod
@ -184,7 +184,7 @@ func detachVolume(f *framework.Framework, client clientset.Interface, vsp *vsphe
framework.DeletePodWithWait(f, client, pod)
By("Waiting for volumes to be detached from the node")
waitForVSphereDiskToDetach(vsp, volPath, k8stype.NodeName(pod.Spec.NodeName))
waitForVSphereDiskToDetach(client, vsp, volPath, k8stype.NodeName(pod.Spec.NodeName))
func deleteVolume(client clientset.Interface, pvclaimName string, namespace string) {

View File

@ -27,7 +27,6 @@ import (
clientset ""
@ -79,7 +78,7 @@ var _ = SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disruptive]"
It("verify volume remains attached after master kubelet restart", func() {
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
// Create pod on each node
@ -106,7 +105,7 @@ var _ = SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disruptive]"
nodeName := types.NodeName(pod.Spec.NodeName)
By(fmt.Sprintf("Verify volume %s is attached to the pod %v", volumePath, nodeName))
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, types.NodeName(nodeName))
isAttached, err := verifyVSphereDiskAttached(client, vsp, volumePath, types.NodeName(nodeName))
Expect(isAttached).To(BeTrue(), fmt.Sprintf("disk: %s is not attached with the node", volumePath))
@ -126,7 +125,7 @@ var _ = SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disruptive]"
nodeName := types.NodeName(pod.Spec.NodeName)
By(fmt.Sprintf("After master restart, verify volume %v is attached to the pod %v", volumePath, nodeName))
isAttached, err := verifyVSphereDiskAttached(vsp, volumePaths[i], types.NodeName(nodeName))
isAttached, err := verifyVSphereDiskAttached(client, vsp, volumePaths[i], types.NodeName(nodeName))
Expect(isAttached).To(BeTrue(), fmt.Sprintf("disk: %s is not attached with the node", volumePath))
@ -135,7 +134,7 @@ var _ = SIGDescribe("Volume Attach Verify [Feature:vsphere][Serial][Disruptive]"
By(fmt.Sprintf("Waiting for volume %s to be detached from the node %v", volumePath, nodeName))
err = waitForVSphereDiskToDetach(vsp, volumePath, types.NodeName(nodeName))
err = waitForVSphereDiskToDetach(client, vsp, volumePath, types.NodeName(nodeName))
By(fmt.Sprintf("Deleting volume %s", volumePath))

View File

@ -61,7 +61,7 @@ var _ = SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]", func(
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
Expect(nodeList.Items).NotTo(BeEmpty(), "Unable to find ready and schedulable Node")
Expect(len(nodeList.Items) > 1).To(BeTrue(), "At least 2 nodes are required for this test")
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(client)
workingDir = os.Getenv("VSPHERE_WORKING_DIR")
@ -112,7 +112,7 @@ var _ = SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]", func(
node1 := types.NodeName(pod.Spec.NodeName)
By(fmt.Sprintf("Verify disk is attached to the node: %v", node1))
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, node1)
isAttached, err := verifyVSphereDiskAttached(client, vsp, volumePath, node1)
Expect(isAttached).To(BeTrue(), "Disk is not attached to the node")
@ -139,11 +139,11 @@ var _ = SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]", func(
Expect(err).NotTo(HaveOccurred(), "Pod did not fail over to a different node")
By(fmt.Sprintf("Waiting for disk to be attached to the new node: %v", node2))
err = waitForVSphereDiskToAttach(vsp, volumePath, node2)
err = waitForVSphereDiskToAttach(client, vsp, volumePath, node2)
Expect(err).NotTo(HaveOccurred(), "Disk is not attached to the node")
By(fmt.Sprintf("Waiting for disk to be detached from the previous node: %v", node1))
err = waitForVSphereDiskToDetach(vsp, volumePath, node1)
err = waitForVSphereDiskToDetach(client, vsp, volumePath, node1)
Expect(err).NotTo(HaveOccurred(), "Disk is not detached from the node")
By(fmt.Sprintf("Power on the previous node: %v", node1))

View File

@ -75,7 +75,7 @@ var _ = SIGDescribe("Volume Operations Storm [Feature:vsphere]", func() {
volume_ops_scale = DEFAULT_VOLUME_OPS_SCALE
pvclaims = make([]*v1.PersistentVolumeClaim, volume_ops_scale)
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(client)
AfterEach(func() {
@ -113,14 +113,14 @@ var _ = SIGDescribe("Volume Operations Storm [Feature:vsphere]", func() {
By("Verify all volumes are accessible and available in the pod")
verifyVSphereVolumesAccessible(pod, persistentvolumes, vsp)
verifyVSphereVolumesAccessible(client, pod, persistentvolumes, vsp)
By("Deleting pod")
framework.ExpectNoError(framework.DeletePodWithWait(f, client, pod))
By("Waiting for volumes to be detached from the node")
for _, pv := range persistentvolumes {
waitForVSphereDiskToDetach(vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))
waitForVSphereDiskToDetach(client, vsp, pv.Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))

View File

@ -28,7 +28,6 @@ import (
storageV1 ""
clientset ""
@ -214,11 +213,11 @@ func invokeVolumeLifeCyclePerformance(f *framework.Framework, client clientset.I
latency[AttachOp] = elapsed.Seconds()
// Verify access to the volumes
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
for i, pod := range totalpods {
verifyVSphereVolumesAccessible(pod, totalpvs[i], vsp)
verifyVSphereVolumesAccessible(client, pod, totalpvs[i], vsp)
By("Deleting pods")
@ -237,7 +236,7 @@ func invokeVolumeLifeCyclePerformance(f *framework.Framework, client clientset.I
err = waitForVSphereDisksToDetach(vsp, nodeVolumeMap)
err = waitForVSphereDisksToDetach(client, vsp, nodeVolumeMap)
By("Deleting the PVCs")

View File

@ -57,7 +57,7 @@ var _ = SIGDescribe("Volume Placement", func() {
isNodeLabeled = true
By("creating vmdk")
vsp, err = vsphere.GetVSphere()
vsp, err = getVSphere(c)
volumePath, err := createVSphereVolume(vsp, nil)
@ -285,7 +285,7 @@ var _ = SIGDescribe("Volume Placement", func() {
framework.ExpectNoError(framework.DeletePodWithWait(f, c, podB), "defer: Failed to delete pod ", podB.Name)
By(fmt.Sprintf("wait for volumes to be detached from the node: %v", node1Name))
for _, volumePath := range volumePaths {
framework.ExpectNoError(waitForVSphereDiskToDetach(vsp, volumePath, types.NodeName(node1Name)))
framework.ExpectNoError(waitForVSphereDiskToDetach(c, vsp, volumePath, types.NodeName(node1Name)))
@ -362,7 +362,7 @@ func createPodWithVolumeAndNodeSelector(client clientset.Interface, namespace st
By(fmt.Sprintf("Verify volume is attached to the node:%v", nodeName))
for _, volumePath := range volumePaths {
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, types.NodeName(nodeName))
isAttached, err := verifyVSphereDiskAttached(client, vsp, volumePath, types.NodeName(nodeName))
Expect(isAttached).To(BeTrue(), "disk:"+volumePath+" is not attached with the node")
@ -385,6 +385,6 @@ func deletePodAndWaitForVolumeToDetach(f *framework.Framework, c clientset.Inter
By("Waiting for volume to be detached from the node")
for _, volumePath := range volumePaths {
framework.ExpectNoError(waitForVSphereDiskToDetach(vsp, volumePath, types.NodeName(nodeName)))
framework.ExpectNoError(waitForVSphereDiskToDetach(c, vsp, volumePath, types.NodeName(nodeName)))

View File

@ -295,16 +295,16 @@ func invokeValidPolicyTest(f *framework.Framework, client clientset.Interface, n
pod, err := framework.CreatePod(client, namespace, nil, pvclaims, false, "")
vsp, err := vsphere.GetVSphere()
vsp, err := getVSphere(client)
By("Verify the volume is accessible and available in the pod")
verifyVSphereVolumesAccessible(pod, persistentvolumes, vsp)
verifyVSphereVolumesAccessible(client, pod, persistentvolumes, vsp)
By("Deleting pod")
framework.DeletePodWithWait(f, client, pod)
By("Waiting for volumes to be detached from the node")
waitForVSphereDiskToDetach(vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))
waitForVSphereDiskToDetach(client, vsp, persistentvolumes[0].Spec.VsphereVolume.VolumePath, k8stype.NodeName(pod.Spec.NodeName))
func invokeInvalidPolicyTestNeg(client clientset.Interface, namespace string, scParameters map[string]string) error {

View File

@ -42,6 +42,7 @@ go_library(

View File

@ -876,3 +876,195 @@ func TestOverlappingDeployments(t *testing.T) {
// Deployment should not block rollout when updating spec replica number and template at the same time.
func TestScaledRolloutDeployment(t *testing.T) {
s, closeFn, rm, dc, informers, c := dcSetup(t)
defer closeFn()
name := "test-scaled-rollout-deployment"
ns := framework.CreateTestingNamespace(name, s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
stopCh := make(chan struct{})
defer close(stopCh)
go rm.Run(5, stopCh)
go dc.Run(5, stopCh)
// Create a deployment with rolling update strategy, max surge = 3, and max unavailable = 2
var err error
replicas := int32(10)
tester := &deploymentTester{t: t, c: c, deployment: newDeployment(name, ns.Name, replicas)}
tester.deployment.Spec.Strategy.RollingUpdate.MaxSurge = intOrStrP(3)
tester.deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = intOrStrP(2)
tester.deployment, err = c.ExtensionsV1beta1().Deployments(ns.Name).Create(tester.deployment)
if err != nil {
t.Fatalf("failed to create deployment %q: %v", name, err)
if err = tester.waitForDeploymentRevisionAndImage("1", fakeImage); err != nil {
if err = tester.waitForDeploymentCompleteAndMarkPodsReady(); err != nil {
t.Fatalf("deployment %q failed to complete: %v", name, err)
// Record current replicaset before starting new rollout
firstRS, err := tester.expectNewReplicaSet()
if err != nil {
// Update the deployment with another new image but do not mark the pods as ready to block new replicaset
fakeImage2 := "fakeimage2"
tester.deployment, err = tester.updateDeployment(func(update *v1beta1.Deployment) {
update.Spec.Template.Spec.Containers[0].Image = fakeImage2
if err != nil {
t.Fatalf("failed updating deployment %q: %v", name, err)
if err = tester.waitForDeploymentRevisionAndImage("2", fakeImage2); err != nil {
// Verify the deployment has minimum available replicas after 2nd rollout
tester.deployment, err = c.ExtensionsV1beta1().Deployments(ns.Name).Get(name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get deployment %q: %v", name, err)
minAvailableReplicas := deploymentutil.MinAvailable(tester.deployment)
if tester.deployment.Status.AvailableReplicas < minAvailableReplicas {
t.Fatalf("deployment %q does not have minimum number of available replicas after 2nd rollout", name)
// Wait for old replicaset of 1st rollout to have desired replicas
firstRS, err = c.ExtensionsV1beta1().ReplicaSets(ns.Name).Get(firstRS.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get replicaset %q: %v", firstRS.Name, err)
if err = tester.waitRSStable(firstRS); err != nil {
// Wait for new replicaset of 2nd rollout to have desired replicas
secondRS, err := tester.expectNewReplicaSet()
if err != nil {
if err = tester.waitRSStable(secondRS); err != nil {
// Scale up the deployment and update its image to another new image simultaneously (this time marks all pods as ready)
newReplicas := int32(20)
fakeImage3 := "fakeimage3"
tester.deployment, err = tester.updateDeployment(func(update *v1beta1.Deployment) {
update.Spec.Replicas = &newReplicas
update.Spec.Template.Spec.Containers[0].Image = fakeImage3
if err != nil {
t.Fatalf("failed updating deployment %q: %v", name, err)
if err = tester.waitForDeploymentRevisionAndImage("3", fakeImage3); err != nil {
if err = tester.waitForDeploymentCompleteAndMarkPodsReady(); err != nil {
t.Fatalf("deployment %q failed to complete: %v", name, err)
// Verify every replicaset has correct desiredReplicas annotation after 3rd rollout
thirdRS, err := deploymentutil.GetNewReplicaSet(tester.deployment, c.ExtensionsV1beta1())
if err != nil {
t.Fatalf("failed getting new revision 3 replicaset for deployment %q: %v", name, err)
rss := []*v1beta1.ReplicaSet{firstRS, secondRS, thirdRS}
for _, curRS := range rss {
curRS, err = c.ExtensionsV1beta1().ReplicaSets(ns.Name).Get(curRS.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get replicaset when checking desired replicas annotation: %v", err)
desired, ok := deploymentutil.GetDesiredReplicasAnnotation(curRS)
if !ok {
t.Fatalf("failed to retrieve desiredReplicas annotation for replicaset %q", curRS.Name)
if desired != *(tester.deployment.Spec.Replicas) {
t.Fatalf("unexpected desiredReplicas annotation for replicaset %q: expected %d, got %d", curRS.Name, *(tester.deployment.Spec.Replicas), desired)
// Update the deployment with another new image but do not mark the pods as ready to block new replicaset
fakeImage4 := "fakeimage4"
tester.deployment, err = tester.updateDeployment(func(update *v1beta1.Deployment) {
update.Spec.Template.Spec.Containers[0].Image = fakeImage4
if err != nil {
t.Fatalf("failed updating deployment %q: %v", name, err)
if err = tester.waitForDeploymentRevisionAndImage("4", fakeImage4); err != nil {
// Verify the deployment has minimum available replicas after 4th rollout
tester.deployment, err = c.ExtensionsV1beta1().Deployments(ns.Name).Get(name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get deployment %q: %v", name, err)
minAvailableReplicas = deploymentutil.MinAvailable(tester.deployment)
if tester.deployment.Status.AvailableReplicas < minAvailableReplicas {
t.Fatalf("deployment %q does not have minimum number of available replicas after 4th rollout", name)
// Wait for old replicaset of 3rd rollout to have desired replicas
thirdRS, err = c.ExtensionsV1beta1().ReplicaSets(ns.Name).Get(thirdRS.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get replicaset %q: %v", thirdRS.Name, err)
if err = tester.waitRSStable(thirdRS); err != nil {
// Wait for new replicaset of 4th rollout to have desired replicas
fourthRS, err := tester.expectNewReplicaSet()
if err != nil {
if err = tester.waitRSStable(fourthRS); err != nil {
// Scale down the deployment and update its image to another new image simultaneously (this time marks all pods as ready)
newReplicas = int32(5)
fakeImage5 := "fakeimage5"
tester.deployment, err = tester.updateDeployment(func(update *v1beta1.Deployment) {
update.Spec.Replicas = &newReplicas
update.Spec.Template.Spec.Containers[0].Image = fakeImage5
if err != nil {
t.Fatalf("failed updating deployment %q: %v", name, err)
if err = tester.waitForDeploymentRevisionAndImage("5", fakeImage5); err != nil {
if err = tester.waitForDeploymentCompleteAndMarkPodsReady(); err != nil {
t.Fatalf("deployment %q failed to complete: %v", name, err)
// Verify every replicaset has correct desiredReplicas annotation after 5th rollout
fifthRS, err := deploymentutil.GetNewReplicaSet(tester.deployment, c.ExtensionsV1beta1())
if err != nil {
t.Fatalf("failed getting new revision 5 replicaset for deployment %q: %v", name, err)
rss = []*v1beta1.ReplicaSet{thirdRS, fourthRS, fifthRS}
for _, curRS := range rss {
curRS, err = c.ExtensionsV1beta1().ReplicaSets(ns.Name).Get(curRS.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("failed to get replicaset when checking desired replicas annotation: %v", err)
desired, ok := deploymentutil.GetDesiredReplicasAnnotation(curRS)
if !ok {
t.Fatalf("failed to retrieve desiredReplicas annotation for replicaset %q", curRS.Name)
if desired != *(tester.deployment.Spec.Replicas) {
t.Fatalf("unexpected desiredReplicas annotation for replicaset %q: expected %d, got %d", curRS.Name, *(tester.deployment.Spec.Replicas), desired)

View File

@ -26,6 +26,7 @@ import (
metav1 ""
clientset ""
@ -80,7 +81,8 @@ func newDeployment(name, ns string, replicas int32) *v1beta1.Deployment {
Replicas: &replicas,
Selector: &metav1.LabelSelector{MatchLabels: testLabels()},
Strategy: v1beta1.DeploymentStrategy{
Type: v1beta1.RollingUpdateDeploymentStrategyType,
Type: v1beta1.RollingUpdateDeploymentStrategyType,
RollingUpdate: new(v1beta1.RollingUpdateDeployment),
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
@ -212,6 +214,11 @@ func markPodReady(c clientset.Interface, ns string, pod *v1.Pod) error {
return err
func intOrStrP(num int) *intstr.IntOrString {
intstr := intstr.FromInt(num)
return &intstr
// markUpdatedPodsReady manually marks updated Deployment pods status to ready,
// until the deployment is complete
func (d *deploymentTester) markUpdatedPodsReady(wg *sync.WaitGroup) {
@ -405,3 +412,7 @@ func (d *deploymentTester) listUpdatedPods() ([]v1.Pod, error) {
return ownedPods, nil
func (d *deploymentTester) waitRSStable(replicaset *v1beta1.ReplicaSet) error {
return testutil.WaitRSStable(d.t, d.c, replicaset, pollInterval, pollTimeout)

View File

@ -18,6 +18,7 @@ go_test(

View File

@ -41,6 +41,7 @@ import (
podutil ""
testutil ""
const (
@ -217,15 +218,8 @@ func createRSsPods(t *testing.T, clientSet clientset.Interface, rss []*v1beta1.R
// Verify .Status.Replicas is equal to .Spec.Replicas
func waitRSStable(t *testing.T, clientSet clientset.Interface, rs *v1beta1.ReplicaSet) {
rsClient := clientSet.Extensions().ReplicaSets(rs.Namespace)
if err := wait.PollImmediate(interval, timeout, func() (bool, error) {
newRS, err := rsClient.Get(rs.Name, metav1.GetOptions{})
if err != nil {
return false, err
return newRS.Status.Replicas == *rs.Spec.Replicas, nil
}); err != nil {
t.Fatalf("Failed to verify .Status.Replicas is equal to .Spec.Replicas for rs %s: %v", rs.Name, err)
if err := testutil.WaitRSStable(t, clientSet, rs, interval, timeout); err != nil {

View File

@ -18,6 +18,7 @@ package utils
import (
extensions ""
@ -50,3 +51,18 @@ func UpdateReplicaSetWithRetries(c clientset.Interface, namespace, name string,
return rs, pollErr
// Verify .Status.Replicas is equal to .Spec.Replicas
func WaitRSStable(t *testing.T, clientSet clientset.Interface, rs *extensions.ReplicaSet, pollInterval, pollTimeout time.Duration) error {
desiredGeneration := rs.Generation
if err := wait.PollImmediate(pollInterval, pollTimeout, func() (bool, error) {
newRS, err := clientSet.ExtensionsV1beta1().ReplicaSets(rs.Namespace).Get(rs.Name, metav1.GetOptions{})
if err != nil {
return false, err
return newRS.Status.ObservedGeneration >= desiredGeneration && newRS.Status.Replicas == *rs.Spec.Replicas, nil
}); err != nil {
return fmt.Errorf("failed to verify .Status.Replicas is equal to .Spec.Replicas for replicaset %q: %v", rs.Name, err)
return nil