Merge pull request #65223 from jsafrane/aws-inaccessible-key

Automatic merge from submit-queue (batch tested with PRs 65187, 65206, 65223, 64752, 65238). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fixed detection of inaccessible AWS encryption key.

AWS provisioner now checks if created encrypted volume gets "Available" or it gets silently deleted by AWS because StorageClass referenced invalid (e.g. non-existing) KMS key for encryption.

This seems to be the only way how to detect such invalid key, because Kubernetes may not have enough permission to check if the key exists.

**Which issue(s) this PR fixes**
Fixes #62171

**Special notes for your reviewer**:

**Release note**:

```release-note
AWS now checks for validity of ecryption key when creating encrypted volumes. Dynamic provisioning of encrypted volume may get slower due to these checks.
```

/sig aws
/sig storage
@kubernetes/sig-aws-misc
pull/8/head
Kubernetes Submit Queue 2018-06-21 19:48:10 -07:00 committed by GitHub
commit 18b16be622
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 21 deletions

View File

@ -220,6 +220,13 @@ const (
createTagFactor = 2.0
createTagSteps = 9
// encryptedCheck* is configuration of poll for created volume to check
// it has not been silently removed by AWS.
// On a random AWS account (shared among several developers) it took 4s on
// average.
encryptedCheckInterval = 1 * time.Second
encryptedCheckTimeout = 30 * time.Second
// Number of node names that can be added to a filter. The AWS limit is 200
// but we are using a lower limit on purpose
filterNodeLimit = 150
@ -2186,14 +2193,6 @@ func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, er
request.VolumeType = aws.String(createType)
request.Encrypted = aws.Bool(volumeOptions.Encrypted)
if len(volumeOptions.KmsKeyId) > 0 {
if missing, err := c.checkEncryptionKey(volumeOptions.KmsKeyId); err != nil {
if missing {
// KSM key is missing, provisioning would fail
return "", err
}
// Log checkEncryptionKey error and try provisioning anyway.
glog.Warningf("Cannot check KSM key %s: %v", volumeOptions.KmsKeyId, err)
}
request.KmsKeyId = aws.String(volumeOptions.KmsKeyId)
request.Encrypted = aws.Bool(true)
}
@ -2222,24 +2221,50 @@ func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, er
return "", fmt.Errorf("error tagging volume %s: %q", volumeName, err)
}
// AWS has a bad habbit of reporting success when creating a volume with
// encryption keys that either don't exists or have wrong permissions.
// Such volume lives for couple of seconds and then it's silently deleted
// by AWS. There is no other check to ensure that given KMS key is correct,
// because Kubernetes may have limited permissions to the key.
if len(volumeOptions.KmsKeyId) > 0 {
err := c.waitUntilVolumeAvailable(volumeName)
if err != nil {
if isAWSErrorVolumeNotFound(err) {
err = fmt.Errorf("failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key")
}
return "", err
}
}
return volumeName, nil
}
// checkEncryptionKey tests that given encryption key exists.
func (c *Cloud) checkEncryptionKey(keyId string) (missing bool, err error) {
input := &kms.DescribeKeyInput{
KeyId: aws.String(keyId),
func (c *Cloud) waitUntilVolumeAvailable(volumeName KubernetesVolumeID) error {
disk, err := newAWSDisk(c, volumeName)
if err != nil {
// Unreachable code
return err
}
_, err = c.kms.DescribeKey(input)
if err == nil {
return false, nil
}
if awsError, ok := err.(awserr.Error); ok {
if awsError.Code() == "NotFoundException" {
return true, fmt.Errorf("KMS key %s not found: %q", keyId, err)
err = wait.Poll(encryptedCheckInterval, encryptedCheckTimeout, func() (done bool, err error) {
vol, err := disk.describeVolume()
if err != nil {
return true, err
}
}
return false, fmt.Errorf("Error checking KSM key %s: %q", keyId, err)
if vol.State != nil {
switch *vol.State {
case "available":
// The volume is Available, it won't be deleted now.
return true, nil
case "creating":
return false, nil
default:
return true, fmt.Errorf("unexpected State of newly created AWS EBS volume %s: %q", volumeName, *vol.State)
}
}
return false, nil
})
return err
}
// DeleteDisk implements Volumes.DeleteDisk

View File

@ -851,6 +851,53 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
testDynamicProvisioning(test, c, claim, nil)
})
})
Describe("Invalid AWS KMS key", func() {
It("should report an error and create no PV", func() {
framework.SkipUnlessProviderIs("aws")
test := storageClassTest{
name: "AWS EBS with invalid KMS key",
provisioner: "kubernetes.io/aws-ebs",
claimSize: "2Gi",
parameters: map[string]string{"kmsKeyId": "arn:aws:kms:us-east-1:123456789012:key/55555555-5555-5555-5555-555555555555"},
}
By("creating a StorageClass")
suffix := fmt.Sprintf("invalid-aws")
class := newStorageClass(test, ns, suffix)
class, err := c.StorageV1().StorageClasses().Create(class)
Expect(err).NotTo(HaveOccurred())
defer func() {
framework.Logf("deleting storage class %s", class.Name)
framework.ExpectNoError(c.StorageV1().StorageClasses().Delete(class.Name, nil))
}()
By("creating a claim object with a suffix for gluster dynamic provisioner")
claim := newClaim(test, ns, suffix)
claim.Spec.StorageClassName = &class.Name
claim, err = c.CoreV1().PersistentVolumeClaims(claim.Namespace).Create(claim)
Expect(err).NotTo(HaveOccurred())
defer func() {
framework.Logf("deleting claim %q/%q", claim.Namespace, claim.Name)
err = c.CoreV1().PersistentVolumeClaims(claim.Namespace).Delete(claim.Name, nil)
if err != nil && !apierrs.IsNotFound(err) {
framework.Failf("Error deleting claim %q. Error: %v", claim.Name, err)
}
}()
// Watch events until the message about invalid key appears
err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) {
events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())
for _, event := range events.Items {
if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") {
return true, nil
}
}
return false, nil
})
Expect(err).NotTo(HaveOccurred())
})
})
})
func getDefaultStorageClassName(c clientset.Interface) string {