Merge pull request #67353 from yguo0905/tpu-api-timeout

Automatic merge from submit-queue (batch tested with PRs 67375, 67353). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Allow passing timeout on TPU API operations via context

The caller will provide a context with timeout attached.

**Release note**:

```release-note
None
```

/assign @vishh
pull/8/head
Kubernetes Submit Queue 2018-08-16 00:54:59 -07:00 committed by GitHub
commit 59fdc02b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 14 additions and 6 deletions

View File

@ -65,7 +65,7 @@ func (gce *GCECloud) CreateTPU(ctx context.Context, name, zone string, node *tpu
} }
glog.V(2).Infof("Creating Cloud TPU %q in zone %q with operation %q", name, zone, op.Name) glog.V(2).Infof("Creating Cloud TPU %q in zone %q with operation %q", name, zone, op.Name)
op, err = gce.waitForTPUOp(30*time.Second, 10*time.Minute, op) op, err = gce.waitForTPUOp(ctx, op)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -98,7 +98,7 @@ func (gce *GCECloud) DeleteTPU(ctx context.Context, name, zone string) error {
} }
glog.V(2).Infof("Deleting Cloud TPU %q in zone %q with operation %q", name, zone, op.Name) glog.V(2).Infof("Deleting Cloud TPU %q in zone %q with operation %q", name, zone, op.Name)
op, err = gce.waitForTPUOp(30*time.Second, 10*time.Minute, op) op, err = gce.waitForTPUOp(ctx, op)
if err != nil { if err != nil {
return err return err
} }
@ -133,10 +133,18 @@ func (gce *GCECloud) ListTPUs(ctx context.Context, zone string) ([]*tpuapi.Node,
return response.Nodes, mc.Observe(nil) return response.Nodes, mc.Observe(nil)
} }
// waitForTPUOp checks whether the op is done every interval before the timeout // waitForTPUOp checks whether the op is done every 30 seconds before the ctx
// occurs. // is cancelled.
func (gce *GCECloud) waitForTPUOp(interval, timeout time.Duration, op *tpuapi.Operation) (*tpuapi.Operation, error) { func (gce *GCECloud) waitForTPUOp(ctx context.Context, op *tpuapi.Operation) (*tpuapi.Operation, error) {
if err := wait.PollImmediate(interval, timeout, func() (bool, error) { if err := wait.PollInfinite(30*time.Second, func() (bool, error) {
// Check if context has been cancelled.
select {
case <-ctx.Done():
glog.V(3).Infof("Context for operation %q has been cancelled: %s", op.Name, ctx.Err())
return true, ctx.Err()
default:
}
glog.V(3).Infof("Waiting for operation %q to complete...", op.Name) glog.V(3).Infof("Waiting for operation %q to complete...", op.Name)
start := time.Now() start := time.Now()