[NFC] polish colossalai/engine/gradient_handler/utils.py code style (#2708)

pull/2709/head
CZYCW 2 years ago committed by GitHub
parent 6427c406cf
commit 4ac8bfb072
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,29 +1,30 @@
import torch.distributed as dist
import torch.nn as nn
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from typing import Iterable
def bucket_allreduce(param_list: Iterable[nn.Parameter], group=None):
# get communication world size
comm_size = dist.get_world_size(group)
# bucketize and all-reduce
buckets = {}
# Pack the buckets.
for param in param_list:
if param.requires_grad and param.grad is not None:
tp = param.data.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
# For each bucket, all-reduce and copy all-reduced grads.
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
coalesced /= comm_size
dist.all_reduce(coalesced, group=group)
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)
from typing import Iterable
import torch.distributed as dist
import torch.nn as nn
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
def bucket_allreduce(param_list: Iterable[nn.Parameter], group=None):
# get communication world size
comm_size = dist.get_world_size(group)
# bucketize and all-reduce
buckets = {}
# Pack the buckets.
for param in param_list:
if param.requires_grad and param.grad is not None:
tp = param.data.type()
if tp not in buckets:
buckets[tp] = []
buckets[tp].append(param)
# For each bucket, all-reduce and copy all-reduced grads.
for tp in buckets:
bucket = buckets[tp]
grads = [param.grad.data for param in bucket]
coalesced = _flatten_dense_tensors(grads)
coalesced /= comm_size
dist.all_reduce(coalesced, group=group)
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
buf.copy_(synced)

Loading…
Cancel
Save