add zero broadcast_sync

pull/448/head
Qu Wenwen 2023-10-26 16:27:03 +08:00
parent 3c992a2101
commit 15ff413362
1 changed files with 3 additions and 0 deletions

View File

@ -710,6 +710,9 @@ class HybridZeroOptimizer(BaseOptimizer):
with torch.cuda.stream(self._comm_bcast_stream):
self.broadcast_params()
if not self._overlap_sync_param:
torch.cuda.synchronize()
timer("step").stop()
# update gradients may not be needed here, because the sync_params function is used in initialization,