mirror of https://github.com/hpcaitech/ColossalAI
[gemini] quick fix on possible async operation (#5803)
* [gemini] quick fix on possible async operation * [gemini] quick fix on possible async operationpull/5812/head
parent
d9dddf574f
commit
3bcbba9262
|
@ -55,6 +55,15 @@ class GeminiZeROHook(ColoParamOpHook):
|
|||
)
|
||||
|
||||
# prefetch
|
||||
if self._gemini_manager.chunk_manager._prefetch_stream is not None:
|
||||
# This is when prefetch happens the first time and there is no dist.Work to sync,
|
||||
# there is possibility that the optimizer haven't finish computation on default stream,
|
||||
# thus we might prefetch outdated chunks there.
|
||||
#
|
||||
# Other than that, self._gemini_manager.wait_chunks will have synced with default stream
|
||||
# by calling dist.Work.wait() and this line makes no diff.
|
||||
self._gemini_manager.chunk_manager._prefetch_stream.wait_stream(torch.cuda.current_stream())
|
||||
|
||||
with get_accelerator().stream(self._gemini_manager.chunk_manager._prefetch_stream):
|
||||
for chunk in chunks_fetch_async:
|
||||
maybe_work = self._chunk_manager.access_chunk(chunk, async_access=True)
|
||||
|
|
Loading…
Reference in New Issue