Browse Source

[zero] avoid zero hook spam by changing log to debug level (#1137)

pull/1140/head
Frank Lee 2 years ago committed by GitHub
parent
commit
15aab1476e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      colossalai/nn/parallel/data_parallel.py
  2. 2
      colossalai/zero/utils/zero_hook.py

2
colossalai/nn/parallel/data_parallel.py

@ -181,7 +181,7 @@ class ColoDDPV2(ColoDDP):
def _post_backward(self): def _post_backward(self):
self.chunk_manager.exec_lazy_release() self.chunk_manager.exec_lazy_release()
self._setup_grads_ptr() self._setup_grads_ptr()
self._logger.info( self._logger.debug(
f'layout time: {self.gemini_manager._layout_time}, evict time: {self.gemini_manager._evict_time}, PCIE move vol: {self.gemini_manager._cpu_gpu_move_volume}B' f'layout time: {self.gemini_manager._layout_time}, evict time: {self.gemini_manager._evict_time}, PCIE move vol: {self.gemini_manager._cpu_gpu_move_volume}B'
) )
self.gemini_manager.post_iter() self.gemini_manager.post_iter()

2
colossalai/zero/utils/zero_hook.py

@ -113,7 +113,7 @@ class ZeroHook(BaseOpHook):
def post_iter(self): def post_iter(self):
if self._stateful_tensor_mgr: if self._stateful_tensor_mgr:
self.logger.info( self.logger.debug(
f"CPU-GPU data moving this iteration {self._stateful_tensor_mgr.cpu_gpu_move_volume/1e9} GB, get layout info time: {self._stateful_tensor_mgr._layout_time}, evict cpu time: {self._stateful_tensor_mgr._evict_time}", f"CPU-GPU data moving this iteration {self._stateful_tensor_mgr.cpu_gpu_move_volume/1e9} GB, get layout info time: {self._stateful_tensor_mgr._layout_time}, evict cpu time: {self._stateful_tensor_mgr._evict_time}",
ranks=[0]) ranks=[0])
self._stateful_tensor_mgr.finish_iter() self._stateful_tensor_mgr.finish_iter()

Loading…
Cancel
Save