diff --git a/colossalai/engine/gradient_handler/_zero_gradient_handler.py b/colossalai/engine/gradient_handler/_zero_gradient_handler.py index 7be3f8fb3..1d70f250f 100644 --- a/colossalai/engine/gradient_handler/_zero_gradient_handler.py +++ b/colossalai/engine/gradient_handler/_zero_gradient_handler.py @@ -5,7 +5,7 @@ from ._base_gradient_handler import BaseGradientHandler @GRADIENT_HANDLER.register_module class ZeROGradientHandler(BaseGradientHandler): """A helper class to handle all-reduce operations in a data parallel group. - A all-reduce collective communication will be operated in + A all-reduce collective communication will be operated in :func:`handle_gradient` among a data parallel group. This class is specialized with ZeRO optimization. """ diff --git a/colossalai/engine/ophooks/_memtracer_ophook.py b/colossalai/engine/ophooks/_memtracer_ophook.py index e77c41055..c7b20c340 100644 --- a/colossalai/engine/ophooks/_memtracer_ophook.py +++ b/colossalai/engine/ophooks/_memtracer_ophook.py @@ -14,7 +14,6 @@ import math class MemTracerOpHook(BaseOpHook): """ Collect GPU memory usage information - :param warmup: This parameter indicates how many iterations to truncate before profiling, defaults to 50 :type warmup: int :param refreshrate: This parameter decides the frequency of write file, defaults to 10 @@ -112,4 +111,4 @@ class MemTracerOpHook(BaseOpHook): def save_results(self): datafile = f"{self._data_prefix}-{self._rank}.pkl" - self.async_mem_monitor.save(datafile) + self.async_mem_monitor.save(datafile) \ No newline at end of file