ColossalAI/colossalai/nn/parallel/gemini_parallel.py

from typing import Optional

import torch

from colossalai.gemini.chunk import init_chunk_manager
from colossalai.gemini.gemini_mgr import GeminiManager
from colossalai.gemini.memory_tracer import MemStats

from .data_parallel import ZeroDDP


class GeminiDDP(ZeroDDP):

    def __init__(self,
                 module: torch.nn.Module,
                 device: torch.device,
                 placement_policy: str = "cpu",
                 pin_memory: bool = False,
                 force_outputs_fp32: bool = False,
                 strict_ddp_mode: bool = False,
                 search_range_mb: int = 32,
                 hidden_dim: Optional[int] = None,
                 min_chunk_size_mb: float = 32,
                 memstats: Optional[MemStats] = None) -> None:
        """
        A torch.Module warpper using ZeRO-DP and Genimi.
        ZeRO is for parallel. Gemini is for memory management.
        WARNING: The class will modify the module inline!

        Example:
            model is initialized under the context of ColoInitContext
            >>> model = GeminiDDP(model, torch.cuda.current_device(), "cuda")
            >>> logits = model(x)
            >>> loss = criterion(logits, labels)
            >>> model.backward(loss)

        Args:
            module (torch.nn.Module): the model to be wrapped.
            device (torch.device): device to place the model.
            placement_policy (str, optional): "cpu", "cuda", "auto". Defaults to "cpu".
            pin_memory (bool, optional): use pin memory on CPU. Defaults to False.
            force_outputs_fp32 (bool, optional): force outputs are fp32. Defaults to False.
            search_range_mb (int, optional): chunk size searching range in MegaByte. Defaults to 32.
            hidden_dim (int, optional): the hidden dimension of DNN.
                Users can provide this argument to speed up searching.
                If users do not know this argument before training, it is ok. We will use a default value 1024.
            min_chunk_size_mb (float, optional): the minimum chunk size in MegaByte.
                If the aggregate size of parameters is still samller than the minimum chunk size,
                all parameters will be compacted into one small chunk.
            memstats (MemStats, optional) the memory statistics collector by a runtime memory tracer.
        """
        # some ugly hotfix for the compatibility with Lightning
        if search_range_mb is None:
            search_range_mb = 32

        chunk_manager = init_chunk_manager(model=module,
                                           init_device=device,
                                           hidden_dim=hidden_dim,
                                           search_range_mb=search_range_mb,
                                           min_chunk_size_mb=min_chunk_size_mb,
                                           strict_ddp_flag=strict_ddp_mode)
        gemini_manager = GeminiManager(placement_policy, chunk_manager, memstats)
        super().__init__(module, gemini_manager, pin_memory, force_outputs_fp32, strict_ddp_mode)
[gemini] add arguments (#2046) * [zero] fix testing parameters * [gemini] add arguments * add docstrings 2022-11-30 08:40:13 +00:00			`from typing import Optional`

[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00			`import torch`

			`from colossalai.gemini.chunk import init_chunk_manager`
			`from colossalai.gemini.gemini_mgr import GeminiManager`
[Gemini] chunk init using runtime visited param order (#2115) 2022-12-12 10:06:16 +00:00			`from colossalai.gemini.memory_tracer import MemStats`
[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00
			`from .data_parallel import ZeroDDP`


			`class GeminiDDP(ZeroDDP):`

			`def __init__(self,`
			`module: torch.nn.Module,`
			`device: torch.device,`
			`placement_policy: str = "cpu",`
			`pin_memory: bool = False,`
			`force_outputs_fp32: bool = False,`
[zero] add strict ddp mode (#2508) * [zero] add strict ddp mode * [polish] add comments for strict ddp mode * [zero] fix test error 2023-01-20 06:04:38 +00:00			`strict_ddp_mode: bool = False,`
[gemini] add arguments (#2046) * [zero] fix testing parameters * [gemini] add arguments * add docstrings 2022-11-30 08:40:13 +00:00			`search_range_mb: int = 32,`
			`hidden_dim: Optional[int] = None,`
[gemini] update ddp strict mode (#2518) * [zero] add strict ddp mode for chunk init * [gemini] update gpt example 2023-01-28 06:35:25 +00:00			`min_chunk_size_mb: float = 32,`
[Gemini] chunk init using runtime visited param order (#2115) 2022-12-12 10:06:16 +00:00			`memstats: Optional[MemStats] = None) -> None:`
[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00			`"""`
[Gemini] add GeminiAdamOptimizer (#1960) 2022-11-16 06:44:28 +00:00			`A torch.Module warpper using ZeRO-DP and Genimi.`
[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00			`ZeRO is for parallel. Gemini is for memory management.`
[Gemini] add GeminiAdamOptimizer (#1960) 2022-11-16 06:44:28 +00:00			`WARNING: The class will modify the module inline!`
[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00
			`Example:`
			`model is initialized under the context of ColoInitContext`
			`>>> model = GeminiDDP(model, torch.cuda.current_device(), "cuda")`
			`>>> logits = model(x)`
			`>>> loss = criterion(logits, labels)`
			`>>> model.backward(loss)`

			`Args:`
			`module (torch.nn.Module): the model to be wrapped.`
			`device (torch.device): device to place the model.`
			`placement_policy (str, optional): "cpu", "cuda", "auto". Defaults to "cpu".`
			`pin_memory (bool, optional): use pin memory on CPU. Defaults to False.`
			`force_outputs_fp32 (bool, optional): force outputs are fp32. Defaults to False.`
			`search_range_mb (int, optional): chunk size searching range in MegaByte. Defaults to 32.`
[gemini] add arguments (#2046) * [zero] fix testing parameters * [gemini] add arguments * add docstrings 2022-11-30 08:40:13 +00:00			`hidden_dim (int, optional): the hidden dimension of DNN.`
			`Users can provide this argument to speed up searching.`
			`If users do not know this argument before training, it is ok. We will use a default value 1024.`
			`min_chunk_size_mb (float, optional): the minimum chunk size in MegaByte.`
			`If the aggregate size of parameters is still samller than the minimum chunk size,`
			`all parameters will be compacted into one small chunk.`
[Gemini] chunk init using runtime visited param order (#2115) 2022-12-12 10:06:16 +00:00			`memstats (MemStats, optional) the memory statistics collector by a runtime memory tracer.`
[Gemini] make gemini usage simple (#1821) 2022-11-08 07:53:13 +00:00			`"""`
[hotfix] fix lightning error (#2529) 2023-01-31 02:40:39 +00:00			`# some ugly hotfix for the compatibility with Lightning`
			`if search_range_mb is None:`
			`search_range_mb = 32`

[gemini] add arguments (#2046) * [zero] fix testing parameters * [gemini] add arguments * add docstrings 2022-11-30 08:40:13 +00:00			`chunk_manager = init_chunk_manager(model=module,`
			`init_device=device,`
			`hidden_dim=hidden_dim,`
			`search_range_mb=search_range_mb,`
[gemini] update ddp strict mode (#2518) * [zero] add strict ddp mode for chunk init * [gemini] update gpt example 2023-01-28 06:35:25 +00:00			`min_chunk_size_mb=min_chunk_size_mb,`
			`strict_ddp_flag=strict_ddp_mode)`
[Gemini] chunk init using runtime visited param order (#2115) 2022-12-12 10:06:16 +00:00			`gemini_manager = GeminiManager(placement_policy, chunk_manager, memstats)`
[zero] add strict ddp mode (#2508) * [zero] add strict ddp mode * [polish] add comments for strict ddp mode * [zero] fix test error 2023-01-20 06:04:38 +00:00			`super().__init__(module, gemini_manager, pin_memory, force_outputs_fp32, strict_ddp_mode)`