mirror of https://github.com/hpcaitech/ColossalAI
[docs] change placememt_policy to placement_policy (#3829)
* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. * fix typo colossalai/auto_parallel autochunk fx/passes etc. * fix typo docs/ * change placememt_policy to placement_policy in docs/ and examples/pull/3830/head
parent
e90fdb1000
commit
518b31c059
|
@ -175,11 +175,11 @@ In this way, users can train their models as usual.
|
|||
In our latest example, a Gemini + ZeRO DDP model is also defined to reduce overhead and improve efficiency.For the details of this part, please refer to [ZeRO](../features/zero_with_chunk.md). You can combine these two parts to understand our entire training process:
|
||||
|
||||
```python
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=32)
|
||||
return model
|
||||
|
|
|
@ -185,23 +185,23 @@ def split_param_col_tp1d(param: ColoParameter, pg: ProcessGroup):
|
|||
Define a model which uses Gemini + ZeRO DDP:
|
||||
|
||||
```python
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||
cai_version = colossalai.__version__
|
||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=32)
|
||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||
from colossalai.gemini import ChunkManager, GeminiManager
|
||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
||||
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||
chunk_manager = ChunkManager(chunk_size,
|
||||
pg,
|
||||
enable_distributed_storage=True,
|
||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
||||
init_device=GeminiManager.get_default_device(placement_policy))
|
||||
model = ZeroDDP(model, gemini_manager)
|
||||
else:
|
||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||
|
|
|
@ -159,11 +159,11 @@ for mn, module in model.named_modules():
|
|||
在我们最新示例中还定义了一个Gemini + ZeRO DDP 的模型从而减小开销,提升效率。这一部分的详细内容可以参考[ZeRO](../features/zero_with_chunk.md),你可以将这两部分内容结合起来看从而理解我们整个训练流程:
|
||||
|
||||
```python
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=32)
|
||||
return model
|
||||
|
|
|
@ -185,23 +185,23 @@ def split_param_col_tp1d(param: ColoParameter, pg: ProcessGroup):
|
|||
定义一个使用 Gemini + ZeRO DDP 的模型:
|
||||
|
||||
```python
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||
cai_version = colossalai.__version__
|
||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=32)
|
||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||
from colossalai.gemini import ChunkManager, GeminiManager
|
||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
||||
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||
chunk_manager = ChunkManager(chunk_size,
|
||||
pg,
|
||||
enable_distributed_storage=True,
|
||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
||||
init_device=GeminiManager.get_default_device(placement_policy))
|
||||
model = ZeroDDP(model, gemini_manager)
|
||||
else:
|
||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||
|
|
|
@ -340,12 +340,12 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token:
|
|||
|
||||
|
||||
# Gemini + ZeRO DDP
|
||||
def gemini_zero_dpp(model: torch.nn.Module, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, placement_policy: str = "auto"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=64)
|
||||
return model
|
||||
|
|
|
@ -342,12 +342,12 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token:
|
|||
|
||||
|
||||
# Gemini + ZeRO DDP
|
||||
def gemini_zero_dpp(model: torch.nn.Module, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, placement_policy: str = "auto"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=64)
|
||||
return model
|
||||
|
|
|
@ -102,23 +102,23 @@ def get_model_size(model: nn.Module):
|
|||
|
||||
|
||||
# Gemini + ZeRO DDP
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||
cai_version = colossalai.__version__
|
||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||
from colossalai.nn.parallel import GeminiDDP
|
||||
model = GeminiDDP(model,
|
||||
device=get_current_device(),
|
||||
placement_policy=placememt_policy,
|
||||
placement_policy=placement_policy,
|
||||
pin_memory=True,
|
||||
search_range_mb=32)
|
||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||
from colossalai.gemini import ChunkManager, GeminiManager
|
||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
||||
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||
chunk_manager = ChunkManager(chunk_size,
|
||||
pg,
|
||||
enable_distributed_storage=True,
|
||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
||||
init_device=GeminiManager.get_default_device(placement_policy))
|
||||
model = ZeroDDP(model, gemini_manager)
|
||||
else:
|
||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||
|
|
Loading…
Reference in New Issue