mirror of https://github.com/hpcaitech/ColossalAI
[docs] change placememt_policy to placement_policy (#3829)
* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. * fix typo colossalai/auto_parallel autochunk fx/passes etc. * fix typo docs/ * change placememt_policy to placement_policy in docs/ and examples/pull/3830/head
parent
e90fdb1000
commit
518b31c059
|
@ -175,11 +175,11 @@ In this way, users can train their models as usual.
|
||||||
In our latest example, a Gemini + ZeRO DDP model is also defined to reduce overhead and improve efficiency.For the details of this part, please refer to [ZeRO](../features/zero_with_chunk.md). You can combine these two parts to understand our entire training process:
|
In our latest example, a Gemini + ZeRO DDP model is also defined to reduce overhead and improve efficiency.For the details of this part, please refer to [ZeRO](../features/zero_with_chunk.md). You can combine these two parts to understand our entire training process:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=32)
|
search_range_mb=32)
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -185,23 +185,23 @@ def split_param_col_tp1d(param: ColoParameter, pg: ProcessGroup):
|
||||||
Define a model which uses Gemini + ZeRO DDP:
|
Define a model which uses Gemini + ZeRO DDP:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||||
cai_version = colossalai.__version__
|
cai_version = colossalai.__version__
|
||||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=32)
|
search_range_mb=32)
|
||||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||||
from colossalai.gemini import ChunkManager, GeminiManager
|
from colossalai.gemini import ChunkManager, GeminiManager
|
||||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||||
chunk_manager = ChunkManager(chunk_size,
|
chunk_manager = ChunkManager(chunk_size,
|
||||||
pg,
|
pg,
|
||||||
enable_distributed_storage=True,
|
enable_distributed_storage=True,
|
||||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
init_device=GeminiManager.get_default_device(placement_policy))
|
||||||
model = ZeroDDP(model, gemini_manager)
|
model = ZeroDDP(model, gemini_manager)
|
||||||
else:
|
else:
|
||||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||||
|
|
|
@ -159,11 +159,11 @@ for mn, module in model.named_modules():
|
||||||
在我们最新示例中还定义了一个Gemini + ZeRO DDP 的模型从而减小开销,提升效率。这一部分的详细内容可以参考[ZeRO](../features/zero_with_chunk.md),你可以将这两部分内容结合起来看从而理解我们整个训练流程:
|
在我们最新示例中还定义了一个Gemini + ZeRO DDP 的模型从而减小开销,提升效率。这一部分的详细内容可以参考[ZeRO](../features/zero_with_chunk.md),你可以将这两部分内容结合起来看从而理解我们整个训练流程:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=32)
|
search_range_mb=32)
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -185,23 +185,23 @@ def split_param_col_tp1d(param: ColoParameter, pg: ProcessGroup):
|
||||||
定义一个使用 Gemini + ZeRO DDP 的模型:
|
定义一个使用 Gemini + ZeRO DDP 的模型:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||||
cai_version = colossalai.__version__
|
cai_version = colossalai.__version__
|
||||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=32)
|
search_range_mb=32)
|
||||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||||
from colossalai.gemini import ChunkManager, GeminiManager
|
from colossalai.gemini import ChunkManager, GeminiManager
|
||||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||||
chunk_manager = ChunkManager(chunk_size,
|
chunk_manager = ChunkManager(chunk_size,
|
||||||
pg,
|
pg,
|
||||||
enable_distributed_storage=True,
|
enable_distributed_storage=True,
|
||||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
init_device=GeminiManager.get_default_device(placement_policy))
|
||||||
model = ZeroDDP(model, gemini_manager)
|
model = ZeroDDP(model, gemini_manager)
|
||||||
else:
|
else:
|
||||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||||
|
|
|
@ -340,12 +340,12 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token:
|
||||||
|
|
||||||
|
|
||||||
# Gemini + ZeRO DDP
|
# Gemini + ZeRO DDP
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, placement_policy: str = "auto"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
|
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=64)
|
search_range_mb=64)
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -342,12 +342,12 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token:
|
||||||
|
|
||||||
|
|
||||||
# Gemini + ZeRO DDP
|
# Gemini + ZeRO DDP
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, placement_policy: str = "auto"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
|
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=64)
|
search_range_mb=64)
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -102,23 +102,23 @@ def get_model_size(model: nn.Module):
|
||||||
|
|
||||||
|
|
||||||
# Gemini + ZeRO DDP
|
# Gemini + ZeRO DDP
|
||||||
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placememt_policy: str = "auto"):
|
def gemini_zero_dpp(model: torch.nn.Module, pg: ProcessGroup, placement_policy: str = "auto"):
|
||||||
cai_version = colossalai.__version__
|
cai_version = colossalai.__version__
|
||||||
if version.parse(cai_version) > version.parse("0.1.10"):
|
if version.parse(cai_version) > version.parse("0.1.10"):
|
||||||
from colossalai.nn.parallel import GeminiDDP
|
from colossalai.nn.parallel import GeminiDDP
|
||||||
model = GeminiDDP(model,
|
model = GeminiDDP(model,
|
||||||
device=get_current_device(),
|
device=get_current_device(),
|
||||||
placement_policy=placememt_policy,
|
placement_policy=placement_policy,
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
search_range_mb=32)
|
search_range_mb=32)
|
||||||
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
|
||||||
from colossalai.gemini import ChunkManager, GeminiManager
|
from colossalai.gemini import ChunkManager, GeminiManager
|
||||||
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
chunk_size = ChunkManager.search_chunk_size(model, 64 * 1024**2, 32)
|
||||||
gemini_manager = GeminiManager(placememt_policy, chunk_manager)
|
gemini_manager = GeminiManager(placement_policy, chunk_manager)
|
||||||
chunk_manager = ChunkManager(chunk_size,
|
chunk_manager = ChunkManager(chunk_size,
|
||||||
pg,
|
pg,
|
||||||
enable_distributed_storage=True,
|
enable_distributed_storage=True,
|
||||||
init_device=GeminiManager.get_default_device(placememt_policy))
|
init_device=GeminiManager.get_default_device(placement_policy))
|
||||||
model = ZeroDDP(model, gemini_manager)
|
model = ZeroDDP(model, gemini_manager)
|
||||||
else:
|
else:
|
||||||
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
raise NotImplemented(f"CAI version {cai_version} is not supported")
|
||||||
|
|
Loading…
Reference in New Issue