mirror of https://github.com/hpcaitech/ColossalAI
[booster] add warning for torch fsdp plugin doc (#3833)
parent
84500b7799
commit
3229f93e30
|
@ -3,10 +3,10 @@ from typing import Callable, Iterable, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
import warnings
|
||||||
from packaging import version
|
from packaging import version
|
||||||
from torch.distributed import ProcessGroup
|
from torch.distributed import ProcessGroup
|
||||||
|
|
||||||
|
|
||||||
if version.parse(torch.__version__) >= version.parse('1.12.0'):
|
if version.parse(torch.__version__) >= version.parse('1.12.0'):
|
||||||
from torch.distributed.fsdp import FullStateDictConfig
|
from torch.distributed.fsdp import FullStateDictConfig
|
||||||
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
|
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
|
||||||
|
@ -202,6 +202,11 @@ class TorchFSDPPlugin(DPPluginBase):
|
||||||
|
|
||||||
# wrap the model with PyTorch FSDP
|
# wrap the model with PyTorch FSDP
|
||||||
fsdp_model = TorchFSDPModel(model, device_id=torch.cuda.current_device(), **self.fsdp_kwargs)
|
fsdp_model = TorchFSDPModel(model, device_id=torch.cuda.current_device(), **self.fsdp_kwargs)
|
||||||
|
|
||||||
|
if len(optimizer.param_groups) > 1:
|
||||||
|
warnings.warn(
|
||||||
|
'TorchFSDPPlugin does not support optimizer that use multi param groups. The results may not be as expected if used.'
|
||||||
|
)
|
||||||
optimizer.__init__(fsdp_model.parameters(), **optimizer.defaults)
|
optimizer.__init__(fsdp_model.parameters(), **optimizer.defaults)
|
||||||
|
|
||||||
if not isinstance(optimizer, FSDPOptimizerWrapper):
|
if not isinstance(optimizer, FSDPOptimizerWrapper):
|
||||||
|
|
|
@ -62,8 +62,11 @@ More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/genera
|
||||||
### Torch FSDP Plugin
|
### Torch FSDP Plugin
|
||||||
|
|
||||||
> ⚠ This plugin is not available when torch version is lower than 1.12.0.
|
> ⚠ This plugin is not available when torch version is lower than 1.12.0.
|
||||||
|
|
||||||
> ⚠ This plugin does not support save/load sharded model checkpoint now.
|
> ⚠ This plugin does not support save/load sharded model checkpoint now.
|
||||||
|
|
||||||
|
> ⚠ This plugin does not support optimizer that use multi params group.
|
||||||
|
|
||||||
More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/fsdp.html).
|
More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/fsdp.html).
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}
|
{{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}
|
||||||
|
|
|
@ -62,8 +62,11 @@ Zero-2 不支持局部梯度累积。如果您坚持使用,虽然可以积累
|
||||||
### Torch FSDP 插件
|
### Torch FSDP 插件
|
||||||
|
|
||||||
> ⚠ 如果 torch 版本低于 1.12.0,此插件将不可用。
|
> ⚠ 如果 torch 版本低于 1.12.0,此插件将不可用。
|
||||||
|
|
||||||
> ⚠ 该插件现在还不支持保存/加载分片的模型 checkpoint。
|
> ⚠ 该插件现在还不支持保存/加载分片的模型 checkpoint。
|
||||||
|
|
||||||
|
> ⚠ 该插件现在还不支持使用了multi params group的optimizer。
|
||||||
|
|
||||||
更多详细信息,请参阅 [Pytorch 文档](https://pytorch.org/docs/main/fsdp.html).
|
更多详细信息,请参阅 [Pytorch 文档](https://pytorch.org/docs/main/fsdp.html).
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}
|
{{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}
|
||||||
|
|
Loading…
Reference in New Issue