diff --git a/colossalai/booster/plugin/torch_fsdp_plugin.py b/colossalai/booster/plugin/torch_fsdp_plugin.py index 340555dc6..8d534ea4c 100644 --- a/colossalai/booster/plugin/torch_fsdp_plugin.py +++ b/colossalai/booster/plugin/torch_fsdp_plugin.py @@ -3,10 +3,10 @@ from typing import Callable, Iterable, Iterator, List, Optional, Tuple, Union import torch import torch.nn as nn +import warnings from packaging import version from torch.distributed import ProcessGroup - if version.parse(torch.__version__) >= version.parse('1.12.0'): from torch.distributed.fsdp import FullStateDictConfig from torch.distributed.fsdp import FullyShardedDataParallel as FSDP @@ -202,6 +202,11 @@ class TorchFSDPPlugin(DPPluginBase): # wrap the model with PyTorch FSDP fsdp_model = TorchFSDPModel(model, device_id=torch.cuda.current_device(), **self.fsdp_kwargs) + + if len(optimizer.param_groups) > 1: + warnings.warn( + 'TorchFSDPPlugin does not support optimizer that use multi param groups. The results may not be as expected if used.' + ) optimizer.__init__(fsdp_model.parameters(), **optimizer.defaults) if not isinstance(optimizer, FSDPOptimizerWrapper): diff --git a/docs/source/en/basics/booster_plugins.md b/docs/source/en/basics/booster_plugins.md index 6ed49bfa7..5e2586b83 100644 --- a/docs/source/en/basics/booster_plugins.md +++ b/docs/source/en/basics/booster_plugins.md @@ -62,8 +62,11 @@ More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/genera ### Torch FSDP Plugin > ⚠ This plugin is not available when torch version is lower than 1.12.0. + > ⚠ This plugin does not support save/load sharded model checkpoint now. +> ⚠ This plugin does not support optimizer that use multi params group. + More details can be found in [Pytorch Docs](https://pytorch.org/docs/main/fsdp.html). {{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }} diff --git a/docs/source/zh-Hans/basics/booster_plugins.md b/docs/source/zh-Hans/basics/booster_plugins.md index 00e7d91e3..5bd88b679 100644 --- a/docs/source/zh-Hans/basics/booster_plugins.md +++ b/docs/source/zh-Hans/basics/booster_plugins.md @@ -62,8 +62,11 @@ Zero-2 不支持局部梯度累积。如果您坚持使用,虽然可以积累 ### Torch FSDP 插件 > ⚠ 如果 torch 版本低于 1.12.0,此插件将不可用。 + > ⚠ 该插件现在还不支持保存/加载分片的模型 checkpoint。 +> ⚠ 该插件现在还不支持使用了multi params group的optimizer。 + 更多详细信息,请参阅 [Pytorch 文档](https://pytorch.org/docs/main/fsdp.html). {{ autodoc:colossalai.booster.plugin.TorchFSDPPlugin }}