mirror of https://github.com/hpcaitech/ColossalAI
[doc] add booster docstring and fix autodoc (#3789)
* [doc] add docstr for booster methods * [doc] fix autodocpull/3795/head
parent
3c07a2846e
commit
72688adb2f
|
@ -130,6 +130,12 @@ class Booster:
|
||||||
return model, optimizer, criterion, dataloader, lr_scheduler
|
return model, optimizer, criterion, dataloader, lr_scheduler
|
||||||
|
|
||||||
def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None:
|
def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None:
|
||||||
|
"""Backward pass.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
loss (torch.Tensor): The loss to be backpropagated.
|
||||||
|
optimizer (Optimizer): The optimizer to be updated.
|
||||||
|
"""
|
||||||
# TODO: implement this method with plugin
|
# TODO: implement this method with plugin
|
||||||
optimizer.backward(loss)
|
optimizer.backward(loss)
|
||||||
|
|
||||||
|
@ -146,6 +152,14 @@ class Booster:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def no_sync(self, model: nn.Module) -> contextmanager:
|
def no_sync(self, model: nn.Module) -> contextmanager:
|
||||||
|
"""Context manager to disable gradient synchronization across DP process groups.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (nn.Module): The model to be disabled gradient synchronization.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
contextmanager: Context to disable gradient synchronization.
|
||||||
|
"""
|
||||||
assert self.plugin is not None, f'no_sync is only enabled when a plugin is provided and the plugin supports no_sync.'
|
assert self.plugin is not None, f'no_sync is only enabled when a plugin is provided and the plugin supports no_sync.'
|
||||||
assert self.plugin.support_no_sync, f'The plugin {self.plugin.__class__.__name__} does not support no_sync.'
|
assert self.plugin.support_no_sync, f'The plugin {self.plugin.__class__.__name__} does not support no_sync.'
|
||||||
return self.plugin.no_sync(model)
|
return self.plugin.no_sync(model)
|
||||||
|
|
|
@ -25,24 +25,6 @@ Plugin is an important component that manages parallel configuration (eg: The ge
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster }}
|
{{ autodoc:colossalai.booster.Booster }}
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.boost }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.backward }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.no_sync }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_model }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_model }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_optimizer }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_optimizer }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_lr_scheduler }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_lr_scheduler }}
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
In a typical workflow, you should launch distributed environment at the beginning of training script and create objects needed (such as models, optimizers, loss function, data loaders etc.) firstly, then call `colossalai.booster` to inject features into these objects, After that, you can use our booster APIs and these returned objects to continue the rest of your training processes.
|
In a typical workflow, you should launch distributed environment at the beginning of training script and create objects needed (such as models, optimizers, loss function, data loaders etc.) firstly, then call `colossalai.booster` to inject features into these objects, After that, you can use our booster APIs and these returned objects to continue the rest of your training processes.
|
||||||
|
|
||||||
|
|
|
@ -13,20 +13,4 @@ We provide a utility class `colossalai.cluster.DistCoordinator` to coordinate di
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator }}
|
{{ autodoc:colossalai.cluster.DistCoordinator }}
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_master }}
|
<!-- doc-test-command: echo -->
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.destroy }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.block_all }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }}
|
|
||||||
|
|
|
@ -25,24 +25,6 @@ Booster插件是管理并行配置的重要组件(eg:gemini插件封装了ge
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster }}
|
{{ autodoc:colossalai.booster.Booster }}
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.boost }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.backward }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.no_sync }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_model }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_model }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_optimizer }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_optimizer }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.save_lr_scheduler }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.booster.Booster.load_lr_scheduler }}
|
|
||||||
|
|
||||||
## 使用方法及示例
|
## 使用方法及示例
|
||||||
|
|
||||||
在使用colossalai训练时,首先需要在训练脚本的开头启动分布式环境,并创建需要使用的模型、优化器、损失函数、数据加载器等对象。之后,调用`colossalai.booster` 将特征注入到这些对象中,您就可以使用我们的booster API去进行您接下来的训练流程。
|
在使用colossalai训练时,首先需要在训练脚本的开头启动分布式环境,并创建需要使用的模型、优化器、损失函数、数据加载器等对象。之后,调用`colossalai.booster` 将特征注入到这些对象中,您就可以使用我们的booster API去进行您接下来的训练流程。
|
||||||
|
|
|
@ -13,20 +13,4 @@
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator }}
|
{{ autodoc:colossalai.cluster.DistCoordinator }}
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_master }}
|
<!-- doc-test-command: echo -->
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.destroy }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.block_all }}
|
|
||||||
|
|
||||||
{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }}
|
|
||||||
|
|
Loading…
Reference in New Issue