|
|
|
@ -27,6 +27,7 @@ from colossalai.utils import (accumulate_gradient, get_current_device,
|
|
|
|
|
is_using_ddp, is_using_pp, is_using_sequence,
|
|
|
|
|
sync_model_param)
|
|
|
|
|
from colossalai.zero import convert_to_zero, ShardedOptimizer
|
|
|
|
|
from colossalai.engine.ophooks import register_ophooks_recursively, BaseOpHook
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_default_parser():
|
|
|
|
@ -228,6 +229,7 @@ def initialize(model: Union[nn.Module, List[nn.Module]],
|
|
|
|
|
train_dataloader: Optional[Union[Iterable, List[Iterable]]] = None,
|
|
|
|
|
test_dataloader: Optional[Union[Iterable, List[Iterable]]] = None,
|
|
|
|
|
lr_scheduler: _LRScheduler = None,
|
|
|
|
|
ophooks: List[BaseOpHook] = [],
|
|
|
|
|
verbose: bool = True
|
|
|
|
|
) -> Tuple[Engine, DataLoader, DataLoader, _LRScheduler]:
|
|
|
|
|
"""Core function to wrap the essential training components with our functionality based on the config which is
|
|
|
|
@ -412,7 +414,8 @@ def initialize(model: Union[nn.Module, List[nn.Module]],
|
|
|
|
|
optimizer=optimizer,
|
|
|
|
|
criterion=criterion,
|
|
|
|
|
gradient_handlers=gradient_handlers,
|
|
|
|
|
clip_grad_norm=clip_grad_norm
|
|
|
|
|
clip_grad_norm=clip_grad_norm,
|
|
|
|
|
ophook_list=ophooks
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return engine, train_dataloader, test_dataloader, lr_scheduler
|
|
|
|
|