mirror of https://github.com/hpcaitech/ColossalAI
aibig-modeldata-parallelismdeep-learningdistributed-computingfoundation-modelsheterogeneous-traininghpcinferencelarge-scalemodel-parallelismpipeline-parallelism
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.0 KiB
77 lines
2.0 KiB
from .activation_checkpoint import checkpoint |
|
from .checkpointing import load_checkpoint, save_checkpoint |
|
from .common import ( |
|
clip_grad_norm_fp32, |
|
conditional_context, |
|
copy_tensor_parallel_attributes, |
|
count_zeros_fp32, |
|
disposable, |
|
ensure_path_exists, |
|
free_port, |
|
is_ddp_ignored, |
|
is_dp_rank_0, |
|
is_model_parallel_parameter, |
|
is_no_pp_or_last_stage, |
|
is_tp_rank_0, |
|
is_using_ddp, |
|
is_using_pp, |
|
is_using_sequence, |
|
multi_tensor_applier, |
|
param_is_not_tensor_parallel_duplicate, |
|
print_rank_0, |
|
switch_virtual_pipeline_parallel_rank, |
|
sync_model_param, |
|
) |
|
from .cuda import empty_cache, get_current_device, set_to_cuda, synchronize |
|
from .data_sampler import DataParallelSampler, get_dataloader |
|
from .memory import ( |
|
colo_device_memory_capacity, |
|
colo_device_memory_used, |
|
colo_get_cpu_memory_capacity, |
|
colo_set_cpu_memory_capacity, |
|
colo_set_process_memory_fraction, |
|
report_memory_usage, |
|
) |
|
from .tensor_detector import TensorDetector |
|
from .timer import MultiTimer, Timer |
|
|
|
__all__ = [ |
|
'checkpoint', |
|
'free_port', |
|
'print_rank_0', |
|
'sync_model_param', |
|
'is_ddp_ignored', |
|
'is_dp_rank_0', |
|
'is_tp_rank_0', |
|
'is_no_pp_or_last_stage', |
|
'is_using_ddp', |
|
'is_using_pp', |
|
'is_using_sequence', |
|
'conditional_context', |
|
'is_model_parallel_parameter', |
|
'clip_grad_norm_fp32', |
|
'count_zeros_fp32', |
|
'copy_tensor_parallel_attributes', |
|
'param_is_not_tensor_parallel_duplicate', |
|
'get_current_device', |
|
'synchronize', |
|
'empty_cache', |
|
'set_to_cuda', |
|
'report_memory_usage', |
|
'colo_device_memory_capacity', |
|
'colo_device_memory_used', |
|
'colo_set_process_memory_fraction', |
|
'Timer', |
|
'MultiTimer', |
|
'multi_tensor_applier', |
|
'DataParallelSampler', |
|
'get_dataloader', |
|
'switch_virtual_pipeline_parallel_rank', |
|
'TensorDetector', |
|
'load_checkpoint', |
|
'save_checkpoint', |
|
'ensure_path_exists', |
|
'disposable', |
|
'colo_set_cpu_memory_capacity', |
|
'colo_get_cpu_memory_capacity', |
|
]
|
|
|