Making large AI models cheaper, faster and more accessible
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

77 lines
2.0 KiB

from .activation_checkpoint import checkpoint
from .checkpointing import load_checkpoint, save_checkpoint
from .common import (
clip_grad_norm_fp32,
conditional_context,
copy_tensor_parallel_attributes,
count_zeros_fp32,
disposable,
ensure_path_exists,
free_port,
is_ddp_ignored,
is_dp_rank_0,
is_model_parallel_parameter,
is_no_pp_or_last_stage,
is_tp_rank_0,
is_using_ddp,
is_using_pp,
is_using_sequence,
multi_tensor_applier,
param_is_not_tensor_parallel_duplicate,
print_rank_0,
switch_virtual_pipeline_parallel_rank,
sync_model_param,
)
from .cuda import empty_cache, get_current_device, set_to_cuda, synchronize
from .data_sampler import DataParallelSampler, get_dataloader
from .memory import (
colo_device_memory_capacity,
colo_device_memory_used,
colo_get_cpu_memory_capacity,
colo_set_cpu_memory_capacity,
colo_set_process_memory_fraction,
report_memory_usage,
)
from .tensor_detector import TensorDetector
from .timer import MultiTimer, Timer
__all__ = [
'checkpoint',
'free_port',
'print_rank_0',
'sync_model_param',
'is_ddp_ignored',
'is_dp_rank_0',
'is_tp_rank_0',
'is_no_pp_or_last_stage',
'is_using_ddp',
'is_using_pp',
'is_using_sequence',
'conditional_context',
'is_model_parallel_parameter',
'clip_grad_norm_fp32',
'count_zeros_fp32',
'copy_tensor_parallel_attributes',
'param_is_not_tensor_parallel_duplicate',
'get_current_device',
'synchronize',
'empty_cache',
'set_to_cuda',
'report_memory_usage',
'colo_device_memory_capacity',
'colo_device_memory_used',
'colo_set_process_memory_fraction',
'Timer',
'MultiTimer',
'multi_tensor_applier',
'DataParallelSampler',
'get_dataloader',
'switch_virtual_pipeline_parallel_rank',
'TensorDetector',
'load_checkpoint',
'save_checkpoint',
'ensure_path_exists',
'disposable',
'colo_set_cpu_memory_capacity',
'colo_get_cpu_memory_capacity',
]