From 7f8b16635b42013b73e1cb1ffdebc07b4d71ac93 Mon Sep 17 00:00:00 2001 From: Hongxin Liu Date: Mon, 29 Apr 2024 10:40:11 +0800 Subject: [PATCH] [misc] refactor launch API and tensor constructor (#5666) * [misc] remove config arg from initialize * [misc] remove old tensor contrusctor * [plugin] add npu support for ddp * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [devops] fix doc test ci * [test] fix test launch * [doc] update launch doc --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/doc_test_on_pr.yml | 2 +- applications/Colossal-LLaMA/train.py | 2 +- .../ColossalChat/benchmarks/benchmark_ppo.py | 2 +- .../examples/training_scripts/train_dpo.py | 2 +- .../examples/training_scripts/train_ppo.py | 2 +- .../examples/training_scripts/train_rm.py | 2 +- .../examples/training_scripts/train_sft.py | 2 +- .../examples/dataset_evaluation/inference.py | 2 +- .../examples/gpt_evaluation/inference.py | 2 +- applications/ColossalMoE/infer.py | 8 ++- .../ColossalMoE/tests/test_mixtral_layer.py | 2 +- .../ColossalMoE/tests/test_moe_checkpoint.py | 2 +- applications/ColossalMoE/train.py | 8 +-- .../auto_parallel/offload/amp_optimizer.py | 2 +- .../offload/base_offload_module.py | 4 +- colossalai/booster/plugin/torch_ddp_plugin.py | 5 +- colossalai/inference/README.md | 2 +- colossalai/initialize.py | 16 +----- .../dynamic_batching/ray_dist_init.py | 2 +- .../legacy/inference/hybridengine/engine.py | 2 +- .../legacy/inference/pipeline/README.md | 34 ++++++------ .../inference/pipeline/benchmark/benchmark.py | 2 +- .../ray_serve/Colossal_Inference_rayserve.py | 2 +- .../torch_serve/Colossal_Inference_Handler.py | 2 +- colossalai/legacy/pipeline/rpc/utils.py | 2 +- colossalai/nn/optimizer/fused_adam.py | 4 +- colossalai/nn/optimizer/hybrid_adam.py | 4 +- colossalai/shardformer/README.md | 2 +- .../examples/convergence_benchmark.py | 2 +- .../examples/performance_benchmark.py | 3 +- colossalai/shardformer/shard/shardformer.py | 2 +- colossalai/tensor/d_tensor/README.md | 2 +- .../train_gpt_using_hybrid_parallelism.md | 2 +- .../train_vit_with_hybrid_parallelism.md | 2 +- docs/source/en/basics/booster_api.md | 2 +- docs/source/en/basics/launch_colossalai.md | 18 ++----- .../gradient_accumulation_with_booster.md | 2 +- .../gradient_clipping_with_booster.md | 2 +- docs/source/en/features/lazy_init.md | 2 +- .../mixed_precision_training_with_booster.md | 10 ++-- docs/source/en/features/nvme_offload.md | 2 +- docs/source/en/features/zero_with_chunk.md | 2 +- .../train_gpt_using_hybrid_parallelism.md | 2 +- .../train_vit_with_hybrid_parallelism.md | 2 +- docs/source/zh-Hans/basics/booster_api.md | 2 +- .../zh-Hans/basics/launch_colossalai.md | 18 ++----- .../gradient_accumulation_with_booster.md | 2 +- .../gradient_clipping_with_booster.md | 2 +- docs/source/zh-Hans/features/lazy_init.md | 2 +- .../mixed_precision_training_with_booster.md | 12 ++--- docs/source/zh-Hans/features/nvme_offload.md | 2 +- .../zh-Hans/features/zero_with_chunk.md | 2 +- .../roberta/pretraining/run_pretraining.py | 4 +- examples/images/dreambooth/debug.py | 2 +- .../dreambooth/train_dreambooth_colossalai.py | 4 +- .../train_dreambooth_colossalai_lora.py | 4 +- examples/images/resnet/train.py | 2 +- examples/images/vit/vit_benchmark.py | 2 +- examples/images/vit/vit_train_demo.py | 2 +- examples/inference/benchmark_llama.py | 2 +- examples/inference/run_llama_inference.py | 2 +- examples/language/bert/benchmark.py | 2 +- examples/language/bert/finetune.py | 2 +- .../auto_offload/train_gpt_offload.py | 3 +- .../auto_parallel/auto_parallel_with_gpt.py | 2 +- .../language/gpt/gemini/train_gpt_demo.py | 2 +- .../gpt/hybridparallelism/benchmark.py | 2 +- .../gpt/hybridparallelism/finetune.py | 2 +- examples/language/gpt/titans/train_gpt.py | 4 +- examples/language/grok-1/inference_tp.py | 2 +- examples/language/llama/benchmark.py | 2 +- .../openmoe/benchmark/benchmark_cai.py | 2 +- examples/language/openmoe/train.py | 2 +- examples/language/opt/opt_benchmark.py | 2 +- examples/language/opt/opt_train_demo.py | 2 +- examples/language/palm/train.py | 2 +- .../auto_parallel/auto_ckpt_batchsize_test.py | 2 +- .../auto_parallel/auto_ckpt_solver_test.py | 2 +- .../tutorial/new_api/cifar_resnet/train.py | 2 +- examples/tutorial/new_api/cifar_vit/train.py | 2 +- .../tutorial/new_api/glue_bert/finetune.py | 2 +- examples/tutorial/opt/opt/run_clm.py | 2 +- .../test_C_solver_consistency.py | 2 +- .../test_ckpt_torchvision.py | 4 +- .../test_offload/test_perf.py | 3 +- .../test_bias_addition_forward.py | 4 +- .../test_tensor_shard/test_checkpoint.py | 2 +- .../test_compatibility_with_ddp.py | 2 +- .../test_compatibility_with_gemini.py | 2 +- .../test_gpt/test_runtime_with_gpt_modules.py | 2 +- .../test_binary_elementwise_metainfo.py | 2 +- .../test_metainfo/test_conv_metainfo.py | 4 +- .../test_metainfo/test_linear_metainfo.py | 4 +- .../test_metainfo/test_norm_metainfo.py | 2 +- .../test_metainfo/test_pooling_metainfo.py | 4 +- .../test_node_handler/test_addbmm_handler.py | 4 +- .../test_node_handler/test_addmm_handler.py | 2 +- .../test_batch_norm_handler.py | 2 +- .../test_bias_linear_function_node.py | 2 +- .../test_bias_linear_module_node.py | 2 +- .../test_binary_elementwise_handler.py | 4 +- .../test_node_handler/test_bmm_handler.py | 4 +- .../test_node_handler/test_conv_handler.py | 4 +- .../test_embedding_handler.py | 4 +- .../test_node_handler/test_getitem_handler.py | 2 +- .../test_layer_norm_handler.py | 2 +- .../test_node_handler/test_linear_handler.py | 4 +- .../test_permute_and_transpose_handler.py | 2 +- .../test_node_handler/test_softmax_handler.py | 2 +- .../test_node_handler/test_split_handler.py | 2 +- .../test_node_handler/test_sum_handler.py | 2 +- .../test_node_handler/test_view_handler.py | 2 +- .../test_mixed_precision/test_fp16_torch.py | 2 +- .../test_plugin/test_3d_plugin.py | 2 +- .../test_plugin/test_dp_plugin_base.py | 2 +- .../test_plugin/test_gemini_plugin.py | 2 +- .../test_plugin/test_low_level_zero_plugin.py | 2 +- .../test_plugin/test_torch_ddp_plugin.py | 2 +- .../test_plugin/test_torch_fsdp_plugin.py | 2 +- .../test_gemini_checkpoint_io.py | 3 +- .../test_gemini_torch_compability.py | 3 +- ...st_hybrid_parallel_plugin_checkpoint_io.py | 3 +- .../test_low_level_zero_checkpoint_io.py | 2 +- .../test_plugins_huggingface_compatibility.py | 3 +- .../test_torch_ddp_checkpoint_io.py | 2 +- .../test_torch_fsdp_checkpoint_io.py | 2 +- .../test_cluster/test_device_mesh_manager.py | 2 +- tests/test_cluster/test_process_group_mesh.py | 54 ------------------- tests/test_device/test_alpha_beta.py | 2 +- tests/test_device/test_device_mesh.py | 2 +- tests/test_device/test_extract_alpha_beta.py | 2 +- tests/test_device/test_init_logical_pg.py | 2 +- .../test_search_logical_device_mesh.py | 2 +- .../test_activation_checkpoint_codegen.py | 4 +- ...st_nested_activation_checkpoint_codegen.py | 4 +- .../test_codegen/test_offload_codegen.py | 4 +- tests/test_fx/test_parallel_1d.py | 2 +- tests/test_infer/test_hybrid_bloom.py | 6 +-- tests/test_infer/test_hybrid_chatglm2.py | 6 +-- tests/test_infer/test_hybrid_llama.py | 6 +-- tests/test_legacy/test_amp/test_naive_fp16.py | 2 +- tests/test_legacy/test_amp/test_torch_fp16.py | 2 +- .../test_comm/test_boardcast_send_recv_v2.py | 2 +- tests/test_legacy/test_comm/test_comm.py | 2 +- .../test_comm/test_object_list_p2p.py | 2 +- .../test_comm/test_object_list_p2p_v2.py | 2 +- .../test_layers/test_1d/test_1d.py | 2 +- .../test_layers/test_2d/test_2d.py | 2 +- .../test_layers/test_2p5d/test_2p5d.py | 2 +- .../test_layers/test_3d/test_3d.py | 2 +- .../test_layers/test_cache_embedding.py | 2 +- .../test_tensor/core/test_dist_spec_mgr.py | 2 +- .../test_legacy/test_tensor/test_parameter.py | 2 +- .../test_trainer/test_pipeline/test_p2p.py | 2 +- .../test_pipeline/test_pipeline_schedule.py | 2 +- .../test_checkpoint/test_checkpoint_1d.py | 2 +- .../test_checkpoint/test_checkpoint_2d.py | 2 +- .../test_checkpoint/test_checkpoint_2p5d.py | 2 +- .../test_checkpoint/test_checkpoint_3d.py | 2 +- tests/test_legacy/test_utils/test_memory.py | 2 +- .../test_utils/test_norm_gradient_clipping.py | 2 +- tests/test_legacy/test_zero/test_commons.py | 2 +- tests/test_lora/test_lora.py | 3 +- tests/test_moe/test_grad_handler.py | 1 - tests/test_moe/test_kernel.py | 2 +- tests/test_moe/test_moe_ep_tp.py | 2 +- tests/test_moe/test_moe_group.py | 1 - tests/test_moe/test_moe_hybrid_zero.py | 2 +- tests/test_moe/test_moe_load_balance.py | 1 - tests/test_moe/test_moe_zero_fwd_bwd.py | 2 +- tests/test_moe/test_moe_zero_optim.py | 2 +- tests/test_optimizer/test_adam_kernel.py | 2 +- tests/test_pipeline/test_p2p_communication.py | 2 +- .../test_schedule/test_interleaved.py | 2 +- .../test_schedule/test_oneF_oneB.py | 2 +- tests/test_pipeline/test_stage_manager.py | 2 +- .../test_amp_optimizer.py | 4 +- .../test_naive_optimizer.py | 4 +- .../test_zero_optimizer.py | 4 +- .../test_layer/test_dist_crossentropy.py | 2 +- .../test_layer/test_dropout.py | 2 +- .../test_layer/test_embedding.py | 2 +- .../test_gpt2_qkv_fused_linear_1d.py | 2 +- .../test_layer/test_layernorm.py | 2 +- .../test_layer/test_linear_1d.py | 2 +- .../test_layer/test_qkv_fused_linear_1d.py | 2 +- .../test_layer/test_sequence_parallel.py | 2 +- .../test_vocab_parallel_embedding_1d.py | 2 +- .../test_model/test_shard_bert.py | 4 +- .../test_model/test_shard_blip2.py | 1 - .../test_model/test_shard_bloom.py | 4 +- .../test_model/test_shard_chatglm2.py | 2 - .../test_model/test_shard_falcon.py | 4 +- .../test_model/test_shard_gpt2.py | 2 - .../test_model/test_shard_llama.py | 4 +- .../test_model/test_shard_mistral.py | 2 +- .../test_model/test_shard_opt.py | 2 - .../test_model/test_shard_sam.py | 2 +- .../test_model/test_shard_t5.py | 2 - .../test_model/test_shard_vit.py | 4 +- .../test_model/test_shard_whisper.py | 4 +- tests/test_shardformer/test_with_torch_ddp.py | 2 +- tests/test_tensor/test_comm_spec_apply.py | 2 +- .../test_dtensor/test_comm_spec.py | 2 +- .../test_tensor/test_dtensor/test_dtensor.py | 2 +- .../test_dtensor/test_layout_converter.py | 6 +-- tests/test_tensor/test_mix_gather.py | 2 +- tests/test_tensor/test_padded_tensor.py | 2 +- .../test_shape_consistency_apply.py | 2 +- .../test_zero/test_gemini/test_chunk_mgrv2.py | 2 +- tests/test_zero/test_gemini/test_chunkv2.py | 2 +- tests/test_zero/test_gemini/test_fwd_bwd.py | 3 +- .../test_gemini/test_gemini_use_rmt.py | 3 +- .../test_zero/test_gemini/test_grad_accum.py | 3 +- tests/test_zero/test_gemini/test_grad_clip.py | 3 +- tests/test_zero/test_gemini/test_inference.py | 3 +- tests/test_zero/test_gemini/test_optim.py | 3 +- tests/test_zero/test_gemini/test_search.py | 2 +- .../test_gemini/test_zeroddp_state_dict.py | 3 +- .../test_gemini/test_zerooptim_state_dict.py | 3 +- .../test_zero/test_low_level/test_grad_acc.py | 2 +- .../test_zero/test_low_level/test_zero1_2.py | 2 +- .../test_low_level/test_zero_ckpt.py | 2 +- 223 files changed, 294 insertions(+), 403 deletions(-) diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml index 8afc46b87..27f7e76af 100644 --- a/.github/workflows/doc_test_on_pr.yml +++ b/.github/workflows/doc_test_on_pr.yml @@ -56,7 +56,7 @@ jobs: needs: detect-changed-doc runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.0.0-11.7.0 + image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 options: --gpus all --rm timeout-minutes: 20 defaults: diff --git a/applications/Colossal-LLaMA/train.py b/applications/Colossal-LLaMA/train.py index 37e4fcc80..43a360a9a 100644 --- a/applications/Colossal-LLaMA/train.py +++ b/applications/Colossal-LLaMA/train.py @@ -136,7 +136,7 @@ def main() -> None: # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() accelerator = get_accelerator() coordinator = DistCoordinator() diff --git a/applications/ColossalChat/benchmarks/benchmark_ppo.py b/applications/ColossalChat/benchmarks/benchmark_ppo.py index e1b7a313f..00edf0534 100644 --- a/applications/ColossalChat/benchmarks/benchmark_ppo.py +++ b/applications/ColossalChat/benchmarks/benchmark_ppo.py @@ -66,7 +66,7 @@ def benchmark_train(args): # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # ====================================================== diff --git a/applications/ColossalChat/examples/training_scripts/train_dpo.py b/applications/ColossalChat/examples/training_scripts/train_dpo.py index b9287eb1a..f06c23a9f 100755 --- a/applications/ColossalChat/examples/training_scripts/train_dpo.py +++ b/applications/ColossalChat/examples/training_scripts/train_dpo.py @@ -37,7 +37,7 @@ def train(args): # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # ============================== diff --git a/applications/ColossalChat/examples/training_scripts/train_ppo.py b/applications/ColossalChat/examples/training_scripts/train_ppo.py index 7c91fa347..727cff7ca 100755 --- a/applications/ColossalChat/examples/training_scripts/train_ppo.py +++ b/applications/ColossalChat/examples/training_scripts/train_ppo.py @@ -39,7 +39,7 @@ def train(args): # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # ====================================================== diff --git a/applications/ColossalChat/examples/training_scripts/train_rm.py b/applications/ColossalChat/examples/training_scripts/train_rm.py index a0c710f2b..364198c1d 100755 --- a/applications/ColossalChat/examples/training_scripts/train_rm.py +++ b/applications/ColossalChat/examples/training_scripts/train_rm.py @@ -34,7 +34,7 @@ def train(args): # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # ====================================================== diff --git a/applications/ColossalChat/examples/training_scripts/train_sft.py b/applications/ColossalChat/examples/training_scripts/train_sft.py index fcd1a429c..ae20f2abc 100755 --- a/applications/ColossalChat/examples/training_scripts/train_sft.py +++ b/applications/ColossalChat/examples/training_scripts/train_sft.py @@ -29,7 +29,7 @@ def train(args): # ============================== # Initialize Distributed Training # ============================== - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # ============================== diff --git a/applications/ColossalEval/examples/dataset_evaluation/inference.py b/applications/ColossalEval/examples/dataset_evaluation/inference.py index 13bbb12b6..a7307635d 100644 --- a/applications/ColossalEval/examples/dataset_evaluation/inference.py +++ b/applications/ColossalEval/examples/dataset_evaluation/inference.py @@ -81,7 +81,7 @@ def rm_and_merge( def main(args): - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) accelerator = get_accelerator() world_size = dist.get_world_size() diff --git a/applications/ColossalEval/examples/gpt_evaluation/inference.py b/applications/ColossalEval/examples/gpt_evaluation/inference.py index 5b09f9de8..408ba3e7b 100644 --- a/applications/ColossalEval/examples/gpt_evaluation/inference.py +++ b/applications/ColossalEval/examples/gpt_evaluation/inference.py @@ -81,7 +81,7 @@ def rm_and_merge( def main(args): - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) world_size = dist.get_world_size() rank = dist.get_rank() diff --git a/applications/ColossalMoE/infer.py b/applications/ColossalMoE/infer.py index c175fe9e3..543c434d2 100644 --- a/applications/ColossalMoE/infer.py +++ b/applications/ColossalMoE/infer.py @@ -57,7 +57,7 @@ def main(): args = parse_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() config = MixtralConfig.from_pretrained(args.model_name) @@ -96,7 +96,11 @@ def main(): if coordinator.rank == 0: text = ["Hello my name is"] else: - text = ["What's the largest country in the world?", "How many people live in China?", "帮我续写这首诗:离离原上草"] + text = [ + "What's the largest country in the world?", + "How many people live in China?", + "帮我续写这首诗:离离原上草", + ] tokenizer.pad_token = tokenizer.unk_token inputs = tokenizer(text, return_tensors="pt", padding=True).to(torch.cuda.current_device()) diff --git a/applications/ColossalMoE/tests/test_mixtral_layer.py b/applications/ColossalMoE/tests/test_mixtral_layer.py index 57589ab20..cbb70f195 100644 --- a/applications/ColossalMoE/tests/test_mixtral_layer.py +++ b/applications/ColossalMoE/tests/test_mixtral_layer.py @@ -50,7 +50,7 @@ def check_mixtral_moe_layer(): def run_dist(rank: int, world_size: int, port: int): - colossalai.launch({}, rank, world_size, "localhost", port) + colossalai.launch(rank, world_size, "localhost", port) check_mixtral_moe_layer() diff --git a/applications/ColossalMoE/tests/test_moe_checkpoint.py b/applications/ColossalMoE/tests/test_moe_checkpoint.py index 822e7410f..074dbf835 100644 --- a/applications/ColossalMoE/tests/test_moe_checkpoint.py +++ b/applications/ColossalMoE/tests/test_moe_checkpoint.py @@ -133,7 +133,7 @@ def check_mixtral_moe_layer(): def run_dist(rank: int, world_size: int, port: int): - colossalai.launch({}, rank, world_size, "localhost", port) + colossalai.launch(rank, world_size, "localhost", port) check_mixtral_moe_layer() diff --git a/applications/ColossalMoE/train.py b/applications/ColossalMoE/train.py index 850236726..d2789d644 100644 --- a/applications/ColossalMoE/train.py +++ b/applications/ColossalMoE/train.py @@ -145,7 +145,7 @@ def main(): args = parse_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() # Set plugin @@ -195,9 +195,9 @@ def main(): lr_scheduler = CosineAnnealingWarmupLR( optimizer=optimizer, total_steps=args.num_epochs * len(dataloader), - warmup_steps=args.warmup_steps - if args.warmup_steps is not None - else int(args.num_epochs * len(dataloader) * 0.025), + warmup_steps=( + args.warmup_steps if args.warmup_steps is not None else int(args.num_epochs * len(dataloader) * 0.025) + ), eta_min=0.1 * args.lr, ) diff --git a/colossalai/auto_parallel/offload/amp_optimizer.py b/colossalai/auto_parallel/offload/amp_optimizer.py index fe8439269..ab02de7ce 100644 --- a/colossalai/auto_parallel/offload/amp_optimizer.py +++ b/colossalai/auto_parallel/offload/amp_optimizer.py @@ -126,7 +126,7 @@ class AMPOptimizer(OptimizerWrapper): return self.grad_scaler.scale.item() def zero_grad(self, *args, **kwargs): - self.module.overflow_counter = torch.cuda.IntTensor([0]) + self.module.overflow_counter = torch.tensor([0], dtype=torch.int, device=get_accelerator().get_current_device()) return self.optim.zero_grad(set_to_none=True) def step(self, *args, **kwargs): diff --git a/colossalai/auto_parallel/offload/base_offload_module.py b/colossalai/auto_parallel/offload/base_offload_module.py index 60de7743a..8afd29e43 100644 --- a/colossalai/auto_parallel/offload/base_offload_module.py +++ b/colossalai/auto_parallel/offload/base_offload_module.py @@ -4,7 +4,7 @@ from typing import Optional, Set import torch import torch.nn as nn -from colossalai.utils import _cast_float +from colossalai.utils import _cast_float, get_current_device from colossalai.utils.common import free_storage from .region_manager import RegionManager @@ -25,7 +25,7 @@ class BaseOffloadModule: self.model = model self.region_manager = region_manager self.grad_hook_list = [] - self.overflow_counter = torch.cuda.IntTensor([0]) + self.overflow_counter = torch.tensor([0], dtype=torch.int, device=get_current_device()) self.grad_offload_stream = torch.cuda.current_stream() if is_sync else GlobalRuntimeInfo.d2h_stream diff --git a/colossalai/booster/plugin/torch_ddp_plugin.py b/colossalai/booster/plugin/torch_ddp_plugin.py index 482cc4e98..5116446a4 100644 --- a/colossalai/booster/plugin/torch_ddp_plugin.py +++ b/colossalai/booster/plugin/torch_ddp_plugin.py @@ -10,6 +10,7 @@ from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO from colossalai.cluster import DistCoordinator from colossalai.interface import ModelWrapper, OptimizerWrapper from colossalai.quantization import BnbQuantizationConfig, quantize_model +from colossalai.utils import get_current_device from .dp_plugin_base import DPPluginBase @@ -203,7 +204,7 @@ class TorchDDPPlugin(DPPluginBase): return True def supported_devices(self) -> List[str]: - return ["cuda"] + return ["cuda", "npu"] def configure( self, @@ -214,7 +215,7 @@ class TorchDDPPlugin(DPPluginBase): lr_scheduler: Optional[LRScheduler] = None, ) -> Tuple[nn.Module, OptimizerWrapper, Callable, DataLoader, LRScheduler]: # cast model to cuda - model = model.cuda() + model = model.to(get_current_device()) # convert model to sync bn model = nn.SyncBatchNorm.convert_sync_batchnorm(model, None) diff --git a/colossalai/inference/README.md b/colossalai/inference/README.md index c2b808155..0bdaf347d 100644 --- a/colossalai/inference/README.md +++ b/colossalai/inference/README.md @@ -114,7 +114,7 @@ import colossalai from transformers import LlamaForCausalLM, LlamaTokenizer #launch distributed environment -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() # load original model and tokenizer model = LlamaForCausalLM.from_pretrained("/path/to/model") diff --git a/colossalai/initialize.py b/colossalai/initialize.py index aaeaad382..934555e19 100644 --- a/colossalai/initialize.py +++ b/colossalai/initialize.py @@ -2,20 +2,15 @@ # -*- encoding: utf-8 -*- import os -import warnings -from pathlib import Path -from typing import Dict, Union import torch.distributed as dist from colossalai.accelerator import get_accelerator -from colossalai.context import Config from colossalai.logging import get_dist_logger from colossalai.utils import set_seed def launch( - config: Union[str, Path, Config, Dict], rank: int, world_size: int, host: str, @@ -44,8 +39,6 @@ def launch( Raises: Exception: Raise exception when config type is wrong """ - if rank == 0: - warnings.warn("`config` is deprecated and will be removed soon.") cur_accelerator = get_accelerator() @@ -68,7 +61,6 @@ def launch( def launch_from_slurm( - config: Union[str, Path, Config, Dict], host: str, port: int, backend: str = "nccl", @@ -95,7 +87,6 @@ def launch_from_slurm( ) launch( - config=config, rank=rank, world_size=world_size, host=host, @@ -107,7 +98,6 @@ def launch_from_slurm( def launch_from_openmpi( - config: Union[str, Path, Config, Dict], host: str, port: int, backend: str = "nccl", @@ -135,7 +125,6 @@ def launch_from_openmpi( ) launch( - config=config, local_rank=local_rank, rank=rank, world_size=world_size, @@ -147,9 +136,7 @@ def launch_from_openmpi( ) -def launch_from_torch( - config: Union[str, Path, Config, Dict], backend: str = "nccl", seed: int = 1024, verbose: bool = True -): +def launch_from_torch(backend: str = "nccl", seed: int = 1024, verbose: bool = True): """A wrapper for colossalai.launch for torchrun or torch.distributed.launch by reading rank and world size from the environment variables set by PyTorch @@ -171,7 +158,6 @@ def launch_from_torch( ) launch( - config=config, local_rank=local_rank, rank=rank, world_size=world_size, diff --git a/colossalai/legacy/inference/dynamic_batching/ray_dist_init.py b/colossalai/legacy/inference/dynamic_batching/ray_dist_init.py index 3e40bb0ee..7a74fb949 100644 --- a/colossalai/legacy/inference/dynamic_batching/ray_dist_init.py +++ b/colossalai/legacy/inference/dynamic_batching/ray_dist_init.py @@ -56,7 +56,7 @@ class Worker: # initialize a ray collective group, otherwise colossalai distributed env won't be built successfully collective.init_collective_group(world_size, rank, "nccl", "default") # initialize and set distributed environment - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") ray_serve_logger.info(f"Worker with rank {rank} (world size {world_size}) setting up..") log_cuda_info("Worker.setup") diff --git a/colossalai/legacy/inference/hybridengine/engine.py b/colossalai/legacy/inference/hybridengine/engine.py index bc4e4fd19..019a678ce 100644 --- a/colossalai/legacy/inference/hybridengine/engine.py +++ b/colossalai/legacy/inference/hybridengine/engine.py @@ -42,7 +42,7 @@ class CaiInferEngine: import colossalai from transformers import LlamaForCausalLM, LlamaTokenizer - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() model = LlamaForCausalLM.from_pretrained("your_path_to_model") tokenizer = LlamaTokenizer.from_pretrained("/home/lczyh/share/models/llama-7b-hf") diff --git a/colossalai/legacy/inference/pipeline/README.md b/colossalai/legacy/inference/pipeline/README.md index f9bb35cc4..cbe96fff0 100644 --- a/colossalai/legacy/inference/pipeline/README.md +++ b/colossalai/legacy/inference/pipeline/README.md @@ -36,7 +36,7 @@ from colossalai.inference.pipeline.policies import LlamaModelInferPolicy import colossalai from transformers import LlamaForCausalLM, LlamaTokenizer -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() model = LlamaForCausalLM.from_pretrained("/path/to/model") tokenizer = LlamaTokenizer.from_pretrained("/path/to/model") @@ -57,27 +57,27 @@ We conducted multiple benchmark tests to evaluate the performance. We compared t ### Llama Throughput (tokens/s) | input length=1024, output length=128 #### A10 7b, fp16 -| batch_size(micro_batch size)| 2(1) | 4(2) | 8(4) | 16(8) | 32(8) | 32(16)| -| :---: | :---: | :---: | :---: | :---: | :---: | :---:| -| Pipeline Inference | 40.35 | 77.1 | 139.03 | 232.7 | 257.81 | OOM | -| Hugging Face | 41.43 | 65.30 | 91.93 | 114.62 | OOM| OOM | +| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(8) | 32(8) | 32(16) | +|:----------------------------:|:-----:|:-----:|:------:|:------:|:------:|:------:| +| Pipeline Inference | 40.35 | 77.1 | 139.03 | 232.7 | 257.81 | OOM | +| Hugging Face | 41.43 | 65.30 | 91.93 | 114.62 | OOM | OOM | #### A10 13b, fp16 -| batch_size(micro_batch size)| 2(1) | 4(2) | 8(4) | 16(4) | -| :---: | :---: | :---: | :---: | :---: | -| Pipeline Inference | 25.39 | 47.09 | 83.7 | 89.46 | -| Hugging Face | 23.48 | 37.59 | 53.44 | OOM | +| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(4) | +|:----------------------------:|:-----:|:-----:|:-----:|:-----:| +| Pipeline Inference | 25.39 | 47.09 | 83.7 | 89.46 | +| Hugging Face | 23.48 | 37.59 | 53.44 | OOM | #### A800 7b, fp16 -| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(8) | 32(16) | -| :---: | :---: | :---: | :---: | :---: | :---: | -| Pipeline Inference| 57.97 | 110.13 | 213.33 | 389.86 | 670.12 | -| Hugging Face | 42.44 | 76.5 | 151.97 | 212.88 | 256.13 | +| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(8) | 32(16) | +|:----------------------------:|:-----:|:------:|:------:|:------:|:------:| +| Pipeline Inference | 57.97 | 110.13 | 213.33 | 389.86 | 670.12 | +| Hugging Face | 42.44 | 76.5 | 151.97 | 212.88 | 256.13 | #### A800 13b, fp16 -| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(8) | 32(16) | -| :---: | :---: | :---: | :---: | :---: | :---: | -| Pipeline Inference | 41.78 | 94.18 | 172.67| 310.75| 470.15 | -| Hugging Face | 36.57 | 68.4 | 105.81 | 139.51 | 166.34 | +| batch_size(micro_batch size) | 2(1) | 4(2) | 8(4) | 16(8) | 32(16) | +|:----------------------------:|:-----:|:-----:|:------:|:------:|:------:| +| Pipeline Inference | 41.78 | 94.18 | 172.67 | 310.75 | 470.15 | +| Hugging Face | 36.57 | 68.4 | 105.81 | 139.51 | 166.34 | diff --git a/colossalai/legacy/inference/pipeline/benchmark/benchmark.py b/colossalai/legacy/inference/pipeline/benchmark/benchmark.py index 8392d0a1e..7bb89f4f4 100644 --- a/colossalai/legacy/inference/pipeline/benchmark/benchmark.py +++ b/colossalai/legacy/inference/pipeline/benchmark/benchmark.py @@ -12,7 +12,7 @@ from colossalai.inference.pipeline.policies import LlamaModelInferPolicy GIGABYTE = 1024**3 MEGABYTE = 1024 * 1024 -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() def data_gen(batch_size: int = 4, seq_len: int = 512): diff --git a/colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py b/colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py index d758b467c..37e7bae41 100644 --- a/colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py +++ b/colossalai/legacy/inference/serving/ray_serve/Colossal_Inference_rayserve.py @@ -56,7 +56,7 @@ class Worker: # initialize a ray collective group, otherwise colossalai distributed env won't be built successfully collective.init_collective_group(world_size, rank, "nccl", "default") # initialize and set distributed environment - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") ray_serve_logger.info(f"Worker with rank {rank} (world size {world_size}) setting up..") log_cuda_info("Worker.setup") diff --git a/colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py b/colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py index e07494b8a..bcbdee951 100644 --- a/colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py +++ b/colossalai/legacy/inference/serving/torch_serve/Colossal_Inference_Handler.py @@ -98,7 +98,7 @@ class ColossalInferenceHandler(BaseHandler, ABC): self.model.cuda() self.model.eval() - colossalai.launch(config={}, rank=rank, world_size=world_size, host=host, port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host=host, port=port, backend="nccl") logger.info("Initializing TPInferEngine ...") shard_config = ShardConfig( enable_tensor_parallelism=True if self.tp_size > 1 else False, extra_kwargs={"inference_only": True} diff --git a/colossalai/legacy/pipeline/rpc/utils.py b/colossalai/legacy/pipeline/rpc/utils.py index 808de301a..87060ab8a 100644 --- a/colossalai/legacy/pipeline/rpc/utils.py +++ b/colossalai/legacy/pipeline/rpc/utils.py @@ -114,7 +114,7 @@ def run_worker(rank, args, master_func): port = args.master_port backend = "nccl" if device == "cuda" else "gloo" - launch(dict(), rank, world_size, host, int(port), backend, verbose=False) + launch(rank, world_size, host, int(port), backend, verbose=False) ppg.set_global_info( rank=rank, world_size=world_size, diff --git a/colossalai/nn/optimizer/fused_adam.py b/colossalai/nn/optimizer/fused_adam.py index aeb5cc91b..c12551657 100644 --- a/colossalai/nn/optimizer/fused_adam.py +++ b/colossalai/nn/optimizer/fused_adam.py @@ -8,7 +8,7 @@ Licensed under the MIT License. """ import torch -from colossalai.utils import multi_tensor_applier +from colossalai.utils import get_current_device, multi_tensor_applier class FusedAdam(torch.optim.Optimizer): @@ -75,7 +75,7 @@ class FusedAdam(torch.optim.Optimizer): fused_optim = FusedOptimizerLoader().load() # Skip buffer - self._dummy_overflow_buf = torch.cuda.IntTensor([0]) + self._dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device=get_current_device()) self.multi_tensor_adam = fused_optim.multi_tensor_adam else: raise RuntimeError("FusedAdam requires cuda extensions") diff --git a/colossalai/nn/optimizer/hybrid_adam.py b/colossalai/nn/optimizer/hybrid_adam.py index c9c1f81bf..417881a0b 100644 --- a/colossalai/nn/optimizer/hybrid_adam.py +++ b/colossalai/nn/optimizer/hybrid_adam.py @@ -3,7 +3,7 @@ from typing import Any, Optional import torch from colossalai.kernel.kernel_loader import FusedOptimizerLoader -from colossalai.utils import multi_tensor_applier +from colossalai.utils import get_current_device, multi_tensor_applier from .cpu_adam import CPUAdam @@ -87,7 +87,7 @@ class HybridAdam(CPUAdam): if torch.cuda.is_available(): fused_optim = FusedOptimizerLoader().load() self.gpu_adam_op = fused_optim.multi_tensor_adam - self._dummy_overflow_buf = torch.cuda.IntTensor([0]) + self._dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device=get_current_device()) @torch.no_grad() def step(self, closure=None, div_scale: float = -1): diff --git a/colossalai/shardformer/README.md b/colossalai/shardformer/README.md index d45421868..47ef98ccf 100644 --- a/colossalai/shardformer/README.md +++ b/colossalai/shardformer/README.md @@ -38,7 +38,7 @@ from transformers import BertForMaskedLM import colossalai # launch colossalai -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() # create model config = BertConfig.from_pretrained('bert-base-uncased') diff --git a/colossalai/shardformer/examples/convergence_benchmark.py b/colossalai/shardformer/examples/convergence_benchmark.py index b03e6201d..4caf61eb4 100644 --- a/colossalai/shardformer/examples/convergence_benchmark.py +++ b/colossalai/shardformer/examples/convergence_benchmark.py @@ -28,7 +28,7 @@ def to_device(x: Any, device: torch.device) -> Any: def train(args): - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() # prepare for data and dataset diff --git a/colossalai/shardformer/examples/performance_benchmark.py b/colossalai/shardformer/examples/performance_benchmark.py index 81215dcdf..cce8b6f3a 100644 --- a/colossalai/shardformer/examples/performance_benchmark.py +++ b/colossalai/shardformer/examples/performance_benchmark.py @@ -1,6 +1,7 @@ """ Shardformer Benchmark """ + import torch import torch.distributed as dist import transformers @@ -84,5 +85,5 @@ def bench_shardformer(BATCH, N_CTX, provider, model_func, dtype=torch.float32, d # start benchmark, command: # torchrun --standalone --nproc_per_node=2 performance_benchmark.py if __name__ == "__main__": - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() bench_shardformer.run(save_path=".", print_data=dist.get_rank() == 0) diff --git a/colossalai/shardformer/shard/shardformer.py b/colossalai/shardformer/shard/shardformer.py index b132f47fd..b3991c4f0 100644 --- a/colossalai/shardformer/shard/shardformer.py +++ b/colossalai/shardformer/shard/shardformer.py @@ -26,7 +26,7 @@ class ShardFormer: import colossalai import torch - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() org_model = BertForMaskedLM.from_pretrained('bert-base-uncased') shard_config = ShardConfig() diff --git a/colossalai/tensor/d_tensor/README.md b/colossalai/tensor/d_tensor/README.md index 3d862dddb..367db5ccd 100644 --- a/colossalai/tensor/d_tensor/README.md +++ b/colossalai/tensor/d_tensor/README.md @@ -69,7 +69,7 @@ import colossalai from colossalai.device.device_mesh import DeviceMesh from colossalai.tensor.d_tensor import DTensor, ShardingSpec -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() # define your device mesh # assume you have 4 GPUs diff --git a/docs/source/en/advanced_tutorials/train_gpt_using_hybrid_parallelism.md b/docs/source/en/advanced_tutorials/train_gpt_using_hybrid_parallelism.md index 0133dfd86..b27f9c811 100644 --- a/docs/source/en/advanced_tutorials/train_gpt_using_hybrid_parallelism.md +++ b/docs/source/en/advanced_tutorials/train_gpt_using_hybrid_parallelism.md @@ -75,7 +75,7 @@ WARMUP_FRACTION = 0.1 we create a distributed environment. ```python # Launch ColossalAI -colossalai.launch_from_torch(config={}, seed=42) +colossalai.launch_from_torch( seed=42) coordinator = DistCoordinator() ``` prepare the dataset. You can use `plugin.prepare_dataloader` to generate a dataloader or customize your own dataloader. diff --git a/docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md b/docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md index dfc2cd596..ac4169344 100644 --- a/docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md +++ b/docs/source/en/advanced_tutorials/train_vit_with_hybrid_parallelism.md @@ -71,7 +71,7 @@ PP_SIZE = 2 Create a distributed environment. ```python # Launch ColossalAI -colossalai.launch_from_torch(config={}, seed=SEEDå) +colossalai.launch_from_torch( seed=SEEDå) coordinator = DistCoordinator() world_size = coordinator.world_size ``` diff --git a/docs/source/en/basics/booster_api.md b/docs/source/en/basics/booster_api.md index 2c75dd9ac..a33be3b49 100644 --- a/docs/source/en/basics/booster_api.md +++ b/docs/source/en/basics/booster_api.md @@ -55,7 +55,7 @@ from colossalai.booster.plugin import TorchDDPPlugin def train(): # launch colossalai - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost') + colossalai.launch(rank=rank, world_size=world_size, port=port, host='localhost') # create plugin and objects for training plugin = TorchDDPPlugin() diff --git a/docs/source/en/basics/launch_colossalai.md b/docs/source/en/basics/launch_colossalai.md index 334757ea7..8a6028d6c 100644 --- a/docs/source/en/basics/launch_colossalai.md +++ b/docs/source/en/basics/launch_colossalai.md @@ -87,8 +87,7 @@ import colossalai args = colossalai.get_default_parser().parse_args() # launch distributed environment -colossalai.launch(config=args.config, - rank=args.rank, +colossalai.launch(rank=args.rank, world_size=args.world_size, host=args.host, port=args.port, @@ -106,20 +105,11 @@ First, we need to set the launch method in our code. As this is a wrapper of the use `colossalai.launch_from_torch`. The arguments required for distributed environment such as rank, world size, host and port are all set by the PyTorch launcher and can be read from the environment variable directly. -config.py -```python -BATCH_SIZE = 512 -LEARNING_RATE = 3e-3 -WEIGHT_DECAY = 0.3 -NUM_EPOCHS = 2 -``` train.py ```python import colossalai -colossalai.launch_from_torch( - config="./config.py", -) +colossalai.launch_from_torch() ... ``` @@ -203,7 +193,6 @@ Do this in your training script: import colossalai colossalai.launch_from_slurm( - config=, host=args.host, port=args.port ) @@ -224,7 +213,6 @@ use them to start the distributed backend. Do this in your train.py: ```python colossalai.launch_from_openmpi( - config=, host=args.host, port=args.port ) @@ -238,3 +226,5 @@ mpirun --hostfile -np python train.py --host diff --git a/docs/source/en/features/gradient_accumulation_with_booster.md b/docs/source/en/features/gradient_accumulation_with_booster.md index ea97dd92e..f1e47e9bb 100644 --- a/docs/source/en/features/gradient_accumulation_with_booster.md +++ b/docs/source/en/features/gradient_accumulation_with_booster.md @@ -45,7 +45,7 @@ We then need to initialize distributed environment. For demo purpose, we uses `l parser = colossalai.get_default_parser() args = parser.parse_args() # launch from torch -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() ``` ### Step 3. Create training components diff --git a/docs/source/en/features/gradient_clipping_with_booster.md b/docs/source/en/features/gradient_clipping_with_booster.md index 14eee67bc..9f9074e1d 100644 --- a/docs/source/en/features/gradient_clipping_with_booster.md +++ b/docs/source/en/features/gradient_clipping_with_booster.md @@ -61,7 +61,7 @@ We then need to initialize distributed environment. For demo purpose, we uses `l for other initialization methods. ```python -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() logger = get_dist_logger() ``` diff --git a/docs/source/en/features/lazy_init.md b/docs/source/en/features/lazy_init.md index 160f68767..30b33b52f 100644 --- a/docs/source/en/features/lazy_init.md +++ b/docs/source/en/features/lazy_init.md @@ -29,7 +29,7 @@ from colossalai.booster.plugin import GeminiPlugin from transformers import LlamaForCausalLM, LlamaConfig, BertForPreTraining -colossalai.launch({}) +colossalai.launch() plugin = GeminiPlugin() booster = Booster(plugin) diff --git a/docs/source/en/features/mixed_precision_training_with_booster.md b/docs/source/en/features/mixed_precision_training_with_booster.md index 8e702a578..baaaacddd 100644 --- a/docs/source/en/features/mixed_precision_training_with_booster.md +++ b/docs/source/en/features/mixed_precision_training_with_booster.md @@ -20,10 +20,10 @@ In Colossal-AI, we have incorporated different implementations of mixed precisio 3. naive amp | Colossal-AI | support tensor parallel | support pipeline parallel | fp16 extent | -| -------------- | ----------------------- | ------------------------- | ---------------------------------------------------------------------------------------------------- | -| AMP_TYPE.TORCH | ✅ | ❌ | Model parameters, activation, gradients are downcast to fp16 during forward and backward propagation | -| AMP_TYPE.APEX | ❌ | ❌ | More fine-grained, we can choose opt_level O0, O1, O2, O3 | -| AMP_TYPE.NAIVE | ✅ | ✅ | Model parameters, forward and backward operations are all downcast to fp16 | +|----------------|-------------------------|---------------------------|------------------------------------------------------------------------------------------------------| +| AMP_TYPE.TORCH | ✅ | ❌ | Model parameters, activation, gradients are downcast to fp16 during forward and backward propagation | +| AMP_TYPE.APEX | ❌ | ❌ | More fine-grained, we can choose opt_level O0, O1, O2, O3 | +| AMP_TYPE.NAIVE | ✅ | ✅ | Model parameters, forward and backward operations are all downcast to fp16 | The first two rely on the original implementation of PyTorch (version 1.6 and above) and NVIDIA Apex. The last method is similar to Apex O2 level. @@ -164,7 +164,7 @@ parser = colossalai.get_default_parser() args = parser.parse_args() # launch from torch -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() ``` diff --git a/docs/source/en/features/nvme_offload.md b/docs/source/en/features/nvme_offload.md index 6ed6f2dee..343a1f67e 100644 --- a/docs/source/en/features/nvme_offload.md +++ b/docs/source/en/features/nvme_offload.md @@ -185,7 +185,7 @@ Then we can train GPT model with Gemini. The placement policy of Gemini should b ```python def train_gemini_cpu(nvme_offload_fraction: float = 0.0): - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() config = GPT2Config() with ColoInitContext(device=torch.cuda.current_device()): model = GPT2LMHeadModel(config) diff --git a/docs/source/en/features/zero_with_chunk.md b/docs/source/en/features/zero_with_chunk.md index 62be86488..f0c13830a 100644 --- a/docs/source/en/features/zero_with_chunk.md +++ b/docs/source/en/features/zero_with_chunk.md @@ -174,7 +174,7 @@ def main(): SEQ_LEN = 1024 VOCAB_SIZE = 50257 NUM_STEPS = 10 - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() # build criterion criterion = GPTLMLoss() diff --git a/docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md b/docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md index cf7d19172..4d4ea8163 100644 --- a/docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md +++ b/docs/source/zh-Hans/advanced_tutorials/train_gpt_using_hybrid_parallelism.md @@ -62,7 +62,7 @@ plugin = HybridParallelPlugin( ## 创建分布式环境. ```python # Launch ColossalAI -colossalai.launch_from_torch(config={}, seed=42) +colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() ``` ## 定义GPT-2模型的训练组件 diff --git a/docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md b/docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md index f32f6c367..c234a3c6e 100644 --- a/docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md +++ b/docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md @@ -70,7 +70,7 @@ PP_SIZE = 2 首先我们创建一个分布式环境 ```python # Launch ColossalAI -colossalai.launch_from_torch(config={}, seed=SEEDå) +colossalai.launch_from_torch(seed=SEEDå) coordinator = DistCoordinator() world_size = coordinator.world_size ``` diff --git a/docs/source/zh-Hans/basics/booster_api.md b/docs/source/zh-Hans/basics/booster_api.md index bb100964d..a9357617d 100644 --- a/docs/source/zh-Hans/basics/booster_api.md +++ b/docs/source/zh-Hans/basics/booster_api.md @@ -60,7 +60,7 @@ from colossalai.booster.plugin import TorchDDPPlugin def train(): # launch colossalai - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host='localhost') + colossalai.launch(rank=rank, world_size=world_size, port=port, host='localhost') # create plugin and objects for training plugin = TorchDDPPlugin() diff --git a/docs/source/zh-Hans/basics/launch_colossalai.md b/docs/source/zh-Hans/basics/launch_colossalai.md index 39b09deae..a80d16717 100644 --- a/docs/source/zh-Hans/basics/launch_colossalai.md +++ b/docs/source/zh-Hans/basics/launch_colossalai.md @@ -74,8 +74,7 @@ import colossalai args = colossalai.get_default_parser().parse_args() # launch distributed environment -colossalai.launch(config=args.config, - rank=args.rank, +colossalai.launch(rank=args.rank, world_size=args.world_size, host=args.host, port=args.port, @@ -93,20 +92,11 @@ PyTorch自带的启动器需要在每个节点上都启动命令才能启动多 首先,我们需要在代码里指定我们的启动方式。由于这个启动器是PyTorch启动器的封装,那么我们自然而然应该使用`colossalai.launch_from_torch`。 分布式环境所需的参数,如 rank, world size, host 和 port 都是由 PyTorch 启动器设置的,可以直接从环境变量中读取。 -config.py -```python -BATCH_SIZE = 512 -LEARNING_RATE = 3e-3 -WEIGHT_DECAY = 0.3 -NUM_EPOCHS = 2 -``` train.py ```python import colossalai -colossalai.launch_from_torch( - config="./config.py", -) +colossalai.launch_from_torch() ... ``` @@ -186,7 +176,6 @@ colossalai run --nproc_per_node 4 --hostfile ./hostfile --master_addr host1 --e import colossalai colossalai.launch_from_slurm( - config=, host=args.host, port=args.port ) @@ -206,7 +195,6 @@ srun python train.py --host --port 29500 您可以在您的训练脚本中尝试以下操作。 ```python colossalai.launch_from_openmpi( - config=, host=args.host, port=args.port ) @@ -219,3 +207,5 @@ mpirun --hostfile -np python train.py --host diff --git a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md index 824308f94..7ad8fb145 100644 --- a/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md +++ b/docs/source/zh-Hans/features/gradient_accumulation_with_booster.md @@ -46,7 +46,7 @@ parser = colossalai.get_default_parser() args = parser.parse_args() # launch from torch -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() ``` diff --git a/docs/source/zh-Hans/features/gradient_clipping_with_booster.md b/docs/source/zh-Hans/features/gradient_clipping_with_booster.md index fdec09bf1..b000d4585 100644 --- a/docs/source/zh-Hans/features/gradient_clipping_with_booster.md +++ b/docs/source/zh-Hans/features/gradient_clipping_with_booster.md @@ -61,7 +61,7 @@ from colossalai.nn.lr_scheduler import CosineAnnealingLR 我们需要初始化分布式环境. 为了快速演示,我们使用`launch_from_torch`. 您可以参考 [Launch Colossal-AI](../basics/launch_colossalai.md) ```python -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() logger = get_dist_logger() ``` diff --git a/docs/source/zh-Hans/features/lazy_init.md b/docs/source/zh-Hans/features/lazy_init.md index 137719c69..c9cc0e4ba 100644 --- a/docs/source/zh-Hans/features/lazy_init.md +++ b/docs/source/zh-Hans/features/lazy_init.md @@ -29,7 +29,7 @@ from colossalai.booster.plugin import GeminiPlugin from transformers import LlamaForCausalLM, LlamaConfig, BertForPreTraining -colossalai.launch({}) +colossalai.launch() plugin = GeminiPlugin() booster = Booster(plugin) diff --git a/docs/source/zh-Hans/features/mixed_precision_training_with_booster.md b/docs/source/zh-Hans/features/mixed_precision_training_with_booster.md index 8e9f614a2..53d9013db 100644 --- a/docs/source/zh-Hans/features/mixed_precision_training_with_booster.md +++ b/docs/source/zh-Hans/features/mixed_precision_training_with_booster.md @@ -19,11 +19,11 @@ AMP 代表自动混合精度训练。 2. apex.amp 3. naive amp -| Colossal-AI | 支持张量并行 | 支持流水并行 | fp16 范围 | -| -------------- | ------------ | ------------ | --------------------------------------------------------- | -| AMP_TYPE.TORCH | ✅ | ❌ | 在前向和反向传播期间,模型参数、激活和梯度向下转换至 fp16 | -| AMP_TYPE.APEX | ❌ | ❌ | 更细粒度,我们可以选择 opt_level O0, O1, O2, O3 | -| AMP_TYPE.NAIVE | ✅ | ✅ | 模型参数、前向和反向操作,全都向下转换至 fp16 | +| Colossal-AI | 支持张量并行 | 支持流水并行 | fp16 范围 | +|----------------|--------------|--------------|-------------------------------------------------------| +| AMP_TYPE.TORCH | ✅ | ❌ | 在前向和反向传播期间,模型参数、激活和梯度向下转换至 fp16 | +| AMP_TYPE.APEX | ❌ | ❌ | 更细粒度,我们可以选择 opt_level O0, O1, O2, O3 | +| AMP_TYPE.NAIVE | ✅ | ✅ | 模型参数、前向和反向操作,全都向下转换至 fp16 | 前两个依赖于 PyTorch (1.6 及以上) 和 NVIDIA Apex 的原始实现。最后一种方法类似 Apex O2。在这些方法中,Apex-AMP 与张量并行不兼容。这是因为张量是以张量并行的方式在设备之间拆分的,因此,需要在不同的进程之间进行通信,以检查整个模型权重中是否出现 inf 或 nan。我们修改了 torch amp 实现,使其现在与张量并行兼容。 @@ -153,7 +153,7 @@ parser = colossalai.get_default_parser() args = parser.parse_args() # launch from torch -colossalai.launch_from_torch(config=dict()) +colossalai.launch_from_torch() ``` diff --git a/docs/source/zh-Hans/features/nvme_offload.md b/docs/source/zh-Hans/features/nvme_offload.md index 1feb9dde5..f013e755d 100644 --- a/docs/source/zh-Hans/features/nvme_offload.md +++ b/docs/source/zh-Hans/features/nvme_offload.md @@ -175,7 +175,7 @@ Mem usage: 4968.016 MB ```python def train_gemini_cpu(nvme_offload_fraction: float = 0.0): - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() config = GPT2Config() with ColoInitContext(device=torch.cuda.current_device()): model = GPT2LMHeadModel(config) diff --git a/docs/source/zh-Hans/features/zero_with_chunk.md b/docs/source/zh-Hans/features/zero_with_chunk.md index c4f21c73c..4a4655d60 100644 --- a/docs/source/zh-Hans/features/zero_with_chunk.md +++ b/docs/source/zh-Hans/features/zero_with_chunk.md @@ -174,7 +174,7 @@ def main(): SEQ_LEN = 1024 VOCAB_SIZE = 50257 NUM_STEPS = 10 - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() # build criterion criterion = GPTLMLoss() diff --git a/examples/community/roberta/pretraining/run_pretraining.py b/examples/community/roberta/pretraining/run_pretraining.py index 40b11d649..48cde8239 100644 --- a/examples/community/roberta/pretraining/run_pretraining.py +++ b/examples/community/roberta/pretraining/run_pretraining.py @@ -35,12 +35,12 @@ def main(): if args.vscode_debug: colossalai.launch( - config={}, rank=args.rank, world_size=args.world_size, host=args.host, port=args.port, backend=args.backend + rank=args.rank, world_size=args.world_size, host=args.host, port=args.port, backend=args.backend ) args.local_rank = -1 args.log_interval = 1 else: - colossalai.launch_from_torch(config={}) # args.colossal_config + colossalai.launch_from_torch() # args.colossal_config args.local_rank = int(os.environ["LOCAL_RANK"]) logger.info( f"launch_from_torch, world size: {torch.distributed.get_world_size()} | " diff --git a/examples/images/dreambooth/debug.py b/examples/images/dreambooth/debug.py index 8ce4dc3bb..64588e904 100644 --- a/examples/images/dreambooth/debug.py +++ b/examples/images/dreambooth/debug.py @@ -9,7 +9,7 @@ from colossalai.zero import ColoInitContext path = "/data/scratch/diffuser/stable-diffusion-v1-4" -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() with ColoInitContext(device="cpu"): vae = AutoencoderKL.from_pretrained( path, diff --git a/examples/images/dreambooth/train_dreambooth_colossalai.py b/examples/images/dreambooth/train_dreambooth_colossalai.py index cc2b2ebc7..2bacb3a04 100644 --- a/examples/images/dreambooth/train_dreambooth_colossalai.py +++ b/examples/images/dreambooth/train_dreambooth_colossalai.py @@ -372,9 +372,9 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: def main(args): if args.seed is None: - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() else: - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) local_rank = dist.get_rank() world_size = dist.get_world_size() diff --git a/examples/images/dreambooth/train_dreambooth_colossalai_lora.py b/examples/images/dreambooth/train_dreambooth_colossalai_lora.py index 227488abe..c4ef2a34e 100644 --- a/examples/images/dreambooth/train_dreambooth_colossalai_lora.py +++ b/examples/images/dreambooth/train_dreambooth_colossalai_lora.py @@ -371,9 +371,9 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: def main(args): if args.seed is None: - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() else: - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) local_rank = gpc.get_local_rank(ParallelMode.DATA) world_size = gpc.get_world_size(ParallelMode.DATA) diff --git a/examples/images/resnet/train.py b/examples/images/resnet/train.py index 5871bbf87..a53a85180 100644 --- a/examples/images/resnet/train.py +++ b/examples/images/resnet/train.py @@ -128,7 +128,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # update the learning rate with linear scaling diff --git a/examples/images/vit/vit_benchmark.py b/examples/images/vit/vit_benchmark.py index fdae9ee01..790bb2b74 100644 --- a/examples/images/vit/vit_benchmark.py +++ b/examples/images/vit/vit_benchmark.py @@ -46,7 +46,7 @@ def main(): args = parse_benchmark_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() world_size = coordinator.world_size diff --git a/examples/images/vit/vit_train_demo.py b/examples/images/vit/vit_train_demo.py index 81009b370..a65f89171 100644 --- a/examples/images/vit/vit_train_demo.py +++ b/examples/images/vit/vit_train_demo.py @@ -137,7 +137,7 @@ def main(): args = parse_demo_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() world_size = coordinator.world_size diff --git a/examples/inference/benchmark_llama.py b/examples/inference/benchmark_llama.py index 26cac977a..a23ab500a 100644 --- a/examples/inference/benchmark_llama.py +++ b/examples/inference/benchmark_llama.py @@ -136,7 +136,7 @@ def benchmark_inference(args): def hybrid_inference(rank, world_size, port, args): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") benchmark_inference(args) diff --git a/examples/inference/run_llama_inference.py b/examples/inference/run_llama_inference.py index b5228c64e..a4e6fd0a1 100644 --- a/examples/inference/run_llama_inference.py +++ b/examples/inference/run_llama_inference.py @@ -68,7 +68,7 @@ def run_inference(args): def run_tp_pipeline_inference(rank, world_size, port, args): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_inference(args) diff --git a/examples/language/bert/benchmark.py b/examples/language/bert/benchmark.py index 10bd367fd..9270c1b0c 100644 --- a/examples/language/bert/benchmark.py +++ b/examples/language/bert/benchmark.py @@ -81,7 +81,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() # local_batch_size = BATCH_SIZE // coordinator.world_size diff --git a/examples/language/bert/finetune.py b/examples/language/bert/finetune.py index bd6c393a7..7e8c07fdc 100644 --- a/examples/language/bert/finetune.py +++ b/examples/language/bert/finetune.py @@ -202,7 +202,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() lr = LEARNING_RATE * coordinator.world_size diff --git a/examples/language/gpt/experiments/auto_offload/train_gpt_offload.py b/examples/language/gpt/experiments/auto_offload/train_gpt_offload.py index b35112498..fbb3a151a 100644 --- a/examples/language/gpt/experiments/auto_offload/train_gpt_offload.py +++ b/examples/language/gpt/experiments/auto_offload/train_gpt_offload.py @@ -94,8 +94,7 @@ def train_gpt(args): def run(rank, world_size, port, args): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") train_gpt(args) diff --git a/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py b/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py index f3d35dd90..9a33c6598 100644 --- a/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py +++ b/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py @@ -47,7 +47,7 @@ def get_data(batch_size, seq_len, vocab_size): def main(): disable_existing_loggers() - launch_from_torch(config={}) + launch_from_torch() logger = get_dist_logger() config = transformers.GPT2Config(n_position=SEQ_LENGTH, n_layer=NUM_LAYERS, n_head=NUM_HEADS, n_embd=HIDDEN_DIM) if FP16: diff --git a/examples/language/gpt/gemini/train_gpt_demo.py b/examples/language/gpt/gemini/train_gpt_demo.py index 78d090ba2..4911ff124 100644 --- a/examples/language/gpt/gemini/train_gpt_demo.py +++ b/examples/language/gpt/gemini/train_gpt_demo.py @@ -132,7 +132,7 @@ def main(): PROF_FLAG = False # The flag of profiling, False by default disable_existing_loggers() - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() logger = get_dist_logger() logger.info(f"{args.model_type}, {args.distplan}, batch size {BATCH_SIZE}", ranks=[0]) diff --git a/examples/language/gpt/hybridparallelism/benchmark.py b/examples/language/gpt/hybridparallelism/benchmark.py index 1315deae6..8c236b524 100644 --- a/examples/language/gpt/hybridparallelism/benchmark.py +++ b/examples/language/gpt/hybridparallelism/benchmark.py @@ -67,7 +67,7 @@ def main(): parser.add_argument("--cpu_offload", action="store_true", help="Use gradient checkpointing") args = parser.parse_args() - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() def empty_init(): diff --git a/examples/language/gpt/hybridparallelism/finetune.py b/examples/language/gpt/hybridparallelism/finetune.py index 888f47aaa..32b2dfcc0 100644 --- a/examples/language/gpt/hybridparallelism/finetune.py +++ b/examples/language/gpt/hybridparallelism/finetune.py @@ -196,7 +196,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() # local_batch_size = BATCH_SIZE // coordinator.world_size diff --git a/examples/language/gpt/titans/train_gpt.py b/examples/language/gpt/titans/train_gpt.py index 565cf1e01..6b45bd33e 100644 --- a/examples/language/gpt/titans/train_gpt.py +++ b/examples/language/gpt/titans/train_gpt.py @@ -36,9 +36,9 @@ def main(): args = parser.parse_args() disable_existing_loggers() if args.from_torch: - colossalai.launch_from_torch(config=args.config) + colossalai.launch_from_torch() else: - colossalai.launch_from_slurm(config=args.config, host=args.host, port=29500, seed=42) + colossalai.launch_from_slurm(host=args.host, port=29500, seed=42) logger = get_dist_logger() data_path = None if args.use_dummy_dataset else os.environ["DATA"] diff --git a/examples/language/grok-1/inference_tp.py b/examples/language/grok-1/inference_tp.py index e10c4929c..f7d7cf864 100644 --- a/examples/language/grok-1/inference_tp.py +++ b/examples/language/grok-1/inference_tp.py @@ -16,7 +16,7 @@ if __name__ == "__main__": parser = get_default_parser() args = parser.parse_args() start = time.time() - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() plugin = HybridParallelPlugin( tp_size=coordinator.world_size, diff --git a/examples/language/llama/benchmark.py b/examples/language/llama/benchmark.py index f457c08cd..5cc602181 100644 --- a/examples/language/llama/benchmark.py +++ b/examples/language/llama/benchmark.py @@ -78,7 +78,7 @@ def main(): parser.add_argument("--custom-ckpt", action="store_true", help="Customize checkpoint", default=False) args = parser.parse_args() - colossalai.launch_from_torch({}) + colossalai.launch_from_torch() coordinator = DistCoordinator() def empty_init(): diff --git a/examples/language/openmoe/benchmark/benchmark_cai.py b/examples/language/openmoe/benchmark/benchmark_cai.py index a6d5f8bf2..22e0c790b 100644 --- a/examples/language/openmoe/benchmark/benchmark_cai.py +++ b/examples/language/openmoe/benchmark/benchmark_cai.py @@ -146,7 +146,7 @@ def main(): args = parse_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() # Set plugin diff --git a/examples/language/openmoe/train.py b/examples/language/openmoe/train.py index 92f4e066a..40f072f13 100644 --- a/examples/language/openmoe/train.py +++ b/examples/language/openmoe/train.py @@ -207,7 +207,7 @@ def main(): args = parse_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() test_mode = args.model_name == "test" diff --git a/examples/language/opt/opt_benchmark.py b/examples/language/opt/opt_benchmark.py index d16c9fdf9..c2883d96c 100755 --- a/examples/language/opt/opt_benchmark.py +++ b/examples/language/opt/opt_benchmark.py @@ -46,7 +46,7 @@ def main(): args = parse_benchmark_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() world_size = coordinator.world_size diff --git a/examples/language/opt/opt_train_demo.py b/examples/language/opt/opt_train_demo.py index 05336bec4..b5b50305c 100644 --- a/examples/language/opt/opt_train_demo.py +++ b/examples/language/opt/opt_train_demo.py @@ -64,7 +64,7 @@ def main(): args = parse_demo_args() # Launch ColossalAI - colossalai.launch_from_torch(config={}, seed=args.seed) + colossalai.launch_from_torch(seed=args.seed) coordinator = DistCoordinator() world_size = coordinator.world_size diff --git a/examples/language/palm/train.py b/examples/language/palm/train.py index 4fac7b507..76a86600b 100644 --- a/examples/language/palm/train.py +++ b/examples/language/palm/train.py @@ -102,7 +102,7 @@ args = parse_args() if args.distplan not in ["colossalai", "pytorch"]: raise TypeError(f"{args.distplan} is error") disable_existing_loggers() -colossalai.launch_from_torch(config={}) +colossalai.launch_from_torch() logger = get_dist_logger() diff --git a/examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py b/examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py index 29101ce08..b7a3f4320 100644 --- a/examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py +++ b/examples/tutorial/auto_parallel/auto_ckpt_batchsize_test.py @@ -20,7 +20,7 @@ def _benchmark(rank, world_size, port): only result in minor performance drop. So at last we might be able to find better training batch size for our model (combine with large batch training optimizer such as LAMB). """ - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = tm.resnet152() gm = symbolic_trace(model) raw_graph = deepcopy(gm.graph) diff --git a/examples/tutorial/auto_parallel/auto_ckpt_solver_test.py b/examples/tutorial/auto_parallel/auto_ckpt_solver_test.py index cd03a9179..81ef7ca03 100644 --- a/examples/tutorial/auto_parallel/auto_ckpt_solver_test.py +++ b/examples/tutorial/auto_parallel/auto_ckpt_solver_test.py @@ -17,7 +17,7 @@ def _benchmark(rank, world_size, port, args): The benchmark will sample in a range of memory budget for each model and output the benchmark summary and data visualization of peak memory vs. budget memory and relative step time vs. peak memory. """ - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") if args.model == "resnet50": model = tm.resnet50() data_gen = partial(data_gen_resnet, batch_size=128, shape=(3, 224, 224)) diff --git a/examples/tutorial/new_api/cifar_resnet/train.py b/examples/tutorial/new_api/cifar_resnet/train.py index a4733126f..2b388fe36 100644 --- a/examples/tutorial/new_api/cifar_resnet/train.py +++ b/examples/tutorial/new_api/cifar_resnet/train.py @@ -128,7 +128,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # update the learning rate with linear scaling diff --git a/examples/tutorial/new_api/cifar_vit/train.py b/examples/tutorial/new_api/cifar_vit/train.py index ec6c852b5..84245d487 100644 --- a/examples/tutorial/new_api/cifar_vit/train.py +++ b/examples/tutorial/new_api/cifar_vit/train.py @@ -148,7 +148,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}) + colossalai.launch_from_torch() coordinator = DistCoordinator() # update the learning rate with linear scaling diff --git a/examples/tutorial/new_api/glue_bert/finetune.py b/examples/tutorial/new_api/glue_bert/finetune.py index e97c9017f..624783a79 100644 --- a/examples/tutorial/new_api/glue_bert/finetune.py +++ b/examples/tutorial/new_api/glue_bert/finetune.py @@ -125,7 +125,7 @@ def main(): # ============================== # Launch Distributed Environment # ============================== - colossalai.launch_from_torch(config={}, seed=42) + colossalai.launch_from_torch(seed=42) coordinator = DistCoordinator() # local_batch_size = BATCH_SIZE // coordinator.world_size diff --git a/examples/tutorial/opt/opt/run_clm.py b/examples/tutorial/opt/opt/run_clm.py index ae8a0f4a0..cb62f77e1 100644 --- a/examples/tutorial/opt/opt/run_clm.py +++ b/examples/tutorial/opt/opt/run_clm.py @@ -289,7 +289,7 @@ class DummyDataloader: def main(): args = parse_args() disable_existing_loggers() - colossalai.legacy.launch_from_torch(config=dict()) + colossalai.legacy.launch_from_torch() logger = get_dist_logger() is_main_process = dist.get_rank() == 0 diff --git a/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py b/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py index 03bba8e64..14bc7aa57 100644 --- a/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py +++ b/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py @@ -27,7 +27,7 @@ except: def _run_C_solver_consistency_test(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") for M, mem_budget in [(tm.resnet50, 4000), (tm.densenet121, 8080)]: model = M() diff --git a/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py b/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py index c46f57f75..19d526524 100644 --- a/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py +++ b/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py @@ -75,7 +75,7 @@ def check_backward_consistency( def _run_ckpt_solver(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") MODEL_LIST = [tm.densenet121] torch.backends.cudnn.deterministic = True @@ -111,7 +111,7 @@ def test_ckpt_solver(): def _run_ckpt_solver_torch11(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") MODEL_LIST = [tm.densenet121] torch.backends.cudnn.deterministic = True diff --git a/tests/test_auto_parallel/test_offload/test_perf.py b/tests/test_auto_parallel/test_offload/test_perf.py index 373ba28b8..3db7a1925 100644 --- a/tests/test_auto_parallel/test_offload/test_perf.py +++ b/tests/test_auto_parallel/test_offload/test_perf.py @@ -141,8 +141,7 @@ def exam_fwd_bwd(model_name: str, memory_budget: float, solver_name: str): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_fwd_bwd() diff --git a/tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py b/tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py index c41c66745..f39f09d54 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_bias_addition_forward.py @@ -42,7 +42,7 @@ class ConvModel(torch.nn.Module): def check_linear_module(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = LinearModel(4, 8).cuda() input = torch.rand(4, 4).cuda() output_compare = model(input) @@ -59,7 +59,7 @@ def check_linear_module(rank, world_size, port): def check_conv_module(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = ConvModel(3, 6, 2).cuda() input = torch.rand(4, 3, 64, 64).cuda() output_compare = model(input) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py b/tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py index c800f54da..f2b966b10 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_checkpoint.py @@ -39,7 +39,7 @@ class GPT2MLPWithCkpt(nn.Module): def check_act_ckpt(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = GPT2MLPWithCkpt(intermediate_size=4 * HIDDEN_SIZE, hidden_size=HIDDEN_SIZE) torch.rand(1, 64, HIDDEN_SIZE) input_sample = { diff --git a/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py b/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py index e8f175326..202f3e3bf 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_ddp.py @@ -32,7 +32,7 @@ class MLP(torch.nn.Module): def check_compatibility_with_ddp(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = MLP(4).cuda() if rank in [0, 1]: input = torch.arange(0, 16, dtype=torch.float).reshape(4, 4).cuda() diff --git a/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py b/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py index d57717326..18de92e2a 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_compatibility_with_gemini.py @@ -34,7 +34,7 @@ class MLP(torch.nn.Module): def check_auto_parallel_with_gemini(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = MLP(4).half().cuda() if rank in [0, 1]: input = torch.arange(0, 16).reshape(4, 4).half().cuda() diff --git a/tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py b/tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py index 24968e670..25c5d4ef1 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_gpt/test_runtime_with_gpt_modules.py @@ -73,7 +73,7 @@ def _check_module_grad( def check_attention_layer(rank, model_cls, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") config = transformers.GPT2Config(n_position=64, n_layer=2, n_head=16, n_embd=HIDDEN_DIM) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py index ba9e28214..d2f3e3724 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_binary_elementwise_metainfo.py @@ -31,7 +31,7 @@ def _binary_elementwise_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = BinaryElementwiseOpModule(token=torch.add, shape=1024).cuda() input = torch.rand(32, 1024).cuda() input.requires_grad = True diff --git a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py index 455581545..5495282bc 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_conv_metainfo.py @@ -31,7 +31,7 @@ def _conv_module_mem_test(rank, world_size, port, bias): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.Conv2d(4, 64, 3, padding=1, bias=bias)).cuda() input = torch.rand(4, 4, 64, 64).cuda() input.requires_grad = True @@ -72,7 +72,7 @@ def _conv_function_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = ConvFunctionModule().cuda() input = torch.rand(4, 4, 64, 64).cuda() input.requires_grad = True diff --git a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py index 639870c89..4958bad6b 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_linear_metainfo.py @@ -30,7 +30,7 @@ def _linear_module_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.Linear(64, 128, bias=False)).cuda() input = torch.rand(8, 8, 16, 64).cuda() input.requires_grad = True @@ -68,7 +68,7 @@ def _linear_function_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = MyModule().cuda() input = torch.rand(8, 8, 16, 64).cuda() input.requires_grad = True diff --git a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py index ed809a758..a0b81edab 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_norm_metainfo.py @@ -25,7 +25,7 @@ def _batchnorm_module_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.BatchNorm2d(128)).cuda() input = torch.rand(4, 128, 64, 64).cuda() input.requires_grad = True diff --git a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py index bd1deb40c..92d91383e 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_metainfo/test_pooling_metainfo.py @@ -21,7 +21,7 @@ def _adaptiveavgpool_module_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.AdaptiveAvgPool2d((16, 16))).cuda() input = torch.rand(4, 128, 64, 64).cuda() input.requires_grad = True @@ -62,7 +62,7 @@ def _maxpool_module_mem_test(rank, world_size, port): port: port for initializing process group """ disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.MaxPool2d((16, 16))).cuda() input = torch.rand(4, 128, 64, 64).cuda() input.requires_grad = True diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py index 73a15f3ba..a8d2fbdfb 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addbmm_handler.py @@ -40,7 +40,7 @@ class AddBMMTorchFunctionModule(nn.Module): def check_2d_device_mesh(rank, world_size, port, module, bias_shape, using_kwargs): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = module(using_kwargs).cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) @@ -150,7 +150,7 @@ def check_2d_device_mesh(rank, world_size, port, module, bias_shape, using_kwarg def check_1d_device_mesh(rank, module, bias_shape, using_kwargs, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) mesh_shape = (1, 4) device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py index 26f9c4ab1..60eadeff9 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_addmm_handler.py @@ -40,7 +40,7 @@ class AddmmModel_with_param(nn.Module): def check_addmm_function_handler(rank, world_size, port, input_shape, model_cls): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") if model_cls == AddmmModel: model = AddmmModel().cuda() else: diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py index 86df7237a..e52cf28ab 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_batch_norm_handler.py @@ -16,7 +16,7 @@ from tests.test_auto_parallel.test_tensor_shard.test_node_handler.utils import n def check_bn_module_handler(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.BatchNorm2d(16)).cuda() physical_mesh_id = torch.arange(0, 4) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py index e06625e1c..5982227b6 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_function_node.py @@ -34,7 +34,7 @@ class LinearModule(torch.nn.Module): def check_linear_module_handler(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = LinearModule(weight_shape=WEIGHT_SHAPE).cuda() physical_mesh_id = torch.arange(0, 4) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py index 690f0c123..c45e3e014 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bias_linear_module_node.py @@ -30,7 +30,7 @@ class LinearModule(torch.nn.Module): def check_linear_module_handler(rank, world_size, port, bias): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = LinearModule(16, 32, bias=bias).cuda() physical_mesh_id = torch.arange(0, 4) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py index 5b2e2ab49..ad0d6d18c 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_binary_elementwise_handler.py @@ -16,7 +16,7 @@ from tests.test_auto_parallel.test_tensor_shard.test_node_handler.utils import n def check_binary_elementwise_handler_with_tensor(rank, world_size, port, op, other_dim): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") class BinaryElementwiseOpModel(nn.Module): def __init__(self, op): @@ -145,7 +145,7 @@ class BEOpModelWithIntConst(nn.Module): def check_binary_elementwise_handler_with_int(rank, world_size, port, op, other_dim, model_cls): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py index 29df12832..ac54f1230 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_bmm_handler.py @@ -26,7 +26,7 @@ class BMMTorchFunctionModule(nn.Module): def check_2d_device_mesh(rank, module, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = module().cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) @@ -121,7 +121,7 @@ def check_2d_device_mesh(rank, module, world_size, port): def check_1d_device_mesh(rank, module, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = module().cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (1, 4) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py index 8a37dd925..407216f46 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_conv_handler.py @@ -16,7 +16,7 @@ from tests.test_auto_parallel.test_tensor_shard.test_node_handler.utils import n def check_conv_module_handler(rank, world_size, port, bias): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.Conv2d(4, 16, 3, padding=1, bias=bias)).cuda() # graph(): # %input_1 : torch.Tensor [#users=1] = placeholder[target=input] @@ -153,7 +153,7 @@ class ConvModel(nn.Module): def check_conv_function_handler(rank, world_size, port, bias): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = ConvModel().cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py index 9ac6ba95d..f9a5b40a0 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_embedding_handler.py @@ -33,7 +33,7 @@ class EmbeddingModule(nn.Module): def check_embedding_module_handler(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = EmbeddingModule(num_embeddings=NUM_EMBEDDINGS, embedding_dims=EMBEDDING_DIMS).cuda() # graph(): # %input_1 : torch.Tensor [#users=1] = placeholder[target=input] @@ -150,7 +150,7 @@ class EmbeddingFunction(nn.Module): def check_embedding_function_handler(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = EmbeddingFunction().cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py index cf802a228..eb8e8ed3e 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_getitem_handler.py @@ -31,7 +31,7 @@ class GetItemFromTensorModel(nn.Module): def check_getitem_from_tensor_handler(rank, getitem_index, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = GetItemFromTensorModel(getitem_index=getitem_index) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py index 59a66bc6a..45aae2ea9 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_layer_norm_handler.py @@ -17,7 +17,7 @@ from tests.test_auto_parallel.test_tensor_shard.test_node_handler.utils import n def check_ln_module_handler(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.LayerNorm(16)).cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py index da88b735f..ddabdb700 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_linear_handler.py @@ -23,7 +23,7 @@ from tests.test_auto_parallel.test_tensor_shard.test_node_handler.utils import n def check_linear_module_handler(rank, world_size, port, bias, input_shape): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = nn.Sequential(nn.Linear(16, 32, bias=bias)).cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) @@ -171,7 +171,7 @@ class LinearModel(nn.Module): def check_linear_function_handler(rank, world_size, port, bias, input_shape): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = LinearModel().cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py index 958dc288f..09ad2ae32 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_permute_and_transpose_handler.py @@ -51,7 +51,7 @@ class LinearReshapeModel(nn.Module): def check_view_handler(rank, world_size, port, call_function, reshape_dims, model_cls): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") if call_function == torch.permute: reshape_dims = reshape_dims[0] elif call_function == torch.transpose: diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py index 1a99c32eb..88f34ff10 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_softmax_handler.py @@ -29,7 +29,7 @@ class LinearSplitModel(nn.Module): def check_split_handler(rank, world_size, port, softmax_dim, model_cls): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = model_cls(softmax_dim=softmax_dim).cuda() input = torch.rand(8, 16, 64, 32).to("cuda") diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py index 0318023c8..225a729ef 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_split_handler.py @@ -42,7 +42,7 @@ class LinearSplitModel(nn.Module): def check_split_handler(rank, world_size, port, split_size, split_dim, model_cls): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = model_cls(split_size=split_size, split_dim=split_dim).cuda() if model_cls.__name__ == "ConvSplitModel": diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py index cbd3e4704..a79cfdf6f 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_sum_handler.py @@ -32,7 +32,7 @@ class LinearSumModel(nn.Module): def check_sum_handler(rank, world_size, port, sum_dims, keepdim): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = LinearSumModel(sum_dims=sum_dims, keepdim=keepdim).cuda() physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py index 466168c79..de483c997 100644 --- a/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py +++ b/tests/test_auto_parallel/test_tensor_shard/test_node_handler/test_view_handler.py @@ -41,7 +41,7 @@ class LinearViewModel(nn.Module): def check_view_handler(rank, tgt_shape, model_cls, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") model = model_cls(tgt_shape).cuda() if model_cls.__name__ == "ConvViewModel": diff --git a/tests/test_booster/test_mixed_precision/test_fp16_torch.py b/tests/test_booster/test_mixed_precision/test_fp16_torch.py index 3aefb3797..f6d6e8303 100644 --- a/tests/test_booster/test_mixed_precision/test_fp16_torch.py +++ b/tests/test_booster/test_mixed_precision/test_fp16_torch.py @@ -9,7 +9,7 @@ from tests.kit.model_zoo import model_zoo def run_torch_amp(rank, world_size, port): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") sub_model_zoo = model_zoo.get_sub_registry("timm") for name, (model_fn, data_gen_fn, output_transform_fn, _, _) in sub_model_zoo.items(): # dlrm_interactionarch has not parameters, so skip diff --git a/tests/test_booster/test_plugin/test_3d_plugin.py b/tests/test_booster/test_plugin/test_3d_plugin.py index 52cb8c46e..e57cadfd8 100644 --- a/tests/test_booster/test_plugin/test_3d_plugin.py +++ b/tests/test_booster/test_plugin/test_3d_plugin.py @@ -265,7 +265,7 @@ def run_grad_acc_test(test_args): def run_dist(rank, world_size, port, early_stop: bool = True): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_3d_plugin(early_stop=early_stop) run_grad_acc_test() diff --git a/tests/test_booster/test_plugin/test_dp_plugin_base.py b/tests/test_booster/test_plugin/test_dp_plugin_base.py index fceb623fe..a2a4a0c07 100644 --- a/tests/test_booster/test_plugin/test_dp_plugin_base.py +++ b/tests/test_booster/test_plugin/test_dp_plugin_base.py @@ -85,7 +85,7 @@ def check_dataloader_sharding(): def run_dist(rank, world_size, port): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_dataloader_sharding() diff --git a/tests/test_booster/test_plugin/test_gemini_plugin.py b/tests/test_booster/test_plugin/test_gemini_plugin.py index 892144772..b2790c0e7 100644 --- a/tests/test_booster/test_plugin/test_gemini_plugin.py +++ b/tests/test_booster/test_plugin/test_gemini_plugin.py @@ -161,7 +161,7 @@ def check_gemini_plugin( def run_dist(rank, world_size, port, early_stop: bool = True): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_gemini_plugin(early_stop=early_stop) diff --git a/tests/test_booster/test_plugin/test_low_level_zero_plugin.py b/tests/test_booster/test_plugin/test_low_level_zero_plugin.py index cbfad6ef7..4908b2d4f 100644 --- a/tests/test_booster/test_plugin/test_low_level_zero_plugin.py +++ b/tests/test_booster/test_plugin/test_low_level_zero_plugin.py @@ -130,7 +130,7 @@ def check_low_level_zero_lora(stage, model_name, early_stop: bool = True): def run_dist(rank, world_size, port, early_stop: bool = True): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_low_level_zero_plugin(early_stop=early_stop) check_low_level_zero_lora(early_stop=early_stop) diff --git a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py index e785843fb..052782047 100644 --- a/tests/test_booster/test_plugin/test_torch_ddp_plugin.py +++ b/tests/test_booster/test_plugin/test_torch_ddp_plugin.py @@ -109,7 +109,7 @@ def check_torch_ddp_no_sync(): def run_dist(rank, world_size, port): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_torch_ddp_plugin() check_torch_ddp_no_sync() diff --git a/tests/test_booster/test_plugin/test_torch_fsdp_plugin.py b/tests/test_booster/test_plugin/test_torch_fsdp_plugin.py index f69807046..90e98f325 100644 --- a/tests/test_booster/test_plugin/test_torch_fsdp_plugin.py +++ b/tests/test_booster/test_plugin/test_torch_fsdp_plugin.py @@ -73,7 +73,7 @@ def check_torch_fsdp_plugin(): def run_dist(rank, world_size, port): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_torch_fsdp_plugin() diff --git a/tests/test_checkpoint_io/test_gemini_checkpoint_io.py b/tests/test_checkpoint_io/test_gemini_checkpoint_io.py index ac6f8caef..ade927e6e 100644 --- a/tests/test_checkpoint_io/test_gemini_checkpoint_io.py +++ b/tests/test_checkpoint_io/test_gemini_checkpoint_io.py @@ -173,8 +173,7 @@ def exam_lazy_from_pretrained(): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_state_dict() exam_state_dict_with_origin() exam_lazy_from_pretrained() diff --git a/tests/test_checkpoint_io/test_gemini_torch_compability.py b/tests/test_checkpoint_io/test_gemini_torch_compability.py index 44a000113..cd313c240 100644 --- a/tests/test_checkpoint_io/test_gemini_torch_compability.py +++ b/tests/test_checkpoint_io/test_gemini_torch_compability.py @@ -163,8 +163,7 @@ def exam_gemini_load_from_torch(shard: bool, model_name: str): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_torch_load_from_gemini() exam_gemini_load_from_torch() diff --git a/tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py b/tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py index 4753ab637..1cf94433d 100644 --- a/tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py +++ b/tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py @@ -132,8 +132,7 @@ def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_conf def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_state_dict() diff --git a/tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py b/tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py index 4073cae0c..119e42e31 100644 --- a/tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py +++ b/tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py @@ -172,7 +172,7 @@ def check_low_level_zero_lora_checkpointIO( def run_dist(rank, world_size, port): - colossalai.launch(config=(dict()), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_low_level_zero_checkpointIO() check_low_level_zero_lora_checkpointIO() torch.cuda.empty_cache() diff --git a/tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py b/tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py index 0353ff115..da0d52d06 100644 --- a/tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py +++ b/tests/test_checkpoint_io/test_plugins_huggingface_compatibility.py @@ -68,8 +68,7 @@ def exam_from_pretrained(plugin_type: str, model_name: str, shard=True, size_per def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_from_pretrained() diff --git a/tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py b/tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py index eeb04df0f..0b9a1605c 100644 --- a/tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py +++ b/tests/test_checkpoint_io/test_torch_ddp_checkpoint_io.py @@ -61,7 +61,7 @@ def check_torch_ddp_checkpointIO(shard: bool, size_per_shard: int): def run_dist(rank, world_size, port): - colossalai.launch(config=(dict()), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_torch_ddp_checkpointIO() diff --git a/tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py b/tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py index 1ea70368e..12b70cc04 100644 --- a/tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py +++ b/tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py @@ -141,7 +141,7 @@ def check_torch_fsdp_ckpt(): def run_dist(rank, world_size, port): # init dist env - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_torch_fsdp_ckpt() diff --git a/tests/test_cluster/test_device_mesh_manager.py b/tests/test_cluster/test_device_mesh_manager.py index ab61cdae5..5d140064b 100644 --- a/tests/test_cluster/test_device_mesh_manager.py +++ b/tests/test_cluster/test_device_mesh_manager.py @@ -6,7 +6,7 @@ from colossalai.testing import spawn def check_device_mesh_manager(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") device_mesh_manager = DeviceMeshManager() # TODO(ver217): this test is strictly relies on hardware, temporary skip it # device_mesh_info_auto = DeviceMeshInfo(physical_ids=[0, 1, 2, 3],) diff --git a/tests/test_cluster/test_process_group_mesh.py b/tests/test_cluster/test_process_group_mesh.py index 3d206622d..3071c0f59 100644 --- a/tests/test_cluster/test_process_group_mesh.py +++ b/tests/test_cluster/test_process_group_mesh.py @@ -6,57 +6,6 @@ from colossalai.cluster import ProcessGroupMesh from colossalai.testing import spawn -def check_process_group_mesh_with_gpc(): - from colossalai.legacy.context import ParallelMode - from colossalai.legacy.core import global_context as gpc - - DP_DIM, PP_DIM, TP_DIM = 0, 1, 2 - pg_mesh = ProcessGroupMesh(1, 2, 2) - - # check world size - assert gpc.get_world_size(ParallelMode.TENSOR) == pg_mesh.size( - TP_DIM - ), f"{gpc.get_world_size(ParallelMode.TENSOR)} != {pg_mesh.size(TP_DIM)}" - assert gpc.get_world_size(ParallelMode.PIPELINE) == pg_mesh.size(PP_DIM) - assert gpc.get_world_size(ParallelMode.DATA) == pg_mesh.size(DP_DIM) - - # check locak rank (coordinate) - assert gpc.get_local_rank(ParallelMode.TENSOR) == pg_mesh.coordinate( - TP_DIM - ), f"{gpc.get_local_rank(ParallelMode.TENSOR)} != {pg_mesh.coordinate(TP_DIM)}" - assert gpc.get_local_rank(ParallelMode.PIPELINE) == pg_mesh.coordinate(PP_DIM) - assert gpc.get_local_rank(ParallelMode.DATA) == pg_mesh.coordinate(DP_DIM) - - # check ranks in group - tp_group = pg_mesh.get_group_along_axis(TP_DIM) - assert gpc.get_ranks_in_group(ParallelMode.TENSOR) == pg_mesh.get_ranks_in_group(tp_group) - pp_group = pg_mesh.get_group_along_axis(PP_DIM) - assert gpc.get_ranks_in_group(ParallelMode.PIPELINE) == pg_mesh.get_ranks_in_group(pp_group) - dp_group = pg_mesh.get_group_along_axis(DP_DIM) - assert gpc.get_ranks_in_group(ParallelMode.DATA) == pg_mesh.get_ranks_in_group(dp_group) - - # check prev rank - coord = pg_mesh.coordinate() - if not gpc.is_first_rank(ParallelMode.TENSOR): - assert coord[TP_DIM] != 0 - prev_coord = coord[:TP_DIM] + (coord[TP_DIM] - 1,) + coord[TP_DIM + 1 :] - assert gpc.get_prev_global_rank(ParallelMode.TENSOR) == pg_mesh.ravel(prev_coord, pg_mesh.shape) - if not gpc.is_first_rank(ParallelMode.PIPELINE): - assert coord[PP_DIM] != 0 - prev_coord = coord[:PP_DIM] + (coord[PP_DIM] - 1,) + coord[PP_DIM + 1 :] - assert gpc.get_prev_global_rank(ParallelMode.PIPELINE) == pg_mesh.ravel(prev_coord, pg_mesh.shape) - - # check next rank - if not gpc.is_last_rank(ParallelMode.TENSOR): - assert coord[TP_DIM] != pg_mesh.size(TP_DIM) - 1 - next_coord = coord[:TP_DIM] + (coord[TP_DIM] + 1,) + coord[TP_DIM + 1 :] - assert gpc.get_next_global_rank(ParallelMode.TENSOR) == pg_mesh.ravel(next_coord, pg_mesh.shape) - if not gpc.is_last_rank(ParallelMode.PIPELINE): - assert coord[PP_DIM] != pg_mesh.size(PP_DIM) - 1 - next_coord = coord[:PP_DIM] + (coord[PP_DIM] + 1,) + coord[PP_DIM + 1 :] - assert gpc.get_next_global_rank(ParallelMode.PIPELINE) == pg_mesh.ravel(next_coord, pg_mesh.shape) - - def check_process_group_mesh_with_cases(): DP_DIM, PP_DIM, TP_DIM = 0, 1, 2 DP_SIZE, PP_SIZE, TP_SIZE = 1, 2, 2 @@ -177,14 +126,11 @@ def check_process_group_mesh_with_cases(): def run_dist(rank, world_size, port): colossalai.launch( - config=dict(parallel=dict(data=1, pipeline=2, tensor=dict(mode="1d", size=2))), rank=rank, world_size=world_size, port=port, host="localhost", ) - # TODO(ver217): this function should be removed when gpc is removed - # check_process_group_mesh_with_gpc() check_process_group_mesh_with_cases() diff --git a/tests/test_device/test_alpha_beta.py b/tests/test_device/test_alpha_beta.py index f4a88f79c..3d9c6d7ce 100644 --- a/tests/test_device/test_alpha_beta.py +++ b/tests/test_device/test_alpha_beta.py @@ -8,7 +8,7 @@ from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn def check_alpha_beta(rank, world_size, port, physical_devices): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") profiler = AlphaBetaProfiler(physical_devices) ab_dict = profiler.profile_ab() for _, (alpha, beta) in ab_dict.items(): diff --git a/tests/test_device/test_device_mesh.py b/tests/test_device/test_device_mesh.py index af44af5d9..b2d057273 100644 --- a/tests/test_device/test_device_mesh.py +++ b/tests/test_device/test_device_mesh.py @@ -75,7 +75,7 @@ def check_2d_device_mesh(): def check_init_from_process_group(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") @pytest.mark.dist diff --git a/tests/test_device/test_extract_alpha_beta.py b/tests/test_device/test_extract_alpha_beta.py index 34f2aacc1..7633f59b9 100644 --- a/tests/test_device/test_extract_alpha_beta.py +++ b/tests/test_device/test_extract_alpha_beta.py @@ -8,7 +8,7 @@ from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn def check_extract_alpha_beta(rank, world_size, port, physical_devices): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") profiler = AlphaBetaProfiler(physical_devices) mesh_alpha, mesh_beta = profiler.extract_alpha_beta_for_device_mesh() diff --git a/tests/test_device/test_init_logical_pg.py b/tests/test_device/test_init_logical_pg.py index 3b398a917..d93f65698 100644 --- a/tests/test_device/test_init_logical_pg.py +++ b/tests/test_device/test_init_logical_pg.py @@ -9,7 +9,7 @@ from colossalai.testing import rerun_if_address_is_in_use, spawn def check_layer(rank, world_size, port): - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) assert rank == dist.get_rank() diff --git a/tests/test_device/test_search_logical_device_mesh.py b/tests/test_device/test_search_logical_device_mesh.py index d9d4e79c1..a44b8e3d6 100644 --- a/tests/test_device/test_search_logical_device_mesh.py +++ b/tests/test_device/test_search_logical_device_mesh.py @@ -8,7 +8,7 @@ from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn def check_alpha_beta(rank, world_size, port, physical_devices): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") profiler = AlphaBetaProfiler(physical_devices) best_logical_mesh = profiler.search_best_logical_mesh() diff --git a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py index 10fe98155..8a3e2d6ec 100644 --- a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py @@ -64,7 +64,7 @@ class MyModule(torch.nn.Module): def _run_act_ckpt_codegen(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and run forward model = MyModule() @@ -127,7 +127,7 @@ def test_act_ckpt_codegen(): def _run_act_ckpt_python_code_torch11(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and run forward model = MyModule() diff --git a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py index f1e87e5ed..69767db2d 100644 --- a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py @@ -32,7 +32,7 @@ class MyModule(torch.nn.Module): def _run_act_ckpt_codegen(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and run forward model = MyModule() @@ -96,7 +96,7 @@ def test_act_ckpt_codegen(): def _run_act_ckpt_python_code_torch11(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and run forward model = MyModule() diff --git a/tests/test_fx/test_codegen/test_offload_codegen.py b/tests/test_fx/test_codegen/test_offload_codegen.py index da1e73ec3..9df4a6899 100644 --- a/tests/test_fx/test_codegen/test_offload_codegen.py +++ b/tests/test_fx/test_codegen/test_offload_codegen.py @@ -66,7 +66,7 @@ def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data: torch.T def _run_offload_codegen(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and input model = MyNet().cuda() @@ -124,7 +124,7 @@ def test_act_ckpt_codegen(): def _run_offload_codegen_torch11(rank, world_size, port): # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model and input model = MyNet().cuda() diff --git a/tests/test_fx/test_parallel_1d.py b/tests/test_fx/test_parallel_1d.py index 6d890f59d..6b0e12609 100644 --- a/tests/test_fx/test_parallel_1d.py +++ b/tests/test_fx/test_parallel_1d.py @@ -33,7 +33,7 @@ CONFIG = dict(parallel=dict(tensor=dict(mode="1d", size=2))) def check_layer(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") input_tensor = torch.rand(2, 16).cuda() model = MLP(16).cuda() symbolic_traced = symbolic_trace(model) diff --git a/tests/test_infer/test_hybrid_bloom.py b/tests/test_infer/test_hybrid_bloom.py index 8cad06dca..ef2aac1d1 100644 --- a/tests/test_infer/test_hybrid_bloom.py +++ b/tests/test_infer/test_hybrid_bloom.py @@ -89,18 +89,18 @@ def run_single_inference_test(tp_size, pp_size, max_output_len, micro_batch_size def check_tp_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_pipeline_inference_test() def check_tp_or_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_inference_test() run_pipeline_inference_test() def check_single_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_single_inference_test diff --git a/tests/test_infer/test_hybrid_chatglm2.py b/tests/test_infer/test_hybrid_chatglm2.py index b53bb25f4..e80b3477f 100644 --- a/tests/test_infer/test_hybrid_chatglm2.py +++ b/tests/test_infer/test_hybrid_chatglm2.py @@ -97,18 +97,18 @@ def run_single_inference_test(tp_size, pp_size, max_output_len, micro_batch_size def check_tp_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_pipeline_inference_test() def check_tp_or_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_inference_test() run_pipeline_inference_test() def check_single_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_single_inference_test diff --git a/tests/test_infer/test_hybrid_llama.py b/tests/test_infer/test_hybrid_llama.py index 30b8b0a99..a99794817 100644 --- a/tests/test_infer/test_hybrid_llama.py +++ b/tests/test_infer/test_hybrid_llama.py @@ -94,18 +94,18 @@ def run_single_inference_test(tp_size, pp_size, max_output_len, micro_batch_size def check_tp_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_pipeline_inference_test() def check_tp_or_pp_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_tp_inference_test() run_pipeline_inference_test() def check_single_inference(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_single_inference_test diff --git a/tests/test_legacy/test_amp/test_naive_fp16.py b/tests/test_legacy/test_amp/test_naive_fp16.py index fe16bc4d4..0df6335f5 100644 --- a/tests/test_legacy/test_amp/test_naive_fp16.py +++ b/tests/test_legacy/test_amp/test_naive_fp16.py @@ -77,7 +77,7 @@ def run_naive_amp(): def run_dist(rank, world_size, port): - colossalai.legacy.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.legacy.launch(rank=rank, world_size=world_size, port=port, host="localhost") run_naive_amp() diff --git a/tests/test_legacy/test_amp/test_torch_fp16.py b/tests/test_legacy/test_amp/test_torch_fp16.py index 5e2e1ede5..dc47dfc72 100644 --- a/tests/test_legacy/test_amp/test_torch_fp16.py +++ b/tests/test_legacy/test_amp/test_torch_fp16.py @@ -76,7 +76,7 @@ def run_torch_amp(): def run_dist(rank, world_size, port): - colossalai.legacy.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.legacy.launch(rank=rank, world_size=world_size, port=port, host="localhost") run_torch_amp() diff --git a/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py index bc243631a..bd15e10f3 100644 --- a/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py +++ b/tests/test_legacy/test_comm/test_boardcast_send_recv_v2.py @@ -16,7 +16,7 @@ torch.manual_seed(123) def check_layer(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl", verbose=False) + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl", verbose=False) rank = gpc.get_local_rank(ParallelMode.PIPELINE) if rank == 0: diff --git a/tests/test_legacy/test_comm/test_comm.py b/tests/test_legacy/test_comm/test_comm.py index 079022e93..75955df69 100644 --- a/tests/test_legacy/test_comm/test_comm.py +++ b/tests/test_legacy/test_comm/test_comm.py @@ -48,7 +48,7 @@ def check_all_reduce(): def check_layer(rank, world_size, port): - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") assert dist.get_rank() == gpc.get_global_rank() print("Rank {} / {}".format(dist.get_rank(), dist.get_world_size())) diff --git a/tests/test_legacy/test_comm/test_object_list_p2p.py b/tests/test_legacy/test_comm/test_object_list_p2p.py index 69c68c715..1d618a65f 100644 --- a/tests/test_legacy/test_comm/test_object_list_p2p.py +++ b/tests/test_legacy/test_comm/test_object_list_p2p.py @@ -88,7 +88,7 @@ def check_send_recv_forward_backward(): def check_layer(rank, world_size, port): - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_send_recv_forward() check_send_recv_backward() check_send_recv_forward_backward() diff --git a/tests/test_legacy/test_comm/test_object_list_p2p_v2.py b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py index eb05ea483..c272f51f4 100644 --- a/tests/test_legacy/test_comm/test_object_list_p2p_v2.py +++ b/tests/test_legacy/test_comm/test_object_list_p2p_v2.py @@ -104,7 +104,7 @@ def check_small_pipeline(): def check_layer(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") disable_existing_loggers() # check_send_recv_forward() diff --git a/tests/test_legacy/test_layers/test_1d/test_1d.py b/tests/test_legacy/test_layers/test_1d/test_1d.py index cebbedd30..9057c2c68 100644 --- a/tests/test_legacy/test_layers/test_1d/test_1d.py +++ b/tests/test_legacy/test_layers/test_1d/test_1d.py @@ -17,7 +17,7 @@ CONFIG = dict( def check_layer(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_linear_col() check_linear_row() diff --git a/tests/test_legacy/test_layers/test_2d/test_2d.py b/tests/test_legacy/test_layers/test_2d/test_2d.py index 77a4b281a..5be498f90 100644 --- a/tests/test_legacy/test_layers/test_2d/test_2d.py +++ b/tests/test_legacy/test_layers/test_2d/test_2d.py @@ -50,7 +50,7 @@ def check_layer(): def check_layer_and_operation(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False diff --git a/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py b/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py index 437a8f8a7..029274570 100644 --- a/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py +++ b/tests/test_legacy/test_layers/test_2p5d/test_2p5d.py @@ -38,7 +38,7 @@ def check_layer(): def check_layer_and_operation(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False diff --git a/tests/test_legacy/test_layers/test_3d/test_3d.py b/tests/test_legacy/test_layers/test_3d/test_3d.py index 7057e2308..876aa7ba8 100644 --- a/tests/test_legacy/test_layers/test_3d/test_3d.py +++ b/tests/test_legacy/test_layers/test_3d/test_3d.py @@ -44,7 +44,7 @@ def check_layer(): def check_layer_and_operation(rank, world_size, port): disable_existing_loggers() - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False torch.backends.cudnn.deterministic = True diff --git a/tests/test_legacy/test_layers/test_cache_embedding.py b/tests/test_legacy/test_layers/test_cache_embedding.py index d64ff56b8..c45097232 100644 --- a/tests/test_legacy/test_layers/test_cache_embedding.py +++ b/tests/test_legacy/test_layers/test_cache_embedding.py @@ -378,7 +378,7 @@ def run_parallel_freq_aware_embed_columnwise(rank, world_size): def run_dist(rank, world_size, port): - colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # run_parallel_freq_aware_embed_columnwise(rank, world_size) run_parallel_freq_aware_embed_tablewise(rank, world_size) diff --git a/tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py b/tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py index 506244447..bfedb779c 100644 --- a/tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py +++ b/tests/test_legacy/test_tensor/core/test_dist_spec_mgr.py @@ -48,7 +48,7 @@ def check_mem(): def run_dist(rank, world_size, port): - colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_mem() run() diff --git a/tests/test_legacy/test_tensor/test_parameter.py b/tests/test_legacy/test_tensor/test_parameter.py index 5217e22cc..eae3e0eb3 100644 --- a/tests/test_legacy/test_tensor/test_parameter.py +++ b/tests/test_legacy/test_tensor/test_parameter.py @@ -9,7 +9,7 @@ from colossalai.testing import free_port @pytest.mark.skip def test_multiinheritance(): - colossalai.legacy.launch(config={}, rank=0, world_size=1, host="localhost", port=free_port(), backend="nccl") + colossalai.legacy.launch(rank=0, world_size=1, host="localhost", port=free_port(), backend="nccl") colo_param = ColoParameter(None, requires_grad=True) assert colo_param.dist_spec.placement.value == "r" assert isinstance(colo_param, ColoTensor) diff --git a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py index cab111358..ba8504d06 100644 --- a/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py +++ b/tests/test_legacy/test_trainer/test_pipeline/test_p2p.py @@ -86,7 +86,7 @@ def check_comm(size, rank, prev_rank, next_rank, logger): def run_check(rank, world_size, port): - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") logger = get_dist_logger() rank = gpc.get_global_rank() prev_rank = gpc.get_prev_global_rank(ParallelMode.PIPELINE) diff --git a/tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py b/tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py index cd7fcfe56..ae7b961ae 100644 --- a/tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py +++ b/tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py @@ -23,7 +23,7 @@ CONFIG = dict(NUM_MICRO_BATCHES=2, parallel=dict(pipeline=dict(size=2), tensor=d def run_schedule(rank, world_size, port): - launch(config=CONFIG, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # build model model = resnet18(num_classes=10) diff --git a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py index c07ff132b..e1b2128aa 100644 --- a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py +++ b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_1d.py @@ -43,7 +43,7 @@ def check_checkpoint_1d(rank, world_size, port): ) disable_existing_loggers() - launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4)) sd1 = m1.state_dict() diff --git a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py index 2ec1facf2..12747951b 100644 --- a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py +++ b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2d.py @@ -43,7 +43,7 @@ def check_checkpoint_2d(rank, world_size, port): ) disable_existing_loggers() - launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4)) sd1 = m1.state_dict() diff --git a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py index a6bf702a8..f7e7b6fad 100644 --- a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py +++ b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_2p5d.py @@ -43,7 +43,7 @@ def check_checkpoint_2p5d(rank, world_size, port): ) disable_existing_loggers() - launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4)) sd1 = m1.state_dict() diff --git a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py index 12d928312..05666cc93 100644 --- a/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py +++ b/tests/test_legacy/test_utils/test_checkpoint/test_checkpoint_3d.py @@ -43,7 +43,7 @@ def check_checkpoint_3d(rank, world_size, port): ) disable_existing_loggers() - launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") m1 = nn.Sequential(nn.Linear(4, 8), nn.Linear(8, 4)) sd1 = m1.state_dict() diff --git a/tests/test_legacy/test_utils/test_memory.py b/tests/test_legacy/test_utils/test_memory.py index 4993df4f3..30fc17b8e 100644 --- a/tests/test_legacy/test_utils/test_memory.py +++ b/tests/test_legacy/test_utils/test_memory.py @@ -14,7 +14,7 @@ def _run_colo_set_process_memory_fraction_and_colo_device_memory_capacity(): def run_dist(rank, world_size, port): - colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") _run_colo_set_process_memory_fraction_and_colo_device_memory_capacity() diff --git a/tests/test_legacy/test_utils/test_norm_gradient_clipping.py b/tests/test_legacy/test_utils/test_norm_gradient_clipping.py index 9975cc04f..c5fab49f4 100644 --- a/tests/test_legacy/test_utils/test_norm_gradient_clipping.py +++ b/tests/test_legacy/test_utils/test_norm_gradient_clipping.py @@ -62,7 +62,7 @@ def run_grad_clip_norm(world_size: int, dtype: torch.dtype, device: str, norm_ty def run_dist(rank, world_size, port): disable_existing_loggers() - colossalai.legacy.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.legacy.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_grad_clip_norm(world_size=world_size) diff --git a/tests/test_legacy/test_zero/test_commons.py b/tests/test_legacy/test_zero/test_commons.py index 741f519e1..32b15706d 100644 --- a/tests/test_legacy/test_zero/test_commons.py +++ b/tests/test_legacy/test_zero/test_commons.py @@ -7,7 +7,7 @@ from colossalai.testing import rerun_if_address_is_in_use, spawn def run_tensor_move(rank, world_size, port): - colossalai.legacy.launch(config={}, rank=0, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.legacy.launch(rank=0, world_size=world_size, host="localhost", port=port, backend="nccl") src_t = torch.ones(2, 3).cuda() tgt_t = torch.zeros(2, 3) diff --git a/tests/test_lora/test_lora.py b/tests/test_lora/test_lora.py index 69febff38..b8daf775d 100644 --- a/tests/test_lora/test_lora.py +++ b/tests/test_lora/test_lora.py @@ -96,8 +96,7 @@ def run_lora_test(): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_lora_test() diff --git a/tests/test_moe/test_grad_handler.py b/tests/test_moe/test_grad_handler.py index a349bc5a9..a88f5f9cc 100644 --- a/tests/test_moe/test_grad_handler.py +++ b/tests/test_moe/test_grad_handler.py @@ -16,7 +16,6 @@ DIM = 16 def run_test(rank, world_size, port): colossalai.launch( - config=dict(), rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_moe/test_kernel.py b/tests/test_moe/test_kernel.py index 62d61a3d4..30122d31a 100644 --- a/tests/test_moe/test_kernel.py +++ b/tests/test_moe/test_kernel.py @@ -20,7 +20,7 @@ def run_routing(rank, world_size, port, rs=2, hidden_size=128, data_type=torch.f # Here we do not need TF32, since it brings absolute error on results torch.backends.cuda.matmul.allow_tf32 = False - colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") local_rank = dist.get_rank() MOE_MANAGER.setup(parallel="EP") # MOE environment initialization diff --git a/tests/test_moe/test_moe_ep_tp.py b/tests/test_moe/test_moe_ep_tp.py index 74feeeb59..660fbd358 100644 --- a/tests/test_moe/test_moe_ep_tp.py +++ b/tests/test_moe/test_moe_ep_tp.py @@ -128,7 +128,7 @@ def sync_local_from_ep(local_model: SparseMLP, ep_model: SparseMLP, assert_grad_ def run_test(rank: int, world_size: int, port: int, num_experts: int, batch_size: int, dim: int, config: Dict): assert batch_size % world_size == 0 - colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") MOE_MANAGER.__init__() MOE_MANAGER.setup(parallel=None) diff --git a/tests/test_moe/test_moe_group.py b/tests/test_moe/test_moe_group.py index 2f08a335d..b7be54d26 100644 --- a/tests/test_moe/test_moe_group.py +++ b/tests/test_moe/test_moe_group.py @@ -60,7 +60,6 @@ def run_moe_init(expert_parallel): def _run_test(rank, world_size, port, expert_parallel): colossalai.launch( - config=dict(), rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_moe/test_moe_hybrid_zero.py b/tests/test_moe/test_moe_hybrid_zero.py index 7ada4090f..7932fa8a7 100644 --- a/tests/test_moe/test_moe_hybrid_zero.py +++ b/tests/test_moe/test_moe_hybrid_zero.py @@ -81,7 +81,7 @@ def run_zero_optim_test(local_rank, world_size, stage=1): def run_dist(rank, world_size, port): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_zero_optim_test(rank, world_size, stage=1) run_zero_optim_test(rank, world_size, stage=2) diff --git a/tests/test_moe/test_moe_load_balance.py b/tests/test_moe/test_moe_load_balance.py index 717bb99fb..fae189bac 100644 --- a/tests/test_moe/test_moe_load_balance.py +++ b/tests/test_moe/test_moe_load_balance.py @@ -164,7 +164,6 @@ def run_hybrid_zero_optim_test(local_rank, world_size, stage=1): def run_dist(rank, world_size, port): colossalai.launch( - config=dict(), rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_moe/test_moe_zero_fwd_bwd.py b/tests/test_moe/test_moe_zero_fwd_bwd.py index 1bff21066..3bb08b49e 100644 --- a/tests/test_moe/test_moe_zero_fwd_bwd.py +++ b/tests/test_moe/test_moe_zero_fwd_bwd.py @@ -61,7 +61,7 @@ def run_zero_test(local_rank, stage=1): def run_dist(rank, world_size, port, stage): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") seed_all(42 + rank) run_zero_test(rank, stage=stage) diff --git a/tests/test_moe/test_moe_zero_optim.py b/tests/test_moe/test_moe_zero_optim.py index 4f6067aaa..224c5c3b9 100644 --- a/tests/test_moe/test_moe_zero_optim.py +++ b/tests/test_moe/test_moe_zero_optim.py @@ -66,7 +66,7 @@ def run_zero_test(local_rank, stage=1): def run_dist(rank, world_size, port, stage): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") seed_all(42 + rank) run_zero_test(rank, stage=stage) diff --git a/tests/test_optimizer/test_adam_kernel.py b/tests/test_optimizer/test_adam_kernel.py index 6d932156a..002649905 100644 --- a/tests/test_optimizer/test_adam_kernel.py +++ b/tests/test_optimizer/test_adam_kernel.py @@ -69,7 +69,7 @@ class FusedAdamKernel(AdamKernel): fused_optim = FusedOptimizerLoader().load() self.fused_adam = fused_optim.multi_tensor_adam - self.dummy_overflow_buf = torch.cuda.IntTensor([0]) + self.dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device=get_accelerator().get_current_device()) def update(self, step: int, param: Tensor, grad: Tensor, exp_avg: Tensor, exp_avg_sq: Tensor): multi_tensor_applier( diff --git a/tests/test_pipeline/test_p2p_communication.py b/tests/test_pipeline/test_p2p_communication.py index 6f5e734b7..48a8d12e0 100644 --- a/tests/test_pipeline/test_p2p_communication.py +++ b/tests/test_pipeline/test_p2p_communication.py @@ -71,7 +71,7 @@ def check_p2p_communication(): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_p2p_communication() diff --git a/tests/test_pipeline/test_schedule/test_interleaved.py b/tests/test_pipeline/test_schedule/test_interleaved.py index f8820688e..a626b834a 100644 --- a/tests/test_pipeline/test_schedule/test_interleaved.py +++ b/tests/test_pipeline/test_schedule/test_interleaved.py @@ -58,7 +58,7 @@ def run_pp( This test is to examine the correctness of interleaved 1F1B, compared with torch. Be aware it contains some hardcodes. """ - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") # create model seed_all(1453) diff --git a/tests/test_pipeline/test_schedule/test_oneF_oneB.py b/tests/test_pipeline/test_schedule/test_oneF_oneB.py index 590800780..c4bfa7b69 100644 --- a/tests/test_pipeline/test_schedule/test_oneF_oneB.py +++ b/tests/test_pipeline/test_schedule/test_oneF_oneB.py @@ -148,7 +148,7 @@ def run_dist( num_microbatch: int, batch_size: int, ): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") examine_pp(num_microbatch, batch_size) diff --git a/tests/test_pipeline/test_stage_manager.py b/tests/test_pipeline/test_stage_manager.py index ed8284b3e..5146a86c8 100644 --- a/tests/test_pipeline/test_stage_manager.py +++ b/tests/test_pipeline/test_stage_manager.py @@ -64,7 +64,7 @@ def check_stage_manager(): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") check_stage_manager() diff --git a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py index f652d18e9..b2c81f8ab 100644 --- a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py +++ b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_amp_optimizer.py @@ -193,13 +193,13 @@ def run_3d_test(test_config): def check_grad_clip_norm(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_test() def check_grad_clip_norm_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_3d_test() diff --git a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py index a749a2966..ee1fd9333 100644 --- a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py +++ b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_naive_optimizer.py @@ -151,13 +151,13 @@ def run_3d_test(test_config): def check_grad_clip_norm(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_test() def check_grad_clip_norm_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_3d_test() diff --git a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py index 41f06a4c3..be257e818 100644 --- a/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py +++ b/tests/test_shardformer/test_hybrid_parallel_grad_clip_norm/test_zero_optimizer.py @@ -183,13 +183,13 @@ def run_3d_test(test_config): def check_grad_clip_norm(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_test() def check_grad_clip_norm_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_3d_test() diff --git a/tests/test_shardformer/test_layer/test_dist_crossentropy.py b/tests/test_shardformer/test_layer/test_dist_crossentropy.py index 414157c22..8ace0e028 100644 --- a/tests/test_shardformer/test_layer/test_dist_crossentropy.py +++ b/tests/test_shardformer/test_layer/test_dist_crossentropy.py @@ -14,7 +14,7 @@ CONFIG = dict( def check_dist_crossentropy(rank, world_size, port, ignore_index): disable_existing_loggers() - colossalai.launch(config=CONFIG, rank=rank, world_size=world_size, port=port, host="localhost", backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost", backend="nccl") # prepare data pred = torch.randn(2, 4, 8, requires_grad=True).cuda() diff --git a/tests/test_shardformer/test_layer/test_dropout.py b/tests/test_shardformer/test_layer/test_dropout.py index 576620e6c..f1e646ed2 100644 --- a/tests/test_shardformer/test_layer/test_dropout.py +++ b/tests/test_shardformer/test_layer/test_dropout.py @@ -56,7 +56,7 @@ def check_dropout_replicated_input(): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_dropout_parallel_input() check_dropout_replicated_input() diff --git a/tests/test_shardformer/test_layer/test_embedding.py b/tests/test_shardformer/test_layer/test_embedding.py index 3dbbcd766..3d7dc2088 100644 --- a/tests/test_shardformer/test_layer/test_embedding.py +++ b/tests/test_shardformer/test_layer/test_embedding.py @@ -43,7 +43,7 @@ def check_embedding_1d(lazy_init: bool): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_embedding_1d() diff --git a/tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py b/tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py index e9aa0dbed..5aa8584a0 100644 --- a/tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py +++ b/tests/test_shardformer/test_layer/test_gpt2_qkv_fused_linear_1d.py @@ -143,7 +143,7 @@ def check_gpt2_qkv_fused_linear_1d(lazy_init: bool, seq_parallel_mode: bool, ove def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # test for linear conv check_gpt2_qkv_fused_linear_1d() diff --git a/tests/test_shardformer/test_layer/test_layernorm.py b/tests/test_shardformer/test_layer/test_layernorm.py index 3eb3bb2e5..b0deff6b8 100644 --- a/tests/test_shardformer/test_layer/test_layernorm.py +++ b/tests/test_shardformer/test_layer/test_layernorm.py @@ -41,7 +41,7 @@ def check_layernorm(lazy_init: bool): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_layernorm() diff --git a/tests/test_shardformer/test_layer/test_linear_1d.py b/tests/test_shardformer/test_layer/test_linear_1d.py index 21d3190de..541aa3251 100644 --- a/tests/test_shardformer/test_layer/test_linear_1d.py +++ b/tests/test_shardformer/test_layer/test_linear_1d.py @@ -185,7 +185,7 @@ def run_dist_linear_test(lazy_init, seq_parallel_mode, overlap): def check_dist_linear(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_dist_linear_test() diff --git a/tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py b/tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py index 5e996d2ba..dc14fd591 100644 --- a/tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py +++ b/tests/test_shardformer/test_layer/test_qkv_fused_linear_1d.py @@ -126,7 +126,7 @@ def check_linear_conv_1d_row(lazy_init: bool): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # test for linear conv check_linear_conv_1d_col() diff --git a/tests/test_shardformer/test_layer/test_sequence_parallel.py b/tests/test_shardformer/test_layer/test_sequence_parallel.py index 13b1a13e7..a6cf61f8f 100644 --- a/tests/test_shardformer/test_layer/test_sequence_parallel.py +++ b/tests/test_shardformer/test_layer/test_sequence_parallel.py @@ -165,7 +165,7 @@ def run_seq_parallel_attn(seq_len, hidden_dim, head_num, batch_size): def check_all2all_attn(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_seq_parallel_attn() diff --git a/tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py b/tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py index 91cc1a987..fdd304256 100644 --- a/tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py +++ b/tests/test_shardformer/test_layer/test_vocab_parallel_embedding_1d.py @@ -45,7 +45,7 @@ def check_vocab_embedding_1d(lazy_init: bool): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_vocab_embedding_1d() diff --git a/tests/test_shardformer/test_model/test_shard_bert.py b/tests/test_shardformer/test_model/test_shard_bert.py index 919557797..3ec394768 100644 --- a/tests/test_shardformer/test_model/test_shard_bert.py +++ b/tests/test_shardformer/test_model/test_shard_bert.py @@ -231,13 +231,13 @@ def run_bert_3d_test(test_config): def check_bert(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_bert_test() def check_bert_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_bert_3d_test() diff --git a/tests/test_shardformer/test_model/test_shard_blip2.py b/tests/test_shardformer/test_model/test_shard_blip2.py index 2c56b0435..712c5c1e1 100644 --- a/tests/test_shardformer/test_model/test_shard_blip2.py +++ b/tests/test_shardformer/test_model/test_shard_blip2.py @@ -99,7 +99,6 @@ def run_blip2_test( def check_blip2(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_shardformer/test_model/test_shard_bloom.py b/tests/test_shardformer/test_model/test_shard_bloom.py index cc0786618..6ab0369e0 100644 --- a/tests/test_shardformer/test_model/test_shard_bloom.py +++ b/tests/test_shardformer/test_model/test_shard_bloom.py @@ -209,13 +209,13 @@ def run_bloom_3d_test(test_config): def check_bloom(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_bloom_test() def check_bloom_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_bloom_3d_test() diff --git a/tests/test_shardformer/test_model/test_shard_chatglm2.py b/tests/test_shardformer/test_model/test_shard_chatglm2.py index 376d315c1..6ce020b68 100644 --- a/tests/test_shardformer/test_model/test_shard_chatglm2.py +++ b/tests/test_shardformer/test_model/test_shard_chatglm2.py @@ -259,7 +259,6 @@ def run_chatglm_3d_test(test_config): def check_chatglm(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", @@ -272,7 +271,6 @@ def check_chatglm(rank, world_size, port): def check_chatglm_3d(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_shardformer/test_model/test_shard_falcon.py b/tests/test_shardformer/test_model/test_shard_falcon.py index 5e2efcd80..8074f9d61 100644 --- a/tests/test_shardformer/test_model/test_shard_falcon.py +++ b/tests/test_shardformer/test_model/test_shard_falcon.py @@ -176,13 +176,13 @@ def run_falcon_3d_test(test_config): def check_falcon(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_falcon_test() def check_falcon_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_falcon_3d_test() diff --git a/tests/test_shardformer/test_model/test_shard_gpt2.py b/tests/test_shardformer/test_model/test_shard_gpt2.py index 4aac7f3d4..72ea2b089 100644 --- a/tests/test_shardformer/test_model/test_shard_gpt2.py +++ b/tests/test_shardformer/test_model/test_shard_gpt2.py @@ -275,7 +275,6 @@ def run_gpt2_3d_test(test_config): def check_gpt2(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", @@ -288,7 +287,6 @@ def check_gpt2(rank, world_size, port): def check_gpt2_3d(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_shardformer/test_model/test_shard_llama.py b/tests/test_shardformer/test_model/test_shard_llama.py index 394592688..104ede981 100644 --- a/tests/test_shardformer/test_model/test_shard_llama.py +++ b/tests/test_shardformer/test_model/test_shard_llama.py @@ -319,13 +319,13 @@ def run_llama_3d_test(test_config): def check_llama(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_llama_test() def check_llama_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_llama_3d_test() diff --git a/tests/test_shardformer/test_model/test_shard_mistral.py b/tests/test_shardformer/test_model/test_shard_mistral.py index 05c199814..deced9d56 100644 --- a/tests/test_shardformer/test_model/test_shard_mistral.py +++ b/tests/test_shardformer/test_model/test_shard_mistral.py @@ -170,7 +170,7 @@ def run_mistral_test(test_config): def check_mistral(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_mistral_test() diff --git a/tests/test_shardformer/test_model/test_shard_opt.py b/tests/test_shardformer/test_model/test_shard_opt.py index 523ed879b..b7c77d20b 100644 --- a/tests/test_shardformer/test_model/test_shard_opt.py +++ b/tests/test_shardformer/test_model/test_shard_opt.py @@ -233,7 +233,6 @@ def run_opt_3d_test(test_config): def check_OPTModel(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", @@ -246,7 +245,6 @@ def check_OPTModel(rank, world_size, port): def check_opt_3d(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_shardformer/test_model/test_shard_sam.py b/tests/test_shardformer/test_model/test_shard_sam.py index a8d4cb635..e872d7f7b 100644 --- a/tests/test_shardformer/test_model/test_shard_sam.py +++ b/tests/test_shardformer/test_model/test_shard_sam.py @@ -57,7 +57,7 @@ def run_sam_test(enable_fused_normalization, enable_tensor_parallelism, enable_f def check_sam(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_sam_test() diff --git a/tests/test_shardformer/test_model/test_shard_t5.py b/tests/test_shardformer/test_model/test_shard_t5.py index a6fe2dd39..521dc9130 100644 --- a/tests/test_shardformer/test_model/test_shard_t5.py +++ b/tests/test_shardformer/test_model/test_shard_t5.py @@ -222,7 +222,6 @@ def run_t5_3d_test(test_config): def check_t5(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", @@ -235,7 +234,6 @@ def check_t5(rank, world_size, port): def check_t5_3d(rank, world_size, port): disable_existing_loggers() colossalai.launch( - config={}, rank=rank, world_size=world_size, host="localhost", diff --git a/tests/test_shardformer/test_model/test_shard_vit.py b/tests/test_shardformer/test_model/test_shard_vit.py index 3a8af2d6d..d33b52b42 100644 --- a/tests/test_shardformer/test_model/test_shard_vit.py +++ b/tests/test_shardformer/test_model/test_shard_vit.py @@ -168,13 +168,13 @@ def run_vit_3d_test(test_config): def check_vit(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_vit_test() def check_vit_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_vit_3d_test() diff --git a/tests/test_shardformer/test_model/test_shard_whisper.py b/tests/test_shardformer/test_model/test_shard_whisper.py index af61e4640..beb2a6761 100644 --- a/tests/test_shardformer/test_model/test_shard_whisper.py +++ b/tests/test_shardformer/test_model/test_shard_whisper.py @@ -196,13 +196,13 @@ def run_whisper_3d_test(test_config): def check_whisper(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_whisper_test() def check_whisper_3d(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_whisper_3d_test() diff --git a/tests/test_shardformer/test_with_torch_ddp.py b/tests/test_shardformer/test_with_torch_ddp.py index 4b741c21b..4735df717 100644 --- a/tests/test_shardformer/test_with_torch_ddp.py +++ b/tests/test_shardformer/test_with_torch_ddp.py @@ -71,7 +71,7 @@ def check_shardformer_with_ddp(lazy_init: bool): def run_dist(rank, world_size, port): disable_existing_loggers() - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") check_shardformer_with_ddp() diff --git a/tests/test_tensor/test_comm_spec_apply.py b/tests/test_tensor/test_comm_spec_apply.py index 5e969b1aa..a2414d949 100644 --- a/tests/test_tensor/test_comm_spec_apply.py +++ b/tests/test_tensor/test_comm_spec_apply.py @@ -178,7 +178,7 @@ def check_all_reduce_in_flatten_device_mesh(device_mesh, rank): def check_comm(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) assert rank == dist.get_rank() diff --git a/tests/test_tensor/test_dtensor/test_comm_spec.py b/tests/test_tensor/test_dtensor/test_comm_spec.py index 6d1640b4f..fd9996710 100644 --- a/tests/test_tensor/test_dtensor/test_comm_spec.py +++ b/tests/test_tensor/test_dtensor/test_comm_spec.py @@ -124,7 +124,7 @@ def check_all_reduce_bwd(process_groups_dict, rank): def check_comm(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) assert rank == dist.get_rank() diff --git a/tests/test_tensor/test_dtensor/test_dtensor.py b/tests/test_tensor/test_dtensor/test_dtensor.py index 33ae59d01..60efa315e 100644 --- a/tests/test_tensor/test_dtensor/test_dtensor.py +++ b/tests/test_tensor/test_dtensor/test_dtensor.py @@ -21,7 +21,7 @@ class TestModel(torch.nn.Module): def check_dtensor(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") test_model = TestModel(8, 8).to("cuda") original_tensor = torch.rand(4, 8).to("cuda") compare_output = test_model(original_tensor) diff --git a/tests/test_tensor/test_dtensor/test_layout_converter.py b/tests/test_tensor/test_dtensor/test_layout_converter.py index 3bface1d2..6e426d0e8 100644 --- a/tests/test_tensor/test_dtensor/test_layout_converter.py +++ b/tests/test_tensor/test_dtensor/test_layout_converter.py @@ -20,7 +20,7 @@ mesh_shape = (2, 2) def check_one_step_transform(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") # [[0, 1], # [2, 3]] device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True) @@ -82,7 +82,7 @@ def check_one_step_transform(rank, world_size, port): def check_layout_converting(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") dim_partition_source = {1: [0, 1]} dim_partition_target = {0: [0, 1]} device_mesh = DeviceMesh(physical_mesh_id, mesh_shape, init_process_group=True) @@ -141,7 +141,7 @@ def check_layout_converting(rank, world_size, port): def check_layout_converting_apply(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") dim_partition_source = {1: [0, 1]} dim_partition_target = {0: [0, 1]} diff --git a/tests/test_tensor/test_mix_gather.py b/tests/test_tensor/test_mix_gather.py index 7d6f8979d..6dbbe5de6 100644 --- a/tests/test_tensor/test_mix_gather.py +++ b/tests/test_tensor/test_mix_gather.py @@ -296,7 +296,7 @@ def check_two_all_gather_RS01(device_mesh, rank): def check_comm(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 8) assert rank == dist.get_rank() diff --git a/tests/test_tensor/test_padded_tensor.py b/tests/test_tensor/test_padded_tensor.py index 31a267c15..6d19845df 100644 --- a/tests/test_tensor/test_padded_tensor.py +++ b/tests/test_tensor/test_padded_tensor.py @@ -10,7 +10,7 @@ from colossalai.testing import rerun_if_address_is_in_use, spawn def check_padded_tensor(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") original_tensor = torch.rand(32, 64).to("cuda") device_mesh = DeviceMesh(torch.Tensor([0, 1, 2, 3]), (2, 2), init_process_group=True) diff --git a/tests/test_tensor/test_shape_consistency_apply.py b/tests/test_tensor/test_shape_consistency_apply.py index b2bc84edd..8d8d8ef51 100644 --- a/tests/test_tensor/test_shape_consistency_apply.py +++ b/tests/test_tensor/test_shape_consistency_apply.py @@ -11,7 +11,7 @@ from colossalai.testing import rerun_if_address_is_in_use, spawn def check_apply(rank, world_size, port): disable_existing_loggers() - launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") physical_mesh_id = torch.arange(0, 4) mesh_shape = (2, 2) diff --git a/tests/test_zero/test_gemini/test_chunk_mgrv2.py b/tests/test_zero/test_gemini/test_chunk_mgrv2.py index 879eeccde..412a95f6a 100644 --- a/tests/test_zero/test_gemini/test_chunk_mgrv2.py +++ b/tests/test_zero/test_gemini/test_chunk_mgrv2.py @@ -49,7 +49,7 @@ def exam_chunk_memory(keep_gathered, pin_memory): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_chunk_memory() diff --git a/tests/test_zero/test_gemini/test_chunkv2.py b/tests/test_zero/test_gemini/test_chunkv2.py index e4dc569b8..257311328 100644 --- a/tests/test_zero/test_gemini/test_chunkv2.py +++ b/tests/test_zero/test_gemini/test_chunkv2.py @@ -108,7 +108,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_chunk_basic() diff --git a/tests/test_zero/test_gemini/test_fwd_bwd.py b/tests/test_zero/test_gemini/test_fwd_bwd.py index 3a9742e01..d9084fd5a 100644 --- a/tests/test_zero/test_gemini/test_fwd_bwd.py +++ b/tests/test_zero/test_gemini/test_fwd_bwd.py @@ -100,8 +100,7 @@ def exam_gpt_fwd_bwd( def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_gpt_fwd_bwd() diff --git a/tests/test_zero/test_gemini/test_gemini_use_rmt.py b/tests/test_zero/test_gemini/test_gemini_use_rmt.py index 90ad62d1a..1e49f2851 100644 --- a/tests/test_zero/test_gemini/test_gemini_use_rmt.py +++ b/tests/test_zero/test_gemini/test_gemini_use_rmt.py @@ -80,8 +80,7 @@ def run_gemini_use_rmt(placement_policy, keep_gather, model_name: str, use_grad_ def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") run_gemini_use_rmt() diff --git a/tests/test_zero/test_gemini/test_grad_accum.py b/tests/test_zero/test_gemini/test_grad_accum.py index 36a803492..fd0e9fd7c 100644 --- a/tests/test_zero/test_gemini/test_grad_accum.py +++ b/tests/test_zero/test_gemini/test_grad_accum.py @@ -138,8 +138,7 @@ def exam_gemini_grad_acc( def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_gemini_grad_acc() diff --git a/tests/test_zero/test_gemini/test_grad_clip.py b/tests/test_zero/test_gemini/test_grad_clip.py index 23b3504fd..0a9bac092 100644 --- a/tests/test_zero/test_gemini/test_grad_clip.py +++ b/tests/test_zero/test_gemini/test_grad_clip.py @@ -117,8 +117,7 @@ def exam_grad_clipping(placement_config, model_name: str, master_weights: bool): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_grad_clipping() diff --git a/tests/test_zero/test_gemini/test_inference.py b/tests/test_zero/test_gemini/test_inference.py index 7f3c7176e..e54804fc5 100644 --- a/tests/test_zero/test_gemini/test_inference.py +++ b/tests/test_zero/test_gemini/test_inference.py @@ -107,8 +107,7 @@ def exam_inference(placement_config: dict, model_name: str, model_init_func: Cal def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_inference() diff --git a/tests/test_zero/test_gemini/test_optim.py b/tests/test_zero/test_gemini/test_optim.py index 71bb27b4a..a9366e7bc 100644 --- a/tests/test_zero/test_gemini/test_optim.py +++ b/tests/test_zero/test_gemini/test_optim.py @@ -183,8 +183,7 @@ def exam_tiny_example(placement_config, model_name: str, mixed_precision: torch. def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_model_step() exam_tiny_example() diff --git a/tests/test_zero/test_gemini/test_search.py b/tests/test_zero/test_gemini/test_search.py index cf3658bf9..9c8c497f3 100644 --- a/tests/test_zero/test_gemini/test_search.py +++ b/tests/test_zero/test_gemini/test_search.py @@ -47,7 +47,7 @@ def exam_chunk_manager(): def run_dist(rank, world_size, port): - colossalai.launch(config={}, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_search_chunk_size() exam_chunk_manager() diff --git a/tests/test_zero/test_gemini/test_zeroddp_state_dict.py b/tests/test_zero/test_gemini/test_zeroddp_state_dict.py index cbf5169fc..23e2d8083 100644 --- a/tests/test_zero/test_gemini/test_zeroddp_state_dict.py +++ b/tests/test_zero/test_gemini/test_zeroddp_state_dict.py @@ -76,8 +76,7 @@ def exam_state_dict(placement_config, keep_gathered, model_name: str, master_wei def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_state_dict() diff --git a/tests/test_zero/test_gemini/test_zerooptim_state_dict.py b/tests/test_zero/test_gemini/test_zerooptim_state_dict.py index 87cb1cdfe..8d70ae3b1 100644 --- a/tests/test_zero/test_gemini/test_zerooptim_state_dict.py +++ b/tests/test_zero/test_gemini/test_zerooptim_state_dict.py @@ -68,8 +68,7 @@ def exam_zero_optim_state_dict(placement_config, keep_gathered): def run_dist(rank, world_size, port): - config = {} - colossalai.launch(config=config, rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") + colossalai.launch(rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl") exam_zero_optim_state_dict() diff --git a/tests/test_zero/test_low_level/test_grad_acc.py b/tests/test_zero/test_low_level/test_grad_acc.py index 11f738615..ed12bb72d 100644 --- a/tests/test_zero/test_low_level/test_grad_acc.py +++ b/tests/test_zero/test_low_level/test_grad_acc.py @@ -130,7 +130,7 @@ def exam_zero_1_grad_acc(sync): def run_dist(rank, world_size, port): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") exam_zero_1_grad_acc(sync=True) exam_zero_1_grad_acc(sync=False) diff --git a/tests/test_zero/test_low_level/test_zero1_2.py b/tests/test_zero/test_low_level/test_zero1_2.py index e2196cfbf..06a29bd1d 100644 --- a/tests/test_zero/test_low_level/test_zero1_2.py +++ b/tests/test_zero/test_low_level/test_zero1_2.py @@ -178,7 +178,7 @@ def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype, master_weights: bool): def run_dist(rank, world_size, port): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") exam_zero_1_torch_ddp(world_size=world_size) exam_zero_1_2() diff --git a/tests/test_zero/test_low_level/test_zero_ckpt.py b/tests/test_zero/test_low_level/test_zero_ckpt.py index e9fc8598a..8543dfba0 100644 --- a/tests/test_zero/test_low_level/test_zero_ckpt.py +++ b/tests/test_zero/test_low_level/test_zero_ckpt.py @@ -103,7 +103,7 @@ def exam_zero_1_torch_ddp_ckpt(): def run_dist(rank, world_size, port): - colossalai.launch(config=dict(), rank=rank, world_size=world_size, port=port, host="localhost") + colossalai.launch(rank=rank, world_size=world_size, port=port, host="localhost") exam_zero_1_torch_ddp_ckpt()