diff --git a/tests/components_to_test/albert.py b/tests/components_to_test/albert.py index 52b2275ec..8924eb2fb 100644 --- a/tests/components_to_test/albert.py +++ b/tests/components_to_test/albert.py @@ -27,7 +27,7 @@ def get_training_components(): attention_probs_dropout_prob=0.) print('building AlbertForSequenceClassification model') - # adapting huggingface BertForSequenceClassification for single unitest calling interface + # adapting huggingface BertForSequenceClassification for single unittest calling interface class ModelAdaptor(AlbertForSequenceClassification): def forward(self, input_ids, labels): diff --git a/tests/test_booster/test_accelerator.py b/tests/test_booster/test_accelerator.py index 895c494d0..6f3f66ed4 100644 --- a/tests/test_booster/test_accelerator.py +++ b/tests/test_booster/test_accelerator.py @@ -7,8 +7,8 @@ from colossalai.testing import clear_cache_before_run, parameterize @clear_cache_before_run() @parameterize('device', ['cpu', 'cuda']) def test_accelerator(device): - acceleartor = Accelerator(device) + accelerator = Accelerator(device) model = nn.Linear(8, 8) - model = acceleartor.configure_model(model) + model = accelerator.configure_model(model) assert next(model.parameters()).device.type == device - del model, acceleartor + del model, accelerator diff --git a/tests/test_booster/test_plugin/test_dp_plugin_base.py b/tests/test_booster/test_plugin/test_dp_plugin_base.py index 61aeded12..689b334ca 100644 --- a/tests/test_booster/test_plugin/test_dp_plugin_base.py +++ b/tests/test_booster/test_plugin/test_dp_plugin_base.py @@ -56,7 +56,7 @@ class DPPluginWrapper(DPPluginBase): def check_dataloader_sharding(): plugin = DPPluginWrapper() - # create a custom dasetset with 0 to 10 + # create a custom dataset with 0 to 10 dataset = TensorDataset(torch.arange(0, 10)) train_dataloader = plugin.prepare_dataloader(dataset, batch_size=2) diff --git a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py index 4d63592f1..4992acbd7 100644 --- a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py +++ b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py @@ -48,7 +48,7 @@ def run_trainer(rank, world_size, port): pipelinable.policy = "uniform" model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE)) - # craete dataloaders + # create dataloaders root = Path(os.environ['DATA']) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4, pad_if_needed=True), @@ -68,7 +68,7 @@ def run_trainer(rank, world_size, port): # create lr scheduler lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS) - # intiailize + # initialize engine, train_dataloader, *_ = colossalai.initialize(model=model, optimizer=optimizer, criterion=criterion, diff --git a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor_v2.py b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor_v2.py index 67d2ba5f5..62bbb8f50 100644 --- a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor_v2.py +++ b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor_v2.py @@ -50,7 +50,7 @@ def run_trainer(rank, world_size, port): pipelinable.policy = "uniform" model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE)) - # craete dataloaders + # create dataloaders root = Path(os.environ['DATA']) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4, pad_if_needed=True), @@ -70,7 +70,7 @@ def run_trainer(rank, world_size, port): # create lr scheduler lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS) - # intiailize + # initialize engine, train_dataloader, *_ = colossalai.initialize(model=model, optimizer=optimizer, criterion=criterion, diff --git a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py index ab483f7e4..bcac2ec42 100644 --- a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py @@ -64,7 +64,7 @@ class MyModule(torch.nn.Module): def _run_act_ckpt_codegen(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and run forward @@ -122,7 +122,7 @@ def test_act_ckpt_codegen(): def _run_act_ckpt_python_code_torch11(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and run forward diff --git a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py index 9064023d4..5b327807a 100644 --- a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py @@ -32,7 +32,7 @@ class MyModule(torch.nn.Module): def _run_act_ckpt_codegen(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and run forward @@ -89,7 +89,7 @@ def test_act_ckpt_codegen(): def _run_act_ckpt_python_code_torch11(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and run forward diff --git a/tests/test_fx/test_codegen/test_offload_codegen.py b/tests/test_fx/test_codegen/test_offload_codegen.py index 96e88eb92..c217b9658 100644 --- a/tests/test_fx/test_codegen/test_offload_codegen.py +++ b/tests/test_fx/test_codegen/test_offload_codegen.py @@ -56,7 +56,7 @@ def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data: torch.T fx_out = gm(data) assert torch.equal(non_fx_out, fx_out), "fx_out doesn't comply with original output" - # test barckward + # test backward loss0 = non_fx_out.sum() loss0.backward() loss1 = fx_out.sum() @@ -65,7 +65,7 @@ def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data: torch.T def _run_offload_codegen(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and input @@ -120,7 +120,7 @@ def test_act_ckpt_codegen(): def _run_offload_codegen_torch11(rank, world_size, port): - # launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly + # launch colossalai to make sure we could execute colossalai.utils.checkpoint currently colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') # build model and input diff --git a/tests/test_layers/test_sequence/test_sequence.py b/tests/test_layers/test_sequence/test_sequence.py index aac192d7e..60f2d55f4 100644 --- a/tests/test_layers/test_sequence/test_sequence.py +++ b/tests/test_layers/test_sequence/test_sequence.py @@ -45,7 +45,7 @@ def check_ring_qk(rank, world_size): ring_qk = colossalai.nn.layer.parallel_sequence.RingQK.apply sub_a = ring_qk(sub_q, sub_k, batch_size, num_heads, sub_seq_length) - # check master and distributed attetion scores + # check master and distributed attention scores sub_master_a = a[:, rank * sub_seq_length:(rank + 1) * sub_seq_length] assert torch.allclose(sub_a, sub_master_a, rtol=1e-5, atol=1e-2) diff --git a/tests/test_moe/test_kernel.py b/tests/test_moe/test_kernel.py index ad9a172b7..39603c158 100644 --- a/tests/test_moe/test_kernel.py +++ b/tests/test_moe/test_kernel.py @@ -41,7 +41,7 @@ def run_routing(rank, world_size, port, rs=2, hidden_size=128, data_type=torch.f if data_type == torch.float16: layer = layer.half() - # use matrix multiplication instead of COL_MOE_KERNL in MOE dispatch and combine + # use matrix multiplication instead of COL_MOE_KERNEL in MOE dispatch and combine layer.use_kernel = False old_out, _ = layer(tokens) ech = old_out.shape @@ -57,7 +57,7 @@ def run_routing(rank, world_size, port, rs=2, hidden_size=128, data_type=torch.f layer.gate_weight.grad.zero_() layer.use_kernel = True - new_out, _ = layer(tokens) # get ouputs through colossal kernel + new_out, _ = layer(tokens) # get outputs through colossal kernel if data_type == torch.float32: check_equal(old_out, new_out) diff --git a/tests/test_tensor/model/test_model.py b/tests/test_tensor/model/test_model.py index 79d70e53c..288bd20e3 100644 --- a/tests/test_tensor/model/test_model.py +++ b/tests/test_tensor/model/test_model.py @@ -329,6 +329,6 @@ def test_pretrain_load(world_size): if __name__ == '__main__': # test_model_parameters() - # test_colo_optgimizer() + # test_colo_optimizer() test_model(4) # test_pretrain_load(4) diff --git a/tests/test_trainer/test_pipeline/test_p2p.py b/tests/test_trainer/test_pipeline/test_p2p.py index cb7a193d2..8ad366133 100644 --- a/tests/test_trainer/test_pipeline/test_p2p.py +++ b/tests/test_trainer/test_pipeline/test_p2p.py @@ -90,7 +90,7 @@ def run_check(rank, world_size, port): prev_rank = gpc.get_prev_global_rank(ParallelMode.PIPELINE) next_rank = gpc.get_next_global_rank(ParallelMode.PIPELINE) logger.info('Rank {0}: prev rank {1}, next rank {2}'.format(rank, prev_rank, next_rank)) - logger.info('Distributed environment is initialzied.') + logger.info('Distributed environment is initialized.') check_comm(world_size, rank, prev_rank, next_rank, logger) gpc.destroy() diff --git a/tests/test_zero/test_gemini/test_chunkv2.py b/tests/test_zero/test_gemini/test_chunkv2.py index 16764aa6b..1cb31b260 100644 --- a/tests/test_zero/test_gemini/test_chunkv2.py +++ b/tests/test_zero/test_gemini/test_chunkv2.py @@ -23,7 +23,7 @@ def add_param(param_list, param_cp_list, *args, **kwargs): param_cp_list.append(param.clone()) -def check_euqal(param, param_cp): +def check_equal(param, param_cp): if param.device != param_cp.device: temp = param.data.to(param_cp.device) else: @@ -57,7 +57,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory): my_chunk.append_tensor(param) assert my_chunk.utilized_size == 597 for param, param_cp in zip(param_list, param_cp_list): - check_euqal(param, param_cp) + check_equal(param, param_cp) my_chunk.close_chunk() if keep_gathered is False: @@ -77,7 +77,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory): my_chunk.access_chunk() assert my_chunk.device_type == 'cuda' for param, param_cp in zip(param_list, param_cp_list): - check_euqal(param, param_cp) + check_equal(param, param_cp) assert my_chunk.tensor_state_cnter[TensorState.HOLD] == 4 my_chunk.tensor_trans_state(param_list[0], TensorState.COMPUTE)