mirror of https://github.com/hpcaitech/ColossalAI
[CI] fix typo with tests/ etc. (#3727)
* fix spelling error with examples/comminity/ * fix spelling error with tests/ * fix some spelling error with tests/ colossalai/ etc. * fix spelling error with tests/ etc. date:2023.5.10pull/3735/head
parent
899aa86368
commit
1f73609adb
|
@ -27,7 +27,7 @@ def get_training_components():
|
||||||
attention_probs_dropout_prob=0.)
|
attention_probs_dropout_prob=0.)
|
||||||
print('building AlbertForSequenceClassification model')
|
print('building AlbertForSequenceClassification model')
|
||||||
|
|
||||||
# adapting huggingface BertForSequenceClassification for single unitest calling interface
|
# adapting huggingface BertForSequenceClassification for single unittest calling interface
|
||||||
class ModelAdaptor(AlbertForSequenceClassification):
|
class ModelAdaptor(AlbertForSequenceClassification):
|
||||||
|
|
||||||
def forward(self, input_ids, labels):
|
def forward(self, input_ids, labels):
|
||||||
|
|
|
@ -7,8 +7,8 @@ from colossalai.testing import clear_cache_before_run, parameterize
|
||||||
@clear_cache_before_run()
|
@clear_cache_before_run()
|
||||||
@parameterize('device', ['cpu', 'cuda'])
|
@parameterize('device', ['cpu', 'cuda'])
|
||||||
def test_accelerator(device):
|
def test_accelerator(device):
|
||||||
acceleartor = Accelerator(device)
|
accelerator = Accelerator(device)
|
||||||
model = nn.Linear(8, 8)
|
model = nn.Linear(8, 8)
|
||||||
model = acceleartor.configure_model(model)
|
model = accelerator.configure_model(model)
|
||||||
assert next(model.parameters()).device.type == device
|
assert next(model.parameters()).device.type == device
|
||||||
del model, acceleartor
|
del model, accelerator
|
||||||
|
|
|
@ -56,7 +56,7 @@ class DPPluginWrapper(DPPluginBase):
|
||||||
def check_dataloader_sharding():
|
def check_dataloader_sharding():
|
||||||
plugin = DPPluginWrapper()
|
plugin = DPPluginWrapper()
|
||||||
|
|
||||||
# create a custom dasetset with 0 to 10
|
# create a custom dataset with 0 to 10
|
||||||
dataset = TensorDataset(torch.arange(0, 10))
|
dataset = TensorDataset(torch.arange(0, 10))
|
||||||
train_dataloader = plugin.prepare_dataloader(dataset, batch_size=2)
|
train_dataloader = plugin.prepare_dataloader(dataset, batch_size=2)
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ def run_trainer(rank, world_size, port):
|
||||||
pipelinable.policy = "uniform"
|
pipelinable.policy = "uniform"
|
||||||
model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE))
|
model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE))
|
||||||
|
|
||||||
# craete dataloaders
|
# create dataloaders
|
||||||
root = Path(os.environ['DATA'])
|
root = Path(os.environ['DATA'])
|
||||||
transform_train = transforms.Compose([
|
transform_train = transforms.Compose([
|
||||||
transforms.RandomCrop(32, padding=4, pad_if_needed=True),
|
transforms.RandomCrop(32, padding=4, pad_if_needed=True),
|
||||||
|
@ -68,7 +68,7 @@ def run_trainer(rank, world_size, port):
|
||||||
# create lr scheduler
|
# create lr scheduler
|
||||||
lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS)
|
lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS)
|
||||||
|
|
||||||
# intiailize
|
# initialize
|
||||||
engine, train_dataloader, *_ = colossalai.initialize(model=model,
|
engine, train_dataloader, *_ = colossalai.initialize(model=model,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
criterion=criterion,
|
criterion=criterion,
|
||||||
|
|
|
@ -50,7 +50,7 @@ def run_trainer(rank, world_size, port):
|
||||||
pipelinable.policy = "uniform"
|
pipelinable.policy = "uniform"
|
||||||
model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE))
|
model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE))
|
||||||
|
|
||||||
# craete dataloaders
|
# create dataloaders
|
||||||
root = Path(os.environ['DATA'])
|
root = Path(os.environ['DATA'])
|
||||||
transform_train = transforms.Compose([
|
transform_train = transforms.Compose([
|
||||||
transforms.RandomCrop(32, padding=4, pad_if_needed=True),
|
transforms.RandomCrop(32, padding=4, pad_if_needed=True),
|
||||||
|
@ -70,7 +70,7 @@ def run_trainer(rank, world_size, port):
|
||||||
# create lr scheduler
|
# create lr scheduler
|
||||||
lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS)
|
lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS)
|
||||||
|
|
||||||
# intiailize
|
# initialize
|
||||||
engine, train_dataloader, *_ = colossalai.initialize(model=model,
|
engine, train_dataloader, *_ = colossalai.initialize(model=model,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
criterion=criterion,
|
criterion=criterion,
|
||||||
|
|
|
@ -64,7 +64,7 @@ class MyModule(torch.nn.Module):
|
||||||
|
|
||||||
|
|
||||||
def _run_act_ckpt_codegen(rank, world_size, port):
|
def _run_act_ckpt_codegen(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and run forward
|
# build model and run forward
|
||||||
|
@ -122,7 +122,7 @@ def test_act_ckpt_codegen():
|
||||||
|
|
||||||
|
|
||||||
def _run_act_ckpt_python_code_torch11(rank, world_size, port):
|
def _run_act_ckpt_python_code_torch11(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and run forward
|
# build model and run forward
|
||||||
|
|
|
@ -32,7 +32,7 @@ class MyModule(torch.nn.Module):
|
||||||
|
|
||||||
|
|
||||||
def _run_act_ckpt_codegen(rank, world_size, port):
|
def _run_act_ckpt_codegen(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and run forward
|
# build model and run forward
|
||||||
|
@ -89,7 +89,7 @@ def test_act_ckpt_codegen():
|
||||||
|
|
||||||
|
|
||||||
def _run_act_ckpt_python_code_torch11(rank, world_size, port):
|
def _run_act_ckpt_python_code_torch11(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and run forward
|
# build model and run forward
|
||||||
|
|
|
@ -56,7 +56,7 @@ def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data: torch.T
|
||||||
fx_out = gm(data)
|
fx_out = gm(data)
|
||||||
assert torch.equal(non_fx_out, fx_out), "fx_out doesn't comply with original output"
|
assert torch.equal(non_fx_out, fx_out), "fx_out doesn't comply with original output"
|
||||||
|
|
||||||
# test barckward
|
# test backward
|
||||||
loss0 = non_fx_out.sum()
|
loss0 = non_fx_out.sum()
|
||||||
loss0.backward()
|
loss0.backward()
|
||||||
loss1 = fx_out.sum()
|
loss1 = fx_out.sum()
|
||||||
|
@ -65,7 +65,7 @@ def _test_fwd_and_bwd(model: torch.nn.Module, gm: ColoGraphModule, data: torch.T
|
||||||
|
|
||||||
|
|
||||||
def _run_offload_codegen(rank, world_size, port):
|
def _run_offload_codegen(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and input
|
# build model and input
|
||||||
|
@ -120,7 +120,7 @@ def test_act_ckpt_codegen():
|
||||||
|
|
||||||
|
|
||||||
def _run_offload_codegen_torch11(rank, world_size, port):
|
def _run_offload_codegen_torch11(rank, world_size, port):
|
||||||
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currectly
|
# launch colossalai to make sure we could execute colossalai.utils.checkpoint currently
|
||||||
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
|
||||||
|
|
||||||
# build model and input
|
# build model and input
|
||||||
|
|
|
@ -45,7 +45,7 @@ def check_ring_qk(rank, world_size):
|
||||||
ring_qk = colossalai.nn.layer.parallel_sequence.RingQK.apply
|
ring_qk = colossalai.nn.layer.parallel_sequence.RingQK.apply
|
||||||
sub_a = ring_qk(sub_q, sub_k, batch_size, num_heads, sub_seq_length)
|
sub_a = ring_qk(sub_q, sub_k, batch_size, num_heads, sub_seq_length)
|
||||||
|
|
||||||
# check master and distributed attetion scores
|
# check master and distributed attention scores
|
||||||
sub_master_a = a[:, rank * sub_seq_length:(rank + 1) * sub_seq_length]
|
sub_master_a = a[:, rank * sub_seq_length:(rank + 1) * sub_seq_length]
|
||||||
assert torch.allclose(sub_a, sub_master_a, rtol=1e-5, atol=1e-2)
|
assert torch.allclose(sub_a, sub_master_a, rtol=1e-5, atol=1e-2)
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ def run_routing(rank, world_size, port, rs=2, hidden_size=128, data_type=torch.f
|
||||||
if data_type == torch.float16:
|
if data_type == torch.float16:
|
||||||
layer = layer.half()
|
layer = layer.half()
|
||||||
|
|
||||||
# use matrix multiplication instead of COL_MOE_KERNL in MOE dispatch and combine
|
# use matrix multiplication instead of COL_MOE_KERNEL in MOE dispatch and combine
|
||||||
layer.use_kernel = False
|
layer.use_kernel = False
|
||||||
old_out, _ = layer(tokens)
|
old_out, _ = layer(tokens)
|
||||||
ech = old_out.shape
|
ech = old_out.shape
|
||||||
|
@ -57,7 +57,7 @@ def run_routing(rank, world_size, port, rs=2, hidden_size=128, data_type=torch.f
|
||||||
layer.gate_weight.grad.zero_()
|
layer.gate_weight.grad.zero_()
|
||||||
|
|
||||||
layer.use_kernel = True
|
layer.use_kernel = True
|
||||||
new_out, _ = layer(tokens) # get ouputs through colossal kernel
|
new_out, _ = layer(tokens) # get outputs through colossal kernel
|
||||||
|
|
||||||
if data_type == torch.float32:
|
if data_type == torch.float32:
|
||||||
check_equal(old_out, new_out)
|
check_equal(old_out, new_out)
|
||||||
|
|
|
@ -329,6 +329,6 @@ def test_pretrain_load(world_size):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# test_model_parameters()
|
# test_model_parameters()
|
||||||
# test_colo_optgimizer()
|
# test_colo_optimizer()
|
||||||
test_model(4)
|
test_model(4)
|
||||||
# test_pretrain_load(4)
|
# test_pretrain_load(4)
|
||||||
|
|
|
@ -90,7 +90,7 @@ def run_check(rank, world_size, port):
|
||||||
prev_rank = gpc.get_prev_global_rank(ParallelMode.PIPELINE)
|
prev_rank = gpc.get_prev_global_rank(ParallelMode.PIPELINE)
|
||||||
next_rank = gpc.get_next_global_rank(ParallelMode.PIPELINE)
|
next_rank = gpc.get_next_global_rank(ParallelMode.PIPELINE)
|
||||||
logger.info('Rank {0}: prev rank {1}, next rank {2}'.format(rank, prev_rank, next_rank))
|
logger.info('Rank {0}: prev rank {1}, next rank {2}'.format(rank, prev_rank, next_rank))
|
||||||
logger.info('Distributed environment is initialzied.')
|
logger.info('Distributed environment is initialized.')
|
||||||
|
|
||||||
check_comm(world_size, rank, prev_rank, next_rank, logger)
|
check_comm(world_size, rank, prev_rank, next_rank, logger)
|
||||||
gpc.destroy()
|
gpc.destroy()
|
||||||
|
|
|
@ -23,7 +23,7 @@ def add_param(param_list, param_cp_list, *args, **kwargs):
|
||||||
param_cp_list.append(param.clone())
|
param_cp_list.append(param.clone())
|
||||||
|
|
||||||
|
|
||||||
def check_euqal(param, param_cp):
|
def check_equal(param, param_cp):
|
||||||
if param.device != param_cp.device:
|
if param.device != param_cp.device:
|
||||||
temp = param.data.to(param_cp.device)
|
temp = param.data.to(param_cp.device)
|
||||||
else:
|
else:
|
||||||
|
@ -57,7 +57,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory):
|
||||||
my_chunk.append_tensor(param)
|
my_chunk.append_tensor(param)
|
||||||
assert my_chunk.utilized_size == 597
|
assert my_chunk.utilized_size == 597
|
||||||
for param, param_cp in zip(param_list, param_cp_list):
|
for param, param_cp in zip(param_list, param_cp_list):
|
||||||
check_euqal(param, param_cp)
|
check_equal(param, param_cp)
|
||||||
my_chunk.close_chunk()
|
my_chunk.close_chunk()
|
||||||
|
|
||||||
if keep_gathered is False:
|
if keep_gathered is False:
|
||||||
|
@ -77,7 +77,7 @@ def exam_chunk_basic(init_device, keep_gathered, pin_memory):
|
||||||
my_chunk.access_chunk()
|
my_chunk.access_chunk()
|
||||||
assert my_chunk.device_type == 'cuda'
|
assert my_chunk.device_type == 'cuda'
|
||||||
for param, param_cp in zip(param_list, param_cp_list):
|
for param, param_cp in zip(param_list, param_cp_list):
|
||||||
check_euqal(param, param_cp)
|
check_equal(param, param_cp)
|
||||||
|
|
||||||
assert my_chunk.tensor_state_cnter[TensorState.HOLD] == 4
|
assert my_chunk.tensor_state_cnter[TensorState.HOLD] == 4
|
||||||
my_chunk.tensor_trans_state(param_list[0], TensorState.COMPUTE)
|
my_chunk.tensor_trans_state(param_list[0], TensorState.COMPUTE)
|
||||||
|
|
Loading…
Reference in New Issue