diff --git a/internlm/utils/evaluation.py b/internlm/utils/evaluation.py index d60ebd2..6128249 100644 --- a/internlm/utils/evaluation.py +++ b/internlm/utils/evaluation.py @@ -112,7 +112,7 @@ def evaluate_on_val_dls( tensor_shape=tensor_shape, metric_hook_list=[val_sche_metric_hook], ): - _, _, loss, _ = trainer.execute_schedule( + _, _, loss, moe_loss = trainer.execute_schedule( batch, forward_only=True, return_loss=True, return_output_label=False ) else: @@ -126,11 +126,11 @@ def evaluate_on_val_dls( grad_accum_batch_size=grad_accum_batch_size, metric_hook_list=[val_sche_metric_hook], ): - _, _, loss, _ = trainer.execute_schedule( + _, _, loss, moe_loss = trainer.execute_schedule( batch, forward_only=True, return_loss=True, return_output_label=False ) if verbose: - val_loss += loss.item() + val_loss += loss.item() - moe_loss.item() assert val_idx != -1 dist.barrier() diff --git a/tests/test_training/test_loss.py b/tests/test_training/test_loss.py index 2f52500..29f891f 100644 --- a/tests/test_training/test_loss.py +++ b/tests/test_training/test_loss.py @@ -186,7 +186,7 @@ def train( # do forward and backward timer("fwd-bwd").start() - _, _, loss = trainer.execute_schedule(batch, forward_only=False, return_loss=True, return_output_label=False) + _, _, loss, _ = trainer.execute_schedule(batch, forward_only=False, return_loss=True, return_output_label=False) if gpc.is_rank_for_log(): assert loss is not None and not math.isnan(loss.item()) global cur_loss_list