mirror of https://github.com/InternLM/InternLM
refactor code
parent
b01e20adc8
commit
ac243e5b33
|
@ -241,7 +241,7 @@ class PipelineScheduler(BaseScheduler):
|
|||
return step_id
|
||||
|
||||
def _forward_step(self, engine, input_obj, return_tensors, return_output_label=True,
|
||||
accum_loss=None, accum_moe_loss=None, moe_loss_coeff:float=1.0):
|
||||
accum_loss=None, accum_moe_loss=None, moe_loss_coeff=1.0):
|
||||
"""
|
||||
Forward step for passed-in model. If it is the first stage, the input tensor
|
||||
is obtained from data_iterator, otherwise the passed-in input_obj is used.
|
||||
|
@ -342,7 +342,7 @@ class PipelineScheduler(BaseScheduler):
|
|||
|
||||
return input_obj_grad
|
||||
|
||||
def _forward_only_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff:float=1.0):
|
||||
def _forward_only_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff=1.0):
|
||||
"""
|
||||
This function performs forward only computation process. The scheduling of microbatches is similar to the
|
||||
warmup phase, where each microbatch first receives the forward input from the previous stage, then performs
|
||||
|
@ -415,7 +415,7 @@ class PipelineScheduler(BaseScheduler):
|
|||
|
||||
return output, label, accum_loss, accum_moe_loss
|
||||
|
||||
def _forward_backward_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff:float=1.0):
|
||||
def _forward_backward_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff=1.0):
|
||||
"""
|
||||
This function schedules the forward and backward computation of microbatches in the pipeline in a 1F1B manner.
|
||||
It consists of three stages: warmup, 1F1B, and cooldown.
|
||||
|
@ -621,7 +621,7 @@ class PipelineScheduler(BaseScheduler):
|
|||
return output, label, accum_loss, accum_moe_loss
|
||||
|
||||
def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True,
|
||||
return_output_label=True, moe_loss_coeff:float=1.0):
|
||||
return_output_label=True, moe_loss_coeff=1.0):
|
||||
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
|
||||
Returns a tuple with losses if the last stage, an empty tuple otherwise.
|
||||
|
||||
|
@ -753,7 +753,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
|
|||
self.microbatch_offset[model_chunk_id] += self.microbatch_size
|
||||
return move_to_device(micro_batch_data)
|
||||
|
||||
def _forward_step(self, engine, chunk_id, moe_loss_coeff:float=1.0):
|
||||
def _forward_step(self, engine, chunk_id, moe_loss_coeff=1.0):
|
||||
"""Forward step for passed-in model. If it is the first stage, the input tensor
|
||||
is obtained from data_iterator, otherwise the passed-in input_obj is used.
|
||||
Returns output tensor. This is a helper function and can be ignored by users.
|
||||
|
@ -1287,7 +1287,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
|
|||
self._run_cooldown_loop(engine, num_microsteps, num_1f1b_micropairs=num_1f1b_micropairs)
|
||||
|
||||
def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True,
|
||||
return_output_label=True, moe_loss_coeff:float=1.0):
|
||||
return_output_label=True, moe_loss_coeff=1.0):
|
||||
"""Run interleaved 1F1B schedule (model split into model chunks), with
|
||||
communication between pipeline stages as needed.
|
||||
|
||||
|
|
Loading…
Reference in New Issue