refactor code

pull/182/head
zhanglei 2023-08-22 10:42:39 +08:00
parent b01e20adc8
commit ac243e5b33
1 changed files with 10 additions and 10 deletions

View File

@ -241,7 +241,7 @@ class PipelineScheduler(BaseScheduler):
return step_id return step_id
def _forward_step(self, engine, input_obj, return_tensors, return_output_label=True, def _forward_step(self, engine, input_obj, return_tensors, return_output_label=True,
accum_loss=None, accum_moe_loss=None, moe_loss_coeff:float=1.0): accum_loss=None, accum_moe_loss=None, moe_loss_coeff=1.0):
""" """
Forward step for passed-in model. If it is the first stage, the input tensor Forward step for passed-in model. If it is the first stage, the input tensor
is obtained from data_iterator, otherwise the passed-in input_obj is used. is obtained from data_iterator, otherwise the passed-in input_obj is used.
@ -342,7 +342,7 @@ class PipelineScheduler(BaseScheduler):
return input_obj_grad return input_obj_grad
def _forward_only_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff:float=1.0): def _forward_only_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff=1.0):
""" """
This function performs forward only computation process. The scheduling of microbatches is similar to the This function performs forward only computation process. The scheduling of microbatches is similar to the
warmup phase, where each microbatch first receives the forward input from the previous stage, then performs warmup phase, where each microbatch first receives the forward input from the previous stage, then performs
@ -415,7 +415,7 @@ class PipelineScheduler(BaseScheduler):
return output, label, accum_loss, accum_moe_loss return output, label, accum_loss, accum_moe_loss
def _forward_backward_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff:float=1.0): def _forward_backward_step(self, engine, return_loss=True, return_output_label=True, moe_loss_coeff=1.0):
""" """
This function schedules the forward and backward computation of microbatches in the pipeline in a 1F1B manner. This function schedules the forward and backward computation of microbatches in the pipeline in a 1F1B manner.
It consists of three stages: warmup, 1F1B, and cooldown. It consists of three stages: warmup, 1F1B, and cooldown.
@ -621,7 +621,7 @@ class PipelineScheduler(BaseScheduler):
return output, label, accum_loss, accum_moe_loss return output, label, accum_loss, accum_moe_loss
def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True, def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True,
return_output_label=True, moe_loss_coeff:float=1.0): return_output_label=True, moe_loss_coeff=1.0):
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages. """Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
Returns a tuple with losses if the last stage, an empty tuple otherwise. Returns a tuple with losses if the last stage, an empty tuple otherwise.
@ -718,7 +718,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
self._input_objs = [[] for _ in range(num_chunks)] self._input_objs = [[] for _ in range(num_chunks)]
self._output_objs = [[] for _ in range(num_chunks)] self._output_objs = [[] for _ in range(num_chunks)]
self._output_obj_grads = [[] for _ in range(num_chunks)] self._output_obj_grads = [[] for _ in range(num_chunks)]
self._moe_losses = [[] for _ in range(num_chunks)] self._moe_losses = [[] for _ in range(num_chunks)]
self._input_obj_shapes = [self.tensor_shape for _ in range(num_chunks)] self._input_obj_shapes = [self.tensor_shape for _ in range(num_chunks)]
self._output_obj_shapes = [None for _ in range(num_chunks)] self._output_obj_shapes = [None for _ in range(num_chunks)]
@ -731,7 +731,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
self._input_objs = [[] for _ in range(self._num_chunks)] self._input_objs = [[] for _ in range(self._num_chunks)]
self._output_objs = [[] for _ in range(self._num_chunks)] self._output_objs = [[] for _ in range(self._num_chunks)]
self._output_obj_grads = [[] for _ in range(self._num_chunks)] self._output_obj_grads = [[] for _ in range(self._num_chunks)]
self._moe_losses = [[] for _ in range(self._num_chunks)] self._moe_losses = [[] for _ in range(self._num_chunks)]
self._input_obj_shapes = [self.tensor_shape for _ in range(self._num_chunks)] self._input_obj_shapes = [self.tensor_shape for _ in range(self._num_chunks)]
self._output_obj_shapes = [None for _ in range(self._num_chunks)] self._output_obj_shapes = [None for _ in range(self._num_chunks)]
@ -753,7 +753,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
self.microbatch_offset[model_chunk_id] += self.microbatch_size self.microbatch_offset[model_chunk_id] += self.microbatch_size
return move_to_device(micro_batch_data) return move_to_device(micro_batch_data)
def _forward_step(self, engine, chunk_id, moe_loss_coeff:float=1.0): def _forward_step(self, engine, chunk_id, moe_loss_coeff=1.0):
"""Forward step for passed-in model. If it is the first stage, the input tensor """Forward step for passed-in model. If it is the first stage, the input tensor
is obtained from data_iterator, otherwise the passed-in input_obj is used. is obtained from data_iterator, otherwise the passed-in input_obj is used.
Returns output tensor. This is a helper function and can be ignored by users. Returns output tensor. This is a helper function and can be ignored by users.
@ -1287,7 +1287,7 @@ class InterleavedPipelineScheduler(PipelineScheduler):
self._run_cooldown_loop(engine, num_microsteps, num_1f1b_micropairs=num_1f1b_micropairs) self._run_cooldown_loop(engine, num_microsteps, num_1f1b_micropairs=num_1f1b_micropairs)
def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True, def forward_backward_step(self, engine, data_iter, forward_only=False, return_loss=True,
return_output_label=True, moe_loss_coeff:float=1.0): return_output_label=True, moe_loss_coeff=1.0):
"""Run interleaved 1F1B schedule (model split into model chunks), with """Run interleaved 1F1B schedule (model split into model chunks), with
communication between pipeline stages as needed. communication between pipeline stages as needed.