diff --git a/colossalai/pipeline/schedule/zero_bubble_pp.py b/colossalai/pipeline/schedule/zero_bubble_pp.py index 498240878..89c868aae 100644 --- a/colossalai/pipeline/schedule/zero_bubble_pp.py +++ b/colossalai/pipeline/schedule/zero_bubble_pp.py @@ -49,7 +49,6 @@ class ZeroBubbleVPipeScheduler(PipelineSchedule): overlap_p2p: bool = True, ): super().__init__(stage_manager) - # Not support overlap_p2p so far # batch info self.num_microbatch = num_microbatch self.microbatch_size = microbatch_size @@ -543,8 +542,6 @@ class ZeroBubbleVPipeScheduler(PipelineSchedule): output_obj_grad_ = [] # For chunk 0 stage 0, use micro_batch as input_obj_; and we don't have to cal microbatch dx. - # if model_chunk_id == 0 and self.stage_manager.is_first_stage(ignore_chunk=True): - # return None # For loss backward; output_obj is loss; output_obj_grad should be None if model_chunk_id == 1 and self.stage_manager.is_first_stage(ignore_chunk=True): @@ -718,10 +715,8 @@ class ZeroBubbleVPipeScheduler(PipelineSchedule): # Do not release_tensor_data loss, release_tensor_data other output_obj; if model_chunk_id == 1 and self.stage_manager.is_first_stage(ignore_chunk=True): self.output_tensors[model_chunk_id].append(output_obj) - # self.output_tensors_dw[model_chunk_id].append(output_obj) else: self.output_tensors[model_chunk_id].append(output_obj) - # self.output_tensors_dw[model_chunk_id].append(output_obj) # add output to send_fwd_buffer if model_chunk_id == 0: # chunk 0