diff --git a/colossalai/utils/profiler/comm_profiler.py b/colossalai/utils/profiler/comm_profiler.py index 80f496c47..d672a8e20 100644 --- a/colossalai/utils/profiler/comm_profiler.py +++ b/colossalai/utils/profiler/comm_profiler.py @@ -140,7 +140,7 @@ class CommProfiler(BaseProfiler): kernel_name, code_location, vol = self.pending_metadata self.profiler.__exit__(None, None, None) - if self.profiler.enabled: + if self.profiler.enabled and dist.get_world_size(group) > 1: assert_flag = 0 current_comm_event = None events = self.profiler.function_events diff --git a/colossalai/utils/profiler/pcie_profiler.py b/colossalai/utils/profiler/pcie_profiler.py index 0724dc10e..2d325da69 100644 --- a/colossalai/utils/profiler/pcie_profiler.py +++ b/colossalai/utils/profiler/pcie_profiler.py @@ -80,8 +80,10 @@ class PcieProfiler(BaseProfiler): events = self.profiler.function_events for event in events: if event.name == "aten::_to_copy": - current_comm_event = PcieEvent(1, self.data_size * _get_numel(event.input_shapes[0]), - event.cuda_time_total) + t_shape = event.input_shapes[0] + if len(t_shape) == 0 or event.cuda_time_total == 0: + continue + current_comm_event = PcieEvent(1, self.data_size * _get_numel(t_shape), event.cuda_time_total) self.total_count += current_comm_event.count self.total_pcie_vol += current_comm_event.pcie_vol self.total_cuda_time += current_comm_event.cuda_time