mirror of https://github.com/hpcaitech/ColossalAI
[profiler] Fixed bugs in CommProfiler and PcieProfiler (#377)
parent
b5f43acee3
commit
8c18eb0998
|
@ -140,7 +140,7 @@ class CommProfiler(BaseProfiler):
|
|||
kernel_name, code_location, vol = self.pending_metadata
|
||||
self.profiler.__exit__(None, None, None)
|
||||
|
||||
if self.profiler.enabled:
|
||||
if self.profiler.enabled and dist.get_world_size(group) > 1:
|
||||
assert_flag = 0
|
||||
current_comm_event = None
|
||||
events = self.profiler.function_events
|
||||
|
|
|
@ -80,8 +80,10 @@ class PcieProfiler(BaseProfiler):
|
|||
events = self.profiler.function_events
|
||||
for event in events:
|
||||
if event.name == "aten::_to_copy":
|
||||
current_comm_event = PcieEvent(1, self.data_size * _get_numel(event.input_shapes[0]),
|
||||
event.cuda_time_total)
|
||||
t_shape = event.input_shapes[0]
|
||||
if len(t_shape) == 0 or event.cuda_time_total == 0:
|
||||
continue
|
||||
current_comm_event = PcieEvent(1, self.data_size * _get_numel(t_shape), event.cuda_time_total)
|
||||
self.total_count += current_comm_event.count
|
||||
self.total_pcie_vol += current_comm_event.pcie_vol
|
||||
self.total_cuda_time += current_comm_event.cuda_time
|
||||
|
|
Loading…
Reference in New Issue