mirror of https://github.com/hpcaitech/ColossalAI
[profiler] Fixed bugs in CommProfiler and PcieProfiler (#377)
parent
b5f43acee3
commit
8c18eb0998
|
@ -140,7 +140,7 @@ class CommProfiler(BaseProfiler):
|
||||||
kernel_name, code_location, vol = self.pending_metadata
|
kernel_name, code_location, vol = self.pending_metadata
|
||||||
self.profiler.__exit__(None, None, None)
|
self.profiler.__exit__(None, None, None)
|
||||||
|
|
||||||
if self.profiler.enabled:
|
if self.profiler.enabled and dist.get_world_size(group) > 1:
|
||||||
assert_flag = 0
|
assert_flag = 0
|
||||||
current_comm_event = None
|
current_comm_event = None
|
||||||
events = self.profiler.function_events
|
events = self.profiler.function_events
|
||||||
|
|
|
@ -80,8 +80,10 @@ class PcieProfiler(BaseProfiler):
|
||||||
events = self.profiler.function_events
|
events = self.profiler.function_events
|
||||||
for event in events:
|
for event in events:
|
||||||
if event.name == "aten::_to_copy":
|
if event.name == "aten::_to_copy":
|
||||||
current_comm_event = PcieEvent(1, self.data_size * _get_numel(event.input_shapes[0]),
|
t_shape = event.input_shapes[0]
|
||||||
event.cuda_time_total)
|
if len(t_shape) == 0 or event.cuda_time_total == 0:
|
||||||
|
continue
|
||||||
|
current_comm_event = PcieEvent(1, self.data_size * _get_numel(t_shape), event.cuda_time_total)
|
||||||
self.total_count += current_comm_event.count
|
self.total_count += current_comm_event.count
|
||||||
self.total_pcie_vol += current_comm_event.pcie_vol
|
self.total_pcie_vol += current_comm_event.pcie_vol
|
||||||
self.total_cuda_time += current_comm_event.cuda_time
|
self.total_cuda_time += current_comm_event.cuda_time
|
||||||
|
|
Loading…
Reference in New Issue