From 907ac4a2dc71678efe5929da69dded795696c2ee Mon Sep 17 00:00:00 2001 From: 1SAA Date: Mon, 14 Mar 2022 16:43:21 +0800 Subject: [PATCH] fixed error when no collective communication in CommProfiler --- colossalai/utils/profiler/comm_profiler.py | 11 +++++++---- colossalai/utils/profiler/pcie_profiler.py | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/colossalai/utils/profiler/comm_profiler.py b/colossalai/utils/profiler/comm_profiler.py index 93c72cc65..a4f5729c9 100644 --- a/colossalai/utils/profiler/comm_profiler.py +++ b/colossalai/utils/profiler/comm_profiler.py @@ -93,16 +93,16 @@ class CommProfiler(BaseProfiler): dist.reduce = torch_reduce def to_tensorboard(self, writer): - writer.add_text(tag="Collective Communication", text_string=self.result_list("\n\n")) + writer.add_text(tag="Collective Communication", text_string=self.result_str("\n\n")) def to_file(self, filename: Path): with open(filename, "w") as f: - f.write(self.result_list()) + f.write(self.result_str()) def show(self): - print(self.result_list()) + print(self.result_str()) - def result_list(self, sep: str = "\n"): + def result_str(self, sep: str = "\n"): res = [] def append(s: str = None): @@ -114,6 +114,9 @@ class CommProfiler(BaseProfiler): append("Warnning: there exists multiple communication operations in the same time. As a result, " "the profiling result is not accurate.") + if self.total_cuda_time == 0: + return "No collective communication has been called yet!" + append("Collective communication profiling result:") append("total cuda time: {}".format(_format_time(self.total_cuda_time))) append("average bandwidth: {}".format(_format_bandwidth(self.total_comm_vol, self.total_cuda_time))) diff --git a/colossalai/utils/profiler/pcie_profiler.py b/colossalai/utils/profiler/pcie_profiler.py index 3a9ec95b4..526222941 100644 --- a/colossalai/utils/profiler/pcie_profiler.py +++ b/colossalai/utils/profiler/pcie_profiler.py @@ -105,16 +105,16 @@ class PcieProfiler(BaseProfiler): self.profiler = None def to_tensorboard(self, writer): - writer.add_text(tag="Data Transmission", text_string=self.result_list("\n\n")) + writer.add_text(tag="Data Transmission", text_string=self.result_str("\n\n")) def to_file(self, filename: Path): with open(filename, "w") as f: - f.write(self.result_list()) + f.write(self.result_str()) def show(self): - print(self.result_list()) + print(self.result_str()) - def result_list(self, sep: str = "\n"): + def result_str(self, sep: str = "\n"): res = [] def append(s: str = None):