mirror of https://github.com/hpcaitech/ColossalAI
Merge pull request #409 from 1SAA/develop
[hotfix] fixed error when no collective communication in CommProfilerpull/413/head
commit
32296cf462
|
@ -93,16 +93,16 @@ class CommProfiler(BaseProfiler):
|
|||
dist.reduce = torch_reduce
|
||||
|
||||
def to_tensorboard(self, writer):
|
||||
writer.add_text(tag="Collective Communication", text_string=self.result_list("\n\n"))
|
||||
writer.add_text(tag="Collective Communication", text_string=self.result_str("\n\n"))
|
||||
|
||||
def to_file(self, filename: Path):
|
||||
with open(filename, "w") as f:
|
||||
f.write(self.result_list())
|
||||
f.write(self.result_str())
|
||||
|
||||
def show(self):
|
||||
print(self.result_list())
|
||||
print(self.result_str())
|
||||
|
||||
def result_list(self, sep: str = "\n"):
|
||||
def result_str(self, sep: str = "\n"):
|
||||
res = []
|
||||
|
||||
def append(s: str = None):
|
||||
|
@ -114,6 +114,9 @@ class CommProfiler(BaseProfiler):
|
|||
append("Warnning: there exists multiple communication operations in the same time. As a result, "
|
||||
"the profiling result is not accurate.")
|
||||
|
||||
if self.total_cuda_time == 0:
|
||||
return "No collective communication has been called yet!"
|
||||
|
||||
append("Collective communication profiling result:")
|
||||
append("total cuda time: {}".format(_format_time(self.total_cuda_time)))
|
||||
append("average bandwidth: {}".format(_format_bandwidth(self.total_comm_vol, self.total_cuda_time)))
|
||||
|
|
|
@ -105,16 +105,16 @@ class PcieProfiler(BaseProfiler):
|
|||
self.profiler = None
|
||||
|
||||
def to_tensorboard(self, writer):
|
||||
writer.add_text(tag="Data Transmission", text_string=self.result_list("\n\n"))
|
||||
writer.add_text(tag="Data Transmission", text_string=self.result_str("\n\n"))
|
||||
|
||||
def to_file(self, filename: Path):
|
||||
with open(filename, "w") as f:
|
||||
f.write(self.result_list())
|
||||
f.write(self.result_str())
|
||||
|
||||
def show(self):
|
||||
print(self.result_list())
|
||||
print(self.result_str())
|
||||
|
||||
def result_list(self, sep: str = "\n"):
|
||||
def result_str(self, sep: str = "\n"):
|
||||
res = []
|
||||
|
||||
def append(s: str = None):
|
||||
|
|
Loading…
Reference in New Issue