from abc import ABC, abstractmethod from pathlib import Path from typing import Union, List from colossalai.core import global_context as gpc # copied from high version pytorch to support low version def _format_time(time_us): """Defines how to format time in FunctionEvent""" US_IN_SECOND = 1000.0 * 1000.0 US_IN_MS = 1000.0 if time_us >= US_IN_SECOND: return '{:.3f}s'.format(time_us / US_IN_SECOND) if time_us >= US_IN_MS: return '{:.3f}ms'.format(time_us / US_IN_MS) return '{:.3f}us'.format(time_us) # copied from high version pytorch to support low version def _format_memory(nbytes): """Returns a formatted memory size string""" KB = 1024 MB = 1024 * KB GB = 1024 * MB if (abs(nbytes) >= GB): return '{:.2f} GB'.format(nbytes * 1.0 / GB) elif (abs(nbytes) >= MB): return '{:.2f} MB'.format(nbytes * 1.0 / MB) elif (abs(nbytes) >= KB): return '{:.2f} KB'.format(nbytes * 1.0 / KB) else: return str(nbytes) + ' B' def _format_bandwidth(volme: float or int, time_us: int): sec_div_mb = (1000.0 / 1024.0)**2 mb_per_sec = volme / time_us * sec_div_mb if mb_per_sec >= 1024.0: return '{:.3f} GB/s'.format(mb_per_sec / 1024.0) else: return '{:.3f} MB/s'.format(mb_per_sec) class BaseProfiler(ABC): def __init__(self, profiler_name: str, priority: int): self.name = profiler_name self.priority = priority @abstractmethod def enable(self): pass @abstractmethod def disable(self): pass @abstractmethod def to_tensorboard(self, writer): pass @abstractmethod def to_file(self, filename: Path): pass @abstractmethod def show(self): pass class ProfilerContext(object): """ Profiler context manager Usage: ```python world_size = 4 inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device()) outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device()) outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0)) cc_prof = CommProfiler() with ProfilerContext([cc_prof]) as prof: op = dist.all_reduce(inputs, async_op=True) dist.all_gather(outputs_list, inputs) op.wait() dist.reduce_scatter(inputs, outputs_list) dist.broadcast(inputs, 0) dist.reduce(inputs, 0) prof.show() ``` """ def __init__(self, profilers: List[BaseProfiler] = None, enable: bool = True): self.enable = enable self.profilers = sorted(profilers, key=lambda prof: prof.priority) def __enter__(self): if self.enable: for prof in self.profilers: prof.enable() return self def __exit__(self, exc_type, exc_val, exc_tb): if self.enable: for prof in self.profilers: prof.disable() def to_tensorboard(self, writer): from torch.utils.tensorboard import SummaryWriter assert isinstance(writer, SummaryWriter), \ f'torch.utils.tensorboard.SummaryWriter is required, but found {type(writer)}.' for prof in self.profilers: prof.to_tensorboard(writer) def to_file(self, log_dir: Union[str, Path]): if isinstance(log_dir, str): log_dir = Path(log_dir) if not log_dir.exists(): log_dir.mkdir(parents=True, exist_ok=True) for prof in self.profilers: log_file = log_dir.joinpath(f'{prof.name}_rank_{gpc.get_global_rank()}.log') prof.to_file(log_file) def show(self): for prof in self.profilers: prof.show()