use rank-based JSON file to avoid inconsistency

pull/717/head
Jie Zhu 2022-04-11 17:00:47 +08:00
parent 9ac531aba5
commit a1d7ab041d
No known key found for this signature in database
GPG Key ID: 58B7CA9DB6DC5BD9
3 changed files with 10 additions and 5 deletions

View File

@ -8,6 +8,7 @@ from torch.distributed import ReduceOp
from colossalai.utils import get_current_device
from .prof_utils import BaseProfiler, _format_time, _format_memory, _format_bandwidth
from typing import List, Optional
from colossalai.core import global_context as gpc
import json
def _get_code_location(depth: int):
@ -109,8 +110,9 @@ class CommProfiler(BaseProfiler):
})
data["events"] = events_list
rank = gpc.get_global_rank()
with open(json_dir.joinpath("communication.json"), "w") as f:
with open(json_dir.joinpath(f"worker{rank}.communication.json"), "w") as f:
json.dump(data, f)
def to_file(self, filename: Path):

View File

@ -4,6 +4,7 @@ from colossalai.engine import Engine
from colossalai.engine.ophooks import MemTracerOpHook
from colossalai.utils.profiler import BaseProfiler
import json
from colossalai.core import global_context as gpc
class MemProfiler(BaseProfiler):
"""Wraper of MemOpHook, used to show GPU memory usage through each iteration
@ -46,7 +47,7 @@ class MemProfiler(BaseProfiler):
"cuda_usage": stats
}
}
rank = gpc.get_global_rank()
with open(log_dir.joinpath(f"worker{rank}.memory.json"), "w") as f:
json.dump(data, f)

View File

@ -3,6 +3,7 @@ from torch.autograd.profiler import profile
from .prof_utils import BaseProfiler, _format_time, _format_memory, _format_bandwidth
from typing import List
import json
from colossalai.core import global_context as gpc
def _get_size(dtype: str):
if dtype == "fp16":
@ -121,8 +122,9 @@ class PcieProfiler(BaseProfiler):
"count": event.count
})
data["events"] = events_list
rank = gpc.get_global_rank()
with open(json_dir.joinpath("pcie.json"), "w") as f:
with open(json_dir.joinpath(f"worker{rank}.pcie.json"), "w") as f:
json.dump(data, f)
def to_file(self, filename: Path):