2022-10-18 08:31:22 +00:00
|
|
|
from time import time
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
import torch
|
|
|
|
import torch.distributed as dist
|
|
|
|
import torch.nn as nn
|
|
|
|
|
2023-01-11 04:22:45 +00:00
|
|
|
from colossalai.utils import is_ddp_ignored
|
2022-10-18 08:31:22 +00:00
|
|
|
|
2023-04-04 05:48:16 +00:00
|
|
|
from .manager import ChunkManager
|
|
|
|
from .search_utils import search_chunk_configuration
|
|
|
|
|
2022-10-18 08:31:22 +00:00
|
|
|
|
2023-01-11 07:30:09 +00:00
|
|
|
def safe_div(a, b):
|
|
|
|
if a == 0:
|
|
|
|
return 0
|
|
|
|
return a / b
|
|
|
|
|
|
|
|
|
2022-10-18 08:31:22 +00:00
|
|
|
def init_chunk_manager(model: nn.Module,
|
|
|
|
init_device: Optional[torch.device] = None,
|
|
|
|
hidden_dim: Optional[int] = None,
|
2023-04-17 03:25:35 +00:00
|
|
|
verbose: bool = False,
|
2023-01-28 06:35:25 +00:00
|
|
|
**kwargs) -> ChunkManager:
|
2022-10-18 08:31:22 +00:00
|
|
|
if hidden_dim:
|
2023-06-25 05:34:15 +00:00
|
|
|
search_interval = hidden_dim
|
2022-10-18 08:31:22 +00:00
|
|
|
else:
|
2023-06-25 05:34:15 +00:00
|
|
|
search_interval = 1024 # defaults to 1024
|
|
|
|
kwargs["search_interval"] = search_interval
|
2022-10-18 08:31:22 +00:00
|
|
|
|
|
|
|
dist.barrier()
|
2022-12-12 10:06:16 +00:00
|
|
|
begin = time()
|
2022-10-18 08:31:22 +00:00
|
|
|
|
2023-01-28 06:35:25 +00:00
|
|
|
config_dict, total_size, wasted_size = search_chunk_configuration(model, **kwargs)
|
2022-10-18 08:31:22 +00:00
|
|
|
|
|
|
|
dist.barrier()
|
|
|
|
end = time()
|
2022-12-12 10:06:16 +00:00
|
|
|
span_s = end - begin
|
2023-06-25 05:34:15 +00:00
|
|
|
mega_unit = 1024**2
|
|
|
|
total_size /= mega_unit
|
|
|
|
wasted_size /= mega_unit
|
2022-10-18 08:31:22 +00:00
|
|
|
|
2023-04-17 03:25:35 +00:00
|
|
|
if verbose and dist.get_rank() == 0:
|
2022-10-18 08:31:22 +00:00
|
|
|
print("searching chunk configuration is completed in {:.2f} s.\n".format(span_s),
|
2023-06-25 05:34:15 +00:00
|
|
|
"used number: {:.2f} * 2^20, wasted number: {:.2f} * 2^20\n".format(total_size, wasted_size),
|
2023-01-11 07:30:09 +00:00
|
|
|
"total wasted percentage is {:.2f}%".format(100 * safe_div(wasted_size, total_size + wasted_size)),
|
2022-10-18 08:31:22 +00:00
|
|
|
sep='',
|
|
|
|
flush=True)
|
|
|
|
dist.barrier()
|
|
|
|
|
|
|
|
chunk_manager = ChunkManager(config_dict, init_device)
|
|
|
|
return chunk_manager
|