mirror of https://github.com/hpcaitech/ColossalAI
remove perf log, unrelated file and so on
parent
5c6c5d6be3
commit
1ec92d29af
|
@ -83,7 +83,7 @@ class ChunkManager:
|
||||||
if chunk_group:
|
if chunk_group:
|
||||||
# the chunk group is not empty
|
# the chunk group is not empty
|
||||||
# close the last chunk
|
# close the last chunk
|
||||||
self.__close_one_chunk(chunk_group[-1]) # chunk[-1] 满了,所以关闭,不能再添加,然后同时scatter到ZeRO PG中
|
self.__close_one_chunk(chunk_group[-1])
|
||||||
|
|
||||||
if tensor.numel() > chunk_size:
|
if tensor.numel() > chunk_size:
|
||||||
chunk_size = tensor.numel()
|
chunk_size = tensor.numel()
|
||||||
|
|
|
@ -33,7 +33,7 @@ class GeminiZeROHook(ColoParamOpHook):
|
||||||
all_chunks = self._chunk_manager.get_chunks(params)
|
all_chunks = self._chunk_manager.get_chunks(params)
|
||||||
|
|
||||||
# wait for prefetched chunks, filter those are not prefetched
|
# wait for prefetched chunks, filter those are not prefetched
|
||||||
chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks) # 当前要fetch的chunk
|
chunks_fetch_sync = self._gemini_manager.wait_chunks(all_chunks)
|
||||||
|
|
||||||
# transfer state
|
# transfer state
|
||||||
for p in params:
|
for p in params:
|
||||||
|
|
|
@ -125,7 +125,7 @@ class GeminiManager:
|
||||||
self._async_works[chunk].wait()
|
self._async_works[chunk].wait()
|
||||||
del self._async_works[chunk]
|
del self._async_works[chunk]
|
||||||
else:
|
else:
|
||||||
non_prefetched_chunks.append(chunk) # 没在之前prefetch过,现在要prefetch的chunk
|
non_prefetched_chunks.append(chunk)
|
||||||
return tuple(non_prefetched_chunks)
|
return tuple(non_prefetched_chunks)
|
||||||
|
|
||||||
def add_work(self, chunk: Chunk, work: dist.Work):
|
def add_work(self, chunk: Chunk, work: dist.Work):
|
||||||
|
|
|
@ -113,10 +113,8 @@ class StaticPlacementPolicy(PlacementPolicy):
|
||||||
def get_prefetch_chunks(self) -> List[Chunk]:
|
def get_prefetch_chunks(self) -> List[Chunk]:
|
||||||
if self.gemini_manager.is_warmup(): # no prefetch during warmup since we need compute_list
|
if self.gemini_manager.is_warmup(): # no prefetch during warmup since we need compute_list
|
||||||
return []
|
return []
|
||||||
# 最多有多少个异步的work
|
|
||||||
can_prefetch = self.max_prefetch - len(self.gemini_manager._async_works)
|
can_prefetch = self.max_prefetch - len(self.gemini_manager._async_works)
|
||||||
prefetch = []
|
prefetch = []
|
||||||
# static炸就炸了,dynamic可能需要我们要先分析当前运行时的内存情况,分配空间或者淘汰块
|
|
||||||
for i in range(self.gemini_manager.compute_idx + 1, len(self.gemini_manager.compute_list)):
|
for i in range(self.gemini_manager.compute_idx + 1, len(self.gemini_manager.compute_list)):
|
||||||
for chunk in self.gemini_manager.compute_list[i]:
|
for chunk in self.gemini_manager.compute_list[i]:
|
||||||
if len(prefetch) >= can_prefetch:
|
if len(prefetch) >= can_prefetch:
|
||||||
|
|
|
@ -1,142 +0,0 @@
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import torch\n",
|
|
||||||
"import torch.nn as nn"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 23,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Linear(in_features=10, out_features=5, bias=False) 50\n",
|
|
||||||
"Linear(in_features=5, out_features=10, bias=False) 50\n",
|
|
||||||
"Linear(in_features=10, out_features=10, bias=False) 100\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"class Toy(nn.Module):\n",
|
|
||||||
" \n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" super(Toy, self).__init__()\n",
|
|
||||||
" self.fc1 = nn.Linear(10,5, bias=False)\n",
|
|
||||||
" self.m3 = nn.Sequential(nn.Linear(5, 10, bias=False), nn.Linear(10,10, bias=False))\n",
|
|
||||||
"\n",
|
|
||||||
"t = Toy()\n",
|
|
||||||
"for mod in t.modules():\n",
|
|
||||||
" for p in mod.parameters(recurse=False):\n",
|
|
||||||
" print(mod, p.numel())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 24,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"torch.Size([5, 10]) 50\n",
|
|
||||||
"torch.Size([10, 5]) 50\n",
|
|
||||||
"torch.Size([10, 10]) 100\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for p in t.parameters():\n",
|
|
||||||
" print(p.shape, p.numel())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 27,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'224'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 27,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"conf_str = torch.__config__.parallel_info()\n",
|
|
||||||
"inter_str = conf_str.split(\"hardware_concurrency() : \")[1]\n",
|
|
||||||
"max_concurrency = inter_str.split(\"\\n\")[0]\n",
|
|
||||||
"max_concurrency"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"0 0\n",
|
|
||||||
"0 1\n",
|
|
||||||
"0 2\n",
|
|
||||||
"1 0\n",
|
|
||||||
"1 1\n",
|
|
||||||
"1 2\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for i in range(3):\n",
|
|
||||||
" for j in range(3):\n",
|
|
||||||
" print(i, j)\n",
|
|
||||||
" if i == 1 and j == 2:break\n",
|
|
||||||
" else:\n",
|
|
||||||
" continue\n",
|
|
||||||
" break"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "colossalai-py310",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.10.14"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
|
@ -66,18 +66,18 @@ class GPTLMLoss(nn.Module):
|
||||||
|
|
||||||
|
|
||||||
def get_cpu_mem():
|
def get_cpu_mem():
|
||||||
return psutil.Process().memory_info().rss / 1024**2 # 返回值是B,转换成MB
|
return psutil.Process().memory_info().rss / 1024**2
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_mem():
|
def get_gpu_mem():
|
||||||
return torch.cuda.memory_allocated() / 1024**2 # 转换成MB
|
return torch.cuda.memory_allocated() / 1024**2
|
||||||
|
|
||||||
|
|
||||||
def get_mem_info(prefix=""):
|
def get_mem_info(prefix=""):
|
||||||
return f"{prefix}GPU memory usage: {get_gpu_mem():.2f} MB, CPU memory usage: {get_cpu_mem():.2f} MB"
|
return f"{prefix}GPU memory usage: {get_gpu_mem():.2f} MB, CPU memory usage: {get_cpu_mem():.2f} MB"
|
||||||
|
|
||||||
|
|
||||||
def get_model_size(model: nn.Module): # 得到模型参数量
|
def get_model_size(model: nn.Module):
|
||||||
total_numel = 0
|
total_numel = 0
|
||||||
for module in model.modules():
|
for module in model.modules():
|
||||||
for p in module.parameters(recurse=False):
|
for p in module.parameters(recurse=False):
|
||||||
|
|
|
@ -26,7 +26,7 @@ PLACEMENT_CONFIGS = [
|
||||||
"offload_optim_frac": 1.0,
|
"offload_optim_frac": 1.0,
|
||||||
"offload_param_frac": 1.0,
|
"offload_param_frac": 1.0,
|
||||||
}, # zero3-offload-all
|
}, # zero3-offload-all
|
||||||
# {"placement_policy": "auto"},
|
{"placement_policy": "auto"},
|
||||||
]
|
]
|
||||||
|
|
||||||
# this model is large enough to slice to chunks
|
# this model is large enough to slice to chunks
|
||||||
|
|
Loading…
Reference in New Issue