mirror of https://github.com/hpcaitech/ColossalAI
[gemini] polish code (#855)
parent
29159d9b5b
commit
f0e654558f
|
@ -42,7 +42,7 @@ class StatefulTensorMgr(object):
|
|||
by mem_stats_collector, which should belongs to a Sharded Model.
|
||||
"""
|
||||
# find stateful tensor in state COMPUTE
|
||||
cuda_demand = 0
|
||||
cuda_demand = StatefulTensor.GST_MGR.state_mem['cpu'][TensorState.COMPUTE]
|
||||
move_to_cuda_tensor_list = []
|
||||
hold_cuda_tensor_list = []
|
||||
for tensor in self._stateful_tensor_list:
|
||||
|
@ -55,7 +55,6 @@ class StatefulTensorMgr(object):
|
|||
elif tensor.device.type == 'cpu':
|
||||
if tensor.state == TensorState.COMPUTE:
|
||||
move_to_cuda_tensor_list.append(tensor)
|
||||
cuda_demand += colo_tensor_mem_usage(tensor.payload)[1]
|
||||
else:
|
||||
raise RuntimeError
|
||||
self._cpu_gpu_move_volume += self._tensor_placement_policy.evict_tensors(hold_cuda_tensor_list,
|
||||
|
@ -66,7 +65,7 @@ class StatefulTensorMgr(object):
|
|||
# move COMPUTE tensors to CUDA
|
||||
for t in move_to_cuda_tensor_list:
|
||||
colo_model_data_tensor_move_inline(t, get_current_device())
|
||||
self._cpu_gpu_move_volume += t.payload.numel() * t.payload.element_size()
|
||||
self._cpu_gpu_move_volume += t.payload_size
|
||||
|
||||
@property
|
||||
def cpu_gpu_move_volume(self):
|
||||
|
|
|
@ -76,7 +76,6 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
|
|||
Returns:
|
||||
int: the volume of memory that is evicted
|
||||
"""
|
||||
volume = 0
|
||||
cuda_capacity = colo_device_memory_capacity(get_current_device())
|
||||
used_cuda_model_data = StatefulTensor.GST_MGR.total_mem['cuda']
|
||||
if warmup:
|
||||
|
@ -88,11 +87,12 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
|
|||
cuda_capacity *= self._steady_cuda_cap_ratio
|
||||
total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
|
||||
avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
|
||||
|
||||
freed_cuda_model_data = 0
|
||||
if avail_cuda_model_data < cuda_demand:
|
||||
# Move cuda_demand - avail_cuda_model_data volume of tensors
|
||||
# to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
|
||||
to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
|
||||
freed_cuda_model_data = 0
|
||||
to_free_tensor_list = hold_cuda_tensor_list
|
||||
if not warmup:
|
||||
next_compute_idx = {t: len(compute_list) for t in hold_cuda_tensor_list}
|
||||
|
@ -104,15 +104,14 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
|
|||
for t in to_free_tensor_list:
|
||||
if freed_cuda_model_data >= to_free_cuda_model_data:
|
||||
break
|
||||
freed_cuda_model_data += colo_tensor_mem_usage(t)[0]
|
||||
freed_cuda_model_data += t.payload_size
|
||||
colo_model_data_tensor_move_inline(t, torch.device('cpu'))
|
||||
volume += t.payload.numel() * t.payload.element_size()
|
||||
if freed_cuda_model_data < to_free_cuda_model_data:
|
||||
raise RuntimeError(
|
||||
f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
|
||||
)
|
||||
|
||||
return volume
|
||||
return freed_cuda_model_data
|
||||
|
||||
|
||||
class TensorPlacementPolicyFactory:
|
||||
|
|
Loading…
Reference in New Issue