mirror of https://github.com/hpcaitech/ColossalAI
[fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages (#1425)
* [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usagespull/1437/head
parent
89c434a0a6
commit
f20cb4e893
|
@ -114,18 +114,29 @@ class MetaInfoProp(torch.fx.Interpreter):
|
||||||
return TensorMetadata(None, None, False, None, 0, False)
|
return TensorMetadata(None, None, False, None, 0, False)
|
||||||
|
|
||||||
meta = _map_aggregate(result, extract_tensor_meta)
|
meta = _map_aggregate(result, extract_tensor_meta)
|
||||||
|
|
||||||
n.meta['tensor_meta'] = meta
|
n.meta['tensor_meta'] = meta
|
||||||
total_node_size = _compute_node_numel(n.meta['tensor_meta'])
|
|
||||||
# counting the total size of parameters
|
# get byte size for each element
|
||||||
|
size_per_elem_bytes = torch.tensor([], dtype=meta.dtype).element_size()
|
||||||
|
|
||||||
|
# compute the total size of activation tensors
|
||||||
|
total_activation_size = _compute_node_numel(n.meta['tensor_meta'])
|
||||||
|
|
||||||
|
# compute the total size of model parameters
|
||||||
total_param_size = 0
|
total_param_size = 0
|
||||||
if n.op == 'call_module':
|
if n.op == 'call_module':
|
||||||
target_module = n.graph.owning_module.get_submodule(n.target)
|
target_module = n.graph.owning_module.get_submodule(n.target)
|
||||||
for param in target_module.parameters():
|
for param in target_module.parameters():
|
||||||
total_param_size += param.numel()
|
total_param_size += param.numel()
|
||||||
|
|
||||||
total_node_size += total_param_size
|
# compute the total memory cost of activation tensors and model parameters
|
||||||
n.node_size = total_node_size
|
total_activation_size *= size_per_elem_bytes
|
||||||
|
total_param_size *= size_per_elem_bytes
|
||||||
|
|
||||||
|
# TODO: node.node_size is not an original attribute
|
||||||
|
setattr(n, 'node_size', total_activation_size + total_param_size)
|
||||||
|
setattr(n, 'param_size', total_param_size)
|
||||||
|
setattr(n, 'activation_size', total_activation_size)
|
||||||
n.meta['type'] = type(result)
|
n.meta['type'] = type(result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
@ -23,12 +23,24 @@ def test_meta_info_prop():
|
||||||
input_sample = torch.rand(BATCH_SIZE, DIM_IN)
|
input_sample = torch.rand(BATCH_SIZE, DIM_IN)
|
||||||
orig_output = model(input_sample)
|
orig_output = model(input_sample)
|
||||||
gm = symbolic_trace(model)
|
gm = symbolic_trace(model)
|
||||||
|
for node in gm.graph.nodes:
|
||||||
|
assert not hasattr(node,
|
||||||
|
'node_size'), 'The attribute Node.node_size should not exist before MetaInfoProp procedure'
|
||||||
|
assert not hasattr(node,
|
||||||
|
'param_size'), 'The attribute Node.param_size should not exist before MetaInfoProp procedure'
|
||||||
|
assert not hasattr(
|
||||||
|
node,
|
||||||
|
'activation_size'), 'The attribute Node.activation_size should not exist before MetaInfoProp procedure'
|
||||||
MetaInfoProp(gm).run(input_sample)
|
MetaInfoProp(gm).run(input_sample)
|
||||||
for node in gm.graph.nodes:
|
for node in gm.graph.nodes:
|
||||||
if node.op == 'placeholder':
|
if node.op == 'placeholder':
|
||||||
meta_check(node.meta['tensor_meta'], input_sample)
|
meta_check(node.meta['tensor_meta'], input_sample)
|
||||||
if node.op == 'output':
|
if node.op == 'output':
|
||||||
meta_check(node.meta['tensor_meta'], orig_output)
|
meta_check(node.meta['tensor_meta'], orig_output)
|
||||||
|
assert hasattr(node, 'node_size'), 'The attribute Node.node_size should exist after MetaInfoProp procedure'
|
||||||
|
assert hasattr(node, 'param_size'), 'The attribute Node.param_size should exist after MetaInfoProp procedure'
|
||||||
|
assert hasattr(
|
||||||
|
node, 'activation_size'), 'The attribute Node.activation_size should exist after MetaInfoProp procedure'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue