From 30dd13c45008ffe9e532f6cffb78f530933e6028 Mon Sep 17 00:00:00 2001 From: Xuanlei Zhao <43881818+oahzxl@users.noreply.github.com> Date: Mon, 13 Mar 2023 17:42:37 +0800 Subject: [PATCH] [autochunk] support complete benchmark (#3121) * refact memory code * dont log free var memory * add memory align * update chunk target * update setting for new memory * finish test * update tracer * update typo * update test * add unet test * add bench * update bench * update bench * init * support vit * move to cpu * add cpu benchmark --- .../benchmark_autochunk_alphafold.py | 4 ++-- .../test_autochunk_diffuser/benchmark_autochunk_diffuser.py | 6 +++--- .../benchmark_autochunk_transformer.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py b/tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py index 896751e40..9a2240d62 100644 --- a/tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py +++ b/tests/test_autochunk/test_autochunk_alphafold/benchmark_autochunk_alphafold.py @@ -23,7 +23,7 @@ def _benchmark_evoformer_stack_gm( get_data: Any, ) -> None: # build model and input - model = get_model() + model = get_model().cpu().eval() meta_args, concrete_args = get_data(*data_args) if concrete_args is None: concrete_args = [] @@ -35,7 +35,7 @@ def _benchmark_evoformer_stack_gm( concrete_args={k: v for k, v in concrete_args}, ) interp = MetaInfoProp(meta_graph) - meta_tensors = [MetaTensor(i[1], fake_device="cuda:0") for i in meta_args] + [i[1] for i in concrete_args] + meta_tensors = [MetaTensor(i[1], fake_device="cpu") for i in meta_args] + [i[1] for i in concrete_args] interp.propagate(*meta_tensors) codegen = AutoChunkCodeGen( meta_graph, diff --git a/tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py b/tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py index 5c127bd69..6fb7efa7a 100644 --- a/tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py +++ b/tests/test_autochunk/test_autochunk_diffuser/benchmark_autochunk_diffuser.py @@ -35,10 +35,9 @@ def _benchmark_autochunk_unet_gm( meta_args={k: v.to(torch.device("meta")) for k, v in meta_args}, concrete_args={k: v for k, v in concrete_args}, ) - model = model.cuda().eval() interp = MetaInfoProp(meta_graph) meta_tensors = [i[1] for i in meta_args] + [i[1] for i in concrete_args] - meta_tensors = [MetaTensor(i, fake_device="cuda:0") if isinstance(i, torch.Tensor) else i for i in meta_tensors] + meta_tensors = [MetaTensor(i, fake_device="cpu") if isinstance(i, torch.Tensor) else i for i in meta_tensors] interp.propagate(*meta_tensors) codegen = AutoChunkCodeGen( meta_graph, @@ -142,6 +141,7 @@ if __name__ == "__main__": port=free_port(), backend="nccl", ) - benchmark_autochunk_unet(batch=1, height=224 * 2, width=224 * 2) benchmark_autochunk_unet(batch=1, height=224 * 3, width=224 * 3) benchmark_autochunk_unet(batch=1, height=224 * 4, width=224 * 4) + benchmark_autochunk_unet(batch=1, height=224 * 5, width=224 * 5) + benchmark_autochunk_unet(batch=1, height=224 * 6, width=224 * 6) diff --git a/tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py b/tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py index 5791af351..63490aaee 100644 --- a/tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py +++ b/tests/test_autochunk/test_autochunk_transformer/benchmark_autochunk_transformer.py @@ -22,7 +22,7 @@ def _benchmark_autochunk_gpt_gm( data: tuple, max_memory: int = None, ) -> None: - model = model.cuda().eval() + model = model.eval().cpu() # build model and input meta_args, concrete_args, sequence = data @@ -37,7 +37,7 @@ def _benchmark_autochunk_gpt_gm( ) interp = MetaInfoProp(meta_graph) meta_tensors = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence] - meta_tensors = [MetaTensor(i, fake_device="cuda:0") if isinstance(i, torch.Tensor) else i for i in meta_tensors] + meta_tensors = [MetaTensor(i, fake_device="cpu") if isinstance(i, torch.Tensor) else i for i in meta_tensors] interp.propagate(*meta_tensors) codegen = AutoChunkCodeGen( meta_graph, @@ -58,7 +58,7 @@ def _benchmark_autochunk_gpt_gm( # init inputs inputs = [meta_args[i] if i in meta_args else concrete_args[i] for i in sequence] inputs = [i.cuda() if isinstance(i, torch.Tensor) else i for i in inputs] - model.cuda().eval() + model.cuda() # bench para_mem = float(parameter_size(model)) / 1024**2 * 6