From ad00894f7f37c370cb9db162e727302ec633c0f0 Mon Sep 17 00:00:00 2001 From: Ziyue Jiang Date: Fri, 6 Jan 2023 16:03:16 +0800 Subject: [PATCH] polish --- .../gpt/experiments/pipeline_parallel/README.md | 3 ++- .../gpt/experiments/pipeline_parallel/utils.py | 12 ------------ 2 files changed, 2 insertions(+), 13 deletions(-) delete mode 100644 examples/language/gpt/experiments/pipeline_parallel/utils.py diff --git a/examples/language/gpt/experiments/pipeline_parallel/README.md b/examples/language/gpt/experiments/pipeline_parallel/README.md index d158b088d..702e3c8d6 100644 --- a/examples/language/gpt/experiments/pipeline_parallel/README.md +++ b/examples/language/gpt/experiments/pipeline_parallel/README.md @@ -1,4 +1,4 @@ -# Auto-Parallelism with GPT2 +# Pipeline Parallelism Demo with GPT2 ## Requirements @@ -33,5 +33,6 @@ For simplicity, the input data is randonly generated here. ```bash #Run the Pipeline Parallel on GPT with default setting and a dummy dataset. +#You can change the GPU number or microbatch number in the run.sh . bash run.sh ``` diff --git a/examples/language/gpt/experiments/pipeline_parallel/utils.py b/examples/language/gpt/experiments/pipeline_parallel/utils.py deleted file mode 100644 index 782f546dc..000000000 --- a/examples/language/gpt/experiments/pipeline_parallel/utils.py +++ /dev/null @@ -1,12 +0,0 @@ -import torch - - -# Randomly Generated Data -def get_data(batch_size, seq_len, vocab_size): - input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), device=torch.cuda.current_device()) - attention_mask = torch.ones_like(input_ids) - return input_ids, attention_mask - - -def get_tflops(model_numel, batch_size, seq_len, step_time): - return model_numel * batch_size * seq_len * 8 / 1e12 / (step_time + 1e-12)