From ad00894f7f37c370cb9db162e727302ec633c0f0 Mon Sep 17 00:00:00 2001
From: Ziyue Jiang <ziyue.jiang@gmail.com>
Date: Fri, 6 Jan 2023 16:03:16 +0800
Subject: [PATCH] polish

---
 .../gpt/experiments/pipeline_parallel/README.md      |  3 ++-
 .../gpt/experiments/pipeline_parallel/utils.py       | 12 ------------
 2 files changed, 2 insertions(+), 13 deletions(-)
 delete mode 100644 examples/language/gpt/experiments/pipeline_parallel/utils.py

diff --git a/examples/language/gpt/experiments/pipeline_parallel/README.md b/examples/language/gpt/experiments/pipeline_parallel/README.md
index d158b088d..702e3c8d6 100644
--- a/examples/language/gpt/experiments/pipeline_parallel/README.md
+++ b/examples/language/gpt/experiments/pipeline_parallel/README.md
@@ -1,4 +1,4 @@
-# Auto-Parallelism with GPT2
+# Pipeline Parallelism Demo with GPT2
 
 ## Requirements
 
@@ -33,5 +33,6 @@ For simplicity, the input data is randonly generated here.
 
 ```bash
 #Run the Pipeline Parallel on GPT with default setting and a dummy dataset.
+#You can change the GPU number or microbatch number in the run.sh .
 bash run.sh
 ```
diff --git a/examples/language/gpt/experiments/pipeline_parallel/utils.py b/examples/language/gpt/experiments/pipeline_parallel/utils.py
deleted file mode 100644
index 782f546dc..000000000
--- a/examples/language/gpt/experiments/pipeline_parallel/utils.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import torch
-
-
-# Randomly Generated Data
-def get_data(batch_size, seq_len, vocab_size):
-    input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), device=torch.cuda.current_device())
-    attention_mask = torch.ones_like(input_ids)
-    return input_ids, attention_mask
-
-
-def get_tflops(model_numel, batch_size, seq_len, step_time):
-    return model_numel * batch_size * seq_len * 8 / 1e12 / (step_time + 1e-12)