From 573af8418406a319e91be07f58fca798a6e72dbd Mon Sep 17 00:00:00 2001
From: ver217 <lhx0217@gmail.com>
Date: Tue, 4 Apr 2023 17:32:51 +0800
Subject: [PATCH] [example] update examples related to zero/gemini (#3431)

* [zero] update legacy import

* [zero] update examples

* [example] fix opt tutorial

* [example] fix opt tutorial

* [example] fix opt tutorial

* [example] fix opt tutorial

* [example] fix import
---
 colossalai/zero/legacy/__init__.py            |  3 ++-
 .../roberta/configs/colossalai_ddp.py         |  7 ++++++-
 .../roberta/configs/colossalai_zero.py        |  9 ++++++--
 examples/tutorial/opt/opt/colossalai_zero.py  |  6 +++++-
 examples/tutorial/opt/opt/requirements.txt    |  1 +
 examples/tutorial/opt/opt/run_clm.py          |  6 +++++-
 examples/tutorial/opt/opt/test_ci.sh          | 21 +++++++++++++++++++
 examples/tutorial/opt/test_ci.sh              |  3 +++
 8 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100755 examples/tutorial/opt/opt/test_ci.sh
 create mode 100755 examples/tutorial/opt/test_ci.sh

diff --git a/colossalai/zero/legacy/__init__.py b/colossalai/zero/legacy/__init__.py
index 35570a1f5..3783d38e6 100644
--- a/colossalai/zero/legacy/__init__.py
+++ b/colossalai/zero/legacy/__init__.py
@@ -6,6 +6,7 @@ import torch.nn as nn
 from colossalai.logging import get_dist_logger
 
 from .init_ctx import ZeroInitContext, no_shard_zero_context, no_shard_zero_decrator
+from .shard_utils import BucketTensorShardStrategy, TensorShardStrategy
 from .sharded_model import ShardedModelV2
 from .sharded_optim import ShardedOptimizerV2
 
@@ -40,5 +41,5 @@ def convert_to_zero_v2(model: nn.Module, optimizer: torch.optim.Optimizer, model
 
 __all__ = [
     'convert_to_zero_v2', 'ShardedModelV2', 'ShardedOptimizerV2', 'ZeroInitContext', 'no_shard_zero_context',
-    'no_shard_zero_decrator'
+    'no_shard_zero_decrator', 'TensorShardStrategy', 'BucketTensorShardStrategy'
 ]
diff --git a/examples/language/roberta/configs/colossalai_ddp.py b/examples/language/roberta/configs/colossalai_ddp.py
index c3c59aa40..3146ffc45 100644
--- a/examples/language/roberta/configs/colossalai_ddp.py
+++ b/examples/language/roberta/configs/colossalai_ddp.py
@@ -1,4 +1,9 @@
-from colossalai.zero.shard_utils import TensorShardStrategy
 from colossalai.nn.optimizer import FusedAdam
 
+try:
+    from colossalai.zero.shard_utils import TensorShardStrategy
+except ImportError:
+    # colossalai > 0.2.8
+    from colossalai.zero.legacy import TensorShardStrategy
+
 clip_grad_norm = 1.0
diff --git a/examples/language/roberta/configs/colossalai_zero.py b/examples/language/roberta/configs/colossalai_zero.py
index c5debdce0..bae4c723c 100644
--- a/examples/language/roberta/configs/colossalai_zero.py
+++ b/examples/language/roberta/configs/colossalai_zero.py
@@ -1,6 +1,11 @@
-from colossalai.zero.shard_utils import TensorShardStrategy
 from colossalai.nn.optimizer import FusedAdam
 
+try:
+    from colossalai.zero.shard_utils import TensorShardStrategy
+except ImportError:
+    # colossalai > 0.2.8
+    from colossalai.zero.legacy import TensorShardStrategy
+
 # fp16 = dict(
 #     mode=AMP_TYPE.TORCH,
 # )
@@ -29,4 +34,4 @@ optimizer = dict(
     weight_decay=1e-2,
 )
 
-# 64433
\ No newline at end of file
+# 64433
diff --git a/examples/tutorial/opt/opt/colossalai_zero.py b/examples/tutorial/opt/opt/colossalai_zero.py
index 833745f3e..7c2c15245 100644
--- a/examples/tutorial/opt/opt/colossalai_zero.py
+++ b/examples/tutorial/opt/opt/colossalai_zero.py
@@ -1,4 +1,8 @@
-from colossalai.zero.shard_utils import TensorShardStrategy
+try:
+    from colossalai.zero.shard_utils import TensorShardStrategy
+except ImportError:
+    # colossalai > 0.2.8
+    from colossalai.zero.legacy import TensorShardStrategy
 
 zero = dict(model_config=dict(shard_strategy=TensorShardStrategy(),
                               tensor_placement_policy="auto",
diff --git a/examples/tutorial/opt/opt/requirements.txt b/examples/tutorial/opt/opt/requirements.txt
index c34df7992..d0ed2c717 100644
--- a/examples/tutorial/opt/opt/requirements.txt
+++ b/examples/tutorial/opt/opt/requirements.txt
@@ -4,3 +4,4 @@ datasets >= 1.8.0
 sentencepiece != 0.1.92
 protobuf
 accelerate == 0.13.2
+transformers
diff --git a/examples/tutorial/opt/opt/run_clm.py b/examples/tutorial/opt/opt/run_clm.py
index e618b4d66..fdc86adab 100755
--- a/examples/tutorial/opt/opt/run_clm.py
+++ b/examples/tutorial/opt/opt/run_clm.py
@@ -413,7 +413,11 @@ def main():
     cai_version = colossalai.__version__
     logger.info(f'using Colossal-AI version {cai_version}')
     if version.parse(cai_version) > version.parse("0.1.10"):
-        from colossalai.nn.parallel import GeminiDDP
+        try:
+            from colossalai.nn.parallel import GeminiDDP
+        except ImportError:
+            # this works for unreleased main branch, and this may be released on 0.2.9
+            from colossalai.zero import GeminiDDP
         model = GeminiDDP(model, device=get_current_device(), placement_policy=PLACEMENT_POLICY, pin_memory=True)
     elif version.parse(cai_version) <= version.parse("0.1.10") and version.parse(cai_version) >= version.parse("0.1.9"):
         from colossalai.gemini import ChunkManager, GeminiManager
diff --git a/examples/tutorial/opt/opt/test_ci.sh b/examples/tutorial/opt/opt/test_ci.sh
new file mode 100755
index 000000000..e505da136
--- /dev/null
+++ b/examples/tutorial/opt/opt/test_ci.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -xue
+
+pip install -r requirements.txt
+
+BS=8
+MEMCAP=0
+GPUNUM=2
+MODLE="facebook/opt-125m"
+
+torchrun \
+  --nproc_per_node ${GPUNUM} \
+  --master_port 19198 \
+  run_clm.py \
+  -s \
+  --output_dir $PWD \
+  --mem_cap ${MEMCAP} \
+  --model_name_or_path ${MODLE} \
+  --per_device_train_batch_size ${BS} \
+  --num_train_epochs 1
diff --git a/examples/tutorial/opt/test_ci.sh b/examples/tutorial/opt/test_ci.sh
new file mode 100755
index 000000000..8341bb105
--- /dev/null
+++ b/examples/tutorial/opt/test_ci.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+cd opt && bash test_ci.sh