diff --git a/colossalai/zero/gemini/gemini_hook.py b/colossalai/zero/gemini/gemini_hook.py
index bf990d127..e691b423b 100644
--- a/colossalai/zero/gemini/gemini_hook.py
+++ b/colossalai/zero/gemini/gemini_hook.py
@@ -37,18 +37,16 @@ class GeminiZeROHook(ColoParamOpHook):
 
         # transfer state
         for p in params:
-            # TODO(haze188): check状态转换
             self._chunk_manager.trans_tensor_state(p, TensorState.COMPUTE)
         self._gemini_manager.sample_overall_data()
 
         # evit chunks, aware of async fetched
-        # TODO(haze188): 可能我们prefetch的又被淘汰掉, check一下
+        # TODO: check if prefetched chunks will be evicted
         self._gemini_manager.adjust_layout(
             all_chunks, record_anyway=self._gemini_manager.placement_policy.max_prefetch > 0
         )
 
         # fetch the rest synchronously
-        # TODO(haze188): 1. 先prefetch还是先fetch（prefetch是异步，fetch是同步）
         for chunk in chunks_fetch_sync:
             self._chunk_manager.access_chunk(chunk)
 
diff --git a/colossalai/zero/gemini/gemini_mgr.py b/colossalai/zero/gemini/gemini_mgr.py
index 2e96c22f3..85beafd32 100644
--- a/colossalai/zero/gemini/gemini_mgr.py
+++ b/colossalai/zero/gemini/gemini_mgr.py
@@ -154,7 +154,6 @@ class GeminiManager:
 
     def _record_warmup_chunks_order(self, chunks: Tuple[Chunk, ...], record_anyway: bool = False) -> None:
         self._compute_idx += 1
-        # TODO(haze188): _compute_list 记录块的访问顺序
         if self._warmup and (self._placement_policy.need_mem_stats or record_anyway):
             self._compute_list.append(chunks)
 
diff --git a/examples/language/gpt/gemini/demo.ipynb b/examples/language/gpt/gemini/demo.ipynb
new file mode 100644
index 000000000..09953b3a9
--- /dev/null
+++ b/examples/language/gpt/gemini/demo.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Linear(in_features=10, out_features=5, bias=False) 50\n",
+      "Linear(in_features=5, out_features=10, bias=False) 50\n",
+      "Linear(in_features=10, out_features=10, bias=False) 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "class Toy(nn.Module):\n",
+    "    \n",
+    "    def __init__(self):\n",
+    "        super(Toy, self).__init__()\n",
+    "        self.fc1 = nn.Linear(10,5, bias=False)\n",
+    "        self.m3 = nn.Sequential(nn.Linear(5, 10, bias=False), nn.Linear(10,10, bias=False))\n",
+    "\n",
+    "t = Toy()\n",
+    "for mod in t.modules():\n",
+    "    for p in mod.parameters(recurse=False):\n",
+    "        print(mod, p.numel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([5, 10]) 50\n",
+      "torch.Size([10, 5]) 50\n",
+      "torch.Size([10, 10]) 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "for p in t.parameters():\n",
+    "    print(p.shape, p.numel())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'224'"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conf_str = torch.__config__.parallel_info()\n",
+    "inter_str = conf_str.split(\"hardware_concurrency() : \")[1]\n",
+    "max_concurrency = inter_str.split(\"\\n\")[0]\n",
+    "max_concurrency"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 0\n",
+      "0 1\n",
+      "0 2\n",
+      "1 0\n",
+      "1 1\n",
+      "1 2\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    for j in range(3):\n",
+    "        print(i, j)\n",
+    "        if i == 1 and j == 2:break\n",
+    "    else:\n",
+    "        continue\n",
+    "    break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "colossalai-py310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/language/gpt/gemini/run_gemini.sh b/examples/language/gpt/gemini/run_gemini.sh
index 5eaa4af4d..bffd26f59 100644
--- a/examples/language/gpt/gemini/run_gemini.sh
+++ b/examples/language/gpt/gemini/run_gemini.sh
@@ -6,7 +6,7 @@ export DISTPLAN=${DISTPLAN:-"CAI_Gemini"}
 export GPUNUM=${GPUNUM:-1}
 export BATCH_SIZE=${BATCH_SIZE:-16}
 export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"}
-export TRAIN_STEP=${TRAIN_STEP:-10}
+export TRAIN_STEP=${TRAIN_STEP:-2}
 # export PYTHONPATH=$PWD:$PYTHONPATH
 
 
diff --git a/examples/language/gpt/gemini/train_gpt_demo.py b/examples/language/gpt/gemini/train_gpt_demo.py
index 6db74231a..667a0c77a 100644
--- a/examples/language/gpt/gemini/train_gpt_demo.py
+++ b/examples/language/gpt/gemini/train_gpt_demo.py
@@ -66,18 +66,18 @@ class GPTLMLoss(nn.Module):
 
 
 def get_cpu_mem():
-    return psutil.Process().memory_info().rss / 1024**2
+    return psutil.Process().memory_info().rss / 1024**2  # 返回值是B，转换成MB
 
 
 def get_gpu_mem():
-    return torch.cuda.memory_allocated() / 1024**2
+    return torch.cuda.memory_allocated() / 1024**2  # 转换成MB
 
 
 def get_mem_info(prefix=""):
     return f"{prefix}GPU memory usage: {get_gpu_mem():.2f} MB, CPU memory usage: {get_cpu_mem():.2f} MB"
 
 
-def get_model_size(model: nn.Module):
+def get_model_size(model: nn.Module):  # 得到模型参数量
     total_numel = 0
     for module in model.modules():
         for p in module.parameters(recurse=False):
diff --git a/tests/test_zero/test_gemini/test_optim.py b/tests/test_zero/test_gemini/test_optim.py
index 1c914ca0e..4e1fb988b 100644
--- a/tests/test_zero/test_gemini/test_optim.py
+++ b/tests/test_zero/test_gemini/test_optim.py
@@ -26,7 +26,7 @@ PLACEMENT_CONFIGS = [
         "offload_optim_frac": 1.0,
         "offload_param_frac": 1.0,
     },  # zero3-offload-all
-    {"placement_policy": "auto"},
+    # {"placement_policy": "auto"},
 ]
 
 # this model is large enough to slice to chunks