From 13ed4b64411d97a58bf8cb481ef45aef0f7d00f9 Mon Sep 17 00:00:00 2001
From: LuGY <74758262+Gy-Lu@users.noreply.github.com>
Date: Thu, 31 Mar 2022 17:42:20 +0800
Subject: [PATCH] [model zoo] add activation offload for gpt model (#582)

---
 model_zoo/gpt/gpt.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/model_zoo/gpt/gpt.py b/model_zoo/gpt/gpt.py
index df331c5b8..7384cc3b4 100644
--- a/model_zoo/gpt/gpt.py
+++ b/model_zoo/gpt/gpt.py
@@ -165,8 +165,9 @@ class GPTBlock(CheckpointModule):
                  bias: bool = True,
                  apply_post_layernorm: bool = False,
                  fuse_scale_mask_softmax: bool = False,
-                 checkpoint: bool = False):
-        super().__init__(checkpoint)
+                 checkpoint: bool = False,
+                 activation_offload: bool = False):
+        super().__init__(checkpoint, activation_offload)
         self.apply_post_layernorm = apply_post_layernorm
         self.norm1 = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype)
         self.attn = GPTSelfAttention(dim=dim,
@@ -252,7 +253,8 @@ class GPT(nn.Module):
                  bias: bool = True,
                  apply_post_layernorm: bool = False,
                  fuse_scale_mask_softmax: bool = False,
-                 checkpoint: bool = False) -> None:
+                 checkpoint: bool = False,
+                 activation_offload: bool = False) -> None:
         super().__init__()
         self.embed = GPTEmbedding(embedding_dim=dim,
                                   vocab_size=vocab_size,
@@ -274,6 +276,7 @@ class GPT(nn.Module):
                 apply_post_layernorm=apply_post_layernorm,
                 fuse_scale_mask_softmax=fuse_scale_mask_softmax,
                 checkpoint=checkpoint,
+                activation_offload=activation_offload
             ) for _ in range(depth)
         ])