From 13ed4b64411d97a58bf8cb481ef45aef0f7d00f9 Mon Sep 17 00:00:00 2001 From: LuGY <74758262+Gy-Lu@users.noreply.github.com> Date: Thu, 31 Mar 2022 17:42:20 +0800 Subject: [PATCH] [model zoo] add activation offload for gpt model (#582) --- model_zoo/gpt/gpt.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/model_zoo/gpt/gpt.py b/model_zoo/gpt/gpt.py index df331c5b8..7384cc3b4 100644 --- a/model_zoo/gpt/gpt.py +++ b/model_zoo/gpt/gpt.py @@ -165,8 +165,9 @@ class GPTBlock(CheckpointModule): bias: bool = True, apply_post_layernorm: bool = False, fuse_scale_mask_softmax: bool = False, - checkpoint: bool = False): - super().__init__(checkpoint) + checkpoint: bool = False, + activation_offload: bool = False): + super().__init__(checkpoint, activation_offload) self.apply_post_layernorm = apply_post_layernorm self.norm1 = col_nn.LayerNorm(normalized_shape=dim, eps=layernorm_epsilon, dtype=dtype) self.attn = GPTSelfAttention(dim=dim, @@ -252,7 +253,8 @@ class GPT(nn.Module): bias: bool = True, apply_post_layernorm: bool = False, fuse_scale_mask_softmax: bool = False, - checkpoint: bool = False) -> None: + checkpoint: bool = False, + activation_offload: bool = False) -> None: super().__init__() self.embed = GPTEmbedding(embedding_dim=dim, vocab_size=vocab_size, @@ -274,6 +276,7 @@ class GPT(nn.Module): apply_post_layernorm=apply_post_layernorm, fuse_scale_mask_softmax=fuse_scale_mask_softmax, checkpoint=checkpoint, + activation_offload=activation_offload ) for _ in range(depth) ])