[shardformer/sequence parallel] not support opt of seq-parallel, add warning and fix a bug in gpt2 pp (#4488)

1 year ago · 351351a36e
parent 5545114fd8
commit 351351a36e
2 changed files with 5 additions and 1 deletions
--- a/colossalai/shardformer/policies/opt.py
+++ b/colossalai/shardformer/policies/opt.py
@ -1,3 +1,4 @@
 import warnings
 from functools import partial
 from typing import Callable, Dict, List
@ -39,6 +40,9 @@ class OPTPolicy(Policy):
        from transformers.models.opt.modeling_opt import OPTAttention, OPTDecoder, OPTDecoderLayer
        policy = {}
        if self.shard_config.enable_sequence_parallelism:
            self.shard_config.enable_sequence_parallelism = False
            warnings.warn("OPT dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
        if self.shard_config.enable_tensor_parallelism:
            policy[OPTDecoder] = ModulePolicyDescription(sub_module_replacement=[