diff --git a/colossalai/shardformer/layer/qkv_fused_linear.py b/colossalai/shardformer/layer/qkv_fused_linear.py index 0f6595a7c..000934ad9 100644 --- a/colossalai/shardformer/layer/qkv_fused_linear.py +++ b/colossalai/shardformer/layer/qkv_fused_linear.py @@ -695,6 +695,7 @@ class FusedLinear1D_Col(ParallelModule): process_group=process_group, weight=module.weight, bias_=module.bias, + n_fused=n_fused, *args, **kwargs, )