mirror of https://github.com/hpcaitech/ColossalAI
Update flash_attention_patch.py
To be compatible with the new change in the Transformers library, where a new argument 'padding_mask' was added to forward function of attention layer. https://github.com/huggingface/transformers/pull/25598pull/4918/head
parent
611a5a80ca
commit
7768afbad0
|
@ -65,6 +65,7 @@ def attention_forward(
|
|||
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
||||
output_attentions: bool = False,
|
||||
use_cache: bool = False,
|
||||
**kwargs
|
||||
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
||||
"""
|
||||
Re-define LLaMA-2 `LlamaAttention` forward method using flash-attention.
|
||||
|
|
Loading…
Reference in New Issue