2023-09-18 01:44:27 +00:00
|
|
|
try:
|
|
|
|
import triton
|
2023-09-19 06:20:26 +00:00
|
|
|
|
2023-09-18 01:44:27 +00:00
|
|
|
HAS_TRITON = True
|
2023-10-04 01:18:45 +00:00
|
|
|
except ImportError:
|
|
|
|
HAS_TRITON = False
|
|
|
|
print("Triton is not installed. Please install Triton to use Triton kernels.")
|
|
|
|
|
|
|
|
# There may exist import error even if we have triton installed.
|
|
|
|
if HAS_TRITON:
|
2024-01-03 06:41:35 +00:00
|
|
|
from .context_attn_unpad import context_attention_unpadded
|
2024-01-19 07:47:16 +00:00
|
|
|
from .flash_decoding import flash_decoding_attention
|
2024-01-24 08:20:42 +00:00
|
|
|
from .fused_rotary_embedding import fused_rotary_embedding
|
2024-02-28 05:47:00 +00:00
|
|
|
from .kvcache_copy import copy_k_to_blocked_cache, copy_kv_to_blocked_cache
|
2024-02-21 03:31:48 +00:00
|
|
|
from .no_pad_rotary_embedding import decoding_fused_rotary_embedding, rotary_embedding
|
2024-01-24 08:20:42 +00:00
|
|
|
from .rms_layernorm import rms_layernorm
|
|
|
|
from .rotary_cache_copy import get_xine_cache
|
2023-09-18 01:44:27 +00:00
|
|
|
from .softmax import softmax
|
|
|
|
|
|
|
|
__all__ = [
|
2024-01-03 06:41:35 +00:00
|
|
|
"context_attention_unpadded",
|
2024-01-19 07:47:16 +00:00
|
|
|
"flash_decoding_attention",
|
2024-02-28 05:47:00 +00:00
|
|
|
"copy_k_to_blocked_cache",
|
2024-01-15 09:37:20 +00:00
|
|
|
"copy_kv_to_blocked_cache",
|
2023-09-19 06:20:26 +00:00
|
|
|
"softmax",
|
2024-01-18 02:21:03 +00:00
|
|
|
"rms_layernorm",
|
2024-01-11 08:24:54 +00:00
|
|
|
"rotary_embedding",
|
2024-01-24 08:20:42 +00:00
|
|
|
"fused_rotary_embedding",
|
|
|
|
"get_xine_cache",
|
2024-02-21 03:31:48 +00:00
|
|
|
"decoding_fused_rotary_embedding",
|
2023-09-18 01:44:27 +00:00
|
|
|
]
|