Merge pull request #6064 from wangbluo/fix_attn

[sp] : fix the attention kernel for sp
pull/6062/head
Wang Binluo 2024-09-18 10:08:15 +08:00 committed by GitHub
commit 63314ce4e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -121,7 +121,8 @@ class ColoAttention:
) )
if size >= MEMORY_BOUND: if size >= MEMORY_BOUND:
ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load() if isinstance(ColoAttention._flash_kernel_dispatch, KernelLoader):
ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load()
# lazy load # lazy load
if isinstance(ColoAttention._kernel_dispatch_map[dtype][mask_type], KernelLoader): if isinstance(ColoAttention._kernel_dispatch_map[dtype][mask_type], KernelLoader):
ColoAttention._kernel_dispatch_map[dtype][mask_type] = ColoAttention._kernel_dispatch_map[dtype][ ColoAttention._kernel_dispatch_map[dtype][mask_type] = ColoAttention._kernel_dispatch_map[dtype][