Merge pull request #6064 from wangbluo/fix_attn

[sp] : fix the attention kernel for sp
2024-09-18 10:08:15 +08:00 · 2024-09-18 10:08:15 +08:00 · 63314ce4e4
parent 37e35230ff 10e4f7da72
commit 63314ce4e4
1 changed files with 2 additions and 1 deletions
--- a/colossalai/shardformer/layer/attn.py
+++ b/colossalai/shardformer/layer/attn.py
@ -121,7 +121,8 @@ class ColoAttention:
            )

        if size >= MEMORY_BOUND:
-            ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load()
+            if isinstance(ColoAttention._flash_kernel_dispatch, KernelLoader):
+                ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load()
        # lazy load
        if isinstance(ColoAttention._kernel_dispatch_map[dtype][mask_type], KernelLoader):
            ColoAttention._kernel_dispatch_map[dtype][mask_type] = ColoAttention._kernel_dispatch_map[dtype][