diff --git a/colossalai/kernel/cuda_native/mha/__init__.py b/colossalai/kernel/cuda_native/mha/__init__.py new file mode 100644 index 000000000..21fddd512 --- /dev/null +++ b/colossalai/kernel/cuda_native/mha/__init__.py @@ -0,0 +1,3 @@ +from .mha import ColoAttention + +__all__ = ['ColoAttention'] diff --git a/tests/test_utils/test_flash_attention.py b/tests/test_utils/test_flash_attention.py index d41ccd832..fbcc45265 100644 --- a/tests/test_utils/test_flash_attention.py +++ b/tests/test_utils/test_flash_attention.py @@ -9,7 +9,7 @@ from colossalai.kernel.cuda_native.mha.mem_eff_attn import HAS_MEM_EFF_ATTN from colossalai.testing import clear_cache_before_run, parameterize if HAS_MEM_EFF_ATTN or HAS_FLASH_ATTN: - from colossalai.kernel.cuda_native.mha.mha import ColoAttention + from colossalai.kernel.cuda_native import ColoAttention from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType DTYPE = [torch.float16, torch.bfloat16, torch.float32]