ColossalAI/colossalai/kernel/cuda_native/scaled_softmax.py

192 lines
6.5 KiB
Python
Raw Normal View History

2021-12-21 04:19:52 +00:00
"""This code from NVIDIA Megatron
with some changes. """
import torch
import torch.nn as nn
import enum
class AttnMaskType(enum.Enum):
padding = 1
causal = 2
class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
"""
Fused operation which performs following three operations in sequence
2022-01-21 02:44:30 +00:00
1. Scale the tensor.
2. Apply upper triangular mask (typically used in gpt models).
3. Perform softmax.
2021-12-21 04:19:52 +00:00
"""
@staticmethod
def forward(ctx, inputs, scale):
2022-01-13 08:47:17 +00:00
try:
import colossal_scaled_upper_triang_masked_softmax
except ImportError:
raise RuntimeError('ScaledUpperTriangMaskedSoftmax requires cuda extensions')
2021-12-21 04:19:52 +00:00
scale_t = torch.tensor([scale])
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
softmax_results = colossal_scaled_upper_triang_masked_softmax.forward(inputs, scale_t[0])
2021-12-21 04:19:52 +00:00
ctx.save_for_backward(softmax_results, scale_t)
return softmax_results
@staticmethod
def backward(ctx, output_grads):
2022-01-13 08:47:17 +00:00
try:
import colossal_scaled_upper_triang_masked_softmax
except ImportError:
raise RuntimeError('ScaledUpperTriangMaskedSoftmax requires cuda extensions')
2021-12-21 04:19:52 +00:00
softmax_results, scale_t = ctx.saved_tensors
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
input_grads = colossal_scaled_upper_triang_masked_softmax.backward(output_grads, softmax_results, scale_t[0])
2021-12-21 04:19:52 +00:00
return input_grads, None
class ScaledMaskedSoftmax(torch.autograd.Function):
"""
Fused operation which performs following three operations in sequence
2022-01-21 02:44:30 +00:00
1. Scale the tensor.
2. Apply the mask.
3. Perform softmax.
2021-12-21 04:19:52 +00:00
"""
@staticmethod
def forward(ctx, inputs, mask, scale):
2022-01-13 08:47:17 +00:00
try:
import colossal_scaled_masked_softmax
except ImportError:
raise RuntimeError('ScaledMaskedSoftmax requires cuda extensions')
2021-12-21 04:19:52 +00:00
scale_t = torch.tensor([scale])
softmax_results = colossal_scaled_masked_softmax.forward(inputs, mask, scale_t[0])
ctx.save_for_backward(softmax_results, scale_t)
return softmax_results
@staticmethod
def backward(ctx, output_grads):
2022-01-13 08:47:17 +00:00
try:
import colossal_scaled_masked_softmax
except ImportError:
raise RuntimeError('ScaledMaskedSoftmax requires cuda extensions')
2021-12-21 04:19:52 +00:00
softmax_results, scale_t = ctx.saved_tensors
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
input_grads = colossal_scaled_masked_softmax.backward(output_grads, softmax_results, scale_t[0])
2021-12-21 04:19:52 +00:00
return input_grads, None, None
class FusedScaleMaskSoftmax(nn.Module):
"""
2022-01-21 02:44:30 +00:00
Fused operation: scaling + mask + softmax
2021-12-21 04:19:52 +00:00
Arguments:
2022-01-21 02:44:30 +00:00
input_in_fp16: Flag to indicate if input in fp16 data format.
input_in_bf16: Flag to indicate if input in bf16 data format.
attn_mask_type: Attention mask type (pad or causal)
scaled_masked_softmax_fusion: Flag to indicate user want to use softmax fusion
mask_func: Mask function to be applied.
softmax_in_fp32: If True, softmax in performed at fp32 precision.
scale: Scaling factor used in input tensor scaling.
2021-12-21 04:19:52 +00:00
"""
def __init__(
self,
input_in_fp16,
input_in_bf16,
attn_mask_type,
scaled_masked_softmax_fusion,
mask_func,
softmax_in_fp32,
scale,
):
super(FusedScaleMaskSoftmax, self).__init__()
self.input_in_fp16 = input_in_fp16
self.input_in_bf16 = input_in_bf16
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
assert not (self.input_in_fp16
and self.input_in_bf16), "both fp16 and bf16 flags cannot be active at the same time."
2021-12-21 04:19:52 +00:00
self.input_in_float16 = self.input_in_fp16 or self.input_in_bf16
self.attn_mask_type = attn_mask_type
self.scaled_masked_softmax_fusion = scaled_masked_softmax_fusion
self.mask_func = mask_func
self.softmax_in_fp32 = softmax_in_fp32
self.scale = scale
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
assert (self.scale is None or softmax_in_fp32), "softmax should be in fp32 when scaled"
2021-12-21 04:19:52 +00:00
def forward(self, input, mask):
# [b, np, sq, sk]
assert input.dim() == 4
if self.is_kernel_available(mask, *input.size()):
return self.forward_fused_softmax(input, mask)
else:
return self.forward_torch_softmax(input, mask)
def is_kernel_available(self, mask, b, np, sq, sk):
attn_batches = b * np
[NFC] Hotfix/format (#984) * [NFC] Polish colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu code style. (#937) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cuda_util.h code style (#939) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.cpp code style (#936) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/block_reduce.h code style (#938) * [NFC] polish moe_cuda_kernel.cu code style (#940) Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> * [NFC] polish pre-commit run --files colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax_cuda.cu code style (#943) * [NFC] polish colossalai/kernel/cuda_native/csrc/moe_cuda.cpp code style (#942) * [NFC] polish colossalai/kernel/cuda_native/csrc/cpu_adam.h code style (#945) * [NFC] polish colossalai/kernel/jit/bias_gelu.py code style (#946) Co-authored-by: jnbai <897086360@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_masked_softmax_cuda.cu code style (#949) Co-authored-by: Jiatong <jiatong.han@u.nus.edu> * [NFC] polish colossalai/builder/pipeline.py code style (#951) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.cpp code style (#952) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/cross_entropy.cu code style (#953) Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/softmax_kernels.cu code style (#954) * [NFC] polish colossalai/kernel/cuda_native/scaled_softmax.py code style (#955) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/context.h code style (#956) Co-authored-by: RichardoLuo <14049555596@qq.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/cross_entropy_layer.h code style (#957) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu code style (#958) * [NFC] polish colossalai/kernel/cuda_native/csrc/multihead_attention_1d.h code style (#962) * [NFC] polish colossalai/kernel/cuda_native/csrc/scaled_upper_triang_masked_softmax.cpp code style (#959) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/general_kernels.cu code style (#963) Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/softmax.h code style (#964) * [NFC] polish __init__.py code style (#965) * [NFC] polish colossalai/nn/layer/parallel_3d/layers.py code style (#966) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h (#968) code style * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/dropout.h code style (#970) * [NFC] polish colossalai/nn/layer/parallel_2p5d/layers.py code style (#972) * [NFC] polish colossalai/kernel/cuda_native/csrc/layer_norm_cuda.cpp code style (#973) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/normalize_kernels.cu code style (#974) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_scale_kernel.cu code style (#977) * [NFC] polish colossalai/nn/layer/parallel_2d/layers.py code style (#976) * [NFC] polish colossalai/kernel/cuda_native/csrc/multi_tensor_sgd_kernel.cu code style (#978) * [NFC] polish colossalai/kernel/cuda_native/csrc/kernels/dropout_kernels.cu code style (#979) * [NFC] polish colossalai/kernel/cuda_native/layer_norm.py code style (#980) * [NFC] polish colossalai/nn/layer/utils/common.py code style (#983) Co-authored-by: BoxiangW <45734921+BoxiangW@users.noreply.github.com> Co-authored-by: yuxuan-lou <83441848+yuxuan-lou@users.noreply.github.com> Co-authored-by: Geng Zhang <34452939+zxgx@users.noreply.github.com> Co-authored-by: Maruyama_Aya <38985202+MaruyamaAya@users.noreply.github.com> Co-authored-by: XYE <92607131+Itok2000u@users.noreply.github.com> Co-authored-by: Xiao Ye <xiaoye2@illinois.edu> Co-authored-by: HaoyuQin <79465534+coder-chin@users.noreply.github.com> Co-authored-by: wky <64853922+wangkuangyi@users.noreply.github.com> Co-authored-by: bajiaoyu517 <59548007+bajiaoyu517@users.noreply.github.com> Co-authored-by: luoling-LC <105470086+luoling-LC@users.noreply.github.com> Co-authored-by: jnbai <897086360@qq.com> Co-authored-by: JT.Han <59948448+JThh@users.noreply.github.com> Co-authored-by: Jiatong <jiatong.han@u.nus.edu> Co-authored-by: xyupeng <99191637+xyupeng@users.noreply.github.com> Co-authored-by: Sze-qq <68757353+Sze-qq@users.noreply.github.com> Co-authored-by: Cautiousss <48676630+Cautiousss@users.noreply.github.com> Co-authored-by: 何晓昕 <cautious@hexiaoxins-MacBook-Pro.local> Co-authored-by: Luxios22 <67457897+Luxios22@users.noreply.github.com> Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com> Co-authored-by: RichardoLuo <50363844+RichardoLuo@users.noreply.github.com> Co-authored-by: RichardoLuo <14049555596@qq.com> Co-authored-by: doubleHU <98150031+huxin711@users.noreply.github.com> Co-authored-by: runluo <68489000+run-qiao@users.noreply.github.com> Co-authored-by: MaxT <854721132@qq.com> Co-authored-by: superhao1995 <804673818@qq.com> Co-authored-by: ziyu huang <huang0ziyu@gmail.com> Co-authored-by: “Arsmart123 <202476410arsmart@gmail.com> Co-authored-by: Yuer867 <62204893+Yuer867@users.noreply.github.com> Co-authored-by: lucasliunju <lucasliunju@gmail.com> Co-authored-by: LuGY <74758262+Gy-Lu@users.noreply.github.com> Co-authored-by: ExtremeViscent <zhangyiqi55732@sina.com> Co-authored-by: Xu Kai <xukai16@foxmail.com> Co-authored-by: Zirui Zhu <zhuzr21@gmail.com> Co-authored-by: Ofey Chan <ofey206@gmail.com> Co-authored-by: DouJS <dujiangsu@163.com> Co-authored-by: Jie Zhu <chore.08-protist@icloud.com> Co-authored-by: shenggan <csg19971016@gmail.com> Co-authored-by: Kai Wang (Victor Kai) <37533040+kaiwang960112@users.noreply.github.com> Co-authored-by: puck_WCR <46049915+WANG-CR@users.noreply.github.com> Co-authored-by: Ziheng Qin <37519855+henryqin1997@users.noreply.github.com>
2022-05-17 01:54:49 +00:00
if (self.scaled_masked_softmax_fusion # user want to fuse
and self.input_in_float16 # input must be fp16
and mask is not None # mask tensor must not be None
and 16 < sk <= 2048 # sk must be 16 ~ 2048
and sq % 4 == 0 # sq must be divisor of 4
and attn_batches % 4 == 0 # np * b must be divisor of 4
):
2021-12-21 04:19:52 +00:00
if 0 <= sk <= 2048:
batch_per_block = self.get_batch_per_block(sq, sk, b, np)
if self.attn_mask_type == AttnMaskType.causal:
if attn_batches % batch_per_block == 0:
return True
else:
if sq % batch_per_block == 0:
return True
return False
def forward_fused_softmax(self, input, mask):
b, np, sq, sk = input.size()
scale = self.scale if self.scale is not None else 1.0
if self.attn_mask_type == AttnMaskType.causal:
assert sq == sk, "causal mask is only for self attention"
# input is 3D tensor (attn_batches, sq, sk)
input = input.view(-1, sq, sk)
probs = ScaledUpperTriangMaskedSoftmax.apply(input, scale)
return probs.view(b, np, sq, sk)
else:
# input is 4D tensor (b, np, sq, sk)
return ScaledMaskedSoftmax.apply(input, mask, scale)
def forward_torch_softmax(self, input, mask):
if self.input_in_float16 and self.softmax_in_fp32:
input = input.float()
if self.scale is not None:
input = input * self.scale
mask_output = self.mask_func(input, mask) if mask is not None else input
probs = torch.nn.Softmax(dim=-1)(mask_output)
if self.input_in_float16 and self.softmax_in_fp32:
if self.input_in_fp16:
probs = probs.half()
else:
probs = probs.bfloat16()
return probs
@staticmethod
def get_batch_per_block(sq, sk, b, np):
2022-01-13 08:47:17 +00:00
try:
import colossal_scaled_masked_softmax
except ImportError:
raise RuntimeError('ScaledMaskedSoftmax requires cuda extensions')
2021-12-21 04:19:52 +00:00
return colossal_scaled_masked_softmax.get_batch_per_block(sq, sk, b, np)