From 2a915a8b62982972834a45a6542ee8d3fcea7fe7 Mon Sep 17 00:00:00 2001 From: Xu Kai Date: Thu, 31 Mar 2022 15:13:01 +0800 Subject: [PATCH] fix format (#568) --- colossalai/kernel/__init__.py | 4 +- .../cuda_native/csrc/colossal_C_frontend.cpp | 102 +++++++----------- 2 files changed, 41 insertions(+), 65 deletions(-) diff --git a/colossalai/kernel/__init__.py b/colossalai/kernel/__init__.py index d3d0be02b..42c95729a 100644 --- a/colossalai/kernel/__init__.py +++ b/colossalai/kernel/__init__.py @@ -1,5 +1,3 @@ from .cuda_native import LayerNorm, FusedScaleMaskSoftmax, MultiHeadAttention -__all__ = [ - "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention" -] +__all__ = ["LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention"] diff --git a/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp b/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp index 735caf54e..17ab1741e 100644 --- a/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp +++ b/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp @@ -1,71 +1,49 @@ -// modified from https://github.com/NVIDIA/apex/blob/master/csrc/multi_tensor_adam.cu +// modified from +// https://github.com/NVIDIA/apex/blob/master/csrc/multi_tensor_adam.cu #include -void multi_tensor_scale_cuda( - int chunk_size, - at::Tensor noop_flag, - std::vector> tensor_lists, - float scale); +void multi_tensor_scale_cuda(int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + float scale); -void multi_tensor_sgd_cuda( - int chunk_size, - at::Tensor noop_flag, - std::vector> tensor_lists, - float wd, - float momentum, - float dampening, - float lr, - bool nesterov, - bool first_run, - bool wd_after_momentum, - float scale); +void multi_tensor_sgd_cuda(int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + float wd, float momentum, float dampening, float lr, + bool nesterov, bool first_run, + bool wd_after_momentum, float scale); -void multi_tensor_adam_cuda( - int chunk_size, - at::Tensor noop_flag, - std::vector> tensor_lists, - const float lr, - const float beta1, - const float beta2, - const float epsilon, - const int step, - const int mode, - const int bias_correction, - const float weight_decay); +void multi_tensor_adam_cuda(int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + const float lr, const float beta1, + const float beta2, const float epsilon, + const int step, const int mode, + const int bias_correction, + const float weight_decay); -void multi_tensor_lamb_cuda( - int chunk_size, - at::Tensor noop_flag, - std::vector> tensor_lists, - const float lr, - const float beta1, - const float beta2, - const float epsilon, - const int step, - const int bias_correction, - const float weight_decay, - const int grad_averaging, - const int mode, - at::Tensor global_grad_norm, - const float max_grad_norm, - at::optional use_nvlamb_python); +void multi_tensor_lamb_cuda(int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + const float lr, const float beta1, + const float beta2, const float epsilon, + const int step, const int bias_correction, + const float weight_decay, const int grad_averaging, + const int mode, at::Tensor global_grad_norm, + const float max_grad_norm, + at::optional use_nvlamb_python); -std::tuple multi_tensor_l2norm_cuda( - int chunk_size, - at::Tensor noop_flag, - std::vector> tensor_lists, - at::optional per_tensor_python); +std::tuple +multi_tensor_l2norm_cuda(int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + at::optional per_tensor_python); -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) -{ - m.def("multi_tensor_scale", &multi_tensor_scale_cuda, +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("multi_tensor_scale", &multi_tensor_scale_cuda, "Fused overflow check + scale for a list of contiguous tensors"); - m.def("multi_tensor_sgd", &multi_tensor_sgd_cuda, - "Fused SGD optimizer for list of contiguous tensors"); - m.def("multi_tensor_adam", &multi_tensor_adam_cuda, - "Compute and apply gradient update to parameters for Adam optimizer"); - m.def("multi_tensor_lamb", &multi_tensor_lamb_cuda, - "Computes and apply update for LAMB optimizer"); - m.def("multi_tensor_l2norm", &multi_tensor_l2norm_cuda, - "Computes L2 norm for a list of contiguous tensors"); + m.def("multi_tensor_sgd", &multi_tensor_sgd_cuda, + "Fused SGD optimizer for list of contiguous tensors"); + m.def("multi_tensor_adam", &multi_tensor_adam_cuda, + "Compute and apply gradient update to parameters for Adam optimizer"); + m.def("multi_tensor_lamb", &multi_tensor_lamb_cuda, + "Computes and apply update for LAMB optimizer"); + m.def("multi_tensor_l2norm", &multi_tensor_l2norm_cuda, + "Computes L2 norm for a list of contiguous tensors"); } \ No newline at end of file