diff --git a/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp b/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp index 17ab1741e..a687adc7b 100644 --- a/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp +++ b/colossalai/kernel/cuda_native/csrc/colossal_C_frontend.cpp @@ -30,10 +30,10 @@ void multi_tensor_lamb_cuda(int chunk_size, at::Tensor noop_flag, const float max_grad_norm, at::optional use_nvlamb_python); -std::tuple -multi_tensor_l2norm_cuda(int chunk_size, at::Tensor noop_flag, - std::vector> tensor_lists, - at::optional per_tensor_python); +std::tuple multi_tensor_l2norm_cuda( + int chunk_size, at::Tensor noop_flag, + std::vector> tensor_lists, + at::optional per_tensor_python); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("multi_tensor_scale", &multi_tensor_scale_cuda,