[NFC] polish colossalai/kernel/cuda_native/csrc/kernels/include/kernels.h code style (#1291)

pull/1298/head
Maruyama_Aya 2022-07-13 10:49:52 +08:00 committed by Frank Lee
parent 552667825b
commit 87f679aeae
1 changed files with 16 additions and 11 deletions

View File

@ -3,10 +3,11 @@
#include <cuda.h> #include <cuda.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include <curand_kernel.h> #include <curand_kernel.h>
#include <stdexcept>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdexcept>
#define MAX_THREADS 1024 #define MAX_THREADS 1024
#define WARP_SIZE 32 #define WARP_SIZE 32
@ -132,8 +133,9 @@ __forceinline__ __host__ __device__ int flat_3dim(int id1, int id2, int id3,
} }
/* Convert 4-dim tensor index into vector index */ /* Convert 4-dim tensor index into vector index */
__forceinline__ __host__ __device__ int __forceinline__ __host__ __device__ int flat_4dim(int id1, int id2, int id3,
flat_4dim(int id1, int id2, int id3, int id4, int dim2, int dim3, int dim4) { int id4, int dim2, int dim3,
int dim4) {
// return id1*(dim2*dim3*dim4) + id2*(dim3*dim4) + id3*dim4 + id4; // return id1*(dim2*dim3*dim4) + id2*(dim3*dim4) + id3*dim4 + id4;
int res = id4; int res = id4;
@ -201,9 +203,9 @@ __forceinline__ __host__ __device__ int flat_6dim(int id1, int id2, int id3,
} }
/* Convert vector index to 6-dim tensor index */ /* Convert vector index to 6-dim tensor index */
__forceinline__ __host__ __device__ void __forceinline__ __host__ __device__ void decompose_6dim(
decompose_6dim(int src, int dim1, int dim2, int dim3, int dim4, int dim5, int src, int dim1, int dim2, int dim3, int dim4, int dim5, int *id0,
int *id0, int *id1, int *id2, int *id3, int *id4, int *id5) { int *id1, int *id2, int *id3, int *id4, int *id5) {
*id5 = src % dim5; *id5 = src % dim5;
src /= dim5; src /= dim5;
@ -221,9 +223,11 @@ decompose_6dim(int src, int dim1, int dim2, int dim3, int dim4, int dim5,
} }
/* Convert vector index to 5-dim tensor index */ /* Convert vector index to 5-dim tensor index */
__forceinline__ __host__ __device__ void __forceinline__ __host__ __device__ void decompose_5dim(int src, int dim1,
decompose_5dim(int src, int dim1, int dim2, int dim3, int dim4, int *id0, int dim2, int dim3,
int *id1, int *id2, int *id3, int *id4) { int dim4, int *id0,
int *id1, int *id2,
int *id3, int *id4) {
*id4 = src % dim4; *id4 = src % dim4;
src /= dim4; src /= dim4;
@ -253,8 +257,9 @@ __forceinline__ __host__ __device__ void decompose_4dim(int src, int dim1,
} }
/* Convert vector index to 3-dim tensor index */ /* Convert vector index to 3-dim tensor index */
__forceinline__ __host__ __device__ void __forceinline__ __host__ __device__ void decompose_3dim(int src, int dim1,
decompose_3dim(int src, int dim1, int dim2, int *id0, int *id1, int *id2) { int dim2, int *id0,
int *id1, int *id2) {
*id2 = src % dim2; *id2 = src % dim2;
src /= dim2; src /= dim2;