mirror of https://github.com/hpcaitech/ColossalAI
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
76 lines
1.6 KiB
76 lines
1.6 KiB
// Adapted from turboderp exllama: https://github.com/turboderp/exllama
|
|
|
|
#define _cuda_buffers_cu
|
|
#include "cuda_buffers.cuh"
|
|
|
|
CudaBuffers* g_buffers[CUDA_MAX_DEVICES] = {NULL};
|
|
// __constant__ half2 q4_table[16][256];
|
|
// half2 q4_table_host[16][256];
|
|
// bool q4_table_init = false;
|
|
|
|
CudaBuffers::CudaBuffers
|
|
(
|
|
int _device,
|
|
int _temp_state_size,
|
|
half* _temp_state,
|
|
half* _temp_dq
|
|
) :
|
|
device(_device),
|
|
temp_state_size(_temp_state_size),
|
|
temp_state(_temp_state),
|
|
temp_dq(_temp_dq)
|
|
{
|
|
cudaSetDevice(_device);
|
|
|
|
cudaStreamCreate(&alt_stream_1);
|
|
cudaStreamCreate(&alt_stream_2);
|
|
cudaStreamCreate(&alt_stream_3);
|
|
cudaEventCreate(&alt_stream_1_done);
|
|
cudaEventCreate(&alt_stream_2_done);
|
|
cudaEventCreate(&alt_stream_3_done);
|
|
}
|
|
|
|
CudaBuffers::~CudaBuffers()
|
|
{
|
|
cudaStreamDestroy(alt_stream_1);
|
|
cudaStreamDestroy(alt_stream_2);
|
|
cudaStreamDestroy(alt_stream_3);
|
|
cudaEventDestroy(alt_stream_1_done);
|
|
cudaEventDestroy(alt_stream_2_done);
|
|
cudaEventDestroy(alt_stream_3_done);
|
|
}
|
|
|
|
CudaBuffers* get_buffers(const int device_index)
|
|
{
|
|
return g_buffers[device_index];
|
|
}
|
|
|
|
void prepare_buffers_cuda
|
|
(
|
|
int _device,
|
|
int _temp_state_size,
|
|
half* _temp_state,
|
|
half* _temp_dq
|
|
)
|
|
{
|
|
CudaBuffers* buffers = new CudaBuffers
|
|
(
|
|
_device,
|
|
_temp_state_size,
|
|
_temp_state,
|
|
_temp_dq
|
|
);
|
|
|
|
g_buffers[_device] = buffers;
|
|
}
|
|
|
|
void cleanup_buffers_cuda()
|
|
{
|
|
for (int i = 0; i < CUDA_MAX_DEVICES; i++)
|
|
{
|
|
if (!g_buffers[i]) continue;
|
|
delete g_buffers[i];
|
|
g_buffers[i] = NULL;
|
|
}
|
|
}
|