mirror of https://github.com/hpcaitech/ColossalAI
76 lines
1.6 KiB
Plaintext
76 lines
1.6 KiB
Plaintext
|
// Adapted from turboderp exllama: https://github.com/turboderp/exllama
|
||
|
|
||
|
#define _cuda_buffers_cu
|
||
|
#include "cuda_buffers.cuh"
|
||
|
|
||
|
CudaBuffers* g_buffers[CUDA_MAX_DEVICES] = {NULL};
|
||
|
// __constant__ half2 q4_table[16][256];
|
||
|
// half2 q4_table_host[16][256];
|
||
|
// bool q4_table_init = false;
|
||
|
|
||
|
CudaBuffers::CudaBuffers
|
||
|
(
|
||
|
int _device,
|
||
|
int _temp_state_size,
|
||
|
half* _temp_state,
|
||
|
half* _temp_dq
|
||
|
) :
|
||
|
device(_device),
|
||
|
temp_state_size(_temp_state_size),
|
||
|
temp_state(_temp_state),
|
||
|
temp_dq(_temp_dq)
|
||
|
{
|
||
|
cudaSetDevice(_device);
|
||
|
|
||
|
cudaStreamCreate(&alt_stream_1);
|
||
|
cudaStreamCreate(&alt_stream_2);
|
||
|
cudaStreamCreate(&alt_stream_3);
|
||
|
cudaEventCreate(&alt_stream_1_done);
|
||
|
cudaEventCreate(&alt_stream_2_done);
|
||
|
cudaEventCreate(&alt_stream_3_done);
|
||
|
}
|
||
|
|
||
|
CudaBuffers::~CudaBuffers()
|
||
|
{
|
||
|
cudaStreamDestroy(alt_stream_1);
|
||
|
cudaStreamDestroy(alt_stream_2);
|
||
|
cudaStreamDestroy(alt_stream_3);
|
||
|
cudaEventDestroy(alt_stream_1_done);
|
||
|
cudaEventDestroy(alt_stream_2_done);
|
||
|
cudaEventDestroy(alt_stream_3_done);
|
||
|
}
|
||
|
|
||
|
CudaBuffers* get_buffers(const int device_index)
|
||
|
{
|
||
|
return g_buffers[device_index];
|
||
|
}
|
||
|
|
||
|
void prepare_buffers_cuda
|
||
|
(
|
||
|
int _device,
|
||
|
int _temp_state_size,
|
||
|
half* _temp_state,
|
||
|
half* _temp_dq
|
||
|
)
|
||
|
{
|
||
|
CudaBuffers* buffers = new CudaBuffers
|
||
|
(
|
||
|
_device,
|
||
|
_temp_state_size,
|
||
|
_temp_state,
|
||
|
_temp_dq
|
||
|
);
|
||
|
|
||
|
g_buffers[_device] = buffers;
|
||
|
}
|
||
|
|
||
|
void cleanup_buffers_cuda()
|
||
|
{
|
||
|
for (int i = 0; i < CUDA_MAX_DEVICES; i++)
|
||
|
{
|
||
|
if (!g_buffers[i]) continue;
|
||
|
delete g_buffers[i];
|
||
|
g_buffers[i] = NULL;
|
||
|
}
|
||
|
}
|