mirror of https://github.com/hpcaitech/ColossalAI
[fix]
parent
606603bb88
commit
5b017d6324
|
@ -94,6 +94,7 @@ inference_config = InferenceConfig(
|
|||
max_batch_size=4,
|
||||
max_input_len=1024,
|
||||
max_output_len=512,
|
||||
use_cuda_kernel=True,
|
||||
use_cuda_graph=False, # Turn on if you want to use CUDA Graph to accelerate inference
|
||||
)
|
||||
|
||||
|
|
|
@ -389,6 +389,7 @@ class InferenceEngine:
|
|||
fd_inter_tensor=batch.fd_inter_tensor,
|
||||
batch_size=batch.current_batch_size,
|
||||
is_prompts=batch.is_prompts,
|
||||
use_cuda_kernel=self.inference_config.use_cuda_kernel,
|
||||
use_cuda_graph=use_cuda_graph,
|
||||
kv_seq_len=sequence_lengths.max().item(),
|
||||
head_dim=batch.head_dim,
|
||||
|
|
Loading…
Reference in New Issue