pull/5434/head
Runyu Lu 2024-03-21 15:55:25 +08:00
parent 606603bb88
commit 5b017d6324
2 changed files with 2 additions and 0 deletions

View File

@ -94,6 +94,7 @@ inference_config = InferenceConfig(
max_batch_size=4,
max_input_len=1024,
max_output_len=512,
use_cuda_kernel=True,
use_cuda_graph=False, # Turn on if you want to use CUDA Graph to accelerate inference
)

View File

@ -389,6 +389,7 @@ class InferenceEngine:
fd_inter_tensor=batch.fd_inter_tensor,
batch_size=batch.current_batch_size,
is_prompts=batch.is_prompts,
use_cuda_kernel=self.inference_config.use_cuda_kernel,
use_cuda_graph=use_cuda_graph,
kv_seq_len=sequence_lengths.max().item(),
head_dim=batch.head_dim,