ColossalAI/tests/test_infer/test_dynamic_batching/config.yaml

15 lines
393 B
YAML

engine_config:
model: MODEL_PATH
tensor_parallel_size: 1
max_batch_size: 2
max_input_len: 1024
max_output_len: 512
# config for app router deployment
# Resources assigned to each model replica. This should correspond to Ray AIR ScalingConfig.
router_config:
max_total_token_num: 4096
batch_max_tokens: 4096
disable_log_stats: False
log_stats_interval: 10
model: MODEL_PATH