engine_config: model: MODEL_PATH tensor_parallel_size: 1 max_batch_size: 2 max_input_len: 1024 max_output_len: 512 # config for app router deployment # Resources assigned to each model replica. This should correspond to Ray AIR ScalingConfig. router_config: max_total_token_num: 4096 batch_max_tokens: 4096 disable_log_stats: False log_stats_interval: 10 model: MODEL_PATH