ColossalAI/colossalai/inference/core/request_handler.py

49 lines
1.3 KiB
Python
Raw Normal View History

from typing import List
class RequestHandler:
"""
RequestHandler is the core for handling existing requests and updating current batch.
During generation process, we call schedule function each iteration to update current batch.
Args:
cache_config: Configuration for initialize and manage kv cache.
"""
def __init__(self, cache_config) -> None:
self.cache_config = cache_config
self._init_cache()
self.waiting_list: List["Reqseq"] = []
self.running_list: List["Reqseq"] = []
def _init_cache(self):
"""
Initialize the cache manager with cache config.
"""
def schedule(self):
"""
The main logic of request handler.
"""
def add_sequence(self, reqseq: "Reqseq"):
"""
Add the request to waiting list.
"""
self.waiting_list.append(reqseq)
def abort_sequence(self, seq_id: str):
"""
Abort the request. #TODO :implement this
"""
self._find_sequence(seq_id)
return
def _find_sequence(self, seq_id: str) -> "Reqseq":
"""
Find the request by seq_id.
"""
def check_unfinished_seqs(self) -> bool:
return self.waiting_list or self.running_list