from typing import Any __all__ = ["CacheBlock"] class CacheBlock: """A simplified version of logical cache block used for Paged Attention.""" def __init__(self, block_id: int, block_size: int, elem_size: int, k_ptrs: Any = None, v_ptrs: Any = None): # Unique id of a cache block self.block_id = block_id # size/capacity of the block in terms of the number of tokens it can hold self.block_size = block_size # element size in bytes self.elem_size = elem_size # For common cases, we track the relationships between logical and physical caches in KV Cache Manager, # Additionally, k, v pointers can be optionally used for tracking the physical cache by CacheBlock itself. self.k_ptrs = k_ptrs self.v_ptrs = v_ptrs self.ref_count = 0 # the number of slots that have been allocated (i.e. the number of tokens occupying the block) self.allocated_size = 0 # the token ids whose KV Cache would be written to corresponding physical caches # TODO add logics to update token_ids self.token_ids = [None] * self.block_size @property def available_space(self) -> int: # `allocated_size` is ensured to be less thanĀ or equal to `block_size` return self.block_size - self.allocated_size def add_ref(self) -> None: self.ref_count += 1 def remove_ref(self) -> None: assert self.ref_count > 0, f"Block#{self.block_id} has no reference to remove." self.ref_count -= 1 def has_ref(self) -> bool: return self.ref_count > 0 def allocate(self, size: int) -> None: assert size <= self.available_space, f"Block#{self.block_id} has no available space to allocate." self.allocated_size += size def is_empty(self): return self.allocated_size < 1 def clear(self) -> None: self.ref_count = 0 self.allocated_size = 0 def __repr__(self): return f"CacheBlock#{self.block_id}(ref#{self.ref_count}, allocated#{self.allocated_size})"