Making large AI models cheaper, faster and more accessible
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

58 lines
2.0 KiB

from typing import Any
__all__ = ["CacheBlock"]
class CacheBlock:
"""A simplified version of logical cache block used for Paged Attention."""
def __init__(self, block_id: int, block_size: int, elem_size: int, k_ptrs: Any = None, v_ptrs: Any = None):
# Unique id of a cache block
self.block_id = block_id
# size/capacity of the block in terms of the number of tokens it can hold
self.block_size = block_size
# element size in bytes
self.elem_size = elem_size
# For common cases, we track the relationships between logical and physical caches in KV Cache Manager,
# Additionally, k, v pointers can be optionally used for tracking the physical cache by CacheBlock itself.
self.k_ptrs = k_ptrs
self.v_ptrs = v_ptrs
self.ref_count = 0
# the number of slots that have been allocated (i.e. the number of tokens occupying the block)
self.allocated_size = 0
# the token ids whose KV Cache would be written to corresponding physical caches
# TODO add logics to update token_ids
self.token_ids = [None] * self.block_size
@property
def available_space(self) -> int:
# `allocated_size` is ensured to be less than or equal to `block_size`
return self.block_size - self.allocated_size
def add_ref(self) -> None:
self.ref_count += 1
def remove_ref(self) -> None:
assert self.ref_count > 0, f"Block#{self.block_id} has no reference to remove."
self.ref_count -= 1
def has_ref(self) -> bool:
return self.ref_count > 0
def allocate(self, size: int) -> None:
assert size <= self.available_space, f"Block#{self.block_id} has no available space to allocate."
self.allocated_size += size
def is_empty(self):
return self.allocated_size < 1
def clear(self) -> None:
self.ref_count = 0
self.allocated_size = 0
def __repr__(self):
return f"CacheBlock#{self.block_id}(ref#{self.ref_count}, allocated#{self.allocated_size})"