From 1ded7e81ef08d574798dd98d1f4d33da07b7f4c9 Mon Sep 17 00:00:00 2001 From: FrankLeeeee Date: Thu, 11 Jan 2024 13:50:45 +0000 Subject: [PATCH] [git] fixed rebased files --- colossalai/inference/core/request_handler.py | 2 +- colossalai/inference/modeling/layers/attention.py | 5 +---- tests/test_infer/test_inference_engine.py | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/colossalai/inference/core/request_handler.py b/colossalai/inference/core/request_handler.py index 09443c92a..3928d7d34 100644 --- a/colossalai/inference/core/request_handler.py +++ b/colossalai/inference/core/request_handler.py @@ -227,4 +227,4 @@ class RequestHandler: self.done_list.extend(finish_seqs) - return finish_seqs + return finish_seqs \ No newline at end of file diff --git a/colossalai/inference/modeling/layers/attention.py b/colossalai/inference/modeling/layers/attention.py index b5cb2c073..af4395f4b 100644 --- a/colossalai/inference/modeling/layers/attention.py +++ b/colossalai/inference/modeling/layers/attention.py @@ -58,9 +58,6 @@ def convert_kvcache(cache, lengths, block_tables, pad_id=0): seq_len = max(lengths) padded_cache = [] for i in range(bsz): - cache1 = cache[block_tables[i][: needed_blocks[i] - 1]].permute((0, 3, 1, 2)).reshape(-1, num_heads, head_size) - cache2 = cache[block_tables[i][needed_blocks[i] - 1], :, :, : num_remaing_tokens[i]].permute(2, 0, 1) - _cache = torch.cat( ( cache[block_tables[i][: needed_blocks[i] - 1]].permute((0, 3, 1, 2)).reshape(-1, num_heads, head_size), @@ -317,4 +314,4 @@ class PagedAttention: ): return self.pad_decoding_forward( q.unsqueeze(1), k.unsqueeze(1), v.unsqueeze(1), k_cache, v_cache, lengths, block_tables - ) + ) \ No newline at end of file diff --git a/tests/test_infer/test_inference_engine.py b/tests/test_infer/test_inference_engine.py index bf626d758..4e5d8c733 100644 --- a/tests/test_infer/test_inference_engine.py +++ b/tests/test_infer/test_inference_engine.py @@ -81,4 +81,4 @@ def test_inference_engine(): if __name__ == "__main__": - test_inference_engine() + test_inference_engine() \ No newline at end of file