diff --git a/colossalai/inference/config.py b/colossalai/inference/config.py
index e69de29bb..d274beb14 100644
--- a/colossalai/inference/config.py
+++ b/colossalai/inference/config.py
@@ -0,0 +1,7 @@
+"""
+Our config consists of three parts:
+    1. model_config: The configuration for the model, including `model name`, 'model path' and self-defined layer.
+    2. parallel_config: The configuration for parallelize model, including `tp_size`,'pp_size', `world size`, `local rank`, `master port`, `master ip`.
+    3. cache_config: Configuration for initialize and manage kv cache, including `block size`, `block num`
+For the convenience of users, we provide a unified config api for that wrapped all the configs. One can easily construct a colossal_config by setting the needed configs.
+"""
diff --git a/colossalai/inference/core/request_handler.py b/colossalai/inference/core/request_handler.py
index 117625177..e7898879a 100644
--- a/colossalai/inference/core/request_handler.py
+++ b/colossalai/inference/core/request_handler.py
@@ -1,10 +1,48 @@
+from typing import List
+
+
 class RequestHandler:
+    """
+    RequestHandler is the core for handling existing requests and updating current batch.
+    During generation process, we call schedule function each iteration to update current batch.
+
+    Args:
+        cache_config: Configuration for initialize and manage kv cache.
+    """
+
     def __init__(self, cache_config) -> None:
         self.cache_config = cache_config
         self._init_cache()
+        self.waiting_list: List["Reqseq"] = []
+        self.running_list: List["Reqseq"] = []
 
     def _init_cache(self):
-        pass
+        """
+        Initialize the cache manager with cache config.
+        """
+
+    def schedule(self):
+        """
+        The main logic of request handler.
+        """
+
+    def add_sequence(self, reqseq: "Reqseq"):
+        """
+        Add the request to waiting list.
+        """
+        self.waiting_list.append(reqseq)
+
+    def abort_sequence(self, seq_id: str):
+        """
+        Abort the request. #TODO :implement this
+        """
+        self._find_sequence(seq_id)
+        return
+
+    def _find_sequence(self, seq_id: str) -> "Reqseq":
+        """
+        Find the request by seq_id.
+        """
 
-    def schedule(self, request):
-        pass
+    def check_unfinished_seqs(self) -> bool:
+        return self.waiting_list or self.running_list
diff --git a/colossalai/inference/readme.md b/colossalai/inference/readme.md
new file mode 100644
index 000000000..301b546ff
--- /dev/null
+++ b/colossalai/inference/readme.md
@@ -0,0 +1,19 @@
+# Colossal-Infer
+## Introduction
+Colossal-Infer is a library for inference of LLMs and MLMs. It is built on top of Colossal AI.
+
+## Structures
+### Overview
+https://n4fyd3ptax.feishu.cn/docx/MhlmdHsGkoeoslx9fqucPO17n9b?openbrd=1&doc_app_id=501&blockId=WCGBdWI9hobOEsxkW5uc8HM6n3b&blockType=whiteboard&blockToken=Cca3wKWk7hPnJxbkCX6cMxPQnqd#WCGBdWI9hobOEsxkW5uc8HM6n3b
+
+## Roadmap
+- [] design of structures
+- [] Core components
+    - [] engine
+    - [] request handler
+    - [] kv cache manager
+    - [] modeling
+    - [] custom layers
+    - [] online server
+- [] supported models
+    - [] llama2