diff --git a/example_with_langchain_and_vectorstore/chatglm_llm.py b/example_with_langchain_and_vectorstore/chatglm_llm.py
index d7943f6..ccba777 100644
--- a/example_with_langchain_and_vectorstore/chatglm_llm.py
+++ b/example_with_langchain_and_vectorstore/chatglm_llm.py
@@ -5,10 +5,17 @@ from transformers import AutoTokenizer, AutoModel
 
 """ChatGLM_G is a wrapper around the ChatGLM model to fit LangChain framework. May not be an optimal implementation"""
 
+
 class ChatGLM_G(LLM):
 
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-    model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
+    tokenizer = AutoTokenizer.from_pretrained(
+        "THUDM/chatglm-6b-int4", trust_remote_code=True
+    )
+    model = (
+        AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
+        .half()
+        .cuda()
+    )
     history = []
 
     @property
@@ -16,20 +23,23 @@ class ChatGLM_G(LLM):
         return "ChatGLM_G"
 
     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-        response, updated_history = self.model.chat(self.tokenizer, prompt, history=self.history)
-        print("ChatGLM: prompt: ", prompt)
-        print("ChatGLM: response: ", response)
+        response, updated_history = self.model.chat(
+            self.tokenizer, prompt, history=self.history, max_length=10000
+        )
+        print("history: ", self.history)
         if stop is not None:
             response = enforce_stop_tokens(response, stop)
         self.history = updated_history
         return response
-    
-    def __call__(self, prompt: str,  stop: Optional[List[str]] = None) -> str:
-        response, updated_history = self.model.chat(self.tokenizer, prompt, history=self.history)
-        print("ChatGLM: prompt: ", prompt)
-        print("ChatGLM: response: ", response)
+
+    def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        response, updated_history = self.model.chat(
+            self.tokenizer, prompt, history=self.history, max_length=10000
+        )
+        print("history: ", self.history)
+
         if stop is not None:
             response = enforce_stop_tokens(response, stop)
         self.history = updated_history
-        
-        return response
\ No newline at end of file
+
+        return response