From aaaf4d7b0eef8a44d308806381f38a8bbd6e27de Mon Sep 17 00:00:00 2001
From: djsaber <60215276+djsaber@users.noreply.github.com>
Date: Fri, 29 Dec 2023 13:03:44 +0800
Subject: [PATCH] fix(chat): fix stream_chat in modeling_internlm(hf) to avoid
 decode error (#560)

* fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters

Signed-off-by: daijun1 <daijun1@eccom.com.cn>

* Update modeling_internlm.py

fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters

* Update modeling_internlm.py

Correct spelling mistakes: chche -> cache

---------

Signed-off-by: daijun1 <daijun1@eccom.com.cn>
Co-authored-by: daijun1 <daijun1@eccom.com.cn>
---
 tools/transformers/modeling_internlm.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/transformers/modeling_internlm.py b/tools/transformers/modeling_internlm.py
index 37f50d1..571971d 100644
--- a/tools/transformers/modeling_internlm.py
+++ b/tools/transformers/modeling_internlm.py
@@ -844,6 +844,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
                 self.query = query
                 self.history = history
                 self.response = ""
+                self.cache = []
                 self.received_inputs = False
                 self.queue.put((self.response, history + [(self.query, self.response)]))
 
@@ -858,11 +859,18 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
                     self.received_inputs = True
                     return
 
-                token = self.tokenizer.decode([value[-1]], skip_special_tokens=True)
+                self.cache.extend(value.tolist())
+                token = self.tokenizer.decode(self.cache, skip_special_tokens=True)
+                if "�" in token and len(token) <= 5:
+                    return
+                
                 if token.strip() != "<eoa>":
                     self.response = self.response + token
                     history = self.history + [(self.query, self.response)]
                     self.queue.put((self.response, history))
+                    self.cache = []
+                else:
+                    self.end()
 
             def end(self):
                 self.queue.put(None)