fix(chat): fix stream_chat in modeling_internlm(hf) to avoid decode error (#560)

* fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters Signed-off-by: daijun1 <daijun1@eccom.com.cn> * Update modeling_internlm.py fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters * Update modeling_internlm.py Correct spelling mistakes: chche -> cache --------- Signed-off-by: daijun1 <daijun1@eccom.com.cn> Co-authored-by: daijun1 <daijun1@eccom.com.cn>
2023-12-29 13:03:44 +08:00 · 2023-12-29 13:03:44 +08:00 · aaaf4d7b0e
parent ac7509389b
commit aaaf4d7b0e
1 changed files with 9 additions and 1 deletions
--- a/tools/transformers/modeling_internlm.py
+++ b/tools/transformers/modeling_internlm.py
@ -844,6 +844,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
                self.query = query
                self.history = history
                self.response = ""
+                self.cache = []
                self.received_inputs = False
                self.queue.put((self.response, history + [(self.query, self.response)]))

@ -858,11 +859,18 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
                    self.received_inputs = True
                    return

-                token = self.tokenizer.decode([value[-1]], skip_special_tokens=True)
+                self.cache.extend(value.tolist())
+                token = self.tokenizer.decode(self.cache, skip_special_tokens=True)
+                if "<EFBFBD>" in token and len(token) <= 5:
+                    return
+                
                if token.strip() != "<eoa>":
                    self.response = self.response + token
                    history = self.history + [(self.query, self.response)]
                    self.queue.put((self.response, history))
+                    self.cache = []
+                else:
+                    self.end()

            def end(self):
                self.queue.put(None)