From aaaf4d7b0eef8a44d308806381f38a8bbd6e27de Mon Sep 17 00:00:00 2001 From: djsaber <60215276+djsaber@users.noreply.github.com> Date: Fri, 29 Dec 2023 13:03:44 +0800 Subject: [PATCH] fix(chat): fix stream_chat in modeling_internlm(hf) to avoid decode error (#560) * fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters Signed-off-by: daijun1 * Update modeling_internlm.py fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters * Update modeling_internlm.py Correct spelling mistakes: chche -> cache --------- Signed-off-by: daijun1 Co-authored-by: daijun1 --- tools/transformers/modeling_internlm.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/transformers/modeling_internlm.py b/tools/transformers/modeling_internlm.py index 37f50d1..571971d 100644 --- a/tools/transformers/modeling_internlm.py +++ b/tools/transformers/modeling_internlm.py @@ -844,6 +844,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel): self.query = query self.history = history self.response = "" + self.cache = [] self.received_inputs = False self.queue.put((self.response, history + [(self.query, self.response)])) @@ -858,11 +859,18 @@ class InternLMForCausalLM(InternLMPreTrainedModel): self.received_inputs = True return - token = self.tokenizer.decode([value[-1]], skip_special_tokens=True) + self.cache.extend(value.tolist()) + token = self.tokenizer.decode(self.cache, skip_special_tokens=True) + if "�" in token and len(token) <= 5: + return + if token.strip() != "": self.response = self.response + token history = self.history + [(self.query, self.response)] self.queue.put((self.response, history)) + self.cache = [] + else: + self.end() def end(self): self.queue.put(None)