fix(chat): fix stream_chat in modeling_internlm(hf) to avoid decode error (#560)

* fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters

Signed-off-by: daijun1 <daijun1@eccom.com.cn>

* Update modeling_internlm.py

fixed the issue that the HF model spontaneously conducted multiple rounds of Q&A and stream_chat method generates garbled characters

* Update modeling_internlm.py

Correct spelling mistakes: chche -> cache

---------

Signed-off-by: daijun1 <daijun1@eccom.com.cn>
Co-authored-by: daijun1 <daijun1@eccom.com.cn>
pull/582/head
djsaber 2023-12-29 13:03:44 +08:00 committed by GitHub
parent ac7509389b
commit aaaf4d7b0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 9 additions and 1 deletions

View File

@ -844,6 +844,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
self.query = query
self.history = history
self.response = ""
self.cache = []
self.received_inputs = False
self.queue.put((self.response, history + [(self.query, self.response)]))
@ -858,11 +859,18 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
self.received_inputs = True
return
token = self.tokenizer.decode([value[-1]], skip_special_tokens=True)
self.cache.extend(value.tolist())
token = self.tokenizer.decode(self.cache, skip_special_tokens=True)
if "<EFBFBD>" in token and len(token) <= 5:
return
if token.strip() != "<eoa>":
self.response = self.response + token
history = self.history + [(self.query, self.response)]
self.queue.put((self.response, history))
self.cache = []
else:
self.end()
def end(self):
self.queue.put(None)