mirror of https://github.com/hpcaitech/ColossalAI
fix auto loading gpt2 tokenizer (#5279)
parent
5d9a0ae75b
commit
32cb74493a
|
@ -136,6 +136,19 @@ class ColossalLLM(LLM):
|
||||||
"""Get the identifying parameters."""
|
"""Get the identifying parameters."""
|
||||||
return {"n": self.n}
|
return {"n": self.n}
|
||||||
|
|
||||||
|
def get_token_ids(self, text: str) -> List[int]:
|
||||||
|
"""Return the ordered ids of the tokens in a text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The string input to tokenize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of ids corresponding to the tokens in the text, in order they occur
|
||||||
|
in the text.
|
||||||
|
"""
|
||||||
|
# use the colossal llm's tokenizer instead of langchain's cached GPT2 tokenizer
|
||||||
|
return self.api.tokenizer.encode(text)
|
||||||
|
|
||||||
|
|
||||||
class VllmLLM(LLM):
|
class VllmLLM(LLM):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue